Package Biskit :: Package Mod :: Module Benchmark
[hide private]
[frames] | no frames]

Source Code for Module Biskit.Mod.Benchmark

  1  ## 
  2  ## Biskit, a toolkit for the manipulation of macromolecular structures 
  3  ## Copyright (C) 2004-2005 Raik Gruenberg & Johan Leckner 
  4  ## 
  5  ## This program is free software; you can redistribute it and/or 
  6  ## modify it under the terms of the GNU General Public License as 
  7  ## published by the Free Software Foundation; either version 2 of the 
  8  ## License, or any later version. 
  9  ## 
 10  ## This program is distributed in the hope that it will be useful, 
 11  ## but WITHOUT ANY WARRANTY; without even the implied warranty of 
 12  ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
 13  ## General Public License for more details. 
 14  ## 
 15  ## You find a copy of the GNU General Public License in the file 
 16  ## license.txt along with this program; if not, write to the Free 
 17  ## Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
 18  ## 
 19   
 20  ## Contributions: Olivier PERIN 
 21  ## 
 22  ## last $Author: leckner $ 
 23  ## $Date: 2006/09/12 06:34:36 $ 
 24  ## $Revision: 1.4 $ 
 25  """ 
 26  Modeling benchmark 
 27  """ 
 28   
 29  import os 
 30  import re 
 31  import linecache 
 32  from Biskit.PDBModel import PDBModel 
 33  from Biskit.ModelList import ModelList 
 34  from Biskit.IcmCad import IcmCad as CAD 
 35   
 36  import Biskit.tools as T 
 37  import Numeric as N 
 38  from Biskit import StdLog, EHandler 
 39   
 40  from Biskit.Mod import Modeller 
 41   
42 -class Benchmark:
43 """ 44 This class regroups the different methods to analyse 45 the output files of interest from modeller 46 """ 47 # standard directory for input 48 F_INPUT_FOLDER = Modeller.F_RESULT_FOLDER 49 50 # standard directory for output 51 F_RESULT_FOLDER = '/benchmark' 52 53 # Standard Input Files 54 # PDB of the Known Structure 55 F_INPUT_REFERENCE= '/reference.pdb' 56 57 # PDBModels list Pickle 58 F_PDBModels = F_INPUT_FOLDER + '/PDBModels.list' 59 60 # Standard Output files for RMSD 61 F_RMSD_AA = F_RESULT_FOLDER +'/rmsd_aa.out' 62 F_RMSD_CA = F_RESULT_FOLDER +'/rmsd_ca.out' 63 F_RMSD_RES = '/rmsd_res' 64 65 # standard Ouput file for PDBModels 66 F_PDBModels_OUT = F_RESULT_FOLDER +'/PDBModels.list' 67 68 # standard Ouput file for final reference 69 F_FINAL_REFERENCE = F_RESULT_FOLDER +'/reference_final.pdb' 70 71
72 - def __init__(self, outFolder='.', verbose=1):
73 """ 74 @param outFolder: folder for output (default: .) 75 @type outFolder: str 76 @param verbose: write intermediary files (default: 1) 77 @type verbose: 1|0 78 """ 79 self.outFolder = T.absfile( outFolder ) 80 81 self.verbose = verbose 82 83 self.prepareFolders()
84 85
86 - def prepareFolders( self ):
87 """ 88 Create folders needed by this class. 89 """ 90 if not os.path.exists(self.outFolder + self.F_RESULT_FOLDER): 91 os.mkdir(self.outFolder + self.F_RESULT_FOLDER)
92 93
94 - def output_fittedStructures(self, pdb, reference, index, atom_mask, 95 output_folder = None):
96 """ 97 Takes a model and a reference structure, performs both a 98 normal fillting to the reference and an itterative fitting. 99 Then returns the two fitted models. 100 101 @param pdb: model 102 @type pdb: PDBModel 103 @param reference: reference model 104 @type reference: PDBModel 105 @param index: index number 106 @type index: int 107 @param atom_mask: atom mask for discharding certain atoms 108 @type atom_mask: [int] 109 @param output_folder: output folder 110 (default: None S{->} outFolder/L{F_RESULT_FOLDER} 111 @type output_folder: str 112 113 @return: pdb_if, pdb - model, fitted iteratively (n=10), 114 model, without iterative fitting 115 @rtype: PDBModel, PDBModel 116 """ 117 output_folder = output_folder or self.outFolder + self.F_RESULT_FOLDER 118 119 tmp_model = pdb.compress( atom_mask ) 120 121 ## iterative fit, max 10 itterations 122 pdb_if = pdb.transform( *tmp_model.transformation\ 123 ( reference, n_it=10, 124 profname="rms_outliers") ) 125 ## normal fit 126 pdb = pdb.transform( *tmp_model.transformation(reference, n_it=0) ) 127 128 ## info about discarded residues in iterative fit 129 pdb_if.setAtomProfile( "rms_outliers", 130 tmp_model.profile("rms_outliers"), 131 mask=atom_mask ) 132 133 ## wirte iteratively fitted structure to disc 134 pdb_if.writePdb( '%s/Fitted_%02i.pdb'%( output_folder, index ) ) 135 136 return pdb_if, pdb
137 138
139 - def calc_rmsd(self, fitted_model_if, fitted_model_wo_if, reference, model):
140 """ 141 Takes the two fitted structures (with and without iterative fitting), 142 the known structure (reference), and the associated model inside the 143 pdb_list. Calculates the different RMSD and set the profiles 144 145 @param fitted_model_if: itteratively fitted model 146 @type fitted_model_if: PDBModel 147 @param fitted_model_wo_if: normaly fitted model 148 @type fitted_model_wo_if: PDBModel 149 @param reference: reference model 150 @type reference: PDBModel 151 @param model: model 152 @type model: PDBModel 153 """ 154 ## first calculate rmsd for heavy atoms and CA without 155 ## removing any residues from the model 156 mask_CA = fitted_model_wo_if.maskCA() 157 158 rmsd_aa = fitted_model_wo_if.rms( reference, fit=0 ) 159 rmsd_ca = fitted_model_wo_if.rms( reference, mask=mask_CA, fit=1 ) 160 161 model.info["rmsd2ref_aa_wo_if"] = rmsd_aa 162 model.info["rmsd2ref_ca_wo_if"] = rmsd_ca 163 164 outliers_mask = N.logical_not(fitted_model_if.profile("rms_outliers")) 165 166 ## Now remove the residues that were outliers in the iterative fit 167 ## and calculate the rmsd again 168 fitted_model_if = fitted_model_if.compress( outliers_mask ) 169 reference = reference.compress( outliers_mask ) 170 171 mask_CA = fitted_model_if.maskCA() 172 173 rmsd_aa_if = fitted_model_if.rms( reference, fit=0 ) 174 rmsd_ca_if = fitted_model_if.rms( reference, mask=mask_CA, fit=1 ) 175 176 model.info["rmsd2ref_aa_if"] = rmsd_aa_if 177 model.info["rmsd2ref_ca_if"] = rmsd_ca_if 178 model.info["rmsd2ref_aa_outliers"] = 1.*(len(outliers_mask) \ 179 - N.sum(outliers_mask)) / len(outliers_mask) 180 model.info["rmsd2ref_ca_outliers"] = 1.*(N.sum(mask_CA) \ 181 - N.sum(N.compress(mask_CA, outliers_mask))) \ 182 / N.sum(mask_CA)
183 184
185 - def output_rmsd_aa(self, pdb_list, output_file = None):
186 """ 187 Write a file rmsd_aa.dat that contains 188 all the global heavy atom rmsd values. 189 190 @param pdb_list: list of models 191 @type pdb_list: ModelList 192 @param output_file: output file 193 (default: None S{->} outFolder/L{F_RMSD_AA}) 194 @type output_file: str 195 """ 196 output_file = output_file or self.outFolder + self.F_RMSD_AA 197 rmsd_aa_out = open(output_file, 'w') 198 199 rmsd_aa_out.write("# Heavy atom RMSD.\n") 200 rmsd_aa_out.write("# column 1. Normal fitting\n") 201 rmsd_aa_out.write("# column 2. Itterative fitting\n") 202 rmsd_aa_out.write("# column 3. Fraction of discarded residues in 2\n") 203 for i in range(len(pdb_list)): 204 rmsd_aa_out.write("model_%02i\t%f\t%f\t%f\n"\ 205 %(i, pdb_list[i].info["rmsd2ref_aa_wo_if"], 206 pdb_list[i].info["rmsd2ref_aa_if"], 207 pdb_list[i].info["rmsd2ref_aa_outliers"])) 208 rmsd_aa_out.close()
209 210
211 - def output_rmsd_ca(self, pdb_list, output_file = None):
212 """ 213 Write a file rmsd_ca.dat that contains 214 all the global c_alpha rmsd values. 215 216 @param pdb_list: list of models 217 @type pdb_list: ModelList 218 @param output_file: output file 219 (default: None S{->} outFolder/L{F_RMSD_CA}) 220 @type output_file: str 221 """ 222 output_file = output_file or self.outFolder + self.F_RMSD_CA 223 rmsd_ca_out = open('%s'%output_file, 'w') 224 225 rmsd_ca_out.write("# Carbon alpha RMSD.\n") 226 rmsd_ca_out.write("# column 1. Normal fitting\n") 227 rmsd_ca_out.write("# column 2. Itterative fitting\n") 228 rmsd_ca_out.write("# column 3. Fraction of discarded residues in 2\n") 229 for i in range(len(pdb_list)): 230 rmsd_ca_out.write("model_%02i\t%7.3f\t%7.3f\t%7.3f\n"\ 231 %(i, pdb_list[i].info["rmsd2ref_ca_wo_if"], 232 pdb_list[i].info["rmsd2ref_ca_if"], 233 pdb_list[i].info["rmsd2ref_ca_outliers"] )) 234 rmsd_ca_out.close()
235 236
237 - def rmsd_res(self, coord1, coord2):
238 """ 239 Calculate the rsmd on residue level for c-alpha between a 240 model and its reference. 241 242 @param coord1: first set of coordinates 243 @type coord1: array 244 @param coord2: second set of coordinates 245 @type coord2: array 246 247 @return: rmsd_res: rmsd per c-alpha 248 @rtype: [float] 249 """ 250 rmsd_res = [] 251 252 for i in range( len(coord1) ): 253 rmsd = N.sqrt( (N.power(coord1[i][0]-coord2[i][0],2) + \ 254 N.power(coord1[i][1]-coord2[i][1],2 )+ \ 255 N.power(coord1[i][2]-coord2[i][2],2 ))) 256 rmsd_res.append(rmsd) 257 258 return rmsd_res
259 260
261 - def output_rmsd_res(self, pdb_list, output_folder = None):
262 """ 263 Write a file that contains the rmsd profile on a residue 264 level for c_alpha. 265 266 @param pdb_list: list of models 267 @type pdb_list: ModelList 268 @param output_folder: output folder (default: None S{->} outFolder/ 269 L{F_RESULT_FOLDER}/L{F_RMSD_RES}) 270 @type output_folder: str 271 """ 272 for m, t in zip(pdb_list, range(len(pdb_list))): 273 output_file = output_folder or self.outFolder + \ 274 self.F_RESULT_FOLDER + self.F_RMSD_RES + '_%02i'%t 275 276 file = open(output_file, 'w') 277 rmsd_res_list = m.compress(m.maskCA()).aProfiles["rmsd2ref_if"] 278 279 file.write("# Carbon alpha rmsd profile.\n") 280 for i in range(len(rmsd_res_list)): 281 file.write("%4i\t%5.3f\n"%(i+1,rmsd_res_list[i])) 282 283 file.close()
284 285
286 - def cad(self, reference, model):
287 """ 288 Calculates the CAD Contact Area Difference between 289 the model and its reference structure and set a profile 290 for the model 291 292 @param reference: reference model 293 @type reference: PDBModel 294 @param model: model 295 @type model: PDBModel 296 """ 297 reference = reference.compress(reference.maskProtein()) 298 model = model.compress(model.maskProtein()) 299 300 model_list = [] 301 model_list.append(model) 302 303 x = CAD(reference, model_list, debug=0, verbose=1) 304 r = x.run() 305 306 model.info["CAD"] = r
307 308
309 - def write_PDBModels(self, pdb_list, output_file = None):
310 """ 311 Pickles the list of PDBModels to disc. 312 313 @param pdb_list: list of models 314 @type pdb_list: ModelList 315 @param output_file: output file 316 (default: None S{->} outFolder/L{F_PDBModels_OUT}) 317 @type output_file: str 318 """ 319 output_file = output_file or self.outFolder + self.F_PDBModels_OUT 320 T.Dump(pdb_list, '%s'%(output_file))
321 322
323 - def go(self, model_list = None, reference = None):
324 """ 325 Run benchmarking. 326 327 @param model_list: list of models 328 (default: None S{->} outFolder/L{F_PDBModels}) 329 @type model_list: ModelList 330 @param reference: reference model 331 (default: None S{->} outFolder/L{F_INPUT_REFERENCE}) 332 @type reference: PDBModel 333 """ 334 model_list = model_list or self.outFolder + self.F_PDBModels 335 reference = reference or self.outFolder + self.F_INPUT_REFERENCE 336 337 pdb_list = T.Load('%s'%model_list) 338 reference = PDBModel(reference) 339 340 # check with python 2.4 341 iref, imodel = reference.compareAtoms(pdb_list[0]) 342 343 mask_casting = N.zeros(len(pdb_list[0])) 344 N.put(mask_casting, imodel, 1) 345 346 reference = reference.take(iref) 347 #reference_mask_CA = reference_rmsd.maskCA() 348 349 atom_mask = N.zeros(len(pdb_list[0])) 350 N.put(atom_mask,imodel,1) 351 352 rmask = pdb_list[0].profile2mask("n_templates", 1,1000) 353 amask = pdb_list[0].res2atomMask(rmask) 354 355 mask_final_ref = N.compress(mask_casting, amask) 356 mask_final = mask_casting * amask 357 358 reference = reference.compress(mask_final_ref) 359 360 for i in range(len(pdb_list)): 361 362 #self.cad(reference, pdb_list[i]) 363 364 pdb_list[i], pdb_wo_if = self.output_fittedStructures(\ 365 pdb_list[i], reference, i, mask_final) 366 367 fitted_model_if = pdb_list[i].compress(mask_final) 368 fitted_model_wo_if = pdb_wo_if.compress(mask_final) 369 370 coord1 = reference.getXyz() 371 coord2 = fitted_model_if.getXyz() 372 373 aprofile = self.rmsd_res(coord1,coord2) 374 375 self.calc_rmsd(fitted_model_if, fitted_model_wo_if, 376 reference, pdb_list[i]) 377 378 pdb_list[i].setAtomProfile('rmsd2ref_if', aprofile, 379 mask=mask_final, default = -1, 380 comment="rmsd to known reference structure") 381 382 self.output_rmsd_aa(pdb_list) 383 self.output_rmsd_ca(pdb_list) 384 self.output_rmsd_res(pdb_list) 385 386 self.write_PDBModels(pdb_list)
387 388 389 390 ############# 391 ## TESTING 392 ############# 393
394 -class Test:
395 """ 396 Test class 397 """ 398
399 - def run( self, local=0):
400 """ 401 run function test 402 403 @param local: transfer local variables to global and perform 404 other tasks only when run locally 405 @type local: 1|0 406 407 @return: 1 408 @rtype: int 409 """ 410 import tempfile 411 import shutil 412 from Biskit import PDBModel 413 from Biskit import Pymoler 414 415 ## collect the input files needed 416 outfolder = tempfile.mkdtemp( '_test_Benchmark' ) 417 os.mkdir( outfolder +'/modeller' ) 418 419 dir = '/Mod/project/validation/1DT7' 420 shutil.copy( T.testRoot() + dir + '/modeller/PDBModels.list', 421 outfolder + '/modeller' ) 422 shutil.copy( T.testRoot() + dir + '/reference.pdb', 423 outfolder ) 424 425 b = Benchmark( outfolder ) 426 427 b.go() 428 429 pdb = T.Load( outfolder + "/modeller/PDBModels.list" )[0] 430 431 reference = PDBModel(outfolder + "/reference.pdb" ) 432 tmp_model = pdb.clone() 433 434 reference = reference.compress( reference.maskCA() ) 435 pdb = pdb.compress( pdb.maskCA() ) 436 tmp_model = tmp_model.compress(tmp_model.maskCA()) 437 438 tm = tmp_model.transformation( reference, n_it=0, 439 profname="rms_outliers") 440 pdb = pdb.transform( tm ) 441 442 if local: 443 pm = Pymoler() 444 pm.addPdb( pdb, "m" ) 445 pm.addPdb( reference, "r" ) 446 pm.colorAtoms( "m", tmp_model.profile("rms_outliers") ) 447 pm.add('set ribbon_trace,1') 448 pm.add('show ribbon') 449 pm.show() 450 451 print 'The result from the benchmarking can be found in %s/benchmark'%outfolder 452 globals().update( locals() ) 453 454 return 1
455 456
457 - def expected_result( self ):
458 """ 459 Precalculated result to check for consistent performance. 460 461 @return: 1 462 @rtype: int 463 """ 464 return 1
465 466 467 if __name__ == '__main__': 468 469 test = Test() 470 471 assert test.run( local=1 ) == test.expected_result() 472