Biskit.Mod.Benchmark

43 """ 44 This class regroups the different methods to analyse 45 the output files of interest from modeller 46 """ 47 # standard directory for input 48 F_INPUT_FOLDER = Modeller.F_RESULT_FOLDER 49 50 # standard directory for output 51 F_RESULT_FOLDER = '/benchmark' 52 53 # Standard Input Files 54 # PDB of the Known Structure 55 F_INPUT_REFERENCE= '/reference.pdb' 56 57 # PDBModels list Pickle 58 F_PDBModels = F_INPUT_FOLDER + '/PDBModels.list' 59 60 # Standard Output files for RMSD 61 F_RMSD_AA = F_RESULT_FOLDER +'/rmsd_aa.out' 62 F_RMSD_CA = F_RESULT_FOLDER +'/rmsd_ca.out' 63 F_RMSD_RES = '/rmsd_res' 64 65 # standard Ouput file for PDBModels 66 F_PDBModels_OUT = F_RESULT_FOLDER +'/PDBModels.list' 67 68 # standard Ouput file for final reference 69 F_FINAL_REFERENCE = F_RESULT_FOLDER +'/reference_final.pdb' 70 71

72 - def __init__(self, outFolder='.', verbose=1):

73 """ 74 @param outFolder: folder for output (default: .) 75 @type outFolder: str 76 @param verbose: write intermediary files (default: 1) 77 @type verbose: 1|0 78 """ 79 self.outFolder = T.absfile( outFolder ) 80 81 self.verbose = verbose 82 83 self.prepareFolders()

84 85

86 - def prepareFolders( self ):

87 """ 88 Create folders needed by this class. 89 """ 90 if not os.path.exists(self.outFolder + self.F_RESULT_FOLDER): 91 os.mkdir(self.outFolder + self.F_RESULT_FOLDER)

92 93

94 - def output_fittedStructures(self, pdb, reference, index, atom_mask, 95 output_folder = None):

96 """ 97 Takes a model and a reference structure, performs both a 98 normal fillting to the reference and an itterative fitting. 99 Then returns the two fitted models. 100 101 @param pdb: model 102 @type pdb: PDBModel 103 @param reference: reference model 104 @type reference: PDBModel 105 @param index: index number 106 @type index: int 107 @param atom_mask: atom mask for discharding certain atoms 108 @type atom_mask: [int] 109 @param output_folder: output folder 110 (default: None S{->} outFolder/L{F_RESULT_FOLDER} 111 @type output_folder: str 112 113 @return: pdb_if, pdb - model, fitted iteratively (n=10), 114 model, without iterative fitting 115 @rtype: PDBModel, PDBModel 116 """ 117 output_folder = output_folder or self.outFolder + self.F_RESULT_FOLDER 118 119 tmp_model = pdb.compress( atom_mask ) 120 121 ## iterative fit, max 10 itterations 122 pdb_if = pdb.transform( *tmp_model.transformation\ 123 ( reference, n_it=10, 124 profname="rms_outliers") ) 125 ## normal fit 126 pdb = pdb.transform( *tmp_model.transformation(reference, n_it=0) ) 127 128 ## info about discarded residues in iterative fit 129 pdb_if.setAtomProfile( "rms_outliers", 130 tmp_model.profile("rms_outliers"), 131 mask=atom_mask ) 132 133 ## wirte iteratively fitted structure to disc 134 pdb_if.writePdb( '%s/Fitted_%02i.pdb'%( output_folder, index ) ) 135 136 return pdb_if, pdb

137 138

139 - def calc_rmsd(self, fitted_model_if, fitted_model_wo_if, reference, model):

140 """ 141 Takes the two fitted structures (with and without iterative fitting), 142 the known structure (reference), and the associated model inside the 143 pdb_list. Calculates the different RMSD and set the profiles 144 145 @param fitted_model_if: itteratively fitted model 146 @type fitted_model_if: PDBModel 147 @param fitted_model_wo_if: normaly fitted model 148 @type fitted_model_wo_if: PDBModel 149 @param reference: reference model 150 @type reference: PDBModel 151 @param model: model 152 @type model: PDBModel 153 """ 154 ## first calculate rmsd for heavy atoms and CA without 155 ## removing any residues from the model 156 mask_CA = fitted_model_wo_if.maskCA() 157 158 rmsd_aa = fitted_model_wo_if.rms( reference, fit=0 ) 159 rmsd_ca = fitted_model_wo_if.rms( reference, mask=mask_CA, fit=1 ) 160 161 model.info["rmsd2ref_aa_wo_if"] = rmsd_aa 162 model.info["rmsd2ref_ca_wo_if"] = rmsd_ca 163 164 outliers_mask = N.logical_not(fitted_model_if.profile("rms_outliers")) 165 166 ## Now remove the residues that were outliers in the iterative fit 167 ## and calculate the rmsd again 168 fitted_model_if = fitted_model_if.compress( outliers_mask ) 169 reference = reference.compress( outliers_mask ) 170 171 mask_CA = fitted_model_if.maskCA() 172 173 rmsd_aa_if = fitted_model_if.rms( reference, fit=0 ) 174 rmsd_ca_if = fitted_model_if.rms( reference, mask=mask_CA, fit=1 ) 175 176 model.info["rmsd2ref_aa_if"] = rmsd_aa_if 177 model.info["rmsd2ref_ca_if"] = rmsd_ca_if 178 model.info["rmsd2ref_aa_outliers"] = 1.*(len(outliers_mask) \ 179 - N.sum(outliers_mask)) / len(outliers_mask) 180 model.info["rmsd2ref_ca_outliers"] = 1.*(N.sum(mask_CA) \ 181 - N.sum(N.compress(mask_CA, outliers_mask))) \ 182 / N.sum(mask_CA)

183 184

185 - def output_rmsd_aa(self, pdb_list, output_file = None):

186 """ 187 Write a file rmsd_aa.dat that contains 188 all the global heavy atom rmsd values. 189 190 @param pdb_list: list of models 191 @type pdb_list: ModelList 192 @param output_file: output file 193 (default: None S{->} outFolder/L{F_RMSD_AA}) 194 @type output_file: str 195 """ 196 output_file = output_file or self.outFolder + self.F_RMSD_AA 197 rmsd_aa_out = open(output_file, 'w') 198 199 rmsd_aa_out.write("# Heavy atom RMSD.\n") 200 rmsd_aa_out.write("# column 1. Normal fitting\n") 201 rmsd_aa_out.write("# column 2. Itterative fitting\n") 202 rmsd_aa_out.write("# column 3. Fraction of discarded residues in 2\n") 203 for i in range(len(pdb_list)): 204 rmsd_aa_out.write("model_%02i\t%f\t%f\t%f\n"\ 205 %(i, pdb_list[i].info["rmsd2ref_aa_wo_if"], 206 pdb_list[i].info["rmsd2ref_aa_if"], 207 pdb_list[i].info["rmsd2ref_aa_outliers"])) 208 rmsd_aa_out.close()

209 210

211 - def output_rmsd_ca(self, pdb_list, output_file = None):

212 """ 213 Write a file rmsd_ca.dat that contains 214 all the global c_alpha rmsd values. 215 216 @param pdb_list: list of models 217 @type pdb_list: ModelList 218 @param output_file: output file 219 (default: None S{->} outFolder/L{F_RMSD_CA}) 220 @type output_file: str 221 """ 222 output_file = output_file or self.outFolder + self.F_RMSD_CA 223 rmsd_ca_out = open('%s'%output_file, 'w') 224 225 rmsd_ca_out.write("# Carbon alpha RMSD.\n") 226 rmsd_ca_out.write("# column 1. Normal fitting\n") 227 rmsd_ca_out.write("# column 2. Itterative fitting\n") 228 rmsd_ca_out.write("# column 3. Fraction of discarded residues in 2\n") 229 for i in range(len(pdb_list)): 230 rmsd_ca_out.write("model_%02i\t%7.3f\t%7.3f\t%7.3f\n"\ 231 %(i, pdb_list[i].info["rmsd2ref_ca_wo_if"], 232 pdb_list[i].info["rmsd2ref_ca_if"], 233 pdb_list[i].info["rmsd2ref_ca_outliers"] )) 234 rmsd_ca_out.close()

235 236

237 - def rmsd_res(self, coord1, coord2):

238 """ 239 Calculate the rsmd on residue level for c-alpha between a 240 model and its reference. 241 242 @param coord1: first set of coordinates 243 @type coord1: array 244 @param coord2: second set of coordinates 245 @type coord2: array 246 247 @return: rmsd_res: rmsd per c-alpha 248 @rtype: [float] 249 """ 250 rmsd_res = [] 251 252 for i in range( len(coord1) ): 253 rmsd = N.sqrt( (N.power(coord1[i][0]-coord2[i][0],2) + \ 254 N.power(coord1[i][1]-coord2[i][1],2 )+ \ 255 N.power(coord1[i][2]-coord2[i][2],2 ))) 256 rmsd_res.append(rmsd) 257 258 return rmsd_res

259 260

261 - def output_rmsd_res(self, pdb_list, output_folder = None):

262 """ 263 Write a file that contains the rmsd profile on a residue 264 level for c_alpha. 265 266 @param pdb_list: list of models 267 @type pdb_list: ModelList 268 @param output_folder: output folder (default: None S{->} outFolder/ 269 L{F_RESULT_FOLDER}/L{F_RMSD_RES}) 270 @type output_folder: str 271 """ 272 for m, t in zip(pdb_list, range(len(pdb_list))): 273 output_file = output_folder or self.outFolder + \ 274 self.F_RESULT_FOLDER + self.F_RMSD_RES + '_%02i'%t 275 276 file = open(output_file, 'w') 277 rmsd_res_list = m.compress(m.maskCA()).aProfiles["rmsd2ref_if"] 278 279 file.write("# Carbon alpha rmsd profile.\n") 280 for i in range(len(rmsd_res_list)): 281 file.write("%4i\t%5.3f\n"%(i+1,rmsd_res_list[i])) 282 283 file.close()

284 285

286 - def cad(self, reference, model):

287 """ 288 Calculates the CAD Contact Area Difference between 289 the model and its reference structure and set a profile 290 for the model 291 292 @param reference: reference model 293 @type reference: PDBModel 294 @param model: model 295 @type model: PDBModel 296 """ 297 reference = reference.compress(reference.maskProtein()) 298 model = model.compress(model.maskProtein()) 299 300 model_list = [] 301 model_list.append(model) 302 303 x = CAD(reference, model_list, debug=0, verbose=1) 304 r = x.run() 305 306 model.info["CAD"] = r

307 308

309 - def write_PDBModels(self, pdb_list, output_file = None):

310 """ 311 Pickles the list of PDBModels to disc. 312 313 @param pdb_list: list of models 314 @type pdb_list: ModelList 315 @param output_file: output file 316 (default: None S{->} outFolder/L{F_PDBModels_OUT}) 317 @type output_file: str 318 """ 319 output_file = output_file or self.outFolder + self.F_PDBModels_OUT 320 T.Dump(pdb_list, '%s'%(output_file))

321 322

323 - def go(self, model_list = None, reference = None):

324 """ 325 Run benchmarking. 326 327 @param model_list: list of models 328 (default: None S{->} outFolder/L{F_PDBModels}) 329 @type model_list: ModelList 330 @param reference: reference model 331 (default: None S{->} outFolder/L{F_INPUT_REFERENCE}) 332 @type reference: PDBModel 333 """ 334 model_list = model_list or self.outFolder + self.F_PDBModels 335 reference = reference or self.outFolder + self.F_INPUT_REFERENCE 336 337 pdb_list = T.Load('%s'%model_list) 338 reference = PDBModel(reference) 339 340 # check with python 2.4 341 iref, imodel = reference.compareAtoms(pdb_list[0]) 342 343 mask_casting = N.zeros(len(pdb_list[0])) 344 N.put(mask_casting, imodel, 1) 345 346 reference = reference.take(iref) 347 #reference_mask_CA = reference_rmsd.maskCA() 348 349 atom_mask = N.zeros(len(pdb_list[0])) 350 N.put(atom_mask,imodel,1) 351 352 rmask = pdb_list[0].profile2mask("n_templates", 1,1000) 353 amask = pdb_list[0].res2atomMask(rmask) 354 355 mask_final_ref = N.compress(mask_casting, amask) 356 mask_final = mask_casting * amask 357 358 reference = reference.compress(mask_final_ref) 359 360 for i in range(len(pdb_list)): 361 362 #self.cad(reference, pdb_list[i]) 363 364 pdb_list[i], pdb_wo_if = self.output_fittedStructures(\ 365 pdb_list[i], reference, i, mask_final) 366 367 fitted_model_if = pdb_list[i].compress(mask_final) 368 fitted_model_wo_if = pdb_wo_if.compress(mask_final) 369 370 coord1 = reference.getXyz() 371 coord2 = fitted_model_if.getXyz() 372 373 aprofile = self.rmsd_res(coord1,coord2) 374 375 self.calc_rmsd(fitted_model_if, fitted_model_wo_if, 376 reference, pdb_list[i]) 377 378 pdb_list[i].setAtomProfile('rmsd2ref_if', aprofile, 379 mask=mask_final, default = -1, 380 comment="rmsd to known reference structure") 381 382 self.output_rmsd_aa(pdb_list) 383 self.output_rmsd_ca(pdb_list) 384 self.output_rmsd_res(pdb_list) 385 386 self.write_PDBModels(pdb_list)

Source Code for Module Biskit.Mod.Benchmark