1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 """
26 Modeling benchmark
27 """
28
29 import os
30 import re
31 import linecache
32 from Biskit.PDBModel import PDBModel
33 from Biskit.ModelList import ModelList
34 from Biskit.IcmCad import IcmCad as CAD
35
36 import Biskit.tools as T
37 import Numeric as N
38 from Biskit import StdLog, EHandler
39
40 from Biskit.Mod import Modeller
41
43 """
44 This class regroups the different methods to analyse
45 the output files of interest from modeller
46 """
47
48 F_INPUT_FOLDER = Modeller.F_RESULT_FOLDER
49
50
51 F_RESULT_FOLDER = '/benchmark'
52
53
54
55 F_INPUT_REFERENCE= '/reference.pdb'
56
57
58 F_PDBModels = F_INPUT_FOLDER + '/PDBModels.list'
59
60
61 F_RMSD_AA = F_RESULT_FOLDER +'/rmsd_aa.out'
62 F_RMSD_CA = F_RESULT_FOLDER +'/rmsd_ca.out'
63 F_RMSD_RES = '/rmsd_res'
64
65
66 F_PDBModels_OUT = F_RESULT_FOLDER +'/PDBModels.list'
67
68
69 F_FINAL_REFERENCE = F_RESULT_FOLDER +'/reference_final.pdb'
70
71
72 - def __init__(self, outFolder='.', verbose=1):
73 """
74 @param outFolder: folder for output (default: .)
75 @type outFolder: str
76 @param verbose: write intermediary files (default: 1)
77 @type verbose: 1|0
78 """
79 self.outFolder = T.absfile( outFolder )
80
81 self.verbose = verbose
82
83 self.prepareFolders()
84
85
92
93
96 """
97 Takes a model and a reference structure, performs both a
98 normal fillting to the reference and an itterative fitting.
99 Then returns the two fitted models.
100
101 @param pdb: model
102 @type pdb: PDBModel
103 @param reference: reference model
104 @type reference: PDBModel
105 @param index: index number
106 @type index: int
107 @param atom_mask: atom mask for discharding certain atoms
108 @type atom_mask: [int]
109 @param output_folder: output folder
110 (default: None S{->} outFolder/L{F_RESULT_FOLDER}
111 @type output_folder: str
112
113 @return: pdb_if, pdb - model, fitted iteratively (n=10),
114 model, without iterative fitting
115 @rtype: PDBModel, PDBModel
116 """
117 output_folder = output_folder or self.outFolder + self.F_RESULT_FOLDER
118
119 tmp_model = pdb.compress( atom_mask )
120
121
122 pdb_if = pdb.transform( *tmp_model.transformation\
123 ( reference, n_it=10,
124 profname="rms_outliers") )
125
126 pdb = pdb.transform( *tmp_model.transformation(reference, n_it=0) )
127
128
129 pdb_if.setAtomProfile( "rms_outliers",
130 tmp_model.profile("rms_outliers"),
131 mask=atom_mask )
132
133
134 pdb_if.writePdb( '%s/Fitted_%02i.pdb'%( output_folder, index ) )
135
136 return pdb_if, pdb
137
138
139 - def calc_rmsd(self, fitted_model_if, fitted_model_wo_if, reference, model):
140 """
141 Takes the two fitted structures (with and without iterative fitting),
142 the known structure (reference), and the associated model inside the
143 pdb_list. Calculates the different RMSD and set the profiles
144
145 @param fitted_model_if: itteratively fitted model
146 @type fitted_model_if: PDBModel
147 @param fitted_model_wo_if: normaly fitted model
148 @type fitted_model_wo_if: PDBModel
149 @param reference: reference model
150 @type reference: PDBModel
151 @param model: model
152 @type model: PDBModel
153 """
154
155
156 mask_CA = fitted_model_wo_if.maskCA()
157
158 rmsd_aa = fitted_model_wo_if.rms( reference, fit=0 )
159 rmsd_ca = fitted_model_wo_if.rms( reference, mask=mask_CA, fit=1 )
160
161 model.info["rmsd2ref_aa_wo_if"] = rmsd_aa
162 model.info["rmsd2ref_ca_wo_if"] = rmsd_ca
163
164 outliers_mask = N.logical_not(fitted_model_if.profile("rms_outliers"))
165
166
167
168 fitted_model_if = fitted_model_if.compress( outliers_mask )
169 reference = reference.compress( outliers_mask )
170
171 mask_CA = fitted_model_if.maskCA()
172
173 rmsd_aa_if = fitted_model_if.rms( reference, fit=0 )
174 rmsd_ca_if = fitted_model_if.rms( reference, mask=mask_CA, fit=1 )
175
176 model.info["rmsd2ref_aa_if"] = rmsd_aa_if
177 model.info["rmsd2ref_ca_if"] = rmsd_ca_if
178 model.info["rmsd2ref_aa_outliers"] = 1.*(len(outliers_mask) \
179 - N.sum(outliers_mask)) / len(outliers_mask)
180 model.info["rmsd2ref_ca_outliers"] = 1.*(N.sum(mask_CA) \
181 - N.sum(N.compress(mask_CA, outliers_mask))) \
182 / N.sum(mask_CA)
183
184
186 """
187 Write a file rmsd_aa.dat that contains
188 all the global heavy atom rmsd values.
189
190 @param pdb_list: list of models
191 @type pdb_list: ModelList
192 @param output_file: output file
193 (default: None S{->} outFolder/L{F_RMSD_AA})
194 @type output_file: str
195 """
196 output_file = output_file or self.outFolder + self.F_RMSD_AA
197 rmsd_aa_out = open(output_file, 'w')
198
199 rmsd_aa_out.write("# Heavy atom RMSD.\n")
200 rmsd_aa_out.write("# column 1. Normal fitting\n")
201 rmsd_aa_out.write("# column 2. Itterative fitting\n")
202 rmsd_aa_out.write("# column 3. Fraction of discarded residues in 2\n")
203 for i in range(len(pdb_list)):
204 rmsd_aa_out.write("model_%02i\t%f\t%f\t%f\n"\
205 %(i, pdb_list[i].info["rmsd2ref_aa_wo_if"],
206 pdb_list[i].info["rmsd2ref_aa_if"],
207 pdb_list[i].info["rmsd2ref_aa_outliers"]))
208 rmsd_aa_out.close()
209
210
212 """
213 Write a file rmsd_ca.dat that contains
214 all the global c_alpha rmsd values.
215
216 @param pdb_list: list of models
217 @type pdb_list: ModelList
218 @param output_file: output file
219 (default: None S{->} outFolder/L{F_RMSD_CA})
220 @type output_file: str
221 """
222 output_file = output_file or self.outFolder + self.F_RMSD_CA
223 rmsd_ca_out = open('%s'%output_file, 'w')
224
225 rmsd_ca_out.write("# Carbon alpha RMSD.\n")
226 rmsd_ca_out.write("# column 1. Normal fitting\n")
227 rmsd_ca_out.write("# column 2. Itterative fitting\n")
228 rmsd_ca_out.write("# column 3. Fraction of discarded residues in 2\n")
229 for i in range(len(pdb_list)):
230 rmsd_ca_out.write("model_%02i\t%7.3f\t%7.3f\t%7.3f\n"\
231 %(i, pdb_list[i].info["rmsd2ref_ca_wo_if"],
232 pdb_list[i].info["rmsd2ref_ca_if"],
233 pdb_list[i].info["rmsd2ref_ca_outliers"] ))
234 rmsd_ca_out.close()
235
236
238 """
239 Calculate the rsmd on residue level for c-alpha between a
240 model and its reference.
241
242 @param coord1: first set of coordinates
243 @type coord1: array
244 @param coord2: second set of coordinates
245 @type coord2: array
246
247 @return: rmsd_res: rmsd per c-alpha
248 @rtype: [float]
249 """
250 rmsd_res = []
251
252 for i in range( len(coord1) ):
253 rmsd = N.sqrt( (N.power(coord1[i][0]-coord2[i][0],2) + \
254 N.power(coord1[i][1]-coord2[i][1],2 )+ \
255 N.power(coord1[i][2]-coord2[i][2],2 )))
256 rmsd_res.append(rmsd)
257
258 return rmsd_res
259
260
262 """
263 Write a file that contains the rmsd profile on a residue
264 level for c_alpha.
265
266 @param pdb_list: list of models
267 @type pdb_list: ModelList
268 @param output_folder: output folder (default: None S{->} outFolder/
269 L{F_RESULT_FOLDER}/L{F_RMSD_RES})
270 @type output_folder: str
271 """
272 for m, t in zip(pdb_list, range(len(pdb_list))):
273 output_file = output_folder or self.outFolder + \
274 self.F_RESULT_FOLDER + self.F_RMSD_RES + '_%02i'%t
275
276 file = open(output_file, 'w')
277 rmsd_res_list = m.compress(m.maskCA()).aProfiles["rmsd2ref_if"]
278
279 file.write("# Carbon alpha rmsd profile.\n")
280 for i in range(len(rmsd_res_list)):
281 file.write("%4i\t%5.3f\n"%(i+1,rmsd_res_list[i]))
282
283 file.close()
284
285
286 - def cad(self, reference, model):
307
308
310 """
311 Pickles the list of PDBModels to disc.
312
313 @param pdb_list: list of models
314 @type pdb_list: ModelList
315 @param output_file: output file
316 (default: None S{->} outFolder/L{F_PDBModels_OUT})
317 @type output_file: str
318 """
319 output_file = output_file or self.outFolder + self.F_PDBModels_OUT
320 T.Dump(pdb_list, '%s'%(output_file))
321
322
323 - def go(self, model_list = None, reference = None):
324 """
325 Run benchmarking.
326
327 @param model_list: list of models
328 (default: None S{->} outFolder/L{F_PDBModels})
329 @type model_list: ModelList
330 @param reference: reference model
331 (default: None S{->} outFolder/L{F_INPUT_REFERENCE})
332 @type reference: PDBModel
333 """
334 model_list = model_list or self.outFolder + self.F_PDBModels
335 reference = reference or self.outFolder + self.F_INPUT_REFERENCE
336
337 pdb_list = T.Load('%s'%model_list)
338 reference = PDBModel(reference)
339
340
341 iref, imodel = reference.compareAtoms(pdb_list[0])
342
343 mask_casting = N.zeros(len(pdb_list[0]))
344 N.put(mask_casting, imodel, 1)
345
346 reference = reference.take(iref)
347
348
349 atom_mask = N.zeros(len(pdb_list[0]))
350 N.put(atom_mask,imodel,1)
351
352 rmask = pdb_list[0].profile2mask("n_templates", 1,1000)
353 amask = pdb_list[0].res2atomMask(rmask)
354
355 mask_final_ref = N.compress(mask_casting, amask)
356 mask_final = mask_casting * amask
357
358 reference = reference.compress(mask_final_ref)
359
360 for i in range(len(pdb_list)):
361
362
363
364 pdb_list[i], pdb_wo_if = self.output_fittedStructures(\
365 pdb_list[i], reference, i, mask_final)
366
367 fitted_model_if = pdb_list[i].compress(mask_final)
368 fitted_model_wo_if = pdb_wo_if.compress(mask_final)
369
370 coord1 = reference.getXyz()
371 coord2 = fitted_model_if.getXyz()
372
373 aprofile = self.rmsd_res(coord1,coord2)
374
375 self.calc_rmsd(fitted_model_if, fitted_model_wo_if,
376 reference, pdb_list[i])
377
378 pdb_list[i].setAtomProfile('rmsd2ref_if', aprofile,
379 mask=mask_final, default = -1,
380 comment="rmsd to known reference structure")
381
382 self.output_rmsd_aa(pdb_list)
383 self.output_rmsd_ca(pdb_list)
384 self.output_rmsd_res(pdb_list)
385
386 self.write_PDBModels(pdb_list)
387
388
389
390
391
392
393
395 """
396 Test class
397 """
398
399 - def run( self, local=0):
400 """
401 run function test
402
403 @param local: transfer local variables to global and perform
404 other tasks only when run locally
405 @type local: 1|0
406
407 @return: 1
408 @rtype: int
409 """
410 import tempfile
411 import shutil
412 from Biskit import PDBModel
413 from Biskit import Pymoler
414
415
416 outfolder = tempfile.mkdtemp( '_test_Benchmark' )
417 os.mkdir( outfolder +'/modeller' )
418
419 dir = '/Mod/project/validation/1DT7'
420 shutil.copy( T.testRoot() + dir + '/modeller/PDBModels.list',
421 outfolder + '/modeller' )
422 shutil.copy( T.testRoot() + dir + '/reference.pdb',
423 outfolder )
424
425 b = Benchmark( outfolder )
426
427 b.go()
428
429 pdb = T.Load( outfolder + "/modeller/PDBModels.list" )[0]
430
431 reference = PDBModel(outfolder + "/reference.pdb" )
432 tmp_model = pdb.clone()
433
434 reference = reference.compress( reference.maskCA() )
435 pdb = pdb.compress( pdb.maskCA() )
436 tmp_model = tmp_model.compress(tmp_model.maskCA())
437
438 tm = tmp_model.transformation( reference, n_it=0,
439 profname="rms_outliers")
440 pdb = pdb.transform( tm )
441
442 if local:
443 pm = Pymoler()
444 pm.addPdb( pdb, "m" )
445 pm.addPdb( reference, "r" )
446 pm.colorAtoms( "m", tmp_model.profile("rms_outliers") )
447 pm.add('set ribbon_trace,1')
448 pm.add('show ribbon')
449 pm.show()
450
451 print 'The result from the benchmarking can be found in %s/benchmark'%outfolder
452 globals().update( locals() )
453
454 return 1
455
456
458 """
459 Precalculated result to check for consistent performance.
460
461 @return: 1
462 @rtype: int
463 """
464 return 1
465
466
467 if __name__ == '__main__':
468
469 test = Test()
470
471 assert test.run( local=1 ) == test.expected_result()
472