1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 """
27 Interface to Modeller
28 """
29
30 import re, os, os.path, subprocess
31 import commands
32 import linecache
33 from string import *
34
35 import settings
36 import Biskit.tools as T
37
38 from Biskit.Mod.TemplateCleaner import TemplateCleaner as TC
39 from Biskit.Mod.SequenceSearcher import SequenceSearcher as SS
40 from Biskit.Mod.CheckIdentities import Check_Identities as CI
41 from Biskit.Mod.Aligner import Aligner
42 from Biskit.ModelList import ModelList
43 from Biskit.DictList import DictList
44
45 from Biskit import Executor, TemplateError
46
47 from Biskit import PDBModel
48 import glob
49
50 from Biskit import StdLog, EHandler
51
52
54 pass
55
56
58 """
59 Run a Modeller job
60 ==================
61
62 Take Alignment from t_coffee and template PDBs. Creates a modeller-script
63 and runs modeller and post-processes the results. The Modeller output ends
64 up in the output folder.
65
66 Example usage
67 -------------
68 >>> m = Modeller( outfolder )
69 >>> m.run()
70
71 References
72 ----------
73 - U{http://www.salilab.org/modeller/}
74 """
75
76 MODELLER_TEMPLATE = """
77 # Homology modelling by the MODELLER TOP routine 'model'.
78
79 INCLUDE # Include the predefined TOP routines
80
81 SET OUTPUT_CONTROL = 1 1 1 1 1 # uncomment to produce a large log file
82 SET ALNFILE = '%(f_pir)s' # alignment filename
83 SET KNOWNS = %(template_ids)s
84 SET SEQUENCE = '%(target_id)s' # code of the target
85 SET ATOM_FILES_DIRECTORY = '%(template_folder)s'# directories for input atom files
86 SET STARTING_MODEL= %(starting_model)i # index of the first model
87 SET ENDING_MODEL = %(ending_model)i # index of the last model
88 #(determines how many models to calculate)
89
90 CALL ROUTINE = 'model' # do homology modelling
91 """
92
93 F_RESULT_FOLDER = '/modeller'
94 F_MOD_SCRIPT = 'modeller.top'
95
96 F_INPUT_FOLDER = F_RESULT_FOLDER
97
98
99 F_INP = F_RESULT_FOLDER + '/' + F_MOD_SCRIPT
100
101
102 F_PDBModels = '/PDBModels.list'
103
104
105 F_SCORE_OUT = F_RESULT_FOLDER + '/Modeller_Score.out'
106
107
108 - def __init__( self, outFolder='.', log=None, fasta_target=None,
109 f_pir=None, template_folder=None, fout=None,
110 starting_model=1, ending_model=10, verbose=0,
111 host=None, **kw ):
112 """
113 @param outFolder: base folder for Modeller output
114 (default: L{F_RESULT_FOLDER})
115 @type outFolder: str
116 @param log: log file instance, if None, STDOUT is used (default: None)
117 @type log: LogFile
118 @param f_pir: PIR alignment file (default: None -> L{Aligner.F_FINAL_ALN})
119 @type f_pir: str
120 @param template_folder: folder with template coordinate files
121 (default: None -> L{TC.F_MODELLER})
122 @type template_folder: str
123 @param fout: name of modeller script (default: None -> L{F_INP})
124 @type fout: None or str
125 @param starting_model: first model (default: 1)
126 @type starting_model: int
127 @param ending_model: last model (default: 10)
128 @type ending_model: int
129 @param verbose: verbosity level (default: 1)
130 @type verbose: 1|0
131 @param host: host for calculation (None->no ssh) (default: None)
132 @type host: str
133 """
134 self.outFolder = T.absfile( outFolder )
135 self.log = log
136
137 self.verbose = verbose
138
139 self.f_inp = None
140
141 self.f_pir = f_pir
142 self.template_folder = template_folder
143 self.starting_model = starting_model
144 self.ending_model = ending_model
145
146 f_top = '%s%s'%(self.outFolder, self.F_INP)
147
148 self.template_ids = ''
149 self.target_id = ''
150
151
152 self.pir_ids = []
153
154 Executor.__init__( self, 'modeller', args=f_top, f_in=f_top,
155 template=self.MODELLER_TEMPLATE,
156 cwd='%s%s'%(self.outFolder, self.F_RESULT_FOLDER),
157 verbose=verbose, node=host, **kw )
158
159
168
169
177
178
180 """
181 Convert the list of template id's into a string in addition
182 L{Executor.generateInp} functionality.
183
184 Replace formatstr place holders in inp by fields of this class.
185
186 @return: input file name OR (if pipes=1) content of input file
187 @rtype: str
188
189 @raise TemplateError: if error while creating template file
190 """
191
192 self.template_ids = [ "'%s'"%id for id in self.template_ids ]
193 self.template_ids = ' '.join( self.template_ids )
194
195 try:
196 inp = None
197
198 if self.template:
199 inp = self.fillTemplate()
200
201 return self.convertInput( inp )
202
203 except Exception, why:
204 s = "Error while creating template file."
205 s += "\n template file: " + str( self.template )
206 s += "\n why: " + str( why )
207 s += "\n Error:\n " + t.lastError()
208 raise TemplateError, s
209
210
212 """
213 Write message to log.
214
215 @param msg: message to print
216 @type msg: str
217 """
218 if self.log:
219 self.log.add( msg )
220 else:
221 if force:
222 print msg
223
224
226 """
227 Extract names of all PDB files in a folder (without .PDB)
228
229 @return: list of PDB names
230 @rtype: [str]
231 """
232 fs = os.listdir( self.template_folder )
233 return [ f[:-4] for f in fs if f[-4:].upper()=='.PDB' ]
234
235
237 """
238 Extract (first) sequence id from fasta file. Make intelligent guess
239 if there is no exact match between target fasta and alignment file.
240
241 @param f_fasta: fasta file with target sequence
242 @type f_fasta: str
243
244 @raise ModellerError: if target ID is not found in alignment
245 """
246 ex_fasta_id = re.compile('^>([A-Za-z0-9_]+)')
247
248 title = open( f_fasta ).readline()
249
250 try:
251 id_fasta = ex_fasta_id.findall( title )[0]
252 except IndexError, why:
253 raise ModellerError( "Can't extract sequence id from "+f_fasta )
254
255 if self.pir_ids:
256
257 if id_fasta in self.pir_ids:
258 return id_fasta
259
260 guesses = [ pid for pid in self.pir_ids
261 if pid[:len(id_fasta)] == id_fasta ]
262
263 if len( guesses ) == 1:
264 return guesses[0]
265
266 raise ModellerError("Target ID %s is not found in alignment." %\
267 id_fasta )
268
269 self.logWrite('Warning: target sequence ID extracted without check'+
270 ' against alignment file. Use prepare_alignment first.')
271
272 title.close()
273
274 return id_fasta
275
276
278 """
279 Convert t_coffee - generated PIR into PIR for Modeller.
280 On the way all sequence ids are extracted into self.pir_ids.
281
282 @raise ModellerError: if can't rename file
283 """
284 ex_title = re.compile( '^>P1;([A-Za-z0-9_]+)' )
285 ex_description = re.compile( '^sequence|^structure' )
286
287 self.pir_ids = []
288
289 try:
290 bak_fname = self.f_pir + '_original'
291 os.rename( self.f_pir, bak_fname )
292 except OSError, why:
293 raise ModellerError( "Can't rename " + self.f_pir + " to "+ bak_fname )
294
295 lines = open( bak_fname ).readlines()
296
297 for i in range( len( lines ) ):
298 current = lines[ i ]
299
300 if ex_title.match( current ):
301 id = ex_title.findall( current )[0]
302
303 self.pir_ids += [ id ]
304
305 prefix = 'sequence'
306 if id in self.template_ids:
307 prefix = 'structure'
308
309
310
311 lines[i + 1] = "%s:%s: : : : : : : : \n" \
312 % (prefix, id)
313
314 else:
315 if not ex_description.match( current ):
316 lines[ i ] = current.upper()
317
318 open( self.f_pir, 'w' ).writelines( lines )
319
320
322 """
323 Prepare information needed by modeller.
324
325 @param fasta_target: target fasta file
326 (default: None -> L{SS.F_FASTA_TARGET})
327 @type fasta_target: str
328 @param fout: name of modeller script (default: None -> L{F_INP})
329 @type fout: None or str
330 """
331 fasta_target = fasta_target or self.outFolder + SS.F_FASTA_TARGET
332 self.template_folder = self.template_folder or self.outFolder + TC.F_MODELLER
333 self.f_pir = self.f_pir or self.outFolder + Aligner.F_FINAL_ALN
334
335 self.template_ids = self.get_template_ids( )
336
337
338 self.prepare_alignment( )
339
340
341 self.target_id = self.get_target_id( fasta_target )
342
343
351
352
353
354
355
357 """
358 Compile a list with the names of all models created by Modeller.
359
360 @param model_folder: folder containing model PDBs
361 (output from Modeller)
362 @type model_folder: str
363
364 @return: pdb files from modeller
365 @rtype: [str]
366 """
367 return glob.glob('%s/target.B*'%(model_folder))
368
369
371 """
372 Extract the modeller score from the models
373
374 @param f_model: file name of model pdb
375 @type f_model: str
376 @param model: model
377 @type model: PDBModel
378
379 @return: modeller score for the models
380 @rtype: float
381 """
382 r1 = re.compile(r'\w\d.*\d')
383
384 line_of_interest = linecache.getline(f_model, 2)
385 OF = r1.findall(line_of_interest)
386
387 file = open(model.validSource(),'r')
388 string = file.readlines()[:2]
389 model.info["headlines"] = string
390 file.close()
391
392 return float(OF[0])
393
394
396 """
397 Write a list of model scores to file L{F_SCORE_OUT}
398
399 @param pdb_list: list of models
400 @type pdb_list: ModelList
401 @param model_folder: ouput folder
402 @type model_folder: str
403 """
404 file_output = open(self.outFolder+self.F_SCORE_OUT,'w')
405 file_output.write("The models from modeller with their Score:\n")
406
407 for model in pdb_list:
408
409 file_output.write('%s\t%6.2f\n'%(T.stripFilename(
410 model.validSource()), model.info["mod_score"]))
411
412 file_output.close()
413
414
415 - def update_PDB(self, pdb_list, cwd, model_folder):
416 """
417 Extract number of templates per residue position from alignment.
418 Write new PDBs with number of templates in occupancy column.
419
420 @param pdb_list: list of models
421 @type pdb_list: ModelList
422 @param cwd: current project directory
423 @type cwd: str
424 @param model_folder: folder with models
425 @type model_folder: str
426 """
427 cwd = cwd or self.outFolder
428
429 ci = CI(cwd)
430 aln_dictionnary = ci.go()
431
432 template_info = aln_dictionnary["target"]["template_info"]
433
434 for model in pdb_list:
435
436 model.setResProfile("n_templates", template_info,
437 comment="number of templates for each residue")
438
439 rmask = pdb_list[0].profile2mask("n_templates", 1,1000)
440 amask = pdb_list[0].res2atomMask(rmask)
441
442
443 for i in range(len(pdb_list)):
444
445 for a,v in zip(pdb_list[i].atoms, amask):
446 a['occupancy'] = v
447
448 t = []
449
450 for string in pdb_list[i].info["headlines"]:
451
452 tuple = ()
453 tuple = string.split()[0],join(string.split()[1:])
454 t.append(tuple)
455
456 pdb_list[i].writePdb('%s/model_%02i.pdb'%(model_folder,i),
457 headlines = t)
458
459
461 """
462 Read Modeller score from the 'temperature_factor' column and
463 create a profile from it.
464
465 @param pdb_list: list of models
466 @type pdb_list: ModelList
467 """
468 for model in pdb_list:
469
470 m = model.compress( model.maskCA())
471
472 atoms = DictList( m.atoms )
473 prof = atoms.valuesOf( "temperature_factor")
474
475 model.setResProfile('mod_score', prof)
476
477
479 """
480 Dump the list of PDBModels.
481
482 @param pdb_list: list of models
483 @type pdb_list: ModelList
484 @param model_folder: ouput folder (default: None -> L{F_PDBModels})
485 @type model_folder: str
486 """
487 T.Dump(pdb_list, '%s'%(model_folder + self.F_PDBModels))
488
489
490 - def postProcess(self, model_folder=None):
491 """
492 Rename model PDBs to 'model_01.pdb' - 'model_xx.pdb' according to their
493 score, create and dump a ModelList instance containing these models
494 and info-records and profiles with the Modeller score.
495
496 @param model_folder: folder containing model PDBs
497 (default: None -> L{F_INPUT_FOLDER})
498 @type model_folder: str
499 """
500
501 model_folder = model_folder or self.outFolder + self.F_INPUT_FOLDER
502
503 model_files = self.pdb_list(model_folder)
504
505 pdb_list = ModelList( model_files )
506
507 for model in pdb_list:
508
509 model.info["mod_score"] = self.extract_modeller_score(
510 model.validSource(), model)
511
512 pdb_list = pdb_list.sortBy("mod_score")
513
514 self.update_PDB(pdb_list, self.outFolder, model_folder)
515
516 self.output_score(pdb_list,model_folder)
517
518 self.update_rProfiles(pdb_list)
519
520 self.write_PDBModels(pdb_list, model_folder)
521
522
523
524
525
526
527
529 """
530 Test class
531 """
532
533 - def run( self, local=0, run=0 ):
534 """
535 run function test
536
537 @param local: transfer local variables to global and perform
538 other tasks only when run locally
539 @type local: 1|0
540
541 @param run: run the full test (call external application) or not
542 @type run: 1|0
543
544 @return: 1
545 @rtype: int
546 """
547 import tempfile
548 import shutil
549
550
551 outfolder = tempfile.mkdtemp( '_test_Modeller' )
552 os.mkdir( outfolder +'/templates' )
553 os.mkdir( outfolder +'/t_coffee' )
554
555 shutil.copytree( T.testRoot() + '/Mod/project/templates/modeller',
556 outfolder + '/templates/modeller' )
557
558 shutil.copy( T.testRoot() + '/Mod/project/t_coffee/final.pir_aln',
559 outfolder + '/t_coffee' )
560
561 shutil.copy( T.testRoot() + '/Mod/project/target.fasta',
562 outfolder )
563
564 m = Modeller( outfolder, verbose=local )
565
566 if run:
567 print 'Running modeller job ...'
568 m.run()
569 print 'The modelling result can be found in %s/modeller'%outfolder
570
571 if local:
572 globals().update( locals() )
573
574
575 T.tryRemove( outfolder, tree=1 )
576
577 return 1
578
579
581 """
582 Precalculated result to check for consistent performance.
583
584 @return: 1
585 @rtype: int
586 """
587 return 1
588
589
590 if __name__ == '__main__':
591
592 test = Test()
593
594 assert test.run( run=1, local=1 ) == test.expected_result()
595