1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """
23 Prepare template coordinates for modelling.
24 """
25
26 import Biskit.tools as T
27 from Biskit import PDBModel, PDBCleaner, CleanerError
28
29 from TemplateSearcher import TemplateSearcher
30 import modUtils
31
32 import os, string
33
35 """
36 Takes a list of PDB-files and chains identifiers.
37
38 Returns:
39 - cleaned PDB files with all chains
40 - cleaned PDB files with needed chain for modeller.
41 - CA trace PDB for T_coffee
42 - sequences in fasta format for T_coffee
43
44 Creates (folders and files):
45 - mkdir cleaned .. complete PDBs but cleaned
46 - mkdir modeller .. only the chains needed
47 - mkdir t_coffee .. CA traces
48 - templates.fasta
49 """
50
51 F_RESULT_FOLDER = TemplateSearcher.F_RESULT_FOLDER
52
53 F_CLEANED = F_RESULT_FOLDER + '/nr_cleaned/'
54 F_MODELLER= F_RESULT_FOLDER + '/modeller/'
55 F_COFFEE = F_RESULT_FOLDER + '/t_coffee/'
56 F_FASTA = F_RESULT_FOLDER + '/templates.fasta'
57
58 - def __init__( self, outFolder, log=None ):
59 """
60 @param outFolder: output folder
61 @type outFolder: str
62 @param log: None reports to STDOUT (drfault: None)
63 @type log: LogFile instance or None
64 """
65 self.outFolder = T.absfile( outFolder )
66 self.log = log
67
68 self.prepareFolders()
69
70
72 """
73 Create folders needed by this class.
74 """
75 if not os.path.exists( self.outFolder + self.F_CLEANED ):
76 os.mkdir( self.outFolder + self.F_CLEANED )
77 if not os.path.exists( self.outFolder + self.F_MODELLER ):
78 os.mkdir( self.outFolder + self.F_MODELLER )
79 if not os.path.exists( self.outFolder + self.F_COFFEE ):
80 os.mkdir( self.outFolder + self.F_COFFEE )
81
82
84 """
85 Write message to log.
86
87 @param msg: message
88 @type msg: str
89 @param force: if no log, print message (default: 1)
90 @type force: 1|0
91 """
92 if self.log:
93 self.log.add( msg )
94 else:
95 if force:
96 print msg
97
98
100 """
101 Get a PDBModel with only requested chains.
102
103 @param model: original PDBModel
104 @type model: PDBModel
105 @param chainId: chain identifier
106 @type chainId: str
107
108 @return: PDBModel with only the specified chain
109 @rtype: PDBModel
110 """
111 return model.compress( model.mask( \
112 lambda a, id=chainId: a['chain_id'] == id ))
113
114
116 """
117 Convert sequence to fasta format.
118
119 @param header: fasta header
120 @type header: str
121 @param s: sequence
122 @type s: str
123
124 @return: fasta formated sequence
125 @rtype: str
126 """
127 n_chunks = len( s ) / 80
128
129 result = ">%s\n" % header
130
131 for i in range(0, n_chunks+1):
132
133 if i * 80 + 80 < len( s ):
134 chunk = s[i * 80 : i * 80 + 80]
135 else:
136 chunk = s[i * 80 :]
137
138 result += chunk + '\n'
139
140 return result
141
142
144 """
145 Write CA trace PDB with SEQRES records for t_coffee.
146
147 @param model: PDBModel
148 @type model: PDBModel
149 @param fname: filename of new PDB file
150 @type fname: str
151 """
152
153 m_ca = model.compress( model.maskCA() )
154
155
156
157 n_lines = len( m_ca ) / 13
158 if not len( m_ca ) % 13 == 0:
159 n_lines += 1
160
161 chain_id = m_ca.getAtoms()[0]['chain_id']
162
163
164 if not chain_id:
165 for a in m_ca.atoms:
166 a['chain_id'] = 'A'
167
168
169 n_res = m_ca.lenResidues()
170
171 head = []
172
173 res_from = 0
174 res_to = 13
175 for i in range( n_lines ):
176 s = "%4i %s %4i " % (i+1, chain_id, n_res)
177
178 if res_to > n_res:
179 res_to = n_res
180
181 for ca in m_ca.atoms[ res_from: res_to ]:
182 s+= " %3s" % ca['residue_name']
183
184 head += [ ( 'SEQRES', s ) ]
185
186 res_from += 13
187 res_to += 13
188
189 m_ca.writePdb( fname, headlines=head )
190
191
192
194 """
195 Write a PDB file for modeller.
196
197 @param model: PDBModel
198 @type model: PDBModel
199 @param fname: filename of new PDB file
200 @type fname: str
201 """
202 model = model.clone( deepcopy=1 )
203 for a in model.atoms:
204 a['chain_id'] = ''
205
206 model.writePdb( fname )
207
208
210 """
211 Process PDB files in file_dic.
212 The PDB is read from:
213 - L{TemplateSearcher.F_NR}
214
215 and cleaned files are written to:
216 - L{F_CLEANED}
217 - L{F_COFFEE}
218 - L{F_MODELLER}
219
220 If the file L{F_CLEANED} already exists, this file is
221 used to write modeller and t-coffee pdbs.
222
223 @param file_dic: dictionary mapping filenames of pdb files to
224 the chains of interest, e.g. { fname : chain_id, }
225 @type file_dic: {str:str}
226 @param keep_hetatoms: keep hetatoms (default: 0)
227 @type keep_hetatoms: 0|1
228 """
229 fasta = ""
230
231 for f, id in file_dic.items():
232
233 self.logWrite( '\nCLEANING ' + f + '...')
234
235 try:
236 code = string.split(f, '/')[-1][:4]
237
238
239 c = PDBCleaner( f, self.log )
240
241
242 if os.path.exists(
243 self.outFolder+self.F_CLEANED + code + '.pdb' ):
244
245 model = PDBModel( self.outFolder+self.F_CLEANED \
246 + code + '.pdb' )
247
248
249 else:
250 model = c.process( keep_hetatoms=keep_hetatoms )
251
252
253 model.writePdb( self.outFolder+self.F_CLEANED \
254 + code + '.pdb')
255
256 code = model.pdbCode
257 title = code + '_' + id
258
259
260 if len( id ) > 0:
261 model = self.__chain_by_id( model, id )
262
263 fname = "%s%s.pdb" % (self.outFolder + self.F_MODELLER, title)
264 self.write_modeller_pdb( model, fname )
265
266
267 fname = "%s%s.alpha"%(self.outFolder + self.F_COFFEE, title)
268 self.write_tcoffee_pdb( model, fname )
269
270
271 fasta += self.fasta_sequence( title, model.sequence() )
272
273 except:
274 self.logWrite( 'Error cleaning ' + f)
275 self.logWrite( T.lastError() )
276 self.err_cleaner = c
277 self.err_file = f
278
279 fasta_out = open( self.outFolder + self.F_FASTA, 'w' )
280 fasta_out.write( fasta )
281 fasta_out.close()
282
283
284
285
286
287
288
290 """
291 Test class
292 """
293
294 - def run( self, local=0 ):
295 """
296 run function test
297
298 @param local: transfer local variables to global and perform
299 other tasks only when run locally
300 @type local: 1|0
301
302 @return: 1
303 @rtype: int
304 """
305 import tempfile
306 import shutil
307 from Biskit.LogFile import LogFile
308
309
310 outfolder = tempfile.mkdtemp( '_test_TemplateCleaner' )
311 os.mkdir( outfolder +'/templates' )
312
313
314 f_out = outfolder + '/TemplateCleaner.log'
315 l = LogFile( f_out, mode='w')
316
317 shutil.copytree( T.testRoot() + '/Mod/project/templates/nr',
318 outfolder + '/templates/nr' )
319
320 c = TemplateCleaner( outfolder, log=l)
321
322 inp_dic = modUtils.parse_tabbed_file(
323 T.absfile( outfolder + '/templates/nr/chain_index.txt' ) )
324
325 c.process_all( inp_dic )
326
327 if local:
328 print 'TemplateCleaner log file written to: %s'%f_out
329 globals().update( locals() )
330
331
332 T.tryRemove( outfolder, tree=1 )
333
334 return 1
335
336
338 """
339 Precalculated result to check for consistent performance.
340
341 @return: 1
342 @rtype: int
343 """
344 return 1
345
346
347 if __name__ == '__main__':
348
349 test = Test()
350
351 assert test.run( local=1 ) == test.expected_result()
352