1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 """
26 Setup directory structure for the validation.
27 """
28
29 import Biskit.tools as T
30 from Biskit.PDBModel import PDBModel
31 import re
32 import glob
33 import copy
34
35 import modUtils as MU
36
37 from TemplateSearcher import TemplateSearcher
38 from SequenceSearcher import SequenceSearcher
39 from TemplateCleaner import TemplateCleaner
40
41 import os.path
42 import os, string
43
45 """
46 Takes a TemplateSearcher result folder and creates sub-projects
47 with each template cluster center as target sequence to be modeled.
48 In each sub-project folder a folder structures analogue to the main
49 project is set up.
50 The real structure is linked into the sub-project folder as reference.pdb
51 """
52
53 F_RESULT_FOLDER = '/validation'
54 F_NR_FOLDER = SequenceSearcher.F_RESULT_FOLDER
55
56 F_ALPHA_FOLDER = TemplateCleaner.F_COFFEE
57 F_PDB_FOLDER = TemplateCleaner.F_MODELLER
58 F_PDB_LINK = F_PDB_FOLDER
59 F_ALPHA_LINK = F_ALPHA_FOLDER
60
61 F_TEMPLATE_SEQUENCE = '/target.fasta'
62 F_TCOFFEE = '/t_coffee_template_files'
63 F_TEMPLATES_FASTA = '/templates.fasta'
64 F_KNOWN_STRUCTURE = '/reference.pdb'
65
66
67 - def __init__( self, outFolder, log=None ):
68 """
69 @param outFolder: base folder
70 @type outFolder: str
71 @param log: None reports to STDOUT
72 @type log: LogFile instance or None
73 """
74 self.outFolder = T.absfile( outFolder )
75 self.log = log
76
77 self.prepareFolders()
78
79
81 """
82 Check that all needed folders exist, if not create them
83 """
84 if not os.path.exists( self.outFolder + self.F_RESULT_FOLDER ):
85 os.mkdir( self.outFolder + self.F_RESULT_FOLDER )
86
87
89 """
90 Write message to log.
91
92 @param msg: message to print
93 @type msg: str
94 """
95 if self.log:
96 self.log.add( msg )
97 else:
98 if force:
99 print msg
100
101
103 """
104 Take clustering result from the file 'chain_index.txt'
105
106 @param chain_index: file with clustering results
107 (default: None-> L{TemplateSearcher.F_CHAIN_INDEX})
108 @type chain_index:
109
110 @return: pdb codes of templates
111 @rtype: [str]
112 """
113 chain_index = chain_index or self.outFolder + \
114 TemplateSearcher.F_NR + TemplateSearcher.F_CHAIN_INDEX
115
116 r1 = re.compile( r'([A-Z0-9]{4}).pdb' )
117 index = open( "%s"%chain_index, 'r' )
118
119 cluster_list = []
120
121 string_lines = index.readlines()
122 for i in string_lines:
123 if( r1.search(i) ):
124 code = r1.findall(i)[0]
125 cluster_list.append(code)
126
127 index.close()
128
129 return cluster_list
130
131
133 """
134 Create folders for the templates to be used for the validation.
135
136 @param validation_folder: top folder for the validation
137 @type validation_folder: str
138 @param cluster: name for validation subfolder
139 (e.g. pdb code of cluster center)
140 @type cluster: str
141 """
142 try:
143 os.mkdir( '%s/%s'%(validation_folder,cluster) )
144 except:
145 print 'Folder %s/%s alredy exists.'\
146 %(self.F_RESULT_FOLDER, cluster)
147
148
149 - def prepare_alpha(self, cluster_list, alpha_folder = None,
150 output_folder = None):
151 """
152 Create a dictionary where the keys are template pdb codes and
153 the value are the corresponding file names of the carbon alpha
154 pdb files for ALIGNER (.alpha).
155
156 @param cluster_list: pdb codes of templates
157 @type cluster_list: [str]
158 @param alpha_folder: folder with template CA-trace files
159 (default: None -> L{F_ALPHA_FOLDER})
160 @type alpha_folder: str
161 @param output_folder: top output folder
162 (default: None -> L{F_RESULT_FOLDER})
163 @type output_folder: str
164
165 @return: dictionary mapping pdb code to CA-trace files
166 @rtype: {str:str}
167 """
168 alpha_folder = alpha_folder or self.outFolder + self.F_ALPHA_FOLDER
169 output_folder = output_folder or self.outFolder + self.F_RESULT_FOLDER
170
171 alpha_path = glob.glob('%s/*.alpha'%alpha_folder)
172
173 alpha_files = []
174 for i in alpha_path:
175 alpha_files.append(os.path.split(i)[1])
176
177 alpha_dictionary = {}
178 for cluster in cluster_list:
179 alpha_tmp = copy.copy(alpha_path)
180
181 for i in range( len(alpha_files) ):
182
183 if alpha_files[i][0:4] == cluster:
184 alpha_tmp.remove(alpha_tmp[i])
185
186 alpha_dictionary.update({'%s'%cluster : alpha_tmp})
187 output = open("%s/%s"%(output_folder,cluster +self.F_TCOFFEE),'w')
188
189 for line in alpha_tmp:
190 output.write(line + "\n")
191
192 output.close()
193
194 return alpha_dictionary
195
196
197 - def prepare_pdb(self, cluster_list, pdb_folder = None,
198 output_folder = None):
199 """
200 Create a dictionary which keys are templates pdb code and the value
201 the different file names of pdb files for MODELLER
202
203 @param cluster_list: pdb codes of templates
204 @type cluster_list: [str]
205 @param pdb_folder: folder with Modeller pdb files
206 (default: None -> L{F_PDB_FOLDER})
207 @type pdb_folder: str
208 @param output_folder: top output folder
209 (default: None -> L{F_RESULT_FOLDER})
210 @type output_folder: str
211
212 @return: dictionary mapping pdb code to pdb files used by Modeller
213 @rtype: {str:str}
214 """
215 pdb_folder = pdb_folder or self.outFolder + self.F_PDB_FOLDER
216 output_folder = output_folder or self.outFolder + self.F_RESULT_FOLDER
217 self.pdb_path = glob.glob('%s/*.pdb'%pdb_folder)
218
219 pdb_files = []
220 for i in self.pdb_path:
221 pdb_files.append(os.path.split(i)[1])
222
223 pdb_dictionary = {}
224 for cluster in cluster_list:
225 pdb_tmp = copy.copy(self.pdb_path)
226
227 for i in range(len(pdb_files)):
228 if(pdb_files[i][0:4] == cluster):
229 pdb_tmp.remove(pdb_tmp[i])
230
231 pdb_dictionary.update({'%s'%cluster : pdb_tmp})
232
233 return pdb_dictionary
234
235
238 """
239 Create 'templates.fasta' file for each template to validate
240
241 @param cluster_list: pdb codes of templates
242 @type cluster_list: [str]
243 @param pdb_dictionary: dictionary mapping pdb code to pdb files
244 used by Modeller
245 @type pdb_dictionary: {str:str}
246 @param output_folder: top output folder
247 (default: None -> L{F_RESULT_FOLDER})
248 @type output_folder: str
249 """
250 output_folder = output_folder or self.outFolder + self.F_RESULT_FOLDER
251
252 for cluster in cluster_list:
253 folder = '%s/%s'%(output_folder, cluster + \
254 TemplateSearcher.F_RESULT_FOLDER)
255 if not os.path.exists( folder ):
256 os.mkdir( folder)
257 else:
258 print 'Directory %s exists, skipping'%( cluster + \
259 TemplateSearcher.F_RESULT_FOLDER)
260
261 pdb_path = pdb_dictionary["%s"%cluster]
262 PDBModels_list = []
263 pdb_name = []
264
265 for pdb in pdb_path:
266 PDBModels_list.append(PDBModel('%s'%pdb))
267 pdb_name.append(os.path.split(pdb)[1][:-4])
268
269 input_file = self.outFolder + self.F_RESULT_FOLDER + \
270 '/%s'%cluster + TemplateSearcher.F_RESULT_FOLDER \
271 + self.F_TEMPLATES_FASTA
272
273 templatesfasta = open("%s"%input_file,'w')
274
275 for i in range(len(PDBModels_list)):
276 templatesfasta.write(">%s\n"%pdb_name[i])
277 sequence = PDBModels_list[i].sequence()
278 sequence = MU.format_fasta(seq = sequence)
279 templatesfasta.write("%s\n"%sequence)
280
281 templatesfasta.close()
282
283
284 - def link_pdb(self, cluster_list, pdb_dictionary, alpha_dictionary,
285 output_folder = None):
286 """
287 Create link in each template folder to the pdb files for MODELLER
288 and for the alpha files for T-Coffee.
289
290 @param cluster_list: pdb codes of templates
291 @type cluster_list: [str]
292 @param pdb_dictionary: dictionary mapping pdb code to pdb files
293 used by Modeller
294 @type pdb_dictionary: {str:str}
295 @param alpha_dictionary: dictionary mapping pdb code to CA-trace files
296 @type alpha_dictionary: {str:str}
297 @param output_folder: top output folder
298 (default: None -> L{F_RESULT_FOLDER})
299 @type output_folder: str
300 """
301 output_folder = output_folder or self.outFolder + self.F_RESULT_FOLDER
302
303 for cluster in cluster_list:
304
305 folder = '%s/%s'%(output_folder, cluster + self.F_PDB_LINK)
306 if not os.path.exists( folder ):
307 os.mkdir( folder )
308 else:
309 print 'Directory %s exists, skipping'%\
310 (cluster + self.F_PDB_LINK)
311
312 pdb_path = pdb_dictionary[cluster]
313
314 for pdb in pdb_path:
315 target = '%s/%s/%s'%(output_folder, cluster + self.F_PDB_LINK,
316 os.path.split(pdb)[1])
317 if not os.path.exists( target ):
318 os.link('%s'%pdb, target )
319 else:
320 print 'File exists %s/%s no link made.'%\
321 (self.F_PDB_LINK,
322 os.path.split(pdb)[1])
323
324
325 folder = '%s/%s'%(output_folder, cluster + self.F_ALPHA_FOLDER)
326 if not os.path.exists( folder ):
327 os.mkdir( folder )
328 else:
329 print '##'
330
331 alpha_path = alpha_dictionary[cluster]
332
333 for alpha in alpha_path:
334 target = '%s/%s/%s'%(output_folder,
335 cluster + self.F_ALPHA_LINK,
336 os.path.split(alpha)[1])
337 if not os.path.exists( target ):
338 os.link('%s'%alpha, target )
339 else:
340 print 'File exists %s/%s no link made.'%\
341 (self.F_ALPHA_LINK,
342 os.path.split(pdb)[1])
343
344
346 """
347 Create the 'target.fasta' file for each template to validate
348
349 @param cluster: name of the cluster which is used for the
350 foldder name in which the validation is run.
351 @type cluster: str
352 @param output_folder: top output folder
353 (default: None -> L{F_RESULT_FOLDER})
354 @type output_folder: str
355 """
356 output_folder = output_folder or self.outFolder + \
357 self.F_RESULT_FOLDER + '/%s/'%cluster
358 target = open("%s"%(output_folder + self.F_TEMPLATE_SEQUENCE),'w')
359 target.write(">target\n")
360
361 for pdb in self.pdb_path:
362 if(cluster == os.path.split(pdb)[1][0:4]):
363
364 model = PDBModel('%s'%pdb)
365 sequence = model.sequence()
366 sequence = MU.format_fasta(seq = sequence)
367 target.write("%s"%sequence)
368
369 target.close()
370
371
372 - def prepare_sequences(self, cluster, sequences_folder = None,
373 output_folder = None):
374 """
375 Link the 'sequences' directory from the project directory
376 in each template folder
377
378 @param cluster: name of the cluster which is used for the
379 folder name in which the validation is run.
380 @type cluster: str
381 @param sequences_folder: folder with sequences (default: None ->
382 L{SequenceSearcher.F_RESULT_FOLDER})
383 @type sequences_folder: str
384 @param output_folder: top output folder
385 (default: None -> L{F_RESULT_FOLDER})
386 @type output_folder: str
387 """
388 sequences_folder = sequences_folder or self.outFolder + \
389 SequenceSearcher.F_RESULT_FOLDER
390
391 output_folder = output_folder or self.outFolder + \
392 self.F_RESULT_FOLDER + '/%s'%cluster + \
393 SequenceSearcher.F_RESULT_FOLDER
394
395 if not os.path.exists( output_folder ):
396
397 os.system('ln -s %s %s'%(sequences_folder , output_folder))
398
399 else:
400 print 'Folder %s alredy exists, linking skipped.\
401 '%(self.F_RESULT_FOLDER + '/%s'%cluster +
402 SequenceSearcher.F_RESULT_FOLDER )
403
404
407 """
408 Create a link in each template folder with their own known
409 structure 'reference.pdb'
410
411 @param cluster: name of the cluster which is used for the
412 foldder name in which the validation is run.
413 @type cluster: str
414 @param input_folder: folder with pdb files
415 (default: None -> L{F_PDB_FOLDER})
416 @type input_folder: str
417 @param output_file: target file
418 @type output_file: str
419 """
420 input_folder = input_folder or self.outFolder + self.F_PDB_FOLDER
421 output_file = output_file or self.outFolder + self.F_RESULT_FOLDER +\
422 '/%s/'%cluster + self.F_KNOWN_STRUCTURE
423
424 files = os.listdir('%s'%input_folder)
425 for pdb in files:
426 if(cluster == pdb[0:4]):
427 if not os.path.exists( output_file ):
428 os.link( input_folder + pdb, output_file)
429
430
431
432 - def go(self, validation_folder = None):
456
457
458
459
460
461
463 """
464 Test class
465 """
466
467 - def run( self, local=0 ):
468 """
469 run function test
470
471 @param local: transfer local variables to global and perform
472 other tasks only when run locally
473 @type local: 1|0
474
475 @return: 1
476 @rtype: int
477 """
478 import tempfile
479 import shutil
480
481
482 outfolder = tempfile.mkdtemp( '_test_ValidationSetup' )
483 os.mkdir( outfolder +'/templates' )
484
485 shutil.copytree( T.testRoot() + '/Mod/project/templates/nr',
486 outfolder + '/templates/nr' )
487
488 shutil.copytree( T.testRoot() + '/Mod/project/templates/modeller',
489 outfolder + '/templates/modeller' )
490
491 v = ValidationSetup( outFolder = outfolder )
492
493 v.go( validation_folder =outfolder )
494
495 if local:
496 print 'The validation project can be found in %s/validation'%outfolder
497 globals().update( locals() )
498
499
500 return 1
501
502
504 """
505 Precalculated result to check for consistent performance.
506
507 @return: 1
508 @rtype: int
509 """
510 return 1
511
512
513 if __name__ == '__main__':
514
515 test = Test()
516
517 assert test.run( local=1 ) == test.expected_result()
518