1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """
24 Search one or more sequence databases (SwissProt, Tremble) with a sequence
25 using the method of choice (Blast , FastA)
26 """
27
28 import Bio.SwissProt.SProt
29 from Bio.Blast import NCBIWWW
30 from Bio.Blast import NCBIXML
31 from Bio.Blast import NCBIStandalone
32 from Bio import Fasta
33
34 import Biskit.tools as T
35 from Biskit.Errors import *
36 from Biskit import StdLog, EHandler
37
38 from Biskit.Mod import settings
39
40 import re, os
41 import string, copy
42 import commands
43 import copy
44 import sys
45
47 pass
48
50 pass
51
53 """
54 Take a sequence and return a list of nonredundant homolog
55 sequences as fasta file. The search can be performed using three
56 different methods:
57
58 1. localBlast
59 Uses Bio.Blast.NCBIStandalone.blastall (Biopython) to perform
60 the search.
61
62 2. localPSIBlast
63 Uses Bio.Blast.NCBIStandalone.blastpgp (Biopython)
64
65 3. remoteBlast
66 Uses Bio.Blast.NCBIWWW.qblast (Biopython) which performs
67 a BLAST search using the QBLAST server at NCBI.
68
69 @note: the blast sequence database has to be built with -o potion
70 e.g. cat db1.dat db2.dat | formatdb -i stdin -o T -n indexed_db
71
72 @todo: copy blast output
73 """
74
75 F_RESULT_FOLDER = '/sequences'
76
77
78 F_FASTA_ALL = F_RESULT_FOLDER + '/all.fasta'
79
80
81 F_FASTA_NR = F_RESULT_FOLDER + '/nr.fasta'
82
83
84 F_CLUSTER_RAW = F_RESULT_FOLDER + '/cluster_raw.out'
85 F_CLUSTER_LOG = F_RESULT_FOLDER + '/cluster_result.out'
86
87
88 F_BLAST_OUT = F_RESULT_FOLDER + '/blast.out'
89 F_CLUSTER_BLAST_OUT = F_RESULT_FOLDER + '/cluster_blast.out'
90
91
92 F_FASTA_TARGET = '/target.fasta'
93
94
95 - def __init__(self, outFolder='.', clusterLimit=50, verbose=0, log=None ):
96 """
97 @param outFolder: project folder (results are put into subfolder) ['.']
98 @type outFolder: str
99 @param clusterLimit: maximal number of returned sequence clusters
100 (default: 50)
101 @type clusterLimit: int
102 @param verbose: keep temporary files (default: 0)
103 @type verbose: 1|0
104 @param log: log file instance, if None, STDOUT is used (default: None)
105 @type log: LogFile
106 """
107 self.outFolder = T.absfile( outFolder )
108
109
110
111
112
113
114 self.ex_gi = re.compile( '^>(?P<db>ref|gb|ngb|emb|dbj|prf)\|{1,2}(?P<id>[A-Z_0-9.]{4,9})\|' )
115 self.ex_swiss = re.compile( '^>sp\|(?P<id>[A-Z0-9_]{5,7})\|' )
116 self.ex_swiss2= re.compile( '^>.*swiss.*\|(?P<id>[A-Z0-9_]{5,7})\|' )
117 self.ex_pdb = re.compile( '^>pdb\|(?P<id>[A-Z0-9]{4})\|' )
118 self.ex_all = re.compile( '^>.*\|(?P<id>[A-Z0-9_.]{4,14})\|' )
119
120
121
122
123
124
125
126
127 gi = '^gi\|(?P<gi>[0-9]+)\|'
128 self.ex_Rgi_1 = re.compile( gi+'(?P<db>ref|sp|pdb|gb|ngb|emb|dbj|prf|pir)\|(?P<id>[A-Z_0-9.]+)\|' )
129 self.ex_Rgi_2 = re.compile( gi+'(?P<db>ref|sp|pdb|gb|ngb|emb|dbj|prf|pir)\|{2}(?P<id>[A-Z_0-9.]+)' )
130 self.ex_Rswiss = re.compile( gi+'sp\|(?P<id>[A-Z0-9_]{5,7})\|' )
131 self.ex_Rpdb = re.compile( gi+'pdb\|(?P<id>[A-Z0-9]{4})\|' )
132
133
134
135 self.verbose = verbose
136 self.log = log or StdLog()
137
138 self.record_dic = None
139 self.clusters = None
140 self.bestOfCluster = None
141
142
143 self.clusterLimit = clusterLimit
144
145 self.clustersCurrent = None
146
147 self.prepareFolders()
148
149
151 """
152 Create needed output folders if not there.
153 """
154 if not os.path.exists(self.outFolder + self.F_RESULT_FOLDER):
155 os.mkdir( self.outFolder + self.F_RESULT_FOLDER )
156
157
159 """
160 Get all homologues.
161
162 @return: list of Bio.Fasta.Record with all found protein homologues
163 @rtype: [Bio.Fasta.Record]
164
165 @raise BlastError: if no sequences found
166 """
167 if not self.record_dic:
168 raise BlastError( "No sequences found (yet)." )
169
170 return self.record_dic.values()
171
172
174 """
175 Get representative of each cluster.
176
177 @return: list with Bio.Fasta.Record with best record of each cluster
178 @rtype: [Bio.Fasta.Record]
179
180 @raise BlastError: if called before clustering
181 """
182 if not self.clusters:
183 raise BlastError( "Sequences are not yet clustered." )
184
185 return [ self.record_dic[id] for id in self.bestOfCluster ]
186
187
189 """
190 Convert parsed blast result into dictionary of FastaRecords indexed
191 by sequence ID.
192 Writes all the sequences in fasta format to L{F_FASTA_ALL} and the
193 raw blast result to L{F_BLAST_OUT}.
194
195 @param parsed_blast: parsed blast result
196 @type parsed_blast: Bio.Blast.Record.Blast
197 @param db: database
198 @type db: str
199 """
200
201 ids = self.getSequenceIDs( parsed_blast )
202 self.record_dic = self.fastaFromIds( db, ids )
203
204 if self.verbose:
205 self.writeFasta( self.record_dic.values(),
206 self.outFolder + self.F_FASTA_ALL )
207
208 self.__writeBlastResult( parsed_blast,
209 self.outFolder + self.F_BLAST_OUT)
210
211
212
213 - def remoteBlast( self, seqFile, db, method, e=0.01, **kw ):
214 """
215 Perform a remote BLAST search using the QBLAST server at NCBI.
216 Uses Bio.Blast.NCBIWWW.qblast (Biopython) for the search
217
218 @param seqFile: file name with search sequence as FASTA
219 @type seqFile: str
220 @param db: database(s) to search in, e.g. ['swissprot', 'pdb']
221 @type db: [str]
222 @param method: search method, e.g. 'blastp', 'fasta'
223 @type method: str
224 @param e: expectation value cutoff
225 @type e: float
226 @param kw: optional keywords::
227 program BLASTP, BLASTN, BLASTX, TBLASTN, or TBLASTX.
228 database Which database to search against.
229 sequence The sequence to search.
230 ncbi_gi TRUE/FALSE whether to give 'gi' identifier.
231 (default: FALSE)
232 descriptions Number of descriptions to show. Def 500.
233 alignments Number of alignments to show. Def 500.
234 expect An expect value cutoff. Def 10.0.
235 matrix Specify an alt. matrix
236 (PAM30, PAM70, BLOSUM80, BLOSUM45).
237 filter 'none' turns off filtering. Default uses 'seg'
238 or 'dust'.
239 format_type 'HTML', 'Text', 'ASN.1', or 'XML'. Def. 'HTML
240 @type kw: any
241
242
243 @note: Using the remoteBlast is asking for trouble, as
244 every change in the output file might kill the
245 parser. If you still want to use remoteBlast we
246 strongly recomend that you install BioPython
247 from CVS. Information on how to do this you
248 will find on the BioPython homepage.
249
250 @todo: Remote Blasting is running as expected but sequences
251 are still retrieved from a local database. Implement remote
252 collection of fasta seuqences from NCBI (there should be
253 something like in Biopython). Otherwise something like this
254 will also work::
255
256 ## collect the entry with gi 87047648
257 url = 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?CMD=text&db=protein&dopt=FASTA&dispmax=20&uid=87047648'
258 import urllib
259 handle = urllib.urlopen(url)
260 lines = handle.readlines()
261 seq = ''
262 for i in range(len(lines)):
263 if re.match( "[<]pre[>][<]div class='recordbody'[>]{2}gi", l[i] ):
264 i+= 1
265 while not re.match( "^[<]/pre[>][<]/div[>]", l[i]):
266 seq += l[i][:-1]
267 i+= 1
268 print seq
269 """
270 fasta = Fasta.Iterator( open(seqFile) )
271 query = fasta.next()
272
273 blast_result = NCBIWWW.qblast( program=method, database=db,
274 sequence=query, expect=e,
275 ncbi_gi='FALSE', **kw )
276
277 p = NCBIXML.BlastParser()
278
279 parsed = p.parse( blast_result )
280
281
282 self.__blast2dict( parsed, db )
283
284 - def localBlast( self, seqFile, db, method='blastp',
285 resultOut=None, e=0.01, **kw ):
286 """
287 Performa a local blast search (requires that the blast binaries
288 and databases are installed localy).
289 Uses Bio.Blast.NCBIStandalone.blastall (Biopython) for the search.
290
291 @param seqFile: file name with search sequence in FASTA format
292 @type seqFile: str
293 @param db: database(s) to search, e.g. ['swissprot', 'pdb']
294 @type db: [str]
295 @param method: search program to use, e.g. 'blastp', 'fasta'
296 (default: blastp)
297 @type method: str
298 @param e: expectation value cutoff
299 @type e: float
300 @param resultOut: save blast output to this new file
301 @type resultOut: str
302 @param kw: optional keywords::
303 --- Scoring ---
304 matrix Matrix to use (default BLOSUM62).
305 gap_open Gap open penalty (default 0).
306 gap_extend Gap extension penalty (default 0).
307
308 --- Algorithm ---
309 gapped Whether to do a gapped alignment. T/F
310 (default T)
311 wordsize Word size (blastp default 11).
312 keep_hits Number of best hits from a region to keep
313 (default off).
314 xdrop Dropoff value (bits) for gapped alignments
315 (blastp default 25).
316 hit_extend Threshold for extending hits (blastp default 11)
317
318 --- Processing ---
319 filter Filter query sequence? (T/F, default F)
320 restrict_gi Restrict search to these GI's.
321 believe_query Believe the query defline? (T/F, default F)
322 nprocessors Number of processors to use (default 1).
323
324 --- Formatting ---
325 alignments Number of alignments. (default 250)
326 @type kw: any
327
328 @raise BlastError: if program call failes
329 """
330 results = err = p = None
331 try:
332 self.l = {"blast.bin":settings.blast_bin,
333 "m":method,
334 "db":db,
335 "s":seqFile,
336 "e":e,
337 "k":kw}
338
339 results, err = NCBIStandalone.blastall( settings.blast_bin,
340 method, db, seqFile,
341 expectation=e, **kw)
342
343 p = NCBIStandalone.BlastParser()
344
345
346
347
348 parsed = p.parse( results )
349
350 self.__blast2dict( parsed, db )
351
352 except Exception, why:
353 self.error = { 'results':results,
354
355 'parser':p,
356 'blast_bin':settings.blast_bin, 'seqFile':seqFile,
357 'method':method, 'db':db,'kw':kw,
358 'exception':str(why) }
359 print T.lastErrorTrace()
360 raise BlastError( str(why) + "\n" +
361 str( self.error ) )
362
363
364 - def localPSIBlast( self, seqFile, db, resultOut=None, e=0.01, **kw ):
365 """
366 Performa a local psi-blast search (requires that the blast binaries
367 and databases are installed localy).
368 Uses Bio.Blast.NCBIStandalone.blastpgp (Biopython) for the search
369
370 @param seqFile: file name with search sequence in FASTA format
371 @type seqFile: str
372 @param db: database(s) to search e.g. ['swissprot', 'pdb']
373 @type db: [str]
374 @param e: expectation value cutoff (default: 0.001)
375 @type e: float
376 @param resultOut: save blast output to this new file
377 @type resultOut: str
378
379 @param kw: optional keywords::
380 --- Scoring ---
381 matrix Matrix to use (default BLOSUM62).
382 gap_open Gap open penalty (default 11).
383 gap_extend Gap extension penalty (default 1).
384 window_size Multiple hits window size (default 40).
385 npasses Number of passes (default 1).
386 passes Hits/passes (Integer 0-2, default 1).
387
388 --- Algorithm ---
389 gapped Whether to do a gapped alignment (T/F, default T).
390 wordsize Word size (default 3).
391 keep_hits Number of beset hits from a region to keep (def 0)
392 xdrop Dropoff value (bits) for gapped alignments
393 (def 15)
394 hit_extend Threshold for extending hits (default 11).
395 nbits_gapping Number of bits to trigger gapping (default 22).
396 pseudocounts Pseudocounts constants for multiple passes
397 (def 9).
398 xdrop_final X dropoff for final gapped alignment (default 25).
399 xdrop_extension Dropoff for blast extensions (default 7).
400 model_threshold E-value threshold to include in multipass model
401 (default 0.005).
402 required_start Start of required region in query (default 1).
403 required_end End of required region in query (default -1).
404
405 --- Processing ---
406 filter Filter query sequence with SEG? (T/F, default F)
407 believe_query Believe the query defline? (T/F, default F)
408 nprocessors Number of processors to use (default 1).
409
410 --- Formatting ---
411 alignments Number of alignments (default 250).
412 @type kw: any
413
414 @raise BlastError: if program call failes
415 """
416 results = err = p = None
417 try:
418 results, err = NCBIStandalone.blastpgp( settings.psi_blast_bin,
419 db, seqFile,
420 expectation=e, **kw)
421
422 p = NCBIStandalone.PSIBlastParser()
423
424 parsed = p.parse( results )
425
426 self.__blast2dict( parsed.rounds[-1], db )
427
428 except Exception, why:
429 self.error = { 'results':results,'err':err.read(), 'parser':p,
430 'blast_bin':settings.blast_bin, 'seqFile':seqFile,
431 'db':db,'kw':kw,'exception':str(why) }
432 T.errWriteln( "BlastError " + T.lastErrorTrace() )
433 print T.lastErrorTrace()
434 raise BlastError( str(why) + "\n" +
435 str( self.error ) )
436
437
438
440 """
441 Extract sequence ids from BlastParser result.
442
443 Use::
444 getSequenceIDs( Bio.Blast.Record.Blast ) -> [ str ]
445
446 @param blast_records: blast search result
447 @type blast_records: Bio.Blast.Record.Blast
448
449 @return: list of sequence IDs
450 @rtype: [str]
451
452 @raise BlastError: if can't find ID
453 """
454 result = []
455
456 for a in blast_records.alignments:
457 for pattern in [self.ex_gi, self.ex_swiss, self.ex_swiss2,
458 self.ex_pdb, self.ex_all,
459 self.ex_Rgi_1, self.ex_Rgi_2,
460 self.ex_Rswiss, self.ex_Rpdb]:
461
462 match = re.search( pattern, a.title )
463
464 if match:
465 break
466
467 if (not match) or (not match.group('id')):
468 raise BlastError( "Couldn't find ID in " + a.title +\
469 "with pattern " + str(pattern))
470
471 result += [ match.group('id') ]
472
473 return result
474
475
477 """
478 Use::
479 fastaRecordFromId( db, id ) -> Bio.Fasta.Record
480
481 @param db: database
482 @type db: str
483 @param id: sequence database ID
484 @type id: str
485
486 @return: fasta record
487 @rtype: Bio.Fasta.Record
488
489 @raise BlastError: if can't fetch fasta record from database
490 """
491 cmd = settings.fastacmd_bin + ' -d %s -s %s' % (db, id)
492
493 err, o = commands.getstatusoutput( cmd )
494 if err:
495 EHandler.warning('%s returned error: %r' % (cmd, err) )
496 raise BlastError( err )
497
498 frecord = Fasta.Record()
499 frecord.title = id
500
501 try:
502 for line in o.split('\n'):
503 if line[0] == '>':
504 frecord.annotation = line[1:]
505 else:
506 frecord.sequence += line.strip()
507
508 except IndexError:
509 raise InternalError, "Couldn't fetch fasta record %s from database %s" \
510 % (id,db)
511
512 return frecord
513
514
516 """
517 Use::
518 fastaFromIds( id_lst, fastaOut ) -> { str: Bio.Fasta.Record }
519
520 @param db: database
521 @type db: str
522 @param id_lst: sequence database IDs
523 @type id_lst: [str]
524
525 @return: dictionary mapping IDs to Bio.Fasta.Records
526 @rtype: {str: Bio.Fasta.Record}
527
528 @raise BlastError: if couldn't fetch record
529 """
530 result = {}
531 for i in id_lst:
532 try:
533 r = self.fastaRecordFromId( db, i )
534 result[i] = r
535 except BlastError, why:
536 EHandler.warning("couldn't fetch %s"%str(i),trace=0 )
537
538 return result
539
540
542 """
543 Write clustering results to file.
544
545 @param raw: write raw clustering result to disk (default: None)
546 @type raw: 1|0
547 """
548 if self.verbose and raw:
549 f = open( self.outFolder + self.F_CLUSTER_RAW, 'w', 1)
550
551 f.write(raw)
552 f.close()
553
554
556 """
557 Report the clustering result.
558
559 Writes:
560 - clustering results to L{F_CLUSTER_LOG}
561 - blast records to L{F_BLAST_OUT}
562 - blast records of centers to L{F_CLUSTER_BLAST_OUT}
563 - raw clustering results to L{F_CLUSTER_RAW} if raw not None
564
565 @param raw: write raw clustering result to disk (default: None)
566 @type raw: 1|0
567 """
568 try:
569 if self.verbose:
570 f = open( self.outFolder + self.F_CLUSTER_LOG, 'w', 1)
571
572 for cluster in self.clusters:
573
574 f.write( "%i\t%s\n" % ( len( cluster ), str( cluster )))
575
576 f.close()
577
578
579 centers = [ c[0] for c in self.clusters ]
580
581 self.writeClusteredBlastResult( \
582 self.outFolder + self.F_BLAST_OUT,
583 self.outFolder + self.F_CLUSTER_BLAST_OUT, centers )
584
585
586 self.copyClusterOut( raw=raw )
587
588 except IOError, why:
589 EHandler.warning( "Can't write cluster report." + str(why) )
590
591
592 - def clusterFasta( self, fastaIn=None, simCut=1.75, lenCut=0.9, ncpu=1 ):
593 """
594 Cluster sequences. The input fasta titles must be the IDs.
595 fastaClust( fastaIn [, simCut, lenCut, ncpu] )
596
597 @param fastaIn: name of input fasta file
598 @type fastaIn: str
599 @param simCut: similarity threshold (score < 3 or %identity)
600 (default: 1.75)
601 @type simCut: double
602 @param lenCut: length threshold (default: 0.9)
603 @type lenCut: double
604 @param ncpu: number of CPUs
605 @type ncpu: int
606
607 @raise BlastError: if fastaIn is empty
608 """
609 fastaIn = fastaIn or self.outFolder + self.F_FASTA_ALL
610
611 if T.fileLength( fastaIn ) < 1:
612 raise IOError( "File %s empty. Nothing to cluster"%fastaIn )
613
614 self.log.add( "\nClustering sequences:\n%s"%('-'*20) )
615
616 cmd = settings.blastclust_bin + ' -i %s -S %f -L %f -a %i' %\
617 (fastaIn, simCut, lenCut, ncpu)
618
619 if self.verbose:
620 self.log.add("- Command: %s"%cmd)
621
622
623 tmp = os.environ.get( 'TMPDIR', None )
624 if tmp:
625 del os.environ['TMPDIR']
626
627 err, o = commands.getstatusoutput( cmd )
628 if err:
629 raise BlastError( err )
630
631 if tmp:
632 os.environ['TMPDIR'] = tmp
633
634
635
636 lines = [ l.split() for l in o.split('\n') ]
637 dateline = [ l[-1] for l in lines ].index('queries')
638 self.clusters = lines[dateline+1:]
639
640 self.reportClustering( raw=o )
641
642 self.bestOfCluster = [ self.selectFasta( ids )
643 for ids in self.clusters ]
644
645
648 """
649 Run cluterFasta iteratively, with tighter clustering settings, until
650 the number of clusters are less than self.clusterLimit.
651
652 @note: Older versions of T-Coffee can't handle more that approx.
653 50 sequences (out of memory). This problem is taken care of
654 in versions > 3.2 of T-Coffee.
655
656 @param fastaIn: name of input fasta file
657 @type fastaIn: str
658 @param simCut: similarity threshold (score < 3 or %identity)
659 (default: 1.75)
660 @type simCut: double
661 @param lenCut: length threshold (default: 0.9)
662 @type lenCut: double
663 @param ncpu: number of CPUs
664 @type ncpu: int
665 """
666 iter = 1
667 while self.clustersCurrent > self.clusterLimit \
668 or self.clustersCurrent == None:
669
670 self.clusterFasta( fastaIn, simCut, lenCut, ncpu )
671 self.clustersCurrent = len( self.clusters )
672
673 self.log.add( "- Clustering iteration %i produced %i clusters." \
674 %( iter, len( self.clusters) ))
675 self.log.add( "\tusing a simCut of %.2f and a lenCut of %.3f.\n" \
676 %( simCut, lenCut ) )
677
678 simCut -= 0.15
679 lenCut -= 0.015
680 iter += 1
681
682
684 """
685 Select one member of cluster of sequences.
686
687 @param ids_from_cluster: list of sequence ids defining the cluster
688 @type ids_from_cluster: [str]
689 @return: id of best in cluster
690 @rtype: str
691 """
692 return ids_from_cluster[0]
693
694
696 """
697 Create fasta file for given set of records.
698
699 @param frecords: list of Bio.Blast.Records
700 @type frecords: [Bio.Blast.Record]
701 @param fastaOut: file name
702 @type fastaOut: str
703 """
704 f = open( T.absfile(fastaOut), 'w' )
705 for r in frecords:
706 f.write( str( r ) + '\n' )
707 f.close()
708
709
711 """
712 Write all found template sequences to fasta file.
713
714 @param fastaOut: write all fasta records to file
715 (default: L{F_FASTA_ALL})
716 @type fastaOut: str OR None
717 """
718 fastaOut = fastaOut or self.outFolder + self.F_FASTA_ALL
719 self.writeFasta( self.frecords, fastaOut )
720
721
723 """
724 Write non-redundant set of template sequences to fasta file.
725
726 @param fastaOut: write non-redundant fasta records to file
727 (default: L{F_FASTA_NR})
728 @type fastaOut: str
729 """
730 fastaOut = fastaOut or self.outFolder + self.F_FASTA_NR
731
732 self.writeFasta( self.getClusteredRecords(), fastaOut )
733
734
736 """
737 Write the result from the blast search to file (similar to the
738 output produced by a regular blast run).
739
740 writeBlastResult( parsed_blast, outFile )
741
742 @param parsed_blast: Bio.Blast.Record.Blast
743 @type parsed_blast: Bio.Blast.Record.Blast
744 @param outFile: file to write the blast result to
745 @type outFile: str
746 """
747 f = open( T.absfile( outFile ), 'w' )
748
749 i=1
750 for alignment in parsed_blast.alignments:
751 for hsp in alignment.hsps:
752 s = string.replace(alignment.title,'\n',' ')
753 s = string.replace(s, 'pdb|', '\npdb|')
754 f.write('Sequence %i: %s\n'%(i,s))
755 f.write('Length: %i \tScore: %3.1f \tE-value: %2.1e\n'\
756 %(hsp.identities[1], hsp.score, hsp.expect))
757 f.write( 'Identities: %i \tPositives: %i \tGaps: %i\n'\
758 %(hsp.identities[0], hsp.positives[0],
759 hsp.gaps[0] or 0 ))
760
761 f.write( '%s\n'%hsp.query )
762 f.write( '%s\n'%hsp.match )
763 f.write( '%s\n\n'%hsp.sbjct )
764 i += 1
765 f.close()
766
767
769 """
770 Reads the blast.out file and keeps only centers.
771
772 @param allFile: all blast results
773 @type allFile: file
774 @param clustFile: output file name
775 @type clustFile: str
776 @param selection: write only sequences in list
777 @type selection: list
778 """
779 f = open( clustFile, 'w' )
780
781 b = open( allFile, 'r' )
782 line = b.readlines()
783
784 j=0
785 sel = copy.copy(selection)
786 for l in line:
787 for s in sel:
788
789 if re.search( s, l[:30]):
790 sel.remove(s)
791 f.write( '\nCluster center %i:\n'%(selection.index(s)+1) )
792 f.writelines( l )
793
794
795 while j < len(line)-1:
796 j+=1
797
798 if len( line[j] )<4:
799 break
800
801 if line[j][:4] == 'pdb|':
802 pass
803 else:
804 f.writelines( line[j] )
805
806 f.close()
807 b.close()
808
809
810
811
812
813
814
815
817 """
818 Test class
819 """
820
821 - def run( self, local=0, flavour='blastp' ):
822 """
823 run function test
824
825 @param local: transfer local variables to global and perform
826 other tasks only when run locally
827 @type local: 1|0
828 @param flavour: flavour of blast to test, blastp for localBlast
829 OR blastpgp for localPSIBlast (default: blastp)
830 @type flavour: str
831
832 @return: 1
833 @rtype: int
834 """
835 import tempfile
836 import shutil
837 from Biskit.LogFile import LogFile
838
839 query = T.testRoot() + '/Mod/project/target.fasta'
840 outfolder = tempfile.mkdtemp( '_test_SequenceSearcher' )
841 shutil.copy( query, outfolder )
842
843
844 f_out = outfolder+'/SequenceSearcher.log'
845 l = LogFile( f_out, mode='w')
846
847 searcher = SequenceSearcher( outFolder=outfolder, verbose=1, log=l )
848
849
850
851 db = settings.db_swiss
852
853 f_target = searcher.outFolder + searcher.F_FASTA_TARGET
854
855 if local:
856 globals().update( locals() )
857
858
859
860
861 if flavour == 'blastpgp':
862 searcher.localPSIBlast( f_target, db, 'blastpgp', npasses=2 )
863 else:
864 searcher.localBlast( f_target, db, 'blastp', alignments=500, e=0.01 )
865
866 searcher.clusterFasta()
867
868 searcher.writeFastaClustered()
869
870 if local:
871 print '\nThe clustered result from the search can be found in %s'%f_out
872 print '\nSequenceSearche log file written to: %s'%f_out
873 globals().update( locals() )
874
875
876 T.tryRemove( outfolder, tree=1 )
877
878 return 1
879
880
882 """
883 Precalculated result to check for consistent performance.
884
885 @return: 1
886 @rtype: int
887 """
888 return 1
889
890
891
892 if __name__ == '__main__':
893
894 test = Test()
895
896
897 assert test.run( local=1 ) == test.expected_result()
898
899
900 assert test.run( local=1, flavour='blastpgp') == test.expected_result()
901