1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 """
26 Blast 2 sequences against each other,
27 Return sequence identity
28
29 @note: argueably obsolete - use BioPython instead
30 """
31
32 from Biskit import Executor, TemplateError
33 import tools as T
34 import tempfile, re
35 import os.path
36 from Biskit.Errors import BiskitError
37
38
40 pass
41
42
44 """
45 Determine sequence identity between 2 protein sequences
46 """
47
49 """
50 @param seq1: sequence string
51 @type seq1: str
52 @param seq2: sequence string
53 @type seq2: str
54 """
55 self.seq1 = seq1
56 self.seq2 = seq2
57
58 self.inp1 = tempfile.mktemp('_seq1.fasta')
59 self.inp2 = tempfile.mktemp('_seq2.fasta')
60
61
62 self.ex_identity = re.compile('.+ Identities = (\d+)/(\d+) ')
63 self.ex_expect = re.compile('.+ Expect = ([\d\-e\.]+)')
64
65 blastcmd = '-i %s -j %s -M BLOSUM62 -p blastp -F F'\
66 %(self.inp1, self.inp2)
67
68 Executor.__init__( self, 'bl2seq', blastcmd, catch_out=1, **kw )
69
70
72 """
73 create temporary fasta files for bl2seq
74 """
75 f1 = open(self.inp1, 'w')
76 f2 = open(self.inp2, 'w')
77 f1.write(">Sequence 1\n"+self.seq1)
78 f2.write(">Sequence 2\n"+self.seq2)
79 f1.close()
80 f2.close()
81
82
92
93
95 """
96 Extract sequence identity and overlap length from one single
97 bl2seq hit
98
99 @return: blast results, e.g. {'aln_id':1.0, 'aln_len':120}
100 @rtype: dict
101 """
102
103 if not os.path.exists( self.f_out ):
104 raise Blast2SeqError,\
105 'Hmmersearch result file %s does not exist.'%self.f_out
106
107 if T.fileLength( self.f_out ) < 10:
108 raise Blast2SeqError,\
109 'Hmmersearch result file %s seems incomplete.'%self.f_out
110
111 out = open( self.f_out, 'r' )
112 hitStr = out.read()
113 out.close()
114
115
116 hitStr = hitStr.replace( '\n', '#' )
117
118 try:
119 _id = self.ex_identity.match(hitStr)
120 if (_id == None):
121 return {'res_id':0, 'aln_id':0, 'aln_len':0}
122 res_identical, res_number = int(_id.group(1)), int(_id.group(2))
123 id = 1.0 * res_identical/res_number
124 return {'res_id':res_identical, 'aln_id':id, 'aln_len':res_number}
125 except:
126 T.errWriteln("Error 1 in filterBlastHit:")
127 T.errWriteln("Hit Parsed: " + hitStr)
128 return {}
129
130
137
138
139
140
141
142
143 import Biskit.test as BT
144
145 -class Test(BT.BiskitTest):
146 """Test Blast2Seq"""
147
148 TAGS = [BT.EXE]
149
151 """Blast2Seq test """
152 self.blaster = Blast2Seq("AAAFDASEFFGIGHHSFKKEL",
153 "AAAFDASEFFGIGHHSAKK")
154
155 self.r = self.blaster.run()
156
157 if self.local:
158 print self.r
159
160 self.assertEqual( self.r,{'aln_len': 19, 'aln_id': 0.94736842105263153,
161 'res_id': 18} )
162
163
164 if __name__ == '__main__':
165
166 BT.localTest()
167