1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """
24 Parse a PDB file into a PDBModel.
25
26 @see L{PDBModel}
27 @see L{PDBParserFactory}
28 """
29 import Scientific.IO.PDB as IO
30 import Numeric as N
31
32 import Biskit.tools as T
33 import Biskit as B
34 from Biskit.PDBParser import PDBParser, PDBParserError
35
36
38
39 @staticmethod
41 """
42 The method is static and can thus be called directly with the parser
43 class rather than with an instance::
44
45 >>> if PDBParser.supports('myfile.pdb'):
46 >>> ...
47
48 @return: True if the given source is supported by this parser
49 implementation
50 @rtype: bool
51 """
52 return (type(source) is str or isinstance(source, B.LocalPath)) and \
53 (source[-4:].upper() == '.PDB' or
54 source[-7:].upper() == '.PDB.GZ')
55
56
57 @staticmethod
59 """
60 The method is static and can thus be called directly with the parser
61 class rather than with an instance::
62
63 >>> if PDBParser.description('myfile.pdb'):
64 >>> ...
65
66 @return: short free text description of the supported format
67 @rtype: str
68 """
69 return 'PDB file'
70
71
73 """
74 Extract PDB code from file name.
75 @param fname: file name
76 @type fname: str
77 @return: first 4 letters of filename if available
78 @rtype: str
79 """
80 name = T.stripFilename( fname )
81
82 if len( name ) > 3:
83 return name[:4]
84
85 return ''
86
87
88 - def update( self, model, source, skipRes=None, lookHarder=0):
89 """
90 Update empty or missing fields of model from the source. The
91 model will be connected to the source via model.source.
92
93 @param model: existing model
94 @type model: PDBModel
95 @param source: source PDB file
96 @type source: str
97 @param skipRes: list residue names that should not be parsed
98 @type skipRes: [ str ]
99 @param lookHarder: ignored
100 @type lookHarder: 1|0
101
102 @raise PDBParserError - if something is wrong with the source file
103 """
104
105 try:
106
107 if self.needsUpdate( model ):
108
109 atoms, xyz = self.__collectAll( source, skipRes )
110
111 model.atoms = model.atoms or atoms
112
113 model.xyz = model.xyz or xyz
114
115 model.__terAtoms = model._PDBModel__pdbTer()
116
117 model.fileName = model.fileName or source
118
119 model.pdbCode = model.pdbCode or \
120 self.idFromName( model.fileName)
121
122 except:
123 msg = self.__xplorAtomIndicesTest( source ) or ' '
124 raise PDBParserError('Cannot read ' + str(source) + ' as PDB\n'\
125 '\ERROR: ' + T.lastError() + msg)
126
127 model.setSource( source )
128
129
131 """
132 In some cases the setup with parallell xplor trajectories
133 run out of atom indices when writing the pdb files to disc.
134 When this happens (usualy for the TIP3 waters in the later
135 of the 10 parallell trajectories) the atom indices get
136 replaced with ***** which will cause the parsing to fail.
137 The error message recieved is quite cryptic - this function
138 is here to give a more comprehensible message.
139
140 @param source: file that failed to be parsed
141 @type source: str
142 """
143 import re
144 f = open( source, 'r' )
145 lines = f.readlines()
146 f.close()
147
148 for i in range( len(lines) ):
149 if re.match( '^ATOM\s{2}\*{5}', lines[i]):
150 msg = """
151 Line %i to %i of the file %s contains invalid atom indices!
152
153 In some cases the setup with parallell xplor trajectories run out of atom indices when writing the pdb files to disc. When this happens (usualy for the TIP3 waters in the later of the 10 parallell trajectories) the atom indices get replaced with ***** which will cause the parsing to fail.
154
155 REMEDY: run the script fixAtomIndices.py
156 """ % (i, len(lines), source)
157
158 return msg
159
161 """
162 Return first letter in a string (e.g. atom mane)
163
164 @param aName: atom name
165 @type aName: str
166
167 @return: first letter (i.e. not a number) from a string.
168 @rtype: letter
169 """
170 try:
171 i = int( aName[0] )
172 return self.__firstLetter( aName[1:] )
173 except:
174 return aName[0]
175
176
178 """
179 Parse ATOM/HETATM lines from PDB. Collect coordinates plus
180 dictionaries with the other pdb records of each atom.
181 REMARK, HEADER, etc. lines are ignored.
182
183 Some changes are made to the dictionary from PDBFile.readline()::
184 - the 'position' entry (with the coordinates) is removed
185 - leading and trailing spaces are removed from 'name' ..
186 - .. but a 'name_original' entry keeps the old name with spaces
187 - a 'type' entry is added. Its value is 'ATOM' or 'HETATM'
188 - a 'after_ter' entry is added. Its value is 1, if atom is
189 preceeded by a 'TER' line, otherwise 0
190 - empty 'element' entries are filled with the first non-number
191 letter from the atom 'name'
192
193 @param fname: name of pdb file
194 @type fname: str
195 @param skipRes: list with residue names that should be skipped
196 @type skipRes: list of str
197
198 @return: tuple of list of dictionaries from PDBFile.readline()
199 and xyz array N x 3
200 @rtype: ( list, array )
201 """
202 items = []
203 xyz = []
204
205 f = IO.PDBFile( fname )
206
207 try:
208 line, i = ('',''), 0
209
210 while line[0] <> 'END' and line[0] <> 'ENDMDL':
211
212 i += 1
213 try:
214 line = f.readLine()
215 except ValueError, what:
216 self.log.add('Warning: Error parsing line %i of %s' %
217 (i, T.stripFilename( fname )) )
218 self.log.add('\tError: '+str(what) )
219 continue
220
221
222 newChain = line[0] == 'TER'
223 if newChain:
224 line = f.readLine()
225
226 if (line[0] in ['ATOM','HETATM'] ):
227
228 a = line[1]
229
230 if skipRes and a['residue_name'] in skipRes:
231 continue
232
233 a['name_original'] = a['name']
234 a['name'] = a['name'].strip()
235
236 a['type'] = line[0]
237 if newChain: a['after_ter'] = 1
238
239 if a['element'] == '':
240 a['element'] = self.__firstLetter( a['name'] )
241
242 if a['position'].is_vector:
243 lst = [ a['position'][0],
244 a['position'][1],
245 a['position'][2]]
246 xyz.append( lst )
247 else:
248 xyz.append( a['position'] )
249
250 del a['position']
251
252 items += [ a ]
253
254 except:
255 raise PDBParserError("Error parsing file "+fname+": " + T.lastError())
256
257 try:
258 f.close()
259 except:
260 pass
261
262 if len( xyz ) == 0:
263 raise PDBParserError("Error parsing file "+fname+": "+
264 "Couldn't find any atoms.")
265
266 return items, N.array( xyz, 'f' )
267
268
270 """
271 Test class
272 """
273
274 - def run( self, local=0 ):
275 """
276 run function test
277
278 @param local: transfer local variables to global and perform
279 other tasks only when run locally
280 @type local: 1|0
281
282 @return: coordinates of center of mass
283 @rtype: array
284 """
285
286
287 if local:
288 print 'Loading pdb file ..'
289
290 p = PDBParseFile()
291 m = p.parse2new( T.testRoot()+'/rec/1A2P.pdb')
292
293 if local:
294 globals().update( locals() )
295
296 return N.sum( m.centerOfMass() )
297
298
300 """
301 Precalculated result to check for consistent performance.
302
303 @return: coordinates of center of mass
304 @rtype: array
305 """
306 return N.sum( N.array([ 29.53385022, 46.39655482, 37.75218589]))
307
308
309 if __name__ == '__main__':
310
311 test = Test()
312
313 assert abs( test.run( local=1 ) - test.expected_result() ) < 1e-8
314