1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 """
26 Clean PDB-files so that they can be used for MD.
27 """
28
29 import Biskit.molUtils as MU
30 import Biskit.tools as t
31 from Biskit.PDBModel import PDBModel
32
33 import Numeric as N
34
35 import copy
36
38 pass
39
41 """
42 Remove HETAtoms from PDB, replace non-standard AA by closest standard AA.
43 Remove non-standard atoms from standard AA residues (and more).
44 """
45
47 """
48 @param fpdb: pdb file OR PDBModel
49 @type fpdb: str
50 @param log: LogFile object
51 @type log: object
52 """
53 self.model = PDBModel( fpdb )
54 self.log = log
55
56
58 if self.log:
59 self.log.add( msg )
60 else:
61 if force:
62 print msg
63
65 """
66 Keep only atoms with alternate A field (well, or no alternate).
67 """
68 self.logWrite( self.model.pdbCode +
69 ': Removing multiple occupancies of atoms ...')
70
71 i = 0
72 to_be_removed = []
73
74 for a in self.model.getAtoms():
75
76 if a['alternate']:
77 try:
78 str_id = "%i %s %s %i" % (a['serial_number'], a['name'],
79 a['residue_name'],
80 a['residue_number'])
81
82 if a['alternate'].upper() == 'A':
83 a['alternate'] = ''
84
85 else:
86 if float( a['occupancy'] ) < 1.0:
87 to_be_removed += [ i ]
88 self.logWrite(
89 'removing %s (%s %s)' % (str_id,a['alternate'],
90 a['occupancy']))
91 else:
92 self.logWrite(
93 ('keeping non-A duplicate %s because of 1.0 '+
94 'occupancy') % str_id )
95
96 except:
97 self.logWrite("Error removing duplicate: "+t.lastError() )
98 i+=1
99
100 try:
101 self.model.remove( to_be_removed )
102 self.logWrite('Removed %i atoms' % len( to_be_removed ) )
103
104 except:
105 self.logWrite('No atoms with multiple occupancies to remove' )
106
107
109 """
110 Replace amino acids with none standard names with standard
111 amino acids according to L{MU.nonStandardAA}
112
113 @param amber: don't rename HID, HIE, HIP, CYX, NME, ACE [0]
114 @type amber: 1||0
115 @param keep: names of residues to keep
116 @type keep: [ str ]
117 """
118 standard = MU.atomDic.keys() + keep
119
120 if amber:
121 standard.extend( ['HID', 'HIE', 'HIP', 'CYX', 'NME', 'ACE'] )
122
123 replaced = 0
124
125 self.logWrite(self.model.pdbCode +
126 ': Looking for non-standard residue names...')
127
128 for a in self.model.getAtoms():
129
130 resname = a['residue_name'].upper()
131
132 if resname not in standard:
133 if resname in MU.nonStandardAA:
134 a['residue_name'] = MU.nonStandardAA[ resname ]
135
136 self.logWrite('renamed %s %i to %s' % \
137 (resname, a['residue_number'],
138 MU.nonStandardAA[ resname ]))
139 else:
140 a['residue_name'] = 'ALA'
141
142 self.logWrite('Warning: unknown residue name %s %i: ' \
143 % (resname, a['residue_number'] ) )
144 self.logWrite('\t->renamed to ALA.')
145
146 replaced += 1
147
148 self.logWrite('Found %i atoms with non-standard residue names.'% \
149 replaced )
150
151
153 """
154 Check if resname is a standard residue (according to L{MU.atomDic})
155 if not return the closest standard residue (according to
156 L{MU.nonStandardAA}).
157
158 @param resname: 3-letter residue name
159 @type resname: str
160
161 @return: name of closest standard residue or resname itself
162 @rtype: str
163 """
164 if resname in MU.atomDic:
165 return resname
166
167 if resname in MU.nonStandardAA:
168 return MU.nonStandardAA[ resname ]
169
170 return resname
171
172
174 """
175 First missing standard atom triggers removal of standard atoms that
176 follow in the standard order. All non-standard atoms are removed too.
177 Data about standard atoms are taken from L{MU.atomDic} and symomym
178 atom name is defined in L{MU.atomSynonyms}.
179
180 @return: number of atoms removed
181 @rtype: int
182 """
183 mask = []
184
185 self.logWrite("Checking content of standard amino-acids...")
186
187 for res in self.model.resList():
188
189 resname = self.__standard_res( res[0]['residue_name'] ).upper()
190 standard = copy.copy( MU.atomDic[ resname ] )
191
192
193 for a in res:
194 n = a['name']
195 if not n in standard and MU.atomSynonyms.get(n,0) in standard:
196 a['name'] = MU.atomSynonyms[n]
197 self.logWrite('%s: renaming %s to %s in %s %i' %\
198 ( self.model.pdbCode, n, a['name'],
199 a['residue_name'], a['residue_number'] ) )
200
201 anames = [ a['name'] for a in res ]
202 keep = 1
203
204
205 rm = []
206 for n in standard:
207 if not n in anames:
208 keep = 0
209
210 if not keep:
211 rm += [ n ]
212
213 for n in rm:
214 standard.remove( n )
215
216
217 for a in res:
218
219 if a['name'] not in standard:
220 mask += [1]
221 self.logWrite('%s: removing atom %s in %s %i '%\
222 ( self.model.pdbCode, a['name'],
223 a['residue_name'], a['residue_number'] ) )
224 else:
225 mask += [0]
226
227 self.model.remove( mask )
228
229 self.logWrite('Removed ' + str(N.sum(mask)) +
230 ' atoms because they were non-standard' +
231 ' or followed a missing atom.' )
232
233 return N.sum( mask )
234
235
236 - def process( self, keep_hetatoms=0, amber=0, keep_xaa=[] ):
237 """
238 Remove Hetatoms, waters. Replace non-standard names.
239 Remove non-standard atoms.
240
241 @param keep_hetatoms: option
242 @type keep_hetatoms: 0||1
243 @param amber: don't rename amber residue names (HIE, HID, CYX,..)
244 @type amber: 0||1
245 @param keep_xaa: names of non-standard residues to be kept
246 @type keep_xaa: [ str ]
247
248 @return: PDBModel (reference to internal)
249 @rtype: PDBModel
250
251 @raise CleanerError: if something doesn't go as expected ...
252 """
253 try:
254 if not keep_hetatoms:
255 self.model.remove( self.model.maskHetatm() )
256
257 self.model.remove( self.model.maskH2O() )
258
259 self.model.remove( self.model.maskH() )
260
261 self.remove_multi_occupancies()
262
263 self.replace_non_standard_AA( amber=amber, keep=keep_xaa )
264
265 self.remove_non_standard_atoms()
266
267
268 except KeyboardInterrupt, why:
269 raise KeyboardInterrupt( why )
270 except Exception, why:
271 raise CleanerError( 'Error cleaning model: %r' % why )
272
273 return self.model
274
275
276
277
278
279
280
282 """
283 Test class
284 """
285
286
287 - def run( self, local=0 ):
288 """
289 run function test
290
291 @param local: transfer local variables to global and perform
292 other tasks only when run locally
293 @type local: 1|0
294
295 @return: molecular mass
296 @rtype: float
297 """
298 from Biskit.LogFile import LogFile
299 import tempfile
300
301 f_out = tempfile.mktemp( '_test_PDBCleaner' )
302
303 l = LogFile( f_out, mode='w')
304
305
306 c = PDBCleaner( t.testRoot() + '/rec/1A2P_rec_original.pdb',
307 log=l )
308
309 m = c.process()
310
311 if local:
312 print 'PDBCleaner log file written to: %s'%f_out
313 globals().update( locals() )
314
315
316 t.tryRemove( f_out )
317
318 return N.sum( m.masses())
319
320
322 """
323 Precalculated result to check for consistent performance.
324
325 @return: molecular mass
326 @rtype: float
327 """
328 return 34029.0115499993
329
330
331 if __name__ == '__main__':
332
333 test = Test()
334
335 assert abs( test.run( local=1 ) - test.expected_result() ) < 1e-8
336