1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """
24 Fill in missing atoms, and report low occupancies
25
26 Vintage code used by pdb2xplor script -- use L{Biskit.PDBCleaner} for
27 new implementations.
28 """
29
30 from ChainSeparator import ChainSeparator
31 from Scientific.IO.PDB import *
32 import tools as T
33
35 """
36 Take with each call to next() one chain from ChainSeparator;
37 Return this chain with completed residues.
38 """
39
41 """
42 Clean up separate chains.
43
44 @param chainSeparator: ChainSeparator
45 @type chainSeparator: module
46 """
47 self.reader = chainSeparator
48 self.pdbname = self.reader.pdbname()
49 self.log = self.reader.log
50
51 self.aminoAcidDict = {'GLY':['N','CA','C','O'],
52 'ALA':['N','CA','C','O','CB'],
53 'VAL':['N','CA','C','O','CB','CG1','CG2'],
54 'LEU':['N','CA','C','O','CB','CG','CD1','CD2'],
55 'ILE':['N','CA','C','O','CB','CG1','CG2','CD1'],
56 'MET':['N','CA','C','O','CB','CG','SD','CE'],
57 'PRO':['N','CA','C','O','CB','CG','CD'],
58 'PHE':['N','CA','C','O','CB','CG','CD1','CD2','CE1','CE2','CZ'],
59 'TRP':['N','CA','C','O','CB','CG','CD1','CD2','NE1','CE2','CE3',
60 'CZ2','CZ3','CH2'],
61 'SER':['N','CA','C','O','CB','OG'],
62 'THR':['N','CA','C','O','CB','OG1','CG2'],
63 'ASN':['N','CA','C','O','CB','CG','OD1','ND2'],
64 'GLN':['N','CA','C','O','CB','CG','CD','OE1','NE2'],
65 'TYR':['N','CA','C','O','CB','CG','CD1','CD2','CE1','CE2','CZ','OH'],
66 'CYS':['N','CA','C','O','CB','SG'],
67 'LYS':['N','CA','C','O','CB','CG','CD','CE','NZ'],
68 'ARG':['N','CA','C','O','CB','CG','CD','NE','CZ','NH1','NH2'],
69 'HIS':['N','CA','C','O','CB','CG','ND1','CD2','CE1','NE2'],
70 'ASP':['N','CA','C','O','CB','CG','OD1','OD2'],
71 'GLU':['N','CA','C','O','CB','CG','CD','OE1','OE2'],
72 'ACE':['CA', 'C', 'O'],
73 'NME':['N', 'CA'] }
74
75
77 """
78 Tweak list of allowed atom names to one expected for a
79 C-terminal residue.
80
81 @param aName_list: list of allowed atom names
82 e.g. ['N','CA','C','O','CB']
83 @type aName_list: list of strings
84
85 @return: e.g. ['N','CA','C','OT1','CB','OT2']
86 @rtype: list of str
87 """
88 result = []
89 result += aName_list
90 try:
91 result[result.index('O')] = 'OT1'
92 result += ['OT2']
93 except:
94 pass
95 return result
96
97
99 """
100 Add atom with given name to residue.
101
102 @param residue: Scientific.IO.PDB.Residue object
103 @type residue: object
104 @param atom_name: atom name
105 @type atom_name: str
106 """
107 if len(atom_name) < 2:
108
109 atom_name = atom_name + ' '
110
111 residue.atom_list.append( Atom(atom_name, Vector(0,0,0),\
112 element=atom_name[0] ) )
113
114
116 """
117 Look for missing or unknown atom names, add missing atoms,
118 report unknown atoms.
119
120 @param chain: Scientific.IO.PDB.PeptideChain object
121 @type chain: object
122
123 @return: Scientific.IO.PDB.PeptideChain object
124 @rtype: chain object
125 """
126 chain.deleteHydrogens()
127 i = 0
128 self.log.add("Checking atoms of chain "+chain.segment_id)
129
130 for res in chain:
131 try:
132 if i < len(chain)-1:
133 alowed = self.aminoAcidDict[res.name]
134 else:
135 alowed = self._res2Terminal(self.aminoAcidDict[res.name])
136
137 name_list = []
138
139 for atom in res.atom_list:
140
141 name_list = name_list + [atom.name]
142 if not (atom.name in alowed):
143 self.log.add('\tunknown atom: ' + atom.name + ' : '+\
144 res.name+ str(res.number))
145
146 for name in alowed:
147 if not (name in name_list):
148
149 self._addMissing(res, name)
150 self.log.add('\tadded missing atom -> '+ name+ ' : '+\
151 res.name+ str(res.number))
152
153 except:
154 s = "\ncompleteResidues(): Error while checking atoms.\n"
155 s = s + "residue " + str(i)+ " :"+ str(res) + "\n"
156 s = s + T.lastError()
157 T.errWriteln(
158 "Error while completing residues, check log for details.")
159 self.log.add(s)
160
161 i = i+1
162
163 return chain
164
165
167 """
168 Check and report atoms with ocupancies that is not 100%
169 Scientific.PDB.IO will only take one of the atoms even if there are
170 alternate locations indicated in the PDB-file. The code below does only
171 check for none 100% occupancies and report them to the log-file.
172
173 @param chain: Scientific.IO.PDB.PeptideChain object
174 @type chain: chain object
175
176 @return: Scientific.IO.PDB.PeptideChain object
177 @rtype: chain object
178 """
179 self.log.add("Checking atom occupancies of chain "+chain.segment_id)
180 for res in chain:
181 for atom in res:
182 if atom.properties.get('occupancy',1.0) != 1.0:
183 self.log.add('\tOccupancy: '+\
184 str(atom.properties['occupancy']) \
185 + ' : ' + atom.name + ' : '+ res.name+ ' ' +\
186 str(res.number))
187 return chain
188
189
191 """
192 Change name of atoms in specified residue.
193
194 @param chain: Scientific.IO.PDB.PeptideChain object
195 @type chain: chain object
196 @param oldAtomName: atom name
197 @type oldAtomName: str
198 @param residueNum: residue number
199 @type residueNum: int
200 @param newAtomName: atom name
201 @type newAtomName: str
202
203 @return: Scientific.IO.PDB.PeptideChain object
204 @rtype: chain object
205 """
206 changeRes = chain.residues[residueNum]
207 if changeRes.atoms.has_key(oldAtomName) == 1:
208 changeRes.atoms[oldAtomName].name = newAtomName
209 return chain
210
211
213 """
214 Terminal amino acid can't have atom type O and OXT - have to be
215 OT1 and OT2
216
217 @param chain: Scientific.IO.PDB.PeptideChain object
218 @type chain: chain object
219
220 @return: Scientific.IO.PDB.PeptideChain object
221 @rtype: chain object
222 """
223 self._find_and_change(chain, 'O', -1, 'OT1')
224 self._find_and_change(chain, 'OXT', -1, 'OT2')
225 return chain
226
227
229 """
230 Obtain next chain from ChainSeparator; Add missing atoms.
231
232 @return: Scientific.IO.PDB.PeptideChain, completed chain OR
233 if no chain is left
234 @rtype: chain object OR None
235 """
236 chain = self.reader.next()
237 if (chain == None):
238
239 self.reader.extractWaters( )
240
241 return None
242 self.log.add("\nCleaning up chain "+chain.segment_id+": ")
243
244
245 chain = self._checkOccupancy(chain)
246
247
248 chain = self._correct_Cterm(chain)
249 chain = self._completeResidues(chain)
250
251 return chain
252
253
254
255
256
257 import Biskit.test as BT
258
259 -class Test(BT.BiskitTest):
260 """ Test ChainCleaner"""
261
263 self.fname = T.testRoot() + '/rec/1A2P_rec_original.pdb'
264 self.outPath = T.tempDir()
265
268
269
271 """ChainCleaner test"""
272 self.cleaner = ChainCleaner( ChainSeparator(self.fname, self.outPath) )
273 self.cleaner.next()
274
275 if self.local:
276 print 'Wrote log: %s'%(self.cleaner.log.fname)
277
278
279 f= open( self.cleaner.log.fname, 'r')
280 self.result = f.readlines()
281 f.close()
282
283 self.assertEqual(self.result, self.EXPECTED)
284
285
286 EXPECTED = ['\n', 'WARNING! The PDB-file contains coordinates for none water HETATMs.\n', 'If you want to keep the HETATM - prepare the file for Xplor manualy \n', '\n', 'String found in line(s): \n', '\n', 'HETATM \n', 'HETATM 881 ZN ZN A 112 33.898 42.301 55.562 0.30 41.80 ZN \n', 'HETATM 1891 ZN ZN B 112 8.869 30.194 74.011 0.35 32.13 ZN \n', 'HETATM 2908 ZN ZN C 112 39.324 15.495 56.124 1.00 12.86 ZN \n', '\n', '--------------------------------------------------------------------------------\n', '\n', 'Separate chains: \n', '------------------\n', '\n', " Chain ID's of compared chains: ['A', 'B', 'C']\n", ' Cross-Identity between chains:\n', '[[ 1. 1. 1.]\n', ' [ 0. 1. 1.]\n', ' [ 0. 0. 1.]]\n', ' Identity threshold used: 0.9\n', 'NOTE: Identity matrix will be used for removing identical chains.\n', '2 chains have been removed.\n', '\n', '0 breaks found in chain (108 residues) A: []\n', 'changed segment ID of chain A to 1A2A\n', '\n', 'Cleaning up chain 1A2A: \n', 'Checking atom occupancies of chain 1A2A\n', '\tOccupancy: 0.3 : CD2 : TYR 17\n', '\tOccupancy: 0.8 : CE2 : TYR 17\n', '\tOccupancy: 0.5 : CZ : TYR 17\n', '\tOccupancy: 0.5 : OH : TYR 17\n', '\tOccupancy: 0.5 : OD1 : ASP 22\n', '\tOccupancy: 0.5 : OD2 : ASP 22\n', '\tOccupancy: 0.5 : CB : SER 28\n', '\tOccupancy: 0.5 : OG : SER 28\n', '\tOccupancy: 0.5 : CB : GLN 31\n', '\tOccupancy: 0.5 : CG : GLN 31\n', '\tOccupancy: 0.5 : CD : GLN 31\n', '\tOccupancy: 0.5 : OE1 : GLN 31\n', '\tOccupancy: 0.5 : NE2 : GLN 31\n', '\tOccupancy: 0.5 : CB : SER 38\n', '\tOccupancy: 0.5 : OG : SER 38\n', '\tOccupancy: 0.5 : N : ARG 59\n', '\tOccupancy: 0.5 : CA : ARG 59\n', '\tOccupancy: 0.5 : C : ARG 59\n', '\tOccupancy: 0.5 : O : ARG 59\n', '\tOccupancy: 0.0 : CB : ARG 59\n', '\tOccupancy: 0.0 : CG : ARG 59\n', '\tOccupancy: 0.0 : CD : ARG 59\n', '\tOccupancy: 0.0 : NE : ARG 59\n', '\tOccupancy: 0.0 : CZ : ARG 59\n', '\tOccupancy: 0.0 : NH1 : ARG 59\n', '\tOccupancy: 0.0 : NH2 : ARG 59\n', '\tOccupancy: 0.5 : OE1 : GLU 60\n', '\tOccupancy: 0.5 : OE2 : GLU 60\n', '\tOccupancy: 0.5 : CE : LYS 66\n', '\tOccupancy: 0.5 : NZ : LYS 66\n', '\tOccupancy: 0.5 : CB : SER 85\n', '\tOccupancy: 0.5 : OG : SER 85\n', '\tOccupancy: 0.5 : CB : ILE 96\n', '\tOccupancy: 0.5 : CG1 : ILE 96\n', '\tOccupancy: 0.5 : CG2 : ILE 96\n', '\tOccupancy: 0.5 : CD1 : ILE 96\n', 'Checking atoms of chain 1A2A\n']
287
288
289 if __name__ == '__main__':
290
291 BT.localTest()
292