1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 """
25 Store and manipulate coordinates and atom information.
26 """
27
28 import tools as t
29 import molUtils
30 import mathUtils
31 import match2seq
32 import rmsFit
33 from LocalPath import LocalPath
34 from Errors import BiskitError
35 from Biskit import EHandler
36 from ProfileCollection import ProfileCollection, ProfileError
37 from PDBParserFactory import PDBParserFactory
38
39 import Numeric as N
40 import MLab
41 import os, sys
42 import copy
43 import time
44 import string
45 import types
46 import Scientific.IO.PDB as IO
47
48 from multiarray import arraytype
49
50
54
56 pass
57
59 """
60 Store and manipulate coordinates and atom infos stemming from a
61 PDB file. Coordinates are stored in the Numeric array 'xyz'; the
62 additional atom infos from the PDB are stored in the list of
63 dictionaries 'atoms'.
64 Methods are provided to remove items from both atoms and xyz
65 simultaniously, to restore the PDB file, to get masks for certain
66 atom types/names/residues, to iterate over residues, sort atoms etc.
67
68 Atom- or residue-related values can be put into 'profile' arrays.
69 See setAtomProfile() and setResProfile(). Any reordering or removal of
70 atoms is also applied to the profiles so that they should always match
71 the current atom/residue order.
72
73 The object remembers its source (a PDB file or a PDBModel in memory
74 or a pickled PDBModel on disc) and keeps track of whether atoms, xyz, or
75 (which) profiles have been changed with respect to the source.
76 The slim() method is called before pickling a PDBModel. It sets to None
77 the atoms and/or xyz array or any profiles if they have not been changed
78 since beeing read from a source on disc. The change status of xyz and
79 atoms is reported by isChanged() (with respect to the direct source) and
80 isChangedFromDisc() (with respect to the source of the source... just in
81 case).
82 You can trick this mechanism by setting atomsChanged or xyzChanged back
83 to 0 if you want to make only temporary changes that are lost after a
84 call to slim().
85
86 Additional infos about the model can be put into a dictionary 'info'.
87
88 @todo: clean up, rename returnPdb()
89 @todo: clean up the __terAtoms mess
90 @todo: use Profiles instead of space consuming atom dictionaries
91 """
92
93 - def __init__( self, source=None, pdbCode=None, noxyz=0, skipRes=None ):
94 """
95 - PDBModel() creates an empty Model to which coordinates (field xyz)
96 and PDB infos (field atoms) have still to be added.
97 - PDBModel( file_name ) creates a complete model with coordinates
98 and PDB infos taken from file_name (pdb, pdb.gz, pickled PDBModel)
99 - PDBModel( PDBModel ) creates a copy of the given model
100 - PDBModel( PDBModel, noxyz=1 ) creates a copy without coordinates
101
102 @param source: str, file name of pdb/pdb.gz file OR pickled PDBModel OR
103 PDBModel, template structure to copy atoms/xyz field from
104 @type source: str or PDBModel
105 @param pdbCode: PDB code, is extracted from file name otherwise
106 @type pdbCode: str or None
107 @param noxyz: 0 (default) || 1, create without coordinates
108 @type noxyz: 0||1
109
110 @raise PDBError: if file exists but can't be read
111 """
112 self.source = source
113 if type( self.source ) is str:
114 self.source = LocalPath( self.source )
115
116 self.__validSource = 0
117 self.fileName = None
118 self.pdbCode = pdbCode
119 self.xyz = None
120 self.atoms = None
121
122
123 self.rProfiles = PDBProfiles()
124 self.aProfiles = PDBProfiles()
125
126
127 self.caMask = None
128 self.bbMask = None
129 self.heavyMask = None
130
131
132 self.__resMap = None
133 self.__resIndex = None
134
135
136 self.__terAtoms = []
137
138 if noxyz:
139
140 self.xyz = 0
141
142
143 self.xyzChanged = 0
144 self.atomsChanged = 0
145
146 self.forcePickle = 0
147
148
149 self.initVersion = self.version()
150
151
152 self.info = { 'date':t.dateSortString() }
153
154 if source <> None:
155
156 self.update( skipRes=skipRes, lookHarder=1 )
157
158 if noxyz:
159
160 self.xyz = None
161
162
164 return 'PDBModel $Revision: 2.12 $'
165
166
168 """
169 called before pickling the object.
170 """
171 self.slim()
172 self.forcePickle = 0
173 return self.__dict__
174
176 """
177 called for unpickling the object.
178 """
179 self.__dict__ = state
180
181 self.__defaults()
182
185
187 """
188 backwards compatibility to earlier pickled models
189 """
190
191 self.__resMap = getattr( self, '_PDBModel__resMap', None)
192 self.__resIndex = getattr( self, '_PDBModel__resIndex', None)
193
194 if type( self.source ) == str:
195 self.source = LocalPath( self.source )
196
197 self.__validSource = getattr( self, '_PDBModel__validSource', 0)
198
199 self.initVersion = getattr( self, 'initVersion', 'old PDBModel')
200
201
202 if 'resProfiles' in self.__dict__:
203 self.rProfiles=PDBProfiles(
204 profiles=getattr(self,'resProfiles',{}),
205 infos=getattr(self,'resProfiles_info',{}) )
206 try:
207 del self.resProfiles; del self.resProfiles_info
208 except: pass
209
210 if 'atomProfiles' in self.__dict__:
211 self.aProfiles=PDBProfiles(
212 profiles=getattr(self,'atomProfiles',{}),
213 infos=getattr(self,'atomProfiles_info',{}) )
214 try:
215 del self.atomProfiles; del self.atomProfiles_info
216 except: pass
217
218
219 if not getattr( self, 'aProfiles', None):
220 self.aProfiles = PDBProfiles()
221 if not getattr( self, 'rProfiles', None):
222 self.rProfiles = PDBProfiles()
223
224 self.forcePickle = getattr( self, 'forcePickle', 0 )
225
226 self.info = getattr( self, 'info', { 'date':t.dateSortString() } )
227
228
229
230 if getattr( self, '_PDBModel__terAtoms', -1) == -1:
231 self.__terAtoms = self.__pdbTer(1)
232
233
234 if getattr( self, 'xyx', 0 ):
235 del self.xyx
236
237
238 - def update( self, skipRes=None, lookHarder=0, force=0 ):
239 """
240 Read coordinates, atoms, fileName, etc. from PDB or
241 pickled PDBModel - but only if they are currently empty.
242 The atomsChanged and xyzChanged flags are not changed.
243
244 @param skipRes: names of residues to skip if updating from PDB
245 @type skipRes: list of str
246 @param lookHarder: 0(default): update only existing profiles
247 @type lookHarder: 0|1
248 @param force: ignore invalid source (0) or report error (1)
249 @type force: 0|1
250
251 @raise PDBError: if file can't be unpickled or read:
252 """
253 source = self.validSource()
254
255 if source is None and force:
256 raise PDBError( str(self.source) + ' is not a valid source.')
257
258 if source is None:
259 return
260
261 parser = PDBParserFactory.getParser( source )
262 parser.update(self, source, skipRes=skipRes, lookHarder=lookHarder )
263
264
266 """
267 @param rmOld: 1, remove after_ter=0 flags from all atoms
268 @type rmOld: 1||0
269
270 @return: list of atom indices that are followed by a TER record
271 (marked with 'after_ter' flag of the next atom
272 by __collectAll).
273 @rtype: list of int
274 """
275 atoms = self.getAtoms()
276
277
278 ater = [ i-1 for i in range( self.lenAtoms() )
279 if atoms[i].get('after_ter',0)]
280
281
282 if rmOld:
283 for a in self.atoms:
284 if a.has_key('after_ter') and a['after_ter']==0:
285 del a['after_ter']
286
287
288 return N.compress( N.greater( ater, -1 ), ater )
289
290
292 """
293 Replace coordinates.
294
295 @param xyz: Numpy array ( 3 x N_atoms ) of float
296 @type xyz: array
297
298 @return: N.array( 3 x N_atoms ) or None, old coordinates
299 @rtype: array
300 """
301 old = self.xyz
302 self.xyz = xyz
303
304 self.xyzChanged = self.xyzChanged or \
305 not mathUtils.arrayEqual(self.xyz,old )
306 return old
307
308
310 """
311 Replace atoms list of dictionaries.
312 self.__terAtoms is re-created from the 'after_ter' records in atoms
313
314 @param atoms: list of dictionaries as returned by PDBFile.readLine()
315 (nearly, for differences see L{__collectAll()} )
316 @type atoms: list of dictionaries
317
318 @return: [ dict ], old atom dictionaries
319 @rtype: list of dict
320 """
321 old = self.atoms
322 self.atoms = atoms
323
324 changedNow = not ( self.atoms == old )
325
326 if self.atomsChanged:
327 self.__resMap = self.__resIndex = None
328 self.caMask = None
329 self.bbMask = None
330 self.heavyMask = None
331 self.__terAtoms = self.__pdbTer()
332
333 self.atomsChanged = self.atomsChanged or changedNow
334
335 return old
336
337
339 """
340 @param source: LocalPath OR PDBModel OR str
341 """
342 if type( source ) == str:
343 self.source = LocalPath( source )
344 else:
345 self.source = source
346 self.__validSource = 0
347
348
349 - def getXyz( self, mask=None ):
350 """
351 Get coordinates, fetch from source PDB or pickled PDBModel,
352 if necessary.
353
354 @param mask: atom mask
355 @type mask: list of int OR array of 1||0
356
357 @return: xyz-coordinates, N.array( 3 x N_atoms, 'f' )
358 @rtype: array
359 """
360 if self.xyz is None:
361 self.update( force=1 )
362
363 if self.xyz is None:
364 return N.array( [], 'f' )
365
366 if mask is None:
367 return self.xyz
368
369 return N.compress( mask, self.xyz, 0 )
370
371
373 """
374 Get atom dictionaries, fetch from source PDB or pickled PDBModel,
375 if necessary. See also L{__collectAll()}.
376
377 @param mask: atom mask
378 @type mask: list of int OR array of 1||0
379
380 @return: atom dictionary, list of dictionaries from PDBFile.readline()
381 @rtype: list of dic
382 """
383 if self.atoms is None:
384 self.update( force=1 )
385
386 if self.atoms is None:
387 return []
388
389 if mask is None:
390 return self.atoms
391
392 return [ self.atoms[i] for i in N.nonzero( mask ) ]
393
394
395 - def setResProfile( self, name, prof, mask=None, default=None, asarray=1,
396 comment=None, **moreInfo):
397 """
398 Add/override residue-based profile.
399
400 @param name: name to access profile
401 @type name: str
402 @param prof: list/array of values
403 @type prof: list OR array
404 @param mask: list/array 1 x N_residues of 0|1, N.sum(mask)==len(prof)
405 @type mask: list OR array
406 @param default: value for masked residues
407 default: None for lists, 0 for arrays
408 @type default: any
409 @param asarray: store as list (0), as array (2) or store numbers as
410 array but everything else as list (1) default: 1
411 @type asarray: 0|1|2
412 @param comment: goes into aProfiles_info[name]['comment']
413 @type comment: str
414 @param moreInfo: additional key-value pairs for aProfiles_info[name]
415 @type moreInfo: (key, value)
416
417 @raise ProfileError: if length of prof != N_atoms
418 """
419 self.rProfiles.set( name, prof, mask=mask, default=default,
420 asarray=asarray, comment=comment, **moreInfo )
421
422 - def setAtomProfile( self, name, prof, mask=None, default=None, asarray=1,
423 comment=None, **moreInfo):
424 """
425 Add/override atom-based profile.
426
427 @param name: name to access profile
428 @type name: str
429 @param prof: list/array of values
430 @type prof: list OR array
431 @param mask: list/array 1 x N_residues of 0 || 1, profile items
432 @type mask: list OR array
433 @param default: value for masked residues
434 default: None for lists, 0 for arrays
435 @type default: any
436 @param asarray: store as list (0), as array (2) or store numbers as
437 array but everything else as list (1) default: 1
438 @type asarray: 0|1|2
439 @param comment: goes into aProfiles_info[name]['comment']
440 @type comment: str
441 @param moreInfo: additional key-value pairs for aProfiles_info[name]
442 @type moreInfo: (key, value)
443
444 @raise ProfileError: if length of prof
445 """
446 self.aProfiles.set( name, prof, mask=mask, default=default,
447 asarray=asarray, comment=comment, **moreInfo )
448
449
451 """
452 Use::
453 resProfile( profile_name ) -> array 1 x N_res with residue values
454
455 @param name: name to access profile
456 @type name: str
457
458 @return: residue profile array 1 x N_res with residue values
459 @rtype: array
460
461 @raise ProfileError: if rProfiles contains no entry for |name|
462 """
463 return self.rProfiles.get( name, default=default )
464
465
467 """
468 Use::
469 atomProfile( profile_name ) -> array 1 x N_atoms with atom values
470
471 @param name: name to access profile
472 @type name: str
473
474 @return: atom profile array 1 x N_atoms with atom values
475 @rtype: array
476
477 @raise ProfileError: if aProfiles contains no entry for |name|
478 """
479 return self.aProfiles.get( name, default=default )
480
481
482 - def profile( self, name, default=None, lookHarder=0 ):
483 """
484 Use::
485 profile( name, lookHarder=0) -> atom or residue profile
486
487 @param name: name to access profile
488 @type name: str
489 @param default: default result if no profile is found,
490 if None, raise exception
491 @type default: any
492 @param lookHarder: update from source before reporting missing profile
493 @type lookHarder: 0||1
494
495 @raise ProfileError: if neither atom- nor rProfiles
496 contains |name|
497 """
498 if lookHarder and not name in self.aProfiles and \
499 not name in self.rProfiles:
500 self.update()
501
502 if name in self.aProfiles:
503 return self.aProfiles[ name ]
504
505 return self.rProfiles.get( name, default=default )
506
507
509 """
510 Use::
511 profileInfo( name ) -> dict with infos about profile
512
513 @param name: name to access profile
514 @type name: str
515 @param lookHarder:update from source before reporting missing profile::
516 Guaranteed infos: 'version'->str,
517 'comment'->str,
518 'changed'->1||0
519 @type lookHarder: 0|1
520
521 @raise ProfileError: if neither atom - nor rProfiles
522 contains |name|
523 """
524 if lookHarder and not name in self.Profiles and \
525 not name in self.rProfiles:
526 self.update()
527
528 if name in self.aProfiles:
529 return self.aProfiles.getInfo( name )
530
531 if name in self.rProfiles:
532 return self.rProfiles.getInfo( name )
533
534 raise ProfileError( 'No profile info found with name '+str(name))
535
536
538 """
539 Add/Override infos about a given profile
540 E.g. setProfileInfo('relASA', comment='new', params={'bin':'whatif'})
541
542 @param name: name to access profile
543 @type name: str
544
545 @raise ProfileError: if neither atom
546 """
547 d = self.profileInfo( name )
548 for key, value in args.items():
549 d[key] = value
550
551
553 """
554 Remove residue or atom profile(s)
555
556 Use::
557 removeProfile( str_name [,name2, name3] ) -> 1|0,
558
559 @param names: name or list of residue or atom profiles
560 @type names: str OR list of str
561
562 @return: 1 if at least 1 profile has been deleted,
563 0 if none has been found
564 @rtype: int
565 """
566 r = 0
567
568 for n in names:
569 if n in self.aProfiles:
570 del self.aProfiles[n]
571 r = 1
572
573 if n in self.rProfiles:
574 del self.rProfiles[n]
575 r = 1
576 return r
577
578
580 """
581 Tell if xyz or atoms have been changed compared to source file or
582 source object (which can be still in memory).
583
584 @return: (1,1)..both xyz and atoms field have been changed
585 @rtype: (1||0, 1||0)
586 """
587 if (self.xyzChanged==0 or self.atomsChanged==0) \
588 and ( self.source is not None and self.validSource() is None ):
589 raise PDBError('Invalid source: '+str(self.source) )
590
591 if self.validSource() is None:
592 return (1,1)
593
594 return (self.xyzChanged, self.atomsChanged)
595
596
598 """
599 Tell whether xyz and atoms can currently be reconstructed from a
600 source on disc. Same as isChanged() unless source is another not yet
601 saved PDBModel instance that made changes relative to its own source
602 ...
603
604 @return: (1,1)..both xyz and atoms field have been changed
605 @rtype: (1||0, 1||0)
606 """
607 if isinstance( self.source, PDBModel ):
608 xChanged = self.isChanged()[0] or \
609 self.source.isChangedFromDisc()[0]
610 aChanged = self.isChanged()[1] or \
611 self.source.isChangedFromDisc()[1]
612
613 return xChanged, aChanged
614
615 return self.isChanged()
616
617
619 """
620 Check if profile has changed compared to source.
621
622 @return: 1, if profile |pname| can currently not be
623 reconstructed from a source on disc.
624 @rtype: int
625
626 @raise ProfileError: if there is no atom or res profile with pname
627 """
628 if self.validSource() is None:
629 return 1
630
631 if isinstance( self.source, PDBModel ):
632 return self.profileInfo( pname )['changed'] or \
633 self.source.profileChangedFromDisc( pname )
634
635 return self.profileInfo( pname )['changed']
636
637
639 """
640 Remove profiles, that haven't been changed from a direct
641 or indirect source on disc
642 B{AUTOMATICALLY CALLED BEFORE PICKLING}
643 """
644 for key in self.rProfiles:
645
646 if not self.profileChangedFromDisc( key ):
647 self.rProfiles[ key ] = None
648
649 if type( self.rProfiles[ key ] ) == arraytype:
650 self.rProfiles[ key ] = self.rProfiles[key].tolist()
651
652 for key in self.aProfiles:
653
654 if not self.profileChangedFromDisc( key ):
655 self.aProfiles[ key ] = None
656
657 if type( self.aProfiles[ key ] ) == arraytype:
658 self.aProfiles[ key ] = self.aProfiles[key].tolist()
659
661 """
662 Remove xyz array and list of atoms if they haven't been changed and
663 could hence be loaded from the source file (only if there is a source
664 file...). Remove any unchanged profiles.
665 B{AUTOMATICALLY CALLED BEFORE PICKLING}
666 """
667
668
669
670 if not self.forcePickle:
671
672 xChanged, aChanged = self.isChangedFromDisc()
673
674 if not xChanged:
675 self.xyz = None
676
677 if type( self.xyz ) == arraytype and self.xyz.typecode() != 'f':
678 self.xyz = self.xyz.astype('f')
679
680 if not aChanged:
681 self.atoms = None
682
683 if type( self.atoms ) == arraytype:
684 self.atoms = self.atoms.tolist()
685
686 self.__slimProfiles()
687
688 self.__resMap = self.__resIndex = None
689 self.caMask = self.bbMask = self.heavyMask = None
690 self.__validSource = 0
691
692
694 """
695 Check for a valid source on disk.
696
697 @return: str or PDBModel, None if this model has no valid source
698 @rtype: str or PDBModel or None
699 """
700 if self.__validSource == 0:
701
702 if isinstance( self.source, LocalPath ) and self.source.exists():
703 self.__validSource = self.source.local()
704 else:
705 if isinstance( self.source, PDBModel ):
706 self.__validSource = self.source
707 else:
708 self.__validSource = None
709
710 return self.__validSource
711
712
714 """
715 Name of pickled source or PDB file.
716
717 @return: file name of pickled source or PDB file
718 @rtype: str
719
720 @raise PDBError: if there is no valid source
721 """
722 s = self.validSource()
723
724 if s is None:
725 raise PDBError('no valid source')
726
727 if type( s ) == str:
728 return s
729
730 return self.source.sourceFile()
731
732
734 """
735 Disconnect this model from its source (if any).
736
737 @note: If this model has an (in-memory) PDBModel instance as source,
738 the entries of 'atoms' could still reference the same dictionaries.
739 """
740 self.update()
741
742 try:
743 self.fileName = self.fileName or self.sourceFile()
744 except:
745 pass
746
747 self.setSource( None )
748
749 self.xyzChanged, self.atomsChanged = 1, 1
750
751 for p in self.rProfiles:
752 self.setProfileInfo( p, changed=1 )
753 for p in self.aProfiles:
754 self.setProfileInfo( p, changed=1 )
755
756
758 """
759 Return pdb code of model.
760
761 @return: pdb code
762 @rtype: str
763 """
764 return self.pdbCode
765
767 """
768 Set model pdb code.
769
770 @param code: new pdb code
771 @type code: str
772 """
773 self.pdbCode = code
774
775
777 """
778 Amino acid sequence in one letter code.
779
780 @param mask: atom mask, to apply before (default None)
781 @type mask: list or array
782 @param xtable: dict {str:str}, additional residue:single_letter mapping
783 for non-standard residues (default molUtils.xxDic)
784 @type xtable: dict
785
786 @return: 1-letter-code AA sequence (based on first atom of each res).
787 @rtype: str
788 """
789 mask = mask or N.ones( self.lenAtoms(), 'i' )
790
791 firstAtm = N.zeros( self.lenAtoms() )
792 N.put( firstAtm, self.resIndex(), 1 )
793
794
795 mask = mask * firstAtm
796
797 l = [ a['residue_name'] for a in N.compress( mask, self.getAtoms()) ]
798
799 return ''.join( molUtils.singleAA( l ) )
800
801
803 """
804 Rename atoms so that tleap from Amber can read the PDB.
805 If HIS residues contain atoms named HE2 or/and HD2, the residue
806 name is changed to HIE or HID or HIP, respectively. Disulfide bonds
807 are not yet identified - CYS -> CYX renaming must be done manually
808 (see AmberParmBuilder for an example).
809 Internally amber uses H atom names ala HD21 while standard pdb files
810 use 1HD2. By default, ambpdb produces 'standard' pdb atom names but
811 it gives the less ambiguous amber names with switch -aatm.
812
813 @param change: change this model's atoms directly (default:1)
814 @type change: 1|0
815 @param aatm: use, for example, HG23 instead of 3HG2 (default:1)
816 @type aatm: 1|0
817
818 @return: [ {..} ], list of atom dictionaries
819 @rtype: list of atom dictionaries
820 """
821 numbers = map( str, range(10) )
822
823 residues = self.resList()
824
825 if not change:
826 residues = copy.deepcopy( residues )
827
828 atoms = []
829
830 for r in residues:
831
832 for a in r:
833
834 if aatm and len(a['name'])>2 and a['name'][0] in numbers:
835 a['name'] = a['name'][1:] + a['name'][0]
836
837 resname = r[0]['residue_name']
838 if resname == 'HIS':
839 anames = [ a['name'] for a in r ]
840
841 if 'HE2' in anames: resname = 'HIE'
842 if 'HD1' in anames: resname = 'HID'
843 if 'HE2' in anames and 'HD1' in anames:
844 resname = 'HIP'
845
846 for a in r:
847 a['residue_name'] = resname
848
849 atoms.extend( r )
850
851 return atoms
852
853
854 - def writePdb( self, fname, ter=1, amber=0, original=0, left=0, wrap=0,
855 headlines=None, taillines=None):
856 """
857 Save model as PDB file.
858
859 @param fname: name of new file
860 @type fname: str
861 @param ter: Option of how to treat the retminal record::
862 0, don't write any TER statements
863 1, restore original TER statements (doesn't work,
864 if preceeding atom has been deleted) [default]
865 2, put TER between all detected chains
866 3, as 2 but also detect and split discontinuous chains
867 @type ter: 0, 1, 2 or 3
868 @param amber: amber formatted atom names
869 (ter=3, left=1, wrap=0) (default 0)
870 @type amber: 1||0
871 @param original: revert atom names to the ones parsed in from PDB
872 (default 0)
873 @type original: 1||0
874 @param left: left-align atom names (as in amber pdbs)(default 0)
875 @type left: 1||0
876 @param wrap: write e.g. 'NH12' as '2NH1' (default 0)
877 @type wrap: 1||0
878 @param headlines: [( str, dict or str)], list of record / data tuples::
879 e.g. [ ('SEQRES', ' 1 A 22 ALA GLY ALA'), ]
880 @type headlines: list of tuples
881 @param taillines: same as headlines but appended at the end of file
882 @type taillines: list of tuples
883 """
884 try:
885 f = IO.PDBFile( fname, mode='w' )
886
887 numbers = map( str, range(10) )
888
889 atoms = self.getAtoms()
890 if amber:
891 atoms = self.xplor2amber( change=0 )
892 ter = 3
893 wrap = 0
894 left = 1
895
896 if ter == 2 or ter == 3:
897 terIndex = N.array( self.chainIndex( breaks=(ter==3) )[1:] ) -1
898 if ter == 1:
899 terIndex = self.__terAtoms
900
901 if headlines:
902 for l in headlines:
903 f.writeLine( l[0], l[1] )
904
905 if taillines:
906 for l in taillines:
907 f.writeLine( l[0], l[1] )
908
909 for i in range( len( atoms ) ):
910
911 a = atoms[i]
912
913
914 a['position'] = self.xyz[ i ]
915
916
917 real_name = a['name']
918 aname = real_name
919
920 if original and not amber:
921 aname = a['name_original']
922
923
924 if wrap and len(aname) == 4 and aname[0] in numbers:
925 aname = aname[1:] + aname[0]
926 if not left and len(aname) < 4:
927 aname = ' ' + aname.strip()
928
929 a['name'] = aname
930
931
932 f.writeLine( a['type'], a )
933
934
935 a['name'] = real_name
936 del( a['position'] )
937
938
939 if (ter>0 and i in terIndex):
940 f.writeLine('TER', a )
941
942 f.close()
943
944 except:
945 EHandler.error( "Error writing "+fname )
946
947
948 - def returnPdb( self, out=None, ter=1, headlines=None, taillines=None):
949 """
950 Restore PDB file from (possibly transformed) coordinates and pdb line
951 dictionaries in self.atoms. This is an older version of writePdb that
952 returns a list of PDB lines instead of writing to a file.
953
954 @param out: stdout or None, if None a list is returned
955 @type out: stdout or None
956 @param ter: Option of how to treat the retminal record::
957 0, don't write any TER statements
958 1, restore original TER statements (doesn't work,
959 if preceeding atom has been deleted)
960 2, put TER between all detected chains
961 @type ter: 0, 1 or 2
962 @param headlines: [( str, dict or str)], list of record / data tuples::
963 e.g. [ ('SEQRES', ' 1 A 22 ALA GLY ALA'), ]
964 @type headlines: list of tuples
965 @param taillines: same as headlines but appended at the end of file
966 @type taillines: list of tuples
967
968 @return: [ str ], lines of a PDB file
969 @rtype: list of strings
970 """
971 try:
972 pdb_lst=[]
973 i=0
974
975 if ter == 2:
976 terIndex = N.array( self.chainIndex()[1:] ) - 1
977 if ter == 1:
978 terIndex = self.__terAtoms
979
980 if headlines:
981 for l in headlines:
982 pdb_lst += [ '%6s%s\n'%(l[0],l[1]) ]
983
984 if taillines:
985 for l in taillines:
986 pdb_lst += [ '%6s%s\n'%(l[0],l[1]) ]
987
988 for a in self.getAtoms():
989
990
991 a['positionX'], a['positionY'], a['positionZ'] = self.xyz[ i ]
992
993
994 a['name'], a['name_original'] = a['name_original'], a['name']
995
996
997 atom_line = 'ATOM %(serial_number)5i %(name)-4s %(residue_name)3s %(chain_id)1s%(residue_number)4i%(insertion_code)1s %(positionX)8.3f%(positionY)8.3f%(positionZ)8.3f%(occupancy)6.2f%(temperature_factor)6.2f %(segment_id)-4s%(element)2s\n'
998 pdb_lst += [ atom_line%a ]
999
1000
1001 a['name'], a['name_original'] = a['name_original'], a['name']
1002 del( a['positionX'], a['positionY'], a['positionZ'] )
1003
1004
1005 if (ter and i in terIndex):
1006 ter_line = 'TER %(serial_number)5i %(residue_name)3s %(chain_id)1s%(residue_number)4i%(insertion_code)1s\n'
1007 pdb_lst += [ ter_line % self.getAtoms()[i] ]
1008
1009 i += 1
1010
1011 if out == 'stdout':
1012 sys.stdout.writelines( pdb_lst )
1013 else:
1014 return pdb_lst
1015
1016 except:
1017 EHandler.error( "PDBModel.returnPdb(): " )
1018
1019
1021 """
1022 Pickle this PDBModel to a file, set the 'source' field to
1023 this file name and mark atoms, xyz, and profiles as unchanged.
1024 Normal pickling of the object will only dump those data that can not
1025 be reconstructed from the source of this model (if any).
1026 saveAs creates a 'new source' without further dependencies.
1027
1028 @param path: target file name
1029 @type path: str OR LocalPath instance
1030 """
1031 try:
1032 self.update()
1033
1034
1035 self.forcePickle = 1
1036
1037 self.setSource( path )
1038
1039 self.xyzChanged, self.atomsChanged = 0, 0
1040
1041 for p in self.rProfiles:
1042 self.setProfileInfo( p, changed=0 )
1043 for p in self.aProfiles:
1044 self.setProfileInfo( p, changed=0 )
1045
1046 t.Dump( self, str(path) )
1047
1048 except IOError, err:
1049 raise PDBError("Can't open %s for writing." % t.absfile(str(path)))
1050
1051
1052 - def maskF(self, atomFunction, numpy=1 ):
1053 """
1054 Create list whith result of atomFunction( atom ) for each atom.
1055
1056 @param atomFunction: function( dict_from_PDBFile.readline() ),
1057 true || false (Condition)
1058 @type atomFunction: 1||0
1059 @param numpy: 1(default)||0, convert result to Numpy array of int
1060 @type numpy: int
1061
1062 @return: Numpy N.array( [0,1,1,0,0,0,1,0,..], 'i') or list
1063 @rtype: array or list
1064 """
1065 try:
1066 result = map( atomFunction, self.getAtoms() )
1067 except:
1068
1069
1070
1071 EHandler.warning("mask(): Error while mapping funtion "+
1072 "to all atoms.")
1073 result = []
1074
1075 for a in self.getAtoms():
1076 try:
1077 result.append( atomFunction( a ) )
1078
1079 except :
1080 EHandler.warning("mask(): Error while save-mapping ")
1081 result.append(0)
1082
1083 if numpy:
1084 return N.array( result )
1085 return result
1086
1087
1088 - def maskCA( self, force=0 ):
1089 """
1090 Short cut for mask of all CA atoms.
1091
1092 @param force: force calculation even if cached mask is available
1093 @type force: 0||1
1094
1095 @return: N.array( 1 x N_atoms ) of 0||1
1096 @rtype: array
1097 """
1098 if self.caMask == None or force:
1099 self.caMask = self.maskF( lambda a: a['name'] == 'CA' )
1100
1101 return self.caMask
1102
1103
1104 - def maskBB( self, force=0 ):
1105 """
1106 Short cut for mask of all backbone atoms.
1107
1108 @param force: force calculation even if cached mask is available
1109 @type force: 0||1
1110
1111 @return: N.array( 1 x N_atoms ) of 0||1
1112 @rtype: array
1113 """
1114 if self.bbMask == None or force:
1115 self.bbMask = self.maskF(
1116 lambda a: a['name'] in ['CA', 'C', 'N', 'O', 'H','OXT'] )
1117
1118 return self.bbMask
1119
1120
1122 """
1123 Short cut for mask of all heavy atoms. ('element' <> H)
1124
1125 @param force: force calculation even if cached mask is available
1126 @type force: 0||1
1127
1128 @return: N.array( 1 x N_atoms ) of 0||1
1129 @rtype: array
1130 """
1131 if self.heavyMask == None or force:
1132 self.heavyMask = self.maskF( lambda a: a.get('element','') <> 'H' )
1133
1134 return self.heavyMask
1135
1137 """
1138 Short cut for mask of hydrogens. ('element' == H)
1139
1140 @return: N.array( 1 x N_atoms ) of 0||1
1141 @rtype: array
1142 """
1143 return N.logical_not( self.maskHeavy() )
1144
1145
1147 """
1148 Short cut for mask of all CB I{and} CA of GLY.
1149
1150 @return: mask of all CB and CA of GLY
1151 @rtype: array
1152 """
1153 f = lambda a: a['name'] == 'CB' or\
1154 a['residue_name'] == 'GLY' and a['name'] == 'CA'
1155
1156 return self.maskF( f )
1157
1158
1160 """
1161 Short cut for mask of all atoms in residues named TIP3, HOH and WAT
1162
1163 @return: N.array( 1 x N_atoms ) of 0||1
1164 @rtype: array
1165 """
1166 return self.maskF( lambda a: a['residue_name'] in ['TIP3','HOH','WAT'])
1167
1169 """
1170 Short cut for mask of all atoms in residues named
1171 TIP3, HOH, WAT, Na+, Cl-
1172
1173 @return: N.array( 1 x N_atoms ) of 0||1
1174 @rtype: array
1175 """
1176 return self.maskF( lambda a: a['residue_name'] in ['TIP3','HOH','WAT',
1177 'Na+', 'Cl-'] )
1179 """
1180 Short cut for mask of all HETATM
1181
1182 @return: N.array( 1 x N_atoms ) of 0||1
1183 @rtype: array
1184 """
1185 return self.maskF( lambda a: a['type'] == 'HETATM' )
1186
1188 """
1189 Short cut for mask containing all atoms of amino acids.
1190
1191 @param standard: only standard residue names (not CYX, NME,..)
1192 (default 0)
1193 @type standard: 0|1
1194
1195 @return: N.array( 1 x N_atoms ) of 0||1,
1196 mask of all protein atoms (based on residue name)
1197 @rtype: array
1198 """
1199 d = molUtils.aaDic
1200 if standard:
1201 d = molUtils.aaDicStandard
1202
1203 names = map( string.upper, d.keys() )
1204 return self.maskF( lambda a, n=names: a['residue_name'].upper() in n )
1205
1206
1208 """
1209 Get atom indices conforming condition
1210
1211 @param what: Selection::
1212 - function applied to each atom entry,
1213 e.g. lambda a: a['residue_name']=='GLY'
1214 - list of str, allowed atom names
1215 - list of int, allowed atom indices OR mask with only 1 and 0
1216 - int, single allowed atom index
1217 @type what: function OR list of str or int OR int
1218
1219 @return: N_atoms x 1 (0||1 )
1220 @rtype: Numeric array
1221
1222 @raise PDBError: if what is neither of above
1223 """
1224
1225 if type( what ) == types.FunctionType:
1226 return N.nonzero( self.maskF( what) )
1227
1228 if type( what ) == list or type( what ) == type( N.zeros(1) ):
1229
1230
1231 if type( what[0] ) == str:
1232 return self.indices(
1233 lambda a, allowed=what: a['name'] in allowed )
1234
1235 if type( what[0] ) == int:
1236
1237 if len( what ) == self.lenAtoms() and max( what ) < 2:
1238 return N.nonzero( what )
1239
1240 else:
1241 return what
1242
1243
1244 if type( what ) == int:
1245 return N.array( [what], 'i' )
1246
1247 raise PDBError("PDBModel.indices(): Could not interpret condition ")
1248
1249
1250 - def mask( self, what, numpy=1 ):
1251 """
1252 Get atom mask.
1253
1254 @param what: Selection::
1255 - function applied to each atom entry,
1256 e.g. lambda a: a['residue_name']=='GLY'
1257 - list of str, allowed atom names
1258 - list of int, allowed atom indices OR mask with
1259 only 1 and 0
1260 - int, single allowed atom index
1261 @type what: function OR list of str or int OR int
1262
1263 @return: N_atoms x 1 (0||1 )
1264 @rtype: Numeric array
1265
1266 @raise PDBError: if what is neither of above
1267 """
1268
1269 if type( what ) == types.FunctionType:
1270 return self.maskF( what, numpy )
1271
1272 if type( what ) == list or type( what ) == type( N.zeros(1) ):
1273
1274
1275 if type( what[0] ) == str:
1276 return self.mask(
1277 lambda a, allowed=what: a['name'] in allowed, numpy)
1278
1279 if type( what[0] ) == int:
1280
1281 if len( what ) == self.lenAtoms() and max( what ) < 2:
1282 return what
1283
1284 else:
1285 r = N.zeros( self.lenAtoms(),'i' )
1286 N.put( r, what, 1 )
1287 return r
1288
1289
1290 if type( what ) == int:
1291 return self.mask( [what] )
1292
1293 raise PDBError, "PDBModel.mask(): Could not interpret condition "
1294
1295
1297 """
1298 Mask (0) residues for which all atoms are masked (0) in atomMask.
1299
1300 @param atomMask: list/array of int, 1 x N_atoms
1301 @type atomMask: list/array of int
1302
1303 @return: 1 x N_residues (0||1 )
1304 @rtype: array of int
1305 """
1306 result = N.zeros( self.lenResidues(), 'i' )
1307
1308 resMap = self.resMap()
1309
1310 if len( resMap ) == 0:
1311 return result
1312
1313 for res in range( 0, resMap[-1]+1 ):
1314
1315 subMask = N.take( atomMask, N.nonzero( N.equal( resMap, res ) ) )
1316 result[res] = N.sum( subMask ) > 0
1317
1318 return result
1319
1320
1322 """
1323 Get list of indices of residue for which any atom is in indices.
1324
1325 @param indices: list of atom indices
1326 @type indices: list of int
1327
1328 @return: indices of residues
1329 @rtype: list of int
1330 """
1331 result = []
1332 rI = self.resIndex()
1333
1334 for i in range( len( rI ) ):
1335
1336 a = rI[i]
1337
1338 if i < len( rI )-1:
1339 e = rI[i+1]
1340 else:
1341 e = self.lenAtoms()
1342
1343 if N.sum( N.less( indices, e ) * N.greater( indices, a-1 ) ):
1344 result += [i]
1345
1346 return result
1347
1348
1350 """
1351 convert residue mask to atom mask.
1352
1353 @param resMask: list/array of int, 1 x N_residues
1354 @type resMask: list/array of int
1355
1356 @return: 1 x N_atoms
1357 @rtype: array of int
1358 """
1359 result = N.zeros( self.lenAtoms(), 'i')
1360
1361 resMap = self.resMap()
1362
1363 if len( resMap ) == 0:
1364 return result
1365
1366 if len( resMask ) != resMap[-1]+1:
1367 raise PDBError, "invalid residue mask"
1368
1369 N.put( result, self.res2atomIndices( N.nonzero( resMask ) ), 1 )
1370
1371 return result
1372
1373
1375 """
1376 Convert residue indices to atom indices.
1377
1378 @param indices: list/array of residue indices
1379 @type indices: list/array of int
1380
1381 @return: list of atom indices
1382 @rtype: list of int
1383 """
1384 result = []
1385 resMap = self.resMap()
1386
1387 if max( indices ) > resMap[-1] or min( indices ) < 0:
1388 raise PDBError, "invalid residue indices"
1389
1390 for res in indices:
1391 result += N.nonzero( N.equal( resMap, res ) ).tolist()
1392
1393 return result
1394
1395
1397 """
1398 Get an atom profile where each atom has the value its residue has
1399 in the residue profile.
1400
1401 @param p: name of existing residue profile OR ...
1402 [ any ], list of lenResidues() length
1403 @type p: str
1404
1405 @return: [ any ] OR array, atom profile
1406 @rtype: list or array
1407 """
1408 if type( p ) is str:
1409 p = self.resProfile( p )
1410
1411 isArray = isinstance( p, N.arraytype )
1412
1413 resMap = self.resMap()
1414
1415 r = [ p[ resMap[a] ] for a in range( len(resMap) ) ]
1416
1417 if isArray:
1418 r = N.array( r )
1419
1420 return r
1421
1422
1424 """
1425 Convert atom indices to chain indices. Each chain is only
1426 returned once.
1427
1428 @param indices: list of atom indices
1429 @type indices: list of int
1430 @param breaks: look for chain breaks in backbone coordinates (def. 0)
1431 @type breaks: 0||1
1432
1433 @return: chains any atom which is in indices
1434 @rtype: list of int
1435 """
1436 cm = self.chainMap( breaks=breaks )
1437 r = []
1438
1439 for i in indices:
1440 chain = cm[i]
1441 if not chain in r:
1442 r += [ chain ]
1443
1444 return r
1445
1446
1448 """
1449 Convert chain indices into atom indices.
1450
1451 @param indices: list of chain indices
1452 @type indices: list of int
1453 @param breaks: look for chain breaks in backbone coordinates (def. 0)
1454 @type breaks: 0||1
1455
1456 @return: all atoms belonging to the given chains
1457 @rtype: list of int
1458 """
1459 cm = self.chainMap( breaks=breaks )
1460
1461 r = []
1462 for i in indices:
1463 r += N.nonzero( N.equal( cm, i ) ).tolist()
1464
1465 return r
1466
1467
1468 - def profile2mask(self, profName, cutoff_min=None, cutoff_max=None ):
1469 """
1470 profile2mask( str_profname, [cutoff_min, cutoff_max=None])
1471
1472 @param cutoff_min: low value cutoff
1473 @type cutoff_min: float
1474 @param cutoff_max: high value cutoff
1475 @type cutoff_max: float
1476
1477 @return: mask len( profile(profName) ) x 1||0
1478 @rtype: array
1479
1480 @raise ProfileError: if no profile is found with name profName
1481 """
1482 p = self.profile( profName )
1483
1484 cutoff_min = cutoff_min or min( p ) - 1
1485 cutoff_max = cutoff_max or max( p ) + 1
1486
1487 return N.greater( p, cutoff_min ) * N.less( p, cutoff_max )
1488
1489
1491 """
1492 profile2atomMask( str_profname, [cutoff_min, cutoff_max=None])
1493 Same as L{profile2mask}, but converts residue mask to atom mask.
1494
1495 @param cutoff_min: low value cutoff
1496 @type cutoff_min: float
1497 @param cutoff_max: high value cutoff
1498 @type cutoff_max: float
1499
1500 @return: mask N_atoms x 1|0
1501 @rtype: array
1502
1503 @raise ProfileError: if no profile is found with name profName
1504 """
1505 r = self.profile2mask( profName, cutoff_min, cutoff_max )
1506
1507 if len( r ) == self.lenResidues():
1508 r = self.res2atomMask( r )
1509
1510 return r
1511
1512
1514 """
1515 Translate atom positions so that they point to the same atoms after
1516 a call to N.take() (if they are kept at all).
1517
1518 @param oldI: indices to translate
1519 @type oldI: list of int
1520 @param takeI: indices for N
1521 @type takeI: list of int
1522
1523 @return: indices of current atoms after take, resorted
1524 @rtype: list of int
1525 """
1526 ind = N.take( range( self.lenAtoms() ), takeI )
1527
1528 return [ i for i in range( len(ind) ) if ind[i] in oldI ]
1529
1530
1532
1533 ind = N.take( range( self.lenResidues() ), takeI )
1534 return [ i for i in range( len(ind) ) if ind[i] in oldI ]
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574 - def concat( self, *models ):
1575 """
1576 Concatenate atoms, coordinates and profiles. source and fileName
1577 are lost, so are profiles that are not available in all models.
1578 model0.concat( model1 [, model2, ..]) -> single PDBModel.
1579
1580 @param models: models to concatenate
1581 @type models: model OR list of models
1582
1583 @note: info records of given models are lost.
1584 """
1585
1586 if len( models ) == 0:
1587 return self
1588
1589 m = models[0]
1590
1591 r = self.__class__()
1592
1593 r.setXyz( N.concatenate( ( self.getXyz(), m.getXyz() ) ) )
1594 r.setAtoms( self.getAtoms() + m.getAtoms() )
1595 r.setPdbCode( self.pdbCode )
1596
1597 if None in self.rProfiles.values() or \
1598 None in self.aProfiles.values():
1599 self.update()
1600
1601 r.rProfiles = self.rProfiles.concat( m.rProfiles )
1602
1603 r.aProfiles = self.aProfiles.concat( m.aProfiles )
1604
1605 r.__terAtoms = self.__terAtoms +\
1606 (N.array( m.__terAtoms )+self.lenAtoms()).tolist()
1607
1608 return r.concat( *models[1:] )
1609
1610
1611 - def take( self, i, deepcopy=0 ):
1612 """
1613 All fields of the result model are shallow copies of this model's
1614 fields. I.e. removing or reordering of atoms does not affect the
1615 original model but changes to entries in the atoms dictionaries
1616 would also change the atom dictionaries of this model.
1617
1618 take( atomIndices, [deepcopy=0] ) -> PDBModel / sub-class.
1619
1620 @note: the position of TER records is translated to the new atoms.
1621 Chain boundaries can hence be lost (if the terminal atoms are not
1622 taken) or move into the middle of a residue (if atoms are resorted).
1623
1624 @param i: atomIndices, positions to take in the order to take
1625 @type i: list/array of int
1626 @param deepcopy: deepcopy atom dictionaries (default 0)
1627 @type deepcopy: 0||1
1628
1629 @return: PDBModel / sub-class
1630 @rtype: PDBModel
1631 """
1632
1633 rIndices = self.atom2resIndices( i )
1634
1635 r = self.__class__()
1636
1637 r.source = self.source
1638
1639 r.xyz = N.take( self.getXyz(), i, 0 )
1640 r.xyzChanged = self.xyzChanged or not (r.xyz == self.xyz)
1641
1642 if deepcopy:
1643 r.atoms = [ copy.copy( a ) for a in self.getAtoms() ]
1644 r.atoms = N.take( r.atoms, i ).tolist()
1645 else:
1646 r.atoms = N.take( self.getAtoms(), i ).tolist()
1647 r.atomsChanged = self.atomsChanged or not (r.atoms == self.atoms)
1648
1649 r.aProfiles = self.aProfiles.take( i )
1650 r.rProfiles = self.rProfiles.take( rIndices )
1651
1652 r.info = copy.deepcopy( self.info )
1653
1654 if self.__terAtoms:
1655 l = len( self.__terAtoms)
1656 r.__terAtoms = self.__takeAtomIndices( self.__terAtoms, i )
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671 r.pdbCode = self.pdbCode
1672 r.fileName = self.fileName
1673
1674 return r
1675
1676
1677 - def keep( self, i ):
1678 """
1679 Replace atoms,coordinates,profiles of this(!) model with sub-set.
1680 (in-place version of N.take() )
1681
1682 @param i: lst/array of int
1683 @type i:
1684 """
1685 if len(i)==self.lenAtoms() and max(i)<2:
1686 EHandler.warning('dont use PDBModel.keep() with mask.', trace=0)
1687
1688 r = self.take( i )
1689
1690 self.atoms = r.atoms
1691 self.xyz = r.xyz
1692 self.atomsChanged = r.atomsChanged
1693 self.xyzChanged = r.xyzChanged
1694
1695 self.aProfiles = r.aProfiles
1696 self.rProfiles = r.rProfiles
1697
1698 self.info = r.info
1699
1700 self.caMask = self.bbMask = self.heavyMask = None
1701 self.__resMap = self.__resIndex = None
1702
1703 self.__terAtoms = r.__terAtoms
1704
1705
1706 - def clone( self, deepcopy=0 ):
1707 """
1708 Clone PDBModel.
1709
1710 @return: PDBModel / subclass, copy of this model,
1711 see comments to Numeric.take()
1712 @rtype: PDBModel
1713 """
1714 return self.take( range( self.lenAtoms() ), deepcopy )
1715
1716
1717 - def compress( self, mask, deepcopy=0 ):
1718 """
1719 Compress PDBmodel using mask.
1720 compress( mask, [deepcopy=0] ) -> PDBModel
1721
1722 @param mask: N.array( 1 x N_atoms of 1 or 0 )
1723 1 .. keep this atom
1724 @type mask: array
1725 @param deepcopy: deepcopy atom dictionaries of this model and result
1726 (default 0 )
1727 @type deepcopy: 1||0
1728
1729 @return: compressed PDBModel using mask
1730 @rtype: PDBModel
1731 """
1732 return self.take( N.nonzero( mask ), deepcopy )
1733
1734
1736 """
1737 Convenience access to the 3 different remove methods.
1738 The mask used to remove atoms is returned. This mask can be used
1739 to apply the same change to another array of same dimension as
1740 the old(!) xyz and atoms.
1741
1742 @param what: Decription of what to remove::
1743 - function( atom_dict ) -> 1 || 0 (1..remove) OR
1744 - list of int [4, 5, 6, 200, 201..], indices of atoms to remove
1745 - list of int [11111100001101011100..N_atoms], mask (1..remove)
1746 - int, remove atom with this index
1747 @type what: list of int or int
1748
1749 @return: N.array(1 x N_atoms_old) of 0||1, mask used to compress the
1750 atoms and xyz arrays.
1751 @rtype: array
1752
1753 @raise PDBError: if what is neither of above
1754 """
1755 mask = N.logical_not( self.mask( what ) )
1756 self.keep( N.nonzero(mask) )
1757 return mask
1758
1759
1760 - def takeChains( self, chainLst, deepcopy=0, breaks=0, maxDist=None ):
1761 """
1762 Get copy of this model with only the given chains.
1763
1764 @param chainLst: list of chains
1765 @type chainLst: list of int
1766 @param deepcopy: deepcopy atom dictionaries (default 0)
1767 @type deepcopy: 0||1
1768 @param breaks: split chains at chain breaks (default 0)
1769 @type breaks: 0|1
1770 @param maxDist: (if breaks=1) chain break threshold in Angstrom
1771 @type maxDist: float
1772
1773 @return: PDBModel (by default, all fields are shallow copies,
1774 see Numeric.take() )
1775 @rtype: PDBModel
1776 """
1777 chainMap = self.chainMap( breaks=breaks, maxDist=None )
1778
1779 atomMask = map( lambda i, allowed=chainLst: i in allowed, chainMap)
1780
1781 return self.compress( atomMask, deepcopy )
1782
1783
1785 """
1786 Takes the last letter of the segment ID and adds it as chain ID.
1787 """
1788 for a in self.getAtoms():
1789
1790 try:
1791 a['chain_id'] = a['segment_id'][-1]
1792 except:
1793 if verbose:
1794 EHandler.warning("addChainId(): Problem with atom "+str(a))
1795
1796 self.atomsChanged = 1
1797
1798
1799 - def addChainId( self, first_id=None, keep_old=0, breaks=0 ):
1800 """
1801 Assign consecutive chain identifiers A - Z to all atoms.
1802
1803 @param first_id: str (A - Z), first letter instead of 'A'
1804 @type first_id: str
1805 @param keep_old: don't override existing chain IDs (default 0)
1806 @type keep_old: 1|0
1807 @param breaks: consider chain break as start of new chain (default 0)
1808 @type breaks: 1|0
1809 """
1810 atoms = self.getAtoms()
1811
1812 old_chains = []
1813 if keep_old:
1814 old_chains = [ atoms[i]['chain_id'] for i in self.resIndex(breaks)]
1815 old_chains = mathUtils.nonredundant( old_chains )
1816 if '' in old_chains: old_chains.remove('')
1817
1818 letters = string.uppercase
1819 if first_id:
1820 letters = letters[ letters.index( first_id ): ]
1821 letters = mathUtils.difference( letters, old_chains )
1822
1823 chainMap = self.chainMap( breaks=breaks )
1824
1825 for i in range( len( atoms ) ):
1826
1827 if not (keep_old and atoms[i]['chain_id'] in old_chains):
1828 atoms[i]['chain_id'] = letters[ chainMap[i] ]
1829
1830 self.atomsChanged = 1
1831
1832
1834 """
1835 Make all residue numbers consecutive and remove any insertion code
1836 letters. Note that a backward jump in residue numbering is interpreted
1837 as end of chain by chainMap() and chainIndex(). Chain borders might
1838 hence get lost if there is no change in chain label or segid.
1839
1840 @param mask: [ 0||1 x N_atoms ] atom mask to apply BEFORE
1841 @type mask: list of int
1842 @param start: starting number (default 1)
1843 @type start: int
1844 @param addChainId: add chain IDs if they are missing
1845 @type addChainId: 1|0
1846 """
1847 if addChainId:
1848 self.addChainId( keep_old=1, breaks=0 )
1849
1850 i = start
1851 for res in self.resList( mask ):
1852
1853 for a in res:
1854 a['residue_number'] = i
1855 a['insertion_code'] = ''
1856
1857 i += 1
1858
1859
1861 """
1862 Number of atoms in model.
1863
1864 @return: number of atoms
1865 @rtype: int
1866 """
1867 if not self.xyz is None:
1868 return len( self.xyz )
1869
1870 if not self.atoms is None:
1871 return len( self.atoms )
1872
1873 if self.source is None:
1874 return 0
1875
1876 return len( self.getXyz() )
1877
1878
1880 """
1881 Number of resudies in model.
1882
1883 @return: total number of residues
1884 @rtype: int
1885 """
1886 try:
1887 return self.resMap()[-1] + 1
1888 except IndexError:
1889 return 0
1890
1891
1892 - def lenChains( self, breaks=0, maxDist=None ):
1893 """
1894 Number of chains in model.
1895
1896 @param breaks: detect chain breaks from backbone atom distances (def 0)
1897 @type breaks: 0||1
1898 @param maxDist: maximal distance between consequtive residues
1899 [ None ] .. defaults to twice the average distance
1900 @type maxDist: float
1901
1902 @return: total number of chains
1903 @rtype: int
1904 """
1905 try:
1906 return self.chainMap( breaks=breaks, maxDist=maxDist )[-1] + 1
1907 except IndexError:
1908 return 0
1909
1910
1912 """
1913 Return list of lists of atom dictionaries per residue,
1914 which allows to iterate over residues and atoms of residues.
1915
1916 @param mask: [ 0||1 x N_atoms ] atom mask to apply BEFORE
1917 @type mask:
1918
1919 @return: A list of dictionaries::
1920 [ [ {'name':'N', 'residue_name':'LEU', ..},
1921 {'name':'CA','residue_name':'LEU', ..} ],
1922 [ {'name':'CA', 'residue_name':'GLY', ..}, .. ] ]
1923 @rtype: list of dictionaries
1924 """
1925 ri = N.concatenate( (self.resIndex( mask=mask ), [self.lenAtoms()] ) )
1926 resLen = len( ri ) - 1
1927 atoms = self.getAtoms()
1928 if mask:
1929 atoms = N.compress( mask, atoms ).tolist()
1930
1931 return [ atoms[ ri[res] : ri[res+1] ] for res in range( resLen ) ]
1932
1933
1935 """
1936 Creates a PDBModel per residue in PDBModel.
1937
1938 @return: list of PDBModels, one for each residue
1939 @rtype: list of PDBModels
1940 """
1941 ri = self.resIndex()
1942 resLen = len( ri )
1943 atoms = self.getAtoms()
1944 xyz = self.getXyz()
1945
1946 result = []
1947 for res in range( resLen ):
1948 m = PDBModel()
1949 a = ri[res]
1950 if res == resLen - 1:
1951 e = self.lenAtoms()
1952 else:
1953 e = ri[res + 1]
1954 m.setAtoms( atoms[a:e] )
1955 m.setXyz( xyz[a:e] )
1956 result += [m]
1957 return result
1958
1959
1961 """
1962 Generate list to map from any atom to its ORIGINAL(!) PDB
1963 residue number.
1964
1965 @param mask: [00111101011100111...] consider atom: yes or no
1966 len(mask) == N_atoms
1967 @type mask: list of int (1||0)
1968
1969 @return: list all [000111111333344444..] with residue number
1970 for each atom
1971 @rtype: list of int
1972 """
1973 result = []
1974 for a in self.getAtoms():
1975 result.append( a['residue_number'] )
1976
1977
1978 mask = mask or N.ones( len(self.getAtoms() ) , 'i' )
1979
1980
1981 return N.compress( mask, N.array(result, 'i') )
1982
1983
1985 """
1986 Create a map of residue residue for atoms in model.
1987
1988 @param mask: atom mask
1989 @type mask: list of int (1||0)
1990
1991 @return: array [00011111122223333..], residue index for each
1992 unmasked atom
1993 @rtype: list of int
1994 """
1995
1996 mask = mask or N.ones( self.lenAtoms() )
1997
1998 result = []
1999
2000 lastResNumber = -100
2001 lastResName = ''
2002 index = -1
2003 lastAlt = 'x'
2004
2005
2006 for a in N.compress( mask, self.getAtoms() ):
2007
2008 if lastResNumber <> a['residue_number'] or \
2009 lastResName <> a['residue_name'] or \
2010 lastAlt <> a.get('insertion_code', None ):
2011
2012
2013 lastResNumber = a['residue_number']
2014 lastResName = a['residue_name']
2015 lastAlt = a.get( 'insertion_code', None )
2016 index += 1
2017
2018 result.append( index )
2019
2020 return N.array(result, 'i')
2021
2022
2023 - def resMap( self, mask=None, force=0, cache=1 ):
2024 """
2025 Get list to map from any atom to a continuous residue numbering
2026 (starting with 0). A new residue is assumed to start whenever the
2027 'residue_number' or the 'residue_name' record changes between 2
2028 atoms. The mask is applied BEFORE looking for residue borders,
2029 i.e. it can change the residue numbering.
2030
2031 See L{resList()} for an example of how to use the residue map.
2032
2033 @param mask: [0000011111001111...] include atom: yes or no
2034 len(atom_mask) == number of atoms in self.atoms/self.xyz
2035 @type mask: list of int (1||0)
2036 @param force: recalculate map even if cached one is available (def 0)
2037 @type force: 0||1
2038 @param cache: cache new map (def 1)
2039 @type cache: 0||1
2040
2041 @return: array [00011111122223333..], residue index for each
2042 unmasked atom
2043 @rtype: list of int
2044 """
2045 if self.__resMap and not force and mask == None:
2046 return self.__resMap
2047
2048 result = self.__calcResMap( mask )
2049
2050 if cache and mask == None:
2051 self.__resMap = result
2052
2053 return result
2054
2055
2056 - def resIndex( self, mask=None, force=0, cache=1 ):
2057 """
2058 Get the position of the each residue's first atom. The result is by
2059 default cached. That's not really necessary - The calculation is fast.
2060
2061 @param force: re-calculate even if cached result is available (def 0)
2062 @type force: 1||0
2063 @param cache: cache the result (def 1)
2064 @type cache: 1||0
2065 @param mask: atom mask to apply before (i.e. result indices refer to
2066 compressed model)
2067 @type mask: list of int (1||0)
2068
2069 @return: index of the first atom of each residue
2070 @rtype: list of int
2071 """
2072
2073 if self.__resIndex and not force and mask == None:
2074 return self.__resIndex
2075
2076 cache = cache and mask == None
2077
2078 m = self.resMap()
2079 if mask:
2080 m = N.compress( mask, m )
2081
2082 r = [ i for i in range(len(m)) if i==0 or m[i] != m[i-1] ]
2083
2084 if cache:
2085 self__resIndex = r
2086
2087 return r
2088
2089
2091 """
2092 Get the position of the each residue's last atom.
2093
2094 @param mask: atom mask to apply before (i.e. result indices refer to
2095 compressed model)
2096 @type mask: list of int (1||0)
2097
2098 @return: index of the last atom of each residue
2099 @rtype: list of int
2100 """
2101
2102 m = self.resMap()
2103 if mask:
2104 m = N.compress( mask, m )
2105
2106 l = len(m)
2107 return [ i-1 for i in range(1, l) if i==l or m[i] != m[i-1] ]
2108
2109
2110 - def chainMap( self, breaks=0, maxDist=None ):
2111 """
2112 Get chain index of each atom. A new chain is started between 2 atoms if
2113 the chain_id or segment_id changes, the residue numbering jumps back or
2114 a TER record was found.
2115
2116 @param breaks: split chains at chain breaks (def 0)
2117 @type breaks: 1||0
2118 @param maxDist: (if breaks=1) chain break threshold in Angstrom
2119 @type maxDist: float
2120
2121 @return: array 1 x N_atoms of int, e.g. [000000011111111111122222...]
2122 @rtype: list of int
2123 """
2124 result = []
2125
2126 i = 0
2127 lastChain = -1
2128 lastResidue = -100
2129 lastChainID = None
2130 lastSegID = None
2131
2132 ter_atoms = self.__terAtoms
2133 if breaks:
2134 ter_atoms = N.concatenate( (ter_atoms,
2135 self.chainBreaks( maxDist=maxDist )) )
2136
2137 for a in self.getAtoms() :
2138
2139 if a.get('chain_id',None) <> lastChainID or \
2140 a.get('segment_id', None) <> lastSegID or \
2141 a['residue_number'] < lastResidue or \
2142 i-1 in ter_atoms:
2143
2144 lastChain += 1
2145
2146 result.append( lastChain )
2147
2148 lastResidue = a['residue_number']
2149 lastChainID = a.get('chain_id',None)
2150 lastSegID = a.get( 'segment_id', None )
2151 i += 1
2152
2153 return N.array( result, 'i' )
2154
2155
2157 """
2158 Get indices of first atom of each chain.
2159
2160 @param breaks: split chains at chain breaks (def 0)
2161 @type breaks: 1||0
2162 @param maxDist: (if breaks=1) chain break threshold in Angstrom
2163 @type maxDist: float
2164
2165 @return: array (1 x N_chains) of int
2166 @rtype: list of int
2167 """
2168 ci = self.chainMap( breaks=breaks, maxDist=maxDist )
2169
2170 result = []
2171
2172 try:
2173 for chain in range( 0, ci[-1]+1 ):
2174 result.append( N.nonzero( N.equal( ci, chain ))[0] )
2175
2176 except IndexError:
2177 pass
2178
2179 return N.array( result, 'i' )
2180
2181
2182 - def chainBreaks( self, breaks_only=1, maxDist=None ):
2183 """
2184 Identify discontinuities in the molecule's backbone.
2185
2186 @param breaks_only: don't report ends of regular chains (def 1)
2187 @type breaks_only: 1|0
2188 @param maxDist: maximal distance between consequtive residues
2189 [ None ] .. defaults to twice the average distance
2190 @type maxDist: float
2191
2192 @return: atom indices of last atom before a probable chain break
2193 @rtype: list of int
2194 """
2195 i_bb = N.nonzero( self.maskBB() )
2196 bb = self.take( i_bb )
2197 bb_ri= bb.resIndex()
2198 xyz = [ bb.xyz[ bb_ri[i] : bb_ri[i+1] ] for i in range(len(bb_ri)-1) ]
2199 xyz +=[ bb.xyz[ bb_ri[-1]: len(bb) ] ]
2200
2201
2202 dist = lambda a,b: N.sqrt( N.sum( N.power( a - b ,2 ) ) )
2203
2204 d = [ dist( xyz[i][-1], xyz[i+1][0] ) for i in range(len(bb_ri)-1) ]
2205
2206
2207 cutoff = maxDist or MLab.mean(d)*2
2208 breaks = N.nonzero( N.greater( d, cutoff ) )
2209
2210 if not breaks:
2211 return N.array( [] )
2212
2213 ri = self.resIndex()
2214 ri_to_e = {}
2215 for i in range( len(ri)-1 ):
2216 ri_to_e[ ri[i] ] = ri[ i+1 ]-1
2217
2218
2219 r = [ ri_to_e[ i_bb[ bb_ri[i] ] ] for i in breaks ]
2220
2221 if breaks_only:
2222 ri = self.chainIndex( breaks=0 )
2223 r = [ x for x in r if not x+1 in ri ]
2224
2225 return N.array( r )
2226
2227
2229 """
2230 Remove all atoms with a certain residue name.
2231
2232 @param resname: name of residue to be removed
2233 @type resname: str OR list of str
2234 """
2235 resname = t.toList( resname )
2236 self.remove( lambda a, res=resname: a['residue_name'] in res )
2237
2238
2239 - def rms( self, other, mask=None, mask_fit=None, fit=1, n_it=1 ):
2240 """
2241 Rmsd between two PDBModels.
2242
2243 @param other: other model to compare this one with
2244 @type other: PDBModel
2245 @param mask: atom mask for rmsd calculation
2246 @type mask: list of int
2247 @param mask_fit: atom mask for superposition (default: same as mask)
2248 @type mask_fit: list of int
2249 @param fit: superimpose first (default 1)
2250 @type fit: 1||0
2251 @param n_it: number of fit iterations::
2252 1 - classic single fit (default)
2253 0 - until convergence, kicking out outliers on the way
2254 @type n_it: int
2255
2256 @return: rms in Angstrom
2257 @rtype: float
2258 """
2259 x, y = self.getXyz(), other.getXyz()
2260
2261 mask_fit = mask_fit or mask
2262
2263 if fit:
2264
2265 fx, fy = x, y
2266
2267 if mask_fit:
2268 fx = N.compress( mask_fit, x, 0 )
2269 fy = N.compress( mask_fit, y, 0 )
2270
2271 r, t = rmsFit.match( fx, fy, n_iterations=n_it )[0]
2272
2273
2274 y = N.dot(y, N.transpose(r)) + t
2275
2276 if mask:
2277 x = N.compress( mask, x, 0 )
2278 y = N.compress( mask, y, 0 )
2279
2280
2281 d = N.sqrt(N.sum(N.power(x - y, 2), 1))
2282
2283 return N.sqrt( N.average(d**2) )
2284
2285
2334
2335
2361
2362
2363 - def fit( self, refModel, mask=None, n_it=1,
2364 z=2, eps_rmsd=0.5, eps_stdv=0.05,
2365 profname='rms_outlier'):
2366 """
2367 Least-square fit this model onto refMode
2368
2369 @param refModel: reference PDBModel
2370 @type refModel: PDBModel
2371 @param mask: atom mask for superposition
2372 @type mask: list of int (1||0)
2373 @param n_it: number of fit iterations::
2374 1 - classic single fit (default)
2375 0 - until convergence
2376 @type n_it: int
2377 @param z: number of standard deviations for outlier definition
2378 (default 2)
2379 @type z: float
2380 @param eps_rmsd: tolerance in rmsd (default 0.5)
2381 @type eps_rmsd: float
2382 @param eps_stdv: tolerance in standard deviations (default 0.05)
2383 @type eps_stdv: float
2384 @param profname: name of new atom profile containing outlier flag
2385 @type profname: str
2386
2387 @return: PDBModel with transformed coordinates
2388 @rtype: PDBModel
2389 """
2390 return self.transform(
2391 self.transformation( refModel, mask, n_it, eps_rmsd=eps_rmsd,
2392 eps_stdv=eps_stdv, profname=profname ) )
2393
2394
2395 - def magicFit( self, refModel, mask=None ):
2396 """
2397 Superimpose this model onto a ref. model with similar atom content.
2398 magicFit( refModel [, mask ] ) -> PDBModel (or subclass )
2399
2400 @param refModel: reference PDBModel
2401 @type refModel: PDBModel
2402 @param mask: atom mask to use for the fit
2403 @type mask: list of int (1||0)
2404
2405 @return: fitted PDBModel or sub-class
2406 @rtype: PDBModel
2407 """
2408 if mask != None:
2409 m_this = self.compress( mask )
2410 else:
2411 m_this = self
2412
2413 i_this, i_ref = m_this.compareAtoms( refModel )
2414
2415 m_this = m_this.take( i_this )
2416 m_ref = refModel.take(i_ref )
2417
2418
2419 r,t = rmsFit.findTransformation( m_ref.getXyz(), m_this.getXyz() )
2420
2421 result = self.transform( r, t )
2422
2423 return result
2424
2425
2427 """
2428 Get model with centered coordinates.
2429
2430 @param mask: atom mask applied before calculating the center
2431 @type mask: list of int (1||0)
2432
2433 @return: model with centered coordinates
2434 @rtype: PDBModel
2435 """
2436 r = self.clone()
2437 mask = mask or N.ones( len(self) )
2438
2439 avg = N.average( N.compress( mask, r.getXyz(), 0 ) )
2440
2441 r.setXyz( r.getXyz() - avg )
2442
2443 return r
2444
2445
2446 - def center( self, mask=None ):
2447 """
2448 Geometric centar of model.
2449
2450 @param mask: atom mask applied before calculating the center
2451 @type mask: list of int (1||0)
2452
2453 @return: xyz coordinates of center
2454 @rtype: (float, float, float)
2455 """
2456 if mask == None:
2457 return N.average( self.getXyz() )
2458
2459 return N.average( N.compress( mask, self.getXyz(), axis=0 ) )
2460
2461
2463 """
2464 Center of mass of PDBModel.
2465
2466 @return: array('f')
2467 @rtype: (float, float, float)
2468 """
2469 M = self.masses()
2470 return mathUtils.wMean( self.getXyz(), M )
2471
2472
2474 """
2475 Collect the molecular weight of all atoms in PDBModel.
2476
2477 @return: 1-D array with mass of every atom in 1/12 of C12 mass.
2478 @rtype: array of floats
2479
2480 @raise PDBError: if the model contains elements of unknown mass
2481 """
2482 try:
2483 M = [ molUtils.atomMasses[a['element']]for a in self.getAtoms() ]
2484
2485 except KeyError, why:
2486 raise PDBError('Cannot find mass for '+str(why))
2487
2488 return N.array( M )
2489
2490
2492 """
2493 Molecular weight of PDBModel.
2494
2495 @return: total mass in 1/12 of C12 mass
2496 @rtype: float
2497
2498 @raise PDBError: if the model contains elements of unknown mass
2499 """
2500 return N.sum( self.masses() )
2501
2502
2504 """
2505 Take list of value per atom, return list where all atoms of any
2506 residue are set to the highest value of any atom in that residue.
2507 (after applying mask)
2508
2509 @param atomValues: values per atom
2510 @type atomValues: list
2511 @param mask: atom mask
2512 @type mask: list of int (1||0)
2513
2514 @return: array with values set to the maximal intra-residue value
2515 @rtype: array of float
2516 """
2517 if mask == None:
2518 mask = N.ones( len(atomValues) )
2519
2520
2521 masked = atomValues * mask
2522
2523 result = []
2524
2525
2526 for res in range( 0, self.lenResidues() ):
2527
2528
2529 resAtoms = N.compress( N.equal( self.resMap(), res ), masked )
2530
2531
2532 masterValue = max( resAtoms )
2533
2534 result += resAtoms * 0.0 + masterValue
2535
2536 return N.array( result, 'f' )
2537
2538
2539 - def argsort( self, cmpfunc=None ):
2540 """
2541 Prepare sorting atoms within residues according to comparison function.
2542
2543 @param cmpfunc: function( self.atoms[i], self.atoms[j] ) -> -1, 0, +1
2544 @type cmpfunc: function
2545
2546 @return: suggested position of each atom in re-sorted model
2547 ( e.g. [2,1,4,6,5,0,..] )
2548 @rtype: list of int
2549 """
2550
2551 cmpfunc = cmpfunc or ( lambda a1, a2: cmp(a1['name'],a2['name']) )
2552
2553 result = []
2554 pos = 0
2555
2556
2557 for resAtoms in self.resList():
2558
2559 resIndex = range(0, len( resAtoms ) )
2560
2561
2562 f_cmp = lambda i,j,atoms=resAtoms : cmpfunc( atoms[i], atoms[j] )
2563
2564
2565 resIndex.sort( f_cmp )
2566
2567
2568 resIndex = map( lambda i, delta=pos: i + delta, resIndex )
2569
2570
2571 result += resIndex
2572
2573 pos += len( resAtoms )
2574
2575 return result
2576
2577
2578 - def sort( self, sortArg=None, deepcopy=0 ):
2579 """
2580 Apply a given sort list to the atoms of this model.
2581
2582 @param sortArg: comparison function
2583 @type sortArg: function
2584 @param deepcopy: deepcopy atom dictionaries (default 0)
2585 @type deepcopy: 0||1
2586
2587 @return: copy of this model with re-sorted atoms (see Numeric.take() )
2588 @rtype: PDBModel
2589 """
2590 sortArg = sortArg or self.argsort()
2591
2592 terA = self.__terAtoms
2593
2594 r = self.take( sortArg, deepcopy )
2595
2596 r.__terAtoms = terA
2597
2598 return r
2599
2600
2601 - def unsort( self, sortList ):
2602 """
2603 Undo a previous sorting on the model itself (no copy).
2604
2605 @param sortList: sort list used for previous sorting.
2606 @type sortList: list of int
2607
2608 @return: the (back)sort list used ( to undo the undo...)
2609 @rtype: list of int
2610
2611 @raise PDBError: if sorting changed atom number
2612 """
2613
2614 backSortList = range(0, self.lenAtoms() )
2615 backSortList.sort( lambda i,j, l=sortList: cmp(l[i],l[j]) )
2616
2617
2618 self.keep( backSortList )
2619
2620 return backSortList
2621
2622
2623 - def atomNames(self, start=None, stop=None):
2624 """
2625 Return a list of atom names from start to stop RESIDUE index
2626
2627 @param start: index of first residue
2628 @type start: int
2629 @param stop: index of last residue
2630 @type stop: int
2631
2632 @return: ['C','CA','CB' .... ]
2633 @rtype: list of str
2634 """
2635
2636 start = start or 0
2637
2638 if stop is None:
2639 stop = self.lenResidues()-1
2640
2641
2642 i = self.resIndex()[start]
2643
2644 if stop == self.lenResidues()-1:
2645 j = self.lenAtoms()
2646 else:
2647 j = self.resIndex()[stop+1]
2648
2649 return [ a['name'] for a in self.getAtoms()[i:j] ]
2650
2651
2653 """
2654 Test if B{all} key-value pairs of condition are matched in dic
2655
2656 @param condition: {..}, key-value pairs to be matched
2657 @type condition: dictionary
2658 @param dic: {..}, dictionary to be tested
2659 @type dic: dictionary
2660
2661 @return: 1|0, 1 if all key-value pairs of condition are matched in dic
2662 @rtype: 1|0
2663 """
2664 for k,v in condition.items():
2665 if dic.get( k, None ) not in v:
2666 return 0
2667 return 1
2668
2669
2671 """
2672 Test if B{any} key-value pairs of condition are matched in dic
2673
2674 @param condition: {..}, key-value pairs to be matched
2675 @type condition: dictionary
2676 @param dic: {..}, dictionary to be tested
2677 @type dic: dictionary
2678
2679 @return: 1|0, 1 if any key-value pairs of condition are matched in dic
2680 @rtype: 1|0
2681 """
2682 for k,v in condition.items():
2683 if dic.get( k, None ) in v:
2684 return 1
2685 return 0
2686
2687
2689 """
2690 Get atom positions that match a combination of key=values.
2691 E.g. filter( chain_id='A', name=['CA','CB'] ) -> index
2692
2693 @param mode: 0 combine with AND (default), 1 combine with OR
2694 @type mode: 0||1
2695 @param kw: combination of atom dictionary keys and
2696 values/list of values that will be used to filter
2697 @type kw: filter options, see example
2698
2699 @return: sort list
2700 @rtype: list of int
2701 """
2702
2703 atoms = self.getAtoms()
2704 if mode == 0:
2705 f_test = self.__testDict_and
2706 else:
2707 f_test = self.__testDict_or
2708
2709 for k in kw:
2710 kw[ k ] = t.toList( kw[ k ] )
2711
2712 r = [ i for i in range(self.lenAtoms()) if f_test( atoms[i], kw ) ]
2713 return r
2714
2715
2716 - def filter( self, mode=0, **kw):
2717 """
2718 Extract atoms that match a combination of key=values.
2719 E.g. filter( chain_id='A', name=['CA','CB'] ) -> PDBModel
2720
2721 @param mode: 0 combine with AND (default), 1 combine with OR
2722 @type mode: 0||1
2723 @param kw: combination of atom dictionary keys and
2724 values/list of values that will be used to filter
2725 @type kw: filter options, see example
2726
2727 @return: filterd PDBModel
2728 @rtype: PDBModel
2729 """
2730 return self.take( self.filterIndex( mode=mode, **kw ) )
2731
2732
2733 - def equals(self, ref, start=None, stop=None):
2734 """
2735 Compares the residue and atom sequence in the given range.
2736 Coordinates are not checked, profiles are not checked.
2737
2738 @param start: index of first residue
2739 @type start: int
2740 @param stop: index of last residue
2741 @type stop: int
2742
2743 @return: [ 1||0, 1||0 ],
2744 first position sequence identity 0|1,
2745 second positio atom identity 0|1
2746 @rtype: list if int
2747 """
2748
2749 start = start or 0
2750 if stop == None:
2751 stop = self.lenResidues()
2752
2753
2754 seqID, atmID = 0, 0
2755
2756
2757 if self.sequence()[start:stop] == ref.sequence()[start:stop]:
2758 seqID = 1
2759
2760
2761 if self.atomNames(start,stop-1) == ref.atomNames(start,stop-1):
2762 atmID = 1
2763
2764 return [seqID, atmID]
2765
2766
2768 """
2769 Apply to SORTED models without HETATOMS. Coordinates are not checked.
2770
2771 @note: in some rare cases m1.equalAtoms( m2 ) gives a different result
2772 than m2.equalAtoms( m1 ). This is due to the used
2773 SequenceMatcher class.
2774 @todo: option to make sure atoms are also in same order
2775
2776 @param ref: reference PDBModel
2777 @type ref: PDBModel
2778
2779 @return: (mask, mask_ref), two atom masks for all equal (1) atoms
2780 in models
2781 @rtype: (array, array)
2782 """
2783
2784 seqMask, seqMask_ref = match2seq.compareModels(self, ref)
2785
2786
2787 mask = self.res2atomMask(seqMask)
2788 mask_ref = ref.res2atomMask(seqMask_ref)
2789
2790
2791 equal = N.nonzero(seqMask)
2792 equal_ref = N.nonzero(seqMask_ref)
2793
2794
2795 for i in range(0, len(equal)):
2796
2797
2798 aa = self.atomNames(equal[i],equal[i])
2799 aa_ref = ref.atomNames(equal_ref[i],equal_ref[i])
2800
2801
2802 ind = self.resIndex()[ equal[i] ]
2803 ind_ref = ref.resIndex()[ equal_ref[i] ]
2804
2805
2806 for j in range( len(aa) ):
2807
2808 try:
2809 mask[ind] = aa[j] == aa_ref[j]
2810 ind += 1
2811
2812 except IndexError:
2813 mask[ind] = 0
2814 ind += 1
2815
2816 for j in range( len(aa_ref) ):
2817
2818 try:
2819 mask_ref[ind_ref] = aa_ref[j] == aa[j]
2820 ind_ref += 1
2821 except IndexError:
2822 mask_ref[ind_ref] = 0
2823 ind_ref += 1
2824
2825
2826 return mask, mask_ref
2827
2828
2830 """
2831 Get list of atom indices for this and reference model that converts
2832 both into 2 models with identical residue and atom content.
2833
2834 E.g.
2835 >>> m2 = m1.sort() ## m2 has now different atom order
2836 >>> i2, i1 = m2.compareAtoms( m1 )
2837 >>> m1 = m1.take( i1 ); m2 = m2.take( i2 )
2838 >>> m1.atomNames() == m2.atomNames() ## m2 has again same atom order
2839
2840 @return: indices, indices_ref
2841 @rtype: ([int], [int])
2842 """
2843
2844 seqMask, seqMask_ref = match2seq.compareModels(self, ref)
2845
2846
2847 equal = N.nonzero(seqMask)
2848 equal_ref = N.nonzero(seqMask_ref)
2849
2850 result, result_ref = [], []
2851
2852
2853 for i in range(0, len(equal)):
2854
2855
2856 aa = self.atomNames( equal[i],equal[i] )
2857 aa_ref = ref.atomNames( equal_ref[i],equal_ref[i] )
2858
2859
2860 ind = self.resIndex()[ equal[i] ]
2861 ind_ref = ref.resIndex()[ equal_ref[i] ]
2862
2863 for j in range( len(aa_ref) ):
2864
2865 try:
2866
2867 if aa_ref[j] == aa[j]:
2868 result += [ind + j]
2869 result_ref += [ind_ref + j]
2870 continue
2871
2872 except IndexError:
2873 pass
2874
2875 try:
2876 pos = aa.index( aa_ref[j] )
2877
2878 result += [ind + pos]
2879 result_ref += [ind_ref + j]
2880
2881 except ValueError:
2882 pass
2883
2884 return result, result_ref
2885
2886
2888 """
2889 Look how well a given chain matches a continuous stretch of residues
2890 in ref.
2891
2892 @param chain: chain index
2893 @type chain: int
2894 @param ref: reference PDBModel
2895 @type ref: PDBModel
2896
2897 @return: average relative length of matching chain fragments
2898 @rtype: float
2899 """
2900 m = self.takeChains([chain])
2901 l_0 = len( m )
2902 m_cast = m.take( m.compareAtoms( ref )[0] )
2903
2904 f = 1. * len( m_cast ) / m_cast.lenChains( breaks=1, maxDist=5.)
2905 f = f / len( m )
2906
2907 return f
2908
2909
2911 """
2912 Get list of corresponding chain indices for this and reference model.
2913 Use takeChains() to create two models with identical chain content and
2914 order from the result of this function.
2915
2916 @param ref: reference PDBModel
2917 @type ref: PDBModel
2918 @param breaks: look for chain breaks in backbone coordinates
2919 @type breaks: 1||0
2920 @param fractLimit:
2921 @type fractLimit: float
2922
2923 @return: chainIndices, chainIndices_ref
2924 @rtype: ([int], [int])
2925 """
2926 i, i_ref = self.compareAtoms( ref )
2927
2928 c0 = self.atom2chainIndices( i, breaks=breaks )
2929 c_r = ref.atom2chainIndices( i_ref, breaks=breaks )
2930
2931
2932 c0 = [ c for c in c0 \
2933 if self.__chainFraction( c, ref ) > fractLimit ]
2934 c_r= [ c for c in c_r \
2935 if ref.__chainFraction( c, self ) > fractLimit ]
2936
2937 return c0, c_r
2938
2939
2940
2941
2942
2943
2944
2946 """
2947 Test class
2948 """
2949
2950 - def run( self, local=0 ):
2951 """
2952 run function test
2953
2954 @param local: transfer local variables to global and perform
2955 other tasks only when run locally
2956 @type local: 1|0
2957
2958 @return: coordinates of center of mass
2959 @rtype: array
2960 """
2961
2962 if local: print 'Loading pdb file ..'
2963 m = PDBModel( t.testRoot()+'/rec/1A2P.pdb')
2964
2965
2966 if local: print "removing waters.."
2967 m.removeRes(['TIP3', 'HOH'])
2968
2969
2970
2971
2972 m.addChainFromSegid()
2973
2974
2975 chainIdx = m.chainIndex().tolist()
2976
2977
2978 if local:
2979 print 'The molecule consists of %i chains'% m.lenChains()
2980 print '\tChainId \tFirst atom'
2981 for i in chainIdx:
2982 print '\t%s \t\t%i'%(m.atoms[i]['chain_id'], int(i))
2983
2984
2985 for c in range( 0, len( chainIdx ) ):
2986
2987 if local:
2988 print "chain ", c, " starts with ",
2989 print m.atoms[ chainIdx[c] ]['residue_name'],
2990
2991 print " and has sequence: "
2992
2993
2994 chainMask = N.equal( m.chainMap( breaks=1 ), c )
2995 if local: print m.sequence( chainMask )
2996
2997
2998 if local: print "sorting atoms alphabetically..."
2999 sort = m.argsort()
3000 m2 = m.sort( sort )
3001
3002 if local:
3003 globals().update( locals() )
3004
3005 return N.sum( m2.centerOfMass() )
3006
3007
3009 """
3010 Precalculated result to check for consistent performance.
3011
3012 @return: coordinates of center of mass
3013 @rtype: array
3014 """
3015 return N.sum( N.array([ 29.13901386, 46.70021977, 38.34113311]) )
3016
3017
3018
3019 if __name__ == '__main__':
3020
3021 test = Test()
3022
3023 assert abs( test.run( local=1 ) - test.expected_result() ) < 1e-8
3024
3025
3026