1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 """
25 Trajectory - Collection of coordinate frames of a molecule
26 """
27
28 import Numeric as N
29
30
31 import rmsFit
32
33 import tools as T
34 import mathUtils as MU
35 from Biskit.Errors import BiskitError
36 from Biskit import EHandler
37 from PDBModel import PDBModel
38 from ProfileCollection import ProfileCollection
39
40 import string
41 import re
42 import copy
43 import tempfile, os, types
44
45
46 import LinearAlgebra as LA
47
49 pass
50
51
53
55 """
56 Version of class.
57
58 @return: version
59 @rtype: str
60 """
61 return 'Trajectory $Revision: 2.8 $'
62
63
65 """
66 Manage many conformations of one molecule.
67 Read coordinates and PDB infos from many PDB files,
68 superimpose all coordinates to reference structure,
69 calculate pairwise RMSD of all conformations ..
70 """
71
72
73
74 ex_numbers = re.compile('\D*([0-9]+)\D*')
75
76 - def __init__( self, pdbs=None, refpdb=None, rmwat=1,
77 castAll=0, verbose=1 ):
78 """
79 Collect coordinates into Numpy array. By default, the atom content
80 of the first PDB is compared to the reference PDB to look for atoms
81 that have to be removed or re-ordered. It is assumed that the other
82 PDB files in the list have an identical atom content/layout and the
83 same re-ordering / removing is applied to all of them. Set castAll
84 to 1, in order to check each PDB seperately.
85
86 @param pdbs: file names of all conformations OR PDBModels
87 @type pdbs: [ str ] OR [ PDBModel ]
88 @param refpdb: file name of reference pdb
89 @type refpdb: str
90 @param rmwat: skip all TIP3, HOH, Cl-, Na+ from all files (default: 1)
91 @type rmwat: 0|1
92 @param castAll: re-analyze atom content of each frame (default: 0)
93 @type castAll: 0|1
94 @param verbose: verbosity level (default: 1)
95 @type verbose: 1|0
96 """
97 self.ref = None
98 self.frames = None
99 self.resIndex = None
100 self.frameNames = None
101 self.pc = None
102 self.profiles = TrajProfiles()
103 self.verbose= verbose
104
105 if pdbs != None:
106 refpdb = refpdb or pdbs[0]
107
108 self.__create( pdbs, refpdb, rmwat=rmwat, castAll=castAll )
109
110
111 self.initVersion = T.dateString() + ';' + self.version()
112
113
115 """
116 Version of class.
117
118 @return: version
119 @rtype: str
120 """
121 return 'Trajectory $Revision: 2.8 $'
122
123
124 - def __create( self, pdbs, refpdb, rmwat=0, castAll=0 ):
125 """
126 Initiate and create necessary variables.
127
128 @param pdbs: file names of all conformations OR PDBModels
129 @type pdbs: [ str ] OR [ PDBModel ]
130 @param refpdb: file name of reference pdb
131 @type refpdb: str
132 @param rmwat: skip all TIP3, HOH, Cl-, Na+ from all files (default: 1)
133 @type rmwat: 0|1
134 @param castAll: re-analyze atom content of each frame (default: 0)
135 @type castAll: 0|1
136 """
137
138
139 self.setRef( PDBModel( refpdb ) )
140
141
142 if rmwat:
143 wat = ['TIP3', 'HOH', 'WAT', 'Na+', 'Cl-' ]
144 self.ref.remove( lambda a, wat=wat: a['residue_name'] in wat )
145
146
147 self.frames = self.__collectFrames( pdbs, castAll )
148 self.frames.savespace()
149
150
151 self.resIndex = self.ref.resMap()
152
153
154 self.frameNames = pdbs
155
156
158 """
159 Get a single frame and facilitete slicing of tajectories.
160
161 @param i: index OR SliceTyp
162 @type i: int OR [int]
163
164 @return: model OR trajectory
165 @rtype: PDBModel OR Trajectory
166
167 @raise TrajError: if out of memory OR invalid index
168 """
169 try:
170 if type( i ) is int:
171 return self.getPDBModel( i )
172
173 if type( i ) is types.SliceType:
174 start = i.start or 0
175 stop = i.stop or self.lenFrames()
176 if stop > self.lenFrames():
177 stop = self.lenFrames()
178 step = i.step or 1
179 return self.takeFrames( range( start, stop, step ) )
180
181 except MemoryError:
182 raise TrajError, "out of memory, cannot extract frames %s" % str(i)
183
184 raise TrajError, "unsupported index or slice: %s" % str( i )
185
186
188 """
189 Number of frames in the trajectory.
190
191 @return: length
192 @rtype: int
193 """
194 return self.lenFrames()
195
196
198 """
199 called for unpickling the object.
200 """
201 self.__dict__ = state
202
203 self.__defaults()
204
205
207 """
208 backwards compatibility to earlier pickled trajectories
209 """
210 self.pc = getattr( self, 'pc', None )
211 self.frameNames = getattr( self, 'frameNames', None)
212 self.profiles = getattr( self, 'profiles', TrajProfiles() )
213 try:
214 self.frames.savespace()
215 except:
216 pass
217
218
220 """
221 Called before pickling the object.
222 """
223 try:
224 if type( self.frames ) == list or self.frames.typecode() == 'd':
225 EHandler.warning("Converting coordinates to float array.")
226 self.frames = N.array( self.frames ).astype('f')
227 except:
228 EHandler.warning('Could not convert frames to float array.', 1)
229
230 return self.__dict__
231
232
234 """
235 Returna a PDBModel with coordinates that are the average of
236 all frames.
237
238 @return: PDBModel with average structure of trajectory (no fitting!)
239 this trajectory's ref is the source of result model
240 @rtype: PDBModel
241 """
242 result = PDBModel( self.getRef(), noxyz=1 )
243 result.setXyz( N.average( self.frames ) )
244
245 return result
246
247
249 """
250 Read coordinates from list of pdb files.
251
252 @param pdbs: list of file names
253 @type pdbs: [str]
254 @param castAll: analyze atom content of each frame for casting
255 (default: 0)
256 @type castAll: 0|1
257
258 @return: frames x (N x 3) Numpy array (of float)
259 @rtype: array
260 """
261 frameList = []
262 i = 0
263 atomCast = None
264
265 if self.verbose: T.errWrite('reading %i pdbs...' % len(pdbs) )
266
267 refNames = self.ref.atomNames()
268
269 for f in pdbs:
270
271
272 m = PDBModel(f)
273
274
275 if castAll or i==0:
276 atomCast, castRef = m.compareAtoms( self.ref )
277
278 if castRef != range( len( self.ref ) ):
279
280 raise TrajError("Reference PDB doesn't match %s."
281 %m.fileName)
282
283 if atomCast == range( len( m ) ):
284 atomCast = None
285 else:
286 if self.verbose: T.errWrite(' casting ')
287
288
289 if atomCast:
290 m = m.take( atomCast )
291
292
293 if i%100 == 0 and m.atomNames() <> refNames:
294 raise TrajError("%s doesn't match reference pdb."%m.fileName )
295
296 frameList.append( m.xyz )
297
298 i += 1
299 if i%10 == 0 and self.verbose:
300 T.errWrite('#')
301
302 if self.verbose: T.errWrite( 'done\n' )
303
304
305 return N.array(frameList).astype('f')
306
307
309 """
310 @return: reference PDBModel
311 @rtype: PDBModel
312 """
313 return self.ref
314
315
316 - def setRef( self, refModel ):
317 """
318 Assign new reference model.
319
320 @param refModel: PDBModel with same number of atoms as in the frames.
321 @type refModel: PDBModel
322
323 @return: old reference PDBModel
324 @rtype: PDBModel
325 """
326 if (self.ref and refModel.equals( self.ref ) != [1,1] ) or \
327 (self.frames and refModel.lenAtoms() != len( self.frames[0] ) ):
328
329 raise TrajError(\
330 "Atoms of reference model don't match trajectory frames.")
331
332 old, self.ref = self.ref, refModel
333
334 return old
335
336
338 """
339 Concatenate this with other trajectories. The ref model of the
340 new Trajectory is a 'semi-deep' copy of this trajectorie's model.
341 (see L{PDBModel.take()} )::
342 concat( traj [, traj2, traj3, ..] ) -> Trajectory
343
344 @param traj: one or more Trajectory with identical atoms as this one
345 @type traj: Trajectories
346
347 @return: concatenated trajecties
348 @rtype: Trajectory
349 """
350 if len( traj ) == 0:
351 return self
352
353 r = self.__class__()
354
355 r.frames = N.concatenate( (self.frames, traj[0].frames), 0 )
356
357 r.setRef( self.ref.clone())
358
359 if self.frameNames and traj[0].frameNames:
360 r.frameNames = self.frameNames + traj[0].frameNames
361
362 try:
363 if self.pc and traj[0].pc:
364 r.pc['p'] = N.concatenate( (self.pc['p'], traj[0].pc['p']),0)
365 r.pc['u'] = N.concatenate( (self.pc['u'], traj[0].pc['u']),0)
366 except TypeError, why:
367 EHandler.error('cannot concat PC '+str(why) )
368
369 r.profiles = self.profiles.concat( traj[0].profiles )
370
371
372 return r.concat( *traj[1:] )
373
374
376 """
377 Concatenate 2 trajectories of same (frame) length 'horizontally', i.e.
378 for each frame the coordinates of one are appended to the coordinates
379 of the other. The ref model of the new trajectory is a 'semi-deep' copy
380 of this trajectory's model (see L{PDBModel.take()} )::
381 concatAtoms( traj1 [traj2, traj3..]) -> Trajectory
382
383 @param traj: one or more Trajectory of the same number of frames
384 @type traj: Trajectories
385
386 @return: trajectory with concatenated atoms
387 @rtype: Trajectory
388 """
389 if len( traj ) == 0:
390 return self
391
392 r = self.__class__()
393
394 r.frames = N.concatenate( (self.frames, traj[0].frames), 1 )
395 r.setRef( self.ref.concat( traj[0].getRef() ) )
396
397 r.profiles = self.profiles.clone()
398
399 r.frameNames = self.frameNames
400
401 return r.concatAtoms( *traj[1:] )
402
403
405 """
406 @return: number of frames in trajectory
407 @rtype: int
408 """
409 return len( self.frames )
410
411
413 """
414 @return: number of atoms in frames
415 @rtype: int
416 """
417 return N.shape( self.frames )[1]
418
419
421 """
422 Get atom mask.
423
424 @param what: Create the mask using::
425 - funct( self.ref.atoms[i] ) -> int
426 - list int ( indices )
427 - mask
428 - list of str (atom names)
429 @type what: any
430
431 @return: list of 1|0 1 x N_atoms
432 @rtype: list[1|0]
433 """
434 return self.getRef().mask( what )
435
436
438 """
439 A list with residue numbers mapped to each position. i.e.
440 C{ [00001111222222222333..] }
441
442 @param force: calculate even if it already been calculated
443 @type force: 1|0
444
445 @return: list of int
446 @rtype: [int]
447 """
448 if not self.resIndex or not force:
449 self.resIndex = self.getRef().resMap()
450
451 return self.resIndex
452
453
455 """
456 Return a copy of the trajectory containing only the specified frames.
457
458 @param indices: positions to take
459 @type indices: [int]
460
461 @return: copy of this Trajectory (fewer frames, semi-deep copy of ref)
462 @rtype: Trajectory
463 """
464
465 indices = N.compress( N.less( indices, len( self.frames) ), indices )
466
467 r = self.__class__()
468
469
470 r.frames = N.take( self.frames, indices, 0 )
471
472
473 r.setRef( self.ref.take( range( self.ref.lenAtoms() )) )
474
475 if self.frameNames != None:
476 r.frameNames = N.take( self.frameNames, indices, 0 )
477 r.frameNames = map( ''.join, r.frameNames.tolist() )
478
479 r.pc = self.__takePca( indices )
480
481 r.profiles = self.profiles.take( indices )
482
483 r.resIndex = self.resIndex
484
485 return r
486
487
489 """
490 Copy trajectory.
491
492 @return: Trajectory (or sub-class), copy of this trajectory
493 @rtype: Trajectory
494 """
495 return self.takeFrames( range( self.lenFrames() ) )
496
497
499 """
500 Compress trajectory with a frame mask.
501
502 @param mask: frame mask, 1 x N_frames
503 @type mask: [1|0]
504
505 @return: copy of this Trajectory (fewer frames, semi-deep copy of ref)
506 @rtype: Trajectory
507 """
508 return self.takeFrames( N.nonzero( mask ) )
509
510
511 - def replaceContent( self, traj ):
512 """
513 Replace content of this trajectory by content of given traj.
514 No deep-copying, only references are taken.
515
516 @param traj: trajectory
517 @type traj: trajectory
518 """
519 self.frames = traj.frames
520 self.ref = traj.ref
521 self.frameNames = traj.frameNames
522 self.pc = traj.pc
523 self.profiles = traj.profiles
524
525
527 """
528 in-place version of L{takeFrames}. keepFrames( indices ) -> None
529
530 @param indices: frame numbers
531 @type indices: [int]
532 """
533 r = self.takeFrames( indices )
534 self.replaceContent( r )
535
536
538 """
539 Remove given frames from this trajectory object.
540
541 @param indices: frame numbers
542 @type indices: [int]
543 """
544 i = range( self.lenFrames() )
545 i.remove( N.array(indices) )
546 self.keepFrames( i )
547
548
549 - def takeAtoms( self, indices, returnClass=None ):
550 """
551 Take atoms from frames::
552 takeAtoms( indices, type=None ) -> copy of Trajectory
553
554 @param indices: list of atom indices
555 @type indices: [int]
556 @param returnClass: default: None, same class as this object
557 @type returnClass: class OR None
558
559 @return: copy of this Trajectory (with fewer atoms)
560 @rtype: Trajectory
561 """
562
563 returnClass = returnClass or self.__class__
564 r = returnClass()
565
566
567 r.__dict__.update( self.__dict__ )
568 r.frames = r.ref = r.frameNames = r.profiles = None
569
570 r.frames = N.take( self.frames, indices, 1 )
571
572 r.setRef( self.ref.take( indices ) )
573
574 r.frameNames = copy.copy( self.frameNames )
575 r.resIndex = None
576
577 r.profiles = self.profiles.clone()
578
579 r.pc = self.pc
580
581 return r
582
583
585 """
586 Get copy of this trajectory with only atoms marked 1 in aMask.
587
588 @param aMask: atom mask [10011100101111...],
589 lst 1 x N_atoms of 1(keep) or 0
590 @type aMask: [1|0]
591 @param returnClass: default: None, same class as this object
592 @type returnClass: class
593
594 @return: copy of Trajectory with fewer atoms
595 @rtype: Trajectory
596 """
597 return self.takeAtoms( N.nonzero( aMask ), returnClass )
598
599
601 """
602 in-place version of L{takeAtoms}. keepAtoms( indices ) -> None
603
604 @param indices: atom numbers
605 @type indices: [int]
606 """
607 r = self.takeAtoms( indices )
608 self.replaceContent( r )
609
610
612 """
613 Remove atoms from all frames of trajectory and from reference
614 structure.
615
616 @param what: Specify what atoms to remove::
617 - function( atom_dict ) -> 1 || 0 or (1..remove)
618 - list of int [4, 5, 6, 200, 201..], indices of atoms
619 to remove
620 - list of int [11111100001101011100..N_atoms], mask
621 (1..remove)
622 - int, remove atom with this index
623 @type what: any
624
625
626 @return: N.array(1 x N_atoms_old) of 0||1, mask used to compress the
627 atoms and xyz arrays. This mask can be used to apply the
628 same change to another array of same dimension as the
629 old(!) xyz and atoms.
630 @rtype: array
631 """
632
633 mask = N.logical_not( self.atomMask( what ) )
634
635 self.keepAtoms( N.nonzero( mask ) )
636
637 return mask
638
639
640 - def takeChains(self, chainLst, returnClass=None ):
641 """
642 Extract some chains from complete Trajectory.
643
644 @param chainLst: chains to isolate
645 @type chainLst: [int]
646 @param returnClass: default: None, same class as this object
647 @type returnClass: class
648
649 @return: Trajectory with only those chains
650 @rtype: Trajectory
651 """
652 chainMap = self.ref.chainMap()
653
654 atomMask = map( lambda i, allowed=chainLst: i in allowed, chainMap)
655
656 return self.compressAtoms( atomMask, returnClass )
657
658
659 - def fit( self, mask=None, ref=None, n_it=1,
660 prof='rms', verbose=1, fit=1, **profInfos ):
661 """
662 Superimpose all coordinate frames on reference coordinates. Put rms
663 values in a profile. If n_it > 1, the fraction of atoms considered
664 for the fit is put into a profile called |prof|_considered
665 (i.e. by default 'rms_considered').
666
667 @param mask: atom mask, atoms to consider default: [all]
668 @type mask: [1|0]
669 @param ref: use as reference, default: None, average Structure
670 @type ref: PDBModel
671 @param n_it: number of fit iterations, kicking out outliers on the way
672 1 -> classic single fit, 0 -> until convergence
673 (default: 1)
674 @type n_it: int
675 @param prof: save rms per frame in profile of this name, ['rms']
676 @type prof: str
677 @param verbose: print progress info to STDERR (default: 1)
678 @type verbose: 1|0
679 @param fit: transform frames after match, otherwise just calc rms
680 (default: 1)
681 @type fit: 1|0
682 @param profInfos: additional key=value pairs for rms profile info []
683 @type profInfos: key=value
684 """
685 if ref == None:
686 refxyz = N.average( self.frames, 0 )
687 else:
688 refxyz = ref.getXyz()
689
690 if mask == None:
691 mask = N.ones( len( refxyz ) )
692
693 refxyz = N.compress( mask, refxyz, 0 )
694
695 if verbose: T.errWrite( "rmsd fitting..." )
696
697 superImposed = []
698 rms = []
699 non_outliers = []
700 iterations = []
701
702 for i in range(0, len( self.frames) ):
703
704 xyz = self.frames[i]
705
706 if n_it != 1:
707 (r, t), rmsdList = rmsFit.match( refxyz,
708 N.compress( mask, xyz, 0), n_it)
709 iterations.append( len( rmsdList ) )
710 non_outliers.append( rmsdList[-1][0] )
711
712 xyz_transformed = N.dot( xyz, N.transpose(r)) + t
713
714 rms += [ rmsdList[-1][1] ]
715
716 else:
717 r, t = rmsFit.findTransformation( refxyz,
718 N.compress( mask, xyz, 0) )
719
720 xyz_transformed = N.dot( xyz, N.transpose(r)) + t
721
722 d = N.sqrt(N.sum(N.power( N.compress(mask, xyz_transformed,0)\
723 - refxyz, 2), 1))
724 rms += [ N.sqrt( N.average(d**2) ) ]
725
726 if fit:
727 self.frames[i] = xyz_transformed.astype('f')
728
729 if verbose and i%100 == 0:
730 T.errWrite( '#' )
731
732 self.setProfile( prof, rms, n_iterations=n_it, **profInfos )
733
734 if non_outliers:
735 self.setProfile( prof+'_considered', non_outliers,
736 n_iterations=n_it,
737 comment='fraction of atoms considered for iterative fit' )
738
739 if verbose: T.errWrite( 'done\n' )
740
741
761
762
763
764
765
766 - def blockFit2ref( self, refModel=None, mask=None, conv=1e-6 ):
767 """
768 Fit trajectory until convergence onto it's own average and then
769 transform the average of all frames onto the reference. To be used
770 with parallell trajectories.
771
772 @param refModel: Reference model (default: None)
773 @type refModel: PDBModel
774 @param mask: atom mask to apply before fitting
775 (default: None, all atoms)
776 @type mask: [1|0]
777 @param conv: convergence cutoff creterion (default: 1e-6)
778 @type conv: float
779 """
780 self.fit( ref=self.ref )
781
782 m_avg = self.avgModel()
783
784
785 d = 1.
786 dd= 1.
787 while dd >= conv:
788
789 self.fit( ref=m_avg, mask=mask )
790 m_new_avg = self.avgModel()
791
792 oldD, d = d, m_avg.rms( m_new_avg, mask=mask )
793 T.flushPrint( "rms difference: %f" % d )
794
795 dd = oldD - d
796 m_avg = m_new_avg
797
798
799 if refModel:
800 T.flushPrint('fitting trajectory en-block onto reference...')
801
802 if refModel.atomNames() != self.ref.atomNames():
803 ref_i, i = refModel.compareAtoms( m_avg )
804
805 refModel = refModel.take( ref_i )
806 m_avg = m_avg.take( i )
807
808 r, t = m_avg.transformation( refModel, mask )
809 self.transform( r, t )
810
811
813 """
814 Write (possibly transformed) coordinates back to pdb.
815
816 @param index: frame index in trajectory
817 @type index: int
818 @param fname: name of new file
819 @type fname: str
820 """
821 try:
822 self.getPDBModel( index ).writePdb( fname )
823 except:
824 EHandler.error('Error writing %s.' % fname)
825
826
828 """
829 Write coordinates to an NMR-style MODEL/ENDMDL pdb file.
830
831 @param fname: name of new file
832 @type fname: str
833 @param frames: frame indices (default: None, all)
834 @type frames: [int]
835 """
836
837 if frames is None:
838 frames = range( self.lenFrames() )
839
840
841 out = open(fname, 'w')
842
843 tmpFile = tempfile.mktemp()
844
845 for n in frames:
846
847
848 self.writePdb( n, tmpFile )
849
850 pdb_temp = open( tmpFile )
851
852 out.write("MODEL%6i\n" % n)
853 for line in pdb_temp:
854 if line[:3] <> 'END':
855 out.write(line)
856 out.write("ENDMDL\n")
857
858
859 pdb_temp.close()
860 os.remove( tmpFile )
861
862 out.write("END")
863 out.close()
864
865
866 - def writeCrd( self, fname, frames=None ):
867 """
868 Write frames to Amber crd file (w/o box info).
869
870 @param fname: output file name
871 @type fname: str
872 @param frames: frame indices (default: all)
873 @type frames: [int]
874 """
875 if frames == None:
876 frames = range( self.lenFrames() )
877
878 template = " %7.3f" * 10 + '\n'
879
880
881 out = open( T.absfile(fname), 'w')
882 __write = out.write
883
884 __write('\n')
885
886 n_lines = None
887
888 for fi in frames:
889 f = N.ravel( self.frames[ fi ] )
890
891 if n_lines is None:
892 n_lines = n_lines or len( f ) / 10
893 overhang= ( 0 != len( f ) % 10 )
894 i_lines = range( n_lines )
895
896 for i in i_lines:
897 __write( template % tuple( f[10*i:10*i+10] ) )
898
899 if overhang:
900 for x in f[i*10+10:]:
901 __write(" %7.3f" % x )
902 __write('\n')
903
904 out.close()
905
906
908 """
909 Get PDBModel object for a particular frame of the trajectory.
910
911 @param index: frame index
912 @type index: int
913
914 @return: model
915 @rtype: PDBModel
916 """
917 s = PDBModel( self.ref, noxyz=1 )
918 s.setXyz( self.frames[ index ] )
919 return s
920
921
922 - def setProfile( self, name, prof, mask=None, default=None, asarray=1,
923 comment=None, **moreInfo ):
924 """
925 Add/override profile.
926
927 @param name: profile name
928 @type name: str
929 @param prof: list of values
930 @type prof: [any]
931
932 @param mask: list 1 x N_items of 0|1, if there are less values
933 than items, provide mask for missing values,
934 N.sum(mask)==N_items
935 @type mask: [0|1]
936 @param default: value for items masked.
937 @type default: any
938 @param asarray: store as list (0), as array (2) or store numbers as
939 array but everything else as list (1) (default: 1)
940 @type asarray: 0|1|2
941 @param comment: goes into info[name]['comment']
942 @type comment: str
943 @param moreInfo: additional key-value pairs for info[name]
944 @type moreInfo:
945
946 @raise ProfileError: if length of prof != N_residues
947 """
948 self.profiles.set( name, prof, mask, default, asarray=asarray,
949 comment=comment, **moreInfo )
950
951
952 - def profile( self, name, default=None ):
953 """
954 Get the values of a profile::
955 get( name ) -> list of values
956
957 @param name: profile name
958 @type name: str
959 @param default: default result if no profile is found
960 @type default: any
961
962 @raise ProfileError: if no profile is found with |name|
963 """
964 return self.profiles.get( name, default )
965
966
968 """
969 GEt information associated with a profile::
970 profileInfo( name ) -> dict with infos about profile
971 Guaranteed infos: 'version'->str, 'comment'->str, 'changed'->1|0
972
973 @param name: profile name
974 @type name: str
975
976 @raise ProfileError: if no profile is found with |name|
977 """
978 return self.profiles.getInfo( name )
979
980
982 """
983 Add/Override infos about a given profile::
984 e.g. setInfo('relASA', comment='new', params={'bin':'whatif'})
985
986 @param name: profile name
987 @type name: str
988
989 @raise ProfileError: if no profile is found with |name|
990 """
991 self.profiles.setInfo( name, **args )
992
993
994 - def profile2mask(self, profName, cutoff_min=None, cutoff_max=None ):
995 """
996 Create a mask from a profile with optional cutoff values.
997
998 @param profName: profile name
999 @type profName: str
1000 @param cutoff_min: lower cutoff value (default: None)
1001 @type cutoff_min: float
1002 @param cutoff_max: upper cutoff value (default: None)
1003 @type cutoff_max: float
1004
1005 @return: mask lenFrames x 1|0
1006 @rtype: [1|0]
1007 """
1008 return self.profiles.profile2mask( profName, cutoff_min, cutoff_max )
1009
1010
1012 """
1013 Create a biggles.FramedPlot objetc from a profile. Display the plot
1014 with biggles.FramedPlot.show()::
1015 plotProfile(name1, [name2, ..],[arg1=x, arg2=y])->biggles.FramedPlot
1016
1017 @param name: profile name
1018 @type name: str
1019
1020 @return: plot object
1021 @rtype: biggles.FramedPlot
1022 """
1023 return self.profiles.plot( *name, **args )
1024
1025
1027 """
1028 Calculate rmsd between each 2 coordinate frames.
1029
1030 @param aMask: atom mask
1031 @type aMask: [1|0]
1032 @return: frames x frames array of float
1033 @rtype: array
1034 """
1035 frames = self.frames
1036
1037 if aMask != None:
1038 frames = N.compress( aMask, frames, 1 )
1039
1040 result = N.zeros( (len( frames ), len( frames )), 'f' )
1041
1042 for i in range(0, len( frames ) ):
1043
1044 for j in range( i+1, len( frames ) ):
1045 if noFit:
1046 d = N.sqrt(N.sum(N.power(frames[i]-frames[j], 2), 1))
1047 result[i,j] = result[j,i] = N.sqrt( N.average(d**2) )
1048
1049 else:
1050 rt, rmsdLst = rmsFit.match( frames[i], frames[j], 1 )
1051 result[i,j] = result[j,i] = rmsdLst[0][1]
1052
1053 return result
1054
1055
1057 """
1058 Get RMS of each atom from it's average position in trajectory.
1059 The frames should be superimposed (fit() ) to a reference.
1060
1061 @param mask: N x 1 list/Numpy array of 0|1, (N=atoms),
1062 atoms to be considered.
1063 @type mask: [1|0]
1064
1065 @return: Numpy array ( N_unmasked x 1 ) of float.
1066 @rtype: array
1067 """
1068 frames = self.frames
1069 if mask != None:
1070 frames = N.compress( mask, frames, 1 )
1071
1072
1073 avg = N.average( frames )
1074
1075 return N.average(N.sqrt(N.sum(N.power(frames - avg, 2), 2) ))
1076
1077
1078 - def __resWindow( self, res, n_neighbores, rchainMap=None,
1079 left_allowed=None, right_allowed=None ):
1080 """
1081 Get indices of all atoms of a residue and some atoms of its
1082 neighboring residues (if they belong to the same chain).
1083
1084 @param res: residue index
1085 @type res: int
1086 @param n_neighbores: number of residues to include right and left
1087 @type n_neighbores: int
1088 @param right_allowed: array 1 x N_atoms of 1|0, possible neighbore
1089 atoms
1090 @type right_allowed: array
1091 @param left_allowed: array 1 x N_atoms of 1|0, possible neighbore atoms
1092 @type left_allowed: array
1093 @param rchainMap: array 1 x N_residues of int, chain id of each res
1094 @type rchainMap: array
1095
1096 @return: atoms of res, atoms of neighbores
1097 @rtype: [ int ], [ int ]
1098 """
1099
1100 if rchainMap is None:
1101 rchainMap = N.take( self.chainMap(), self.resIndex() )
1102
1103 left_allowed = left_allowed or N.nonzero( self.ref.maskBB() )
1104 right_allowed= right_allowed or N.nonzero( self.ref.maskBB() )
1105
1106
1107 result = self.ref.res2atomIndices( [ res ] )
1108
1109
1110 l = self.ref.lenResidues()
1111 chain = rchainMap[res]
1112
1113 outer_left = range( res-n_neighbores, res )
1114 outer_right= range( res+1, res+n_neighbores+1 )
1115
1116 outer_left = [ i for i in outer_left if i > 0 and rchainMap[i]==chain]
1117 outer_right= [ i for i in outer_right if i < l and rchainMap[i]==chain]
1118
1119
1120 if outer_left:
1121 outer_left = self.ref.res2atomIndices( outer_left )
1122 outer_left = MU.intersection( left_allowed, outer_left )
1123
1124 if outer_right:
1125 outer_right= self.ref.res2atomIndices( outer_right)
1126 outer_right= MU.intersection( right_allowed, outer_right)
1127
1128 return result, outer_left + outer_right
1129
1130
1131 - def getFluct_local( self, mask=None, border_res=1,
1132 left_atoms=['C'], right_atoms=['N'], verbose=1 ):
1133 """
1134 Get mean displacement of each atom from it's average position after
1135 fitting of each residue to the reference backbone coordinates of itself
1136 and selected atoms of neighboring residues to the right and left.
1137
1138 @param mask: N_atoms x 1 array of 0||1, atoms for which fluctuation
1139 should be calculated
1140 @type mask: array
1141 @param border_res: number of neighboring residues to use for fitting
1142 @type border_res: int
1143 @param left_atoms: atoms (names) to use from these neighbore residues
1144 @type left_atoms: [str]
1145 @param right_atoms: atoms (names) to use from these neighbore residues
1146 @type right_atoms: [str]
1147
1148 @return: Numpy array ( N_unmasked x 1 ) of float
1149 @rtype: array
1150 """
1151 if mask == None:
1152 mask = N.ones( len( self.frames[0] ), 'i' )
1153
1154 if verbose: T.errWrite( "rmsd fitting per residue..." )
1155
1156 residues = N.nonzero( self.ref.atom2resMask( mask ) )
1157
1158
1159 fit_atoms_right = N.nonzero( self.ref.mask( right_atoms ) )
1160 fit_atoms_left = N.nonzero( self.ref.mask( left_atoms ) )
1161
1162 rchainMap = N.take( self.ref.chainMap(), self.ref.resIndex() )
1163
1164 result = []
1165
1166 for res in residues:
1167
1168 i_res, i_border = self.__resWindow(res, border_res, rchainMap,
1169 fit_atoms_left, fit_atoms_right)
1170
1171 try:
1172 if not len( i_res ): raise PDBError, 'empty residue'
1173
1174 t_res = self.takeAtoms( i_res + i_border )
1175
1176 i_center = range( len( i_res ) )
1177
1178 mask_BB = t_res.ref.maskBB() * t_res.ref.maskHeavy()
1179
1180
1181 t_res.fit( ref=t_res.ref, mask=mask_BB, verbose=0 )
1182
1183 frames = N.take( t_res.frames, i_center, 1 )
1184
1185 avg = N.average( frames )
1186
1187 rmsd = N.average(N.sqrt(N.sum(N.power(frames - avg, 2), 2) ))
1188
1189 result.extend( rmsd )
1190
1191 if verbose: T.errWrite('#')
1192
1193 except ZeroDivisionError:
1194 result.extend( N.zeros( len(i_res), 'f' ) )
1195 T.errWrite('?' + str( res ))
1196
1197 if verbose: T.errWriteln( "done" )
1198
1199 return result
1200
1201
1203 """
1204 Take list of value per atom, return list where all atoms of any
1205 residue are set to the highest value of any atom in that residue.
1206 (after applying mask)
1207
1208 @param atomValues: list 1 x N, values per atom
1209 @type atomValues: [ float ]
1210 @param mask: list 1 x N, 0|1, 'master' atoms of each residue
1211 @type mask: [1|0]
1212
1213 @return: Numpy array 1 x N of float
1214 @rtype: array
1215 """
1216 if mask == None:
1217 mask = N.ones( len( self.frames[0] ), 'i' )
1218
1219
1220 masked = atomValues * mask
1221
1222 result = []
1223
1224
1225 for res in range( 0, self.resMap()[-1]+1 ):
1226
1227
1228 resAtoms = N.compress( N.equal( self.resMap(), res ), masked )
1229
1230
1231 masterValue = max( resAtoms )
1232
1233 result += resAtoms * 0.0 + masterValue
1234
1235 return N.array( result )
1236
1237
1239 """
1240 Set value of all atoms of each residue to fluctuation of
1241 its gamma atom ( CG, SG, OG ).
1242
1243 @param fluctList: 1x N, precalculated list of values or
1244 None (will be calculated new)
1245 @type fluctList: [float]
1246
1247 @return: Numpy array 1 x N of float
1248 @rtype: [float]
1249 """
1250 if fluctList == None:
1251 fluctList = self.getFluct_local()
1252
1253
1254 select = lambda a: a['name'] in ['CG', 'CG1', 'CG2', 'OG',
1255 'OG1', 'OG2','SG']
1256 CG_mask = self.mask( select )
1257
1258 return self.residusMaximus( fluctList, CG_mask )
1259
1260
1262 """
1263 Convert list of atomic fluctuations to list of residue
1264 fluctuation.
1265
1266 @param atomFluctList: array 1 x N_atoms of float
1267 @type atomFluctList: [float]
1268
1269 @return: array 1 x N_residues of float
1270 @rtype: [float]
1271
1272 @raise TrajError: if result length <> N_residues:
1273 """
1274 if atomFluctList == None:
1275 atomFluctList = self.getFluct_global()
1276
1277
1278
1279 result = self.residusMaximus( atomFluctList, self.ref.maskBB() )
1280
1281
1282 result = N.take( result, self.ref.resIndex() )
1283
1284
1285
1286 if len( result ) <> self.ref.lenResidues():
1287 raise TrajError(
1288 "getResFluct(): Length of result list (%i) <>" % len(result)+
1289 " number of residues (%i)." % self.ref.lenResidues() )
1290
1291 return result
1292
1293
1295 """
1296 Compare to lists by their first, then second, etc item.
1297
1298 @param l1: list
1299 @type l1: [float]
1300 @param l2: list
1301 @type l2: [float]
1302
1303 @return: result of comparison (-1 == l1[i] < l2[i])
1304 @rtype: [-1|0|1]
1305 """
1306 result = cmp( l1[0], l2[0] )
1307
1308 if result == 0 and len(l1) > 1 and len(l2) > 1:
1309 return self.__cmpLists( l1[1:], l2[1:] )
1310
1311 return result
1312
1313
1315 """
1316 Compare 2 file names by the numbers contained in them using the
1317 regular expression L{ex_numbers} (or as strings, if no numbers are
1318 found)::
1319 f1, f2 - strings, e.g. 'frame_1_188.pdb'
1320
1321 @param f1: file name
1322 @type f1: str
1323 @param f2: file name
1324 @type f2: str
1325
1326 @return: result of comparison (-1 == f1 < f2)
1327 @rtype: -1|0|+1
1328 """
1329 try:
1330
1331 l1 = self.ex_numbers.findall( f1 )
1332 l2 = self.ex_numbers.findall( f2 )
1333
1334 l1 = map( float, l1 )
1335 l2 = map( float, l2 )
1336
1337 if len( l1 ) > 0 and len( l2 ) > 0:
1338 return self.__cmpLists( l1, l2 )
1339
1340 except:
1341 pass
1342
1343 return cmp( l1, l2 )
1344
1345
1347 """
1348 Prepare sorting list by file names. Assuming the file names contains
1349 some numbers seperated by non-number characters. Sorting is done after
1350 Nr1 then Nr2 then..
1351
1352 @return: list of frame sort order
1353 @rtype: [int]
1354 """
1355 names = self.frameNames
1356
1357 result = range(0, len(names) )
1358
1359
1360 f_cmp = lambda i,j, ns=names: self.__cmpFileNames( ns[i], ns[j])
1361
1362 result.sort( f_cmp )
1363
1364 return result
1365
1366
1368 """
1369 Apply to a trajectory object to sort the frames and names according
1370 to the sortList.
1371
1372 @param sortList: list to sort after (default: None)
1373 @type sortList: [int]
1374
1375 @raise TrajError: if sortList doesn't fit number of frames or names
1376 """
1377 if sortList == None:
1378 sortList = self.argsortFrames()
1379
1380 if len(sortList) != len(self.frames) or\
1381 len(sortList) != len(self.frameNames) :
1382 raise TrajError("Unequal number of frames and names when sorting")
1383
1384
1385 self.keepFrames( sortList )
1386
1387
1389 """
1390 Sorts atoms B{WITHIN} residues in reference and all frames.
1391
1392 @param f_cmp: atom comparison function
1393 C{ f_cmp( atoms[i], atoms[j]) -> -1|0|+1 }
1394 (default: alphabetic ordering by atom['name'] )
1395 @type f_cmp: function
1396
1397 @raise PDBModelError: if sorting has changed number of atoms in
1398 reference
1399 """
1400
1401 atomSortList = self.ref.argsort( f_cmp )
1402
1403 self.keepAtoms( atomSortList )
1404
1405
1407 """
1408 extract PCA results for certain frames.
1409
1410 @param indices: frame indecies
1411 @type indices: [int]
1412
1413 @return: list of pca values
1414 @rtype: [float]
1415 """
1416 result = copy.deepcopy( getattr(self, 'pc', None ))
1417
1418 if result != None:
1419
1420 result['p'] = N.take( result['p'], indices, 0 )
1421
1422 result['u'] = N.take( result['u'], indices, 0 )
1423
1424 if result['fMask'] != None:
1425 result['fMask'] = N.take( result['fMask'], indices, 0 )
1426
1427 return result
1428
1429
1431 """
1432 Compress PCA results using a frame mask.
1433
1434 @param fMask: frame mask
1435 @type fMask: [1|0]
1436
1437 @return: list of pca values
1438 @rtype: [float]
1439 """
1440 return self.__takePca( N.nonzero( fMask ) )
1441
1442
1443 - def getPca( self, aMask=None, fMask=None, fit=1 ):
1444 """
1445 Get the results form a principal component analysis.
1446
1447 @param aMask: 1 x N_atoms of 1|0, atom mask, default: last one used
1448 @type aMask: [1|0]
1449 @param fMask: 1 x N_frames of 1|0, frame mask, default: all
1450 @type fMask: [1|0]
1451 @param fit: fit to average structure before doing the PC analysis
1452 (default: 1)
1453 @type fit: 1|0
1454
1455 @return: Dictionary with results from the PC analysis::
1456 dic {'p': projection of each frame in PC space,
1457 'e': list of eigen values,
1458 'fit':.., 'aMask':.., 'fMask':.. parameters used}
1459 @rtype: dict
1460 """
1461 if aMask == None:
1462 aMask = N.ones( self.getRef().lenAtoms() )
1463
1464 pc = getattr(self, 'pc', None)
1465
1466
1467 if pc != None and pc['fMask'] == fMask and pc['fit'] == fit and \
1468 aMask == pc['aMask']:
1469
1470 return pc
1471
1472 evectors, proj, evalues = self.pca( aMask, fMask, fit )
1473
1474 pc = {}
1475 pc['aMask'] = aMask
1476 pc['fMask'] = fMask
1477 pc['fit'] = fit
1478 pc['p'] = proj
1479 pc['e'] = evalues
1480 pc['u'] = evectors
1481
1482 self.pc = pc
1483
1484 return pc
1485
1486
1487 - def pca( self, atomMask=None, frameMask=None, fit=1 ):
1488 """
1489 Calculate principal components of trajectory frames.
1490
1491 @param atomMask: 1 x N_atoms, [111001110..] atoms to consider
1492 (default: all)
1493 @type atomMask: [1|0]
1494 @param frameMask: 1 x N_frames, [001111..] frames to consider
1495 (default all )
1496 @type frameMask: [1|0]
1497
1498 @return: (N_frames x N_frames), (1 x N_frames),
1499 projection of each frame in PC space, eigenvalue of each PC
1500 @rtype: array, array, array
1501 """
1502 frameMask = frameMask or N.ones( len( self.frames ) )
1503
1504 atomMask = atomMask or N.ones( self.getRef().lenAtoms() )
1505
1506 if fit:
1507 self.fit( atomMask )
1508
1509 refxyz = N.average( self.frames, 0 )
1510
1511 data = N.compress( frameMask, self.frames, 0 )
1512
1513 data = data - refxyz
1514
1515 data = N.compress( atomMask, data, 1 )
1516
1517
1518 data = N.array( map( N.ravel, data ) )
1519
1520 V, L, U = LA.singular_value_decomposition( data )
1521
1522 return U, V * L, N.power(L, 2)
1523
1524
1525 - def pcMovie( self, ev, steps, factor=1., ref=0, morph=1 ):
1526 """
1527 Morph between the two extreme values of a single principal
1528 component.
1529
1530 @param ev: EigenVector to visualize
1531 @type ev: int
1532 @param steps: number of intermediate frames
1533 @type steps: int
1534 @param factor: exageration factor (default: 1 = No exageration)
1535 @type factor: float
1536 @param ref: take other eigenvecors from this frame (default: 1)
1537 @type ref: int
1538 @param morph: morph between min and max (1) or take real values (0)
1539 (default: 1)
1540 @type morph: 1|0
1541
1542 @return: Trajectory with frames visualizing the morphing.
1543 @rtype: Trajectory
1544 """
1545 fit = 1
1546 if self.pc:
1547 fit = self.pc['fit']
1548 pc = self.getPca( fit=fit )
1549
1550
1551 U = pc['u']
1552
1553
1554 x_avg = N.average(self.frames, 0)
1555 X = N.array( [N.ravel(x) for x in self.frames - x_avg] )
1556
1557
1558 alpha_0 = N.dot( X[ref], U[ev] )
1559
1560
1561 alpha_range = N.dot(X, U[ev]) - alpha_0
1562
1563
1564 if morph:
1565 a_min = factor * min(alpha_range)
1566 a_max = factor * max(alpha_range)
1567 delta = (a_max - a_min) / steps
1568 alpha_range = [ a_min + i*(delta) for i in range(0, steps) ]
1569 else:
1570 alpha_range = N.sort( alpha_range )
1571 delta = len(alpha_range) / (steps * 1.0)
1572 alpha_range = [ alpha_range[ int(round( i*delta )) ]
1573 for i in range(0,steps) ]
1574
1575
1576 Y = N.array( [ X[ref] + alpha * U[ev] for alpha in alpha_range] )
1577
1578
1579 Y = N.reshape(Y, (Y.shape[0], -1, 3))
1580 Y = x_avg + Y
1581
1582 result = self.__class__()
1583 result.ref = self.ref
1584
1585 result.frames = Y
1586 return result
1587
1588
1589
1590
1591
1592
1593
1595 """
1596 Test class
1597 """
1598
1599
1600 - def run( self, local=0 ):
1646
1647
1649 """
1650 Precalculated result to check for consistent performance.
1651
1652 @return: sum of all residue rmsd values
1653 @rtype: float
1654 """
1655 return 58.101235746353879
1656
1657
1658 if __name__ == '__main__':
1659
1660 test = Test()
1661
1662 assert abs( test.run( local=1 ) - test.expected_result() ) < 1e-6
1663