Package Biskit :: Module EnsembleTraj
[hide private]
[frames] | no frames]

Source Code for Module Biskit.EnsembleTraj

  1  ## Automatically adapted for numpy.oldnumeric Mar 26, 2007 by alter_code1.py 
  2   
  3  ## 
  4  ## Biskit, a toolkit for the manipulation of macromolecular structures 
  5  ## Copyright (C) 2004-2006 Raik Gruenberg & Johan Leckner 
  6  ## 
  7  ## This program is free software; you can redistribute it and/or 
  8  ## modify it under the terms of the GNU General Public License as 
  9  ## published by the Free Software Foundation; either version 2 of the 
 10  ## License, or any later version. 
 11  ## 
 12  ## This program is distributed in the hope that it will be useful, 
 13  ## but WITHOUT ANY WARRANTY; without even the implied warranty of 
 14  ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
 15  ## General Public License for more details. 
 16  ## 
 17  ## You find a copy of the GNU General Public License in the file 
 18  ## license.txt along with this program; if not, write to the Free 
 19  ## Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
 20  ## 
 21  ## 
 22   
 23  ## last $Author: graik $ 
 24  ## last $Date: 2007/04/06 10:16:06 $ 
 25  ## $Revision: 2.12 $ 
 26   
 27  """ 
 28  Multi-copy trajectory 
 29  """ 
 30   
 31  from Trajectory import Trajectory, TrajError 
 32  from Biskit import EHandler 
 33   
 34  import Biskit.mathUtils as M 
 35   
 36  import types 
 37  import numpy.oldnumeric as N 
 38  import Biskit.tools as T 
 39   
 40  try: 
 41      import biggles 
 42  except: 
 43      bigges = 0 
 44   
 45   
46 -def traj2ensemble( traj, members=10 ):
47 """ 48 Cast normal Trajectory into EnsembleTrajectory. Resort frames 49 alphabetically by their frame name. This must lead to a sorting first by 50 time step and then by member. 51 52 @param members: number of member trajectories in EnsembleTraj (default: 10) 53 @type members: int 54 55 @return: converted trajectory 56 @rtype: EnsembleTraj 57 """ 58 result = EnsembleTraj( n_members=members ) 59 60 result.__dict__.update( traj.__dict__ ) 61 62 result.sortFrames() 63 64 return result
65 66
67 -class EnsembleTrajError( TrajError ):
68 pass
69
70 -class EnsembleTraj( Trajectory ):
71 """ 72 Multiple copy trajectory. The order of the frame names is assumed to be 73 as follows:: 74 time0_member1, time0_member2, .., time1_member1, time1_member2, .. 75 i.e. frames must be sorted first by time then by member. 76 """ 77
78 - def __init__(self, pdbs=None, refpdb=None, n_members=10, **kw ):
79 """ 80 Create EnsembleTraj from PDB files or pickled PDBModels. The file names 81 are used for alphabetical sorting and must lead to an ordering first 82 by time then by member (i.e. frame_10_01.pdb where 10 is the time step 83 and 01 is the member index). Unlike normal alphabetical order, 84 frame_2_2 sorts before frame_10_10, i.e. we attempt to interprete 85 numbers contained in the frame name strings. 86 87 @param pdbs: file names of all conformations OR PDBModels 88 @type pdbs: [ str ] OR [ PDBModel ] 89 @param refpdb: file name of reference pdb 90 @type refpdb: str 91 @param n_members: number or tjajectories in the ensemble 92 @type n_members: int 93 @param kw: optional key=value pairs, see L{Biskit.Trajectory}. 94 @type kw: key=value 95 96 @raise EnsembleTrajError: if member trajectories don't have 97 equal number of frame 98 """ 99 100 Trajectory.__init__( self, pdbs, refpdb, **kw ) 101 102 self.n_members = n_members 103 104 if self.frameNames != None: 105 self.sortFrames() 106 107 if self.n_members and self.frames is not None and \ 108 len( self ) % self.n_members != 0: 109 raise EnsembleTrajError,\ 110 'Member trajectories must have equal number of frames.'
111 112
113 - def version( self ):
114 """ 115 Version of class. 116 117 @return: version 118 @rtype: str 119 """ 120 return Trajectory.version(self) + '; EnsembleTraj $Revision: 2.12 $'
121 122
123 - def replaceContent( self, traj ):
124 """ 125 Replace content of this trajectory by content of given traj. 126 No deep-copying, only references are taken. 127 128 @param traj: trajectory 129 @type traj: trajectory 130 131 @note: Overrides Trajectory method. 132 """ 133 Trajectory.replaceContent( self, traj ) 134 self.n_members = traj.n_members
135 136
137 - def thin( self, step=1 ):
138 """ 139 Keep only each step'th frame from trajectory with 10 ensemble members. 140 141 @param step: 1..keep all frames, 2..skip first and every second, .. 142 (default: 1) 143 @type step: int 144 145 @return: reduced EnsembleTraj 146 @rtype: EnsembleTraj 147 """ 148 T.ensure( step, int, forbidden=[0] ) 149 150 ## 10 x lenFrames/10, frame indices of each member 151 mI = [ self.memberIndices( i ) for i in range(self.n_members) ] 152 153 mI = N.array( mI ) 154 155 mI = N.take( mI, range( -1, N.shape( mI )[1], step )[1:], 1 ) 156 157 mI = N.transpose( mI ) 158 159 return self.takeFrames( N.ravel( mI ))
160 161
162 - def memberList( self, step = 1 ):
163 """ 164 Extract ensemble member Trajectories. 165 166 @todo: cast member Trajectories back to normal Trajectory object 167 168 @param step: leave out each step-1 frame (for each ensemble member) 169 (default: 1) 170 @type step: int 171 172 @return: [EnsembleTraj] 173 i.e. [ traj_member1, traj_member2, .. traj_member10 ] 174 @rtype: [trajectory] 175 """ 176 ## set of 10 trajectories, one for each ensemble member 177 tm = [self.takeFrames( range(i, self.lenFrames(),self.n_members*step )) 178 for i in range(0,self.n_members) ] 179 180 return tm
181 182
183 - def memberIndices( self, member, step=1 ):
184 """ 185 List of frame indices for this member:: 186 memberIndices( int_member, [int_step] ) 187 188 @param member: member trajectory 189 @type member: int 190 @param step: return only every i'th frame (default: 1) 191 @type step: int 192 193 @return: indices for members 194 @rtype: [int] 195 """ 196 r = range( member, self.lenFrames(), self.n_members ) 197 if step != 1: 198 r = N.take( r, range( 0, len( r ), step ) ).tolist() 199 return r
200 201
202 - def memberMask( self, member ):
203 """ 204 Get mask for all frames belonging to a given ensemble member. 205 206 @param member: member index starting with 0 207 @type member: int 208 209 @return: member mask, N.array( N_frames x 1) of 1||0 210 @rtype: [1|0] 211 """ 212 result = N.zeros( self.lenFrames() ) 213 214 if isinstance( member , types.IntType): 215 N.put( result, self.memberIndices( member ), 1 ) 216 217 if type( member ) == types.ListType: 218 for m in member: 219 N.put( result, self.memberIndices( m ), 1 ) 220 221 return result
222 223
224 - def takeFrames( self, indices ):
225 """ 226 Return a copy of the trajectory containing only the specified frames. 227 228 @param indices: positions to take 229 @type indices: [int] 230 231 @return: copy of this Trajectory (fewer frames, semi-deep copy of ref) 232 @rtype: Trajectory 233 """ 234 r = Trajectory.takeFrames( self, indices ) 235 236 r.n_members = self.n_members 237 return r
238 239
240 - def takeMember( self, i ):
241 """ 242 Take all frames belonging to member:: 243 takeMember( int ) -> EnsembleTraj, single member trajectory 244 245 @param i: member trajectory 246 @type i: int 247 248 @return: trajectory containing member i 249 @rtype: Trajectory 250 """ 251 r = self.takeFrames( self.memberIndices( i ) ) 252 r.n_members = 1 253 return r
254 255
256 - def takeMembers( self, mIndices ):
257 """ 258 Take all frames belonging to the members in mIndices:: 259 takeMembers( mIndices ) -> EnsembleTraj with frames of given members 260 261 @param mIndices: list of member indices 262 @type mIndices: [int] OR array('i') 263 264 @return: EnsembleTraj with specified members 265 @rtype: EnsembleTraj 266 267 @todo: return self.__class__ instead of EnsembleTraj 268 """ 269 try: 270 ## assumes that each member traj has same number of frames 271 fi = N.array( [ self.memberIndices( i ) for i in mIndices ] ) 272 fi = N.ravel( N.transpose( fi ) ) 273 274 n_members = len( mIndices ) 275 276 ## has wrong n_members and member order 277 t = self.takeFrames( fi ) 278 279 result = EnsembleTraj( n_members=n_members ) 280 281 result.__dict__.update( t.__dict__ ) 282 result.n_members = n_members 283 result.resetFrameNames() 284 285 return result 286 287 except TypeError: 288 raise EnsembleTrajError, 'takeMembers TypeError '+\ 289 str(mIndices)+\ 290 "\nlenFrames: %i; n_members: %i" %(len(self), self.n_members)
291 292
293 - def concatEnsembles( self, *traj ):
294 """ 295 Concatenate this with other trajectories in a zig zac manner, 296 resulting in an ensembleTraj with additional members. 297 The ref model of the new Trajectory is a 'semi-deep' copy of this 298 trajectorie's model.(see L{PDBModel.take()} ):: 299 concat( traj [, traj2, traj3, ..] ) -> Trajectory 300 301 @param traj: with identical atoms as this one 302 @type traj: one or more EnsembleTrajectory 303 304 @todo: fix so that pc, and profiles are not lost 305 """ 306 if len( traj ) == 0: 307 return self 308 309 r = self.__class__( n_members = self.n_members + traj[0].n_members ) 310 311 min_members = min( self.n_members, traj[0].n_members ) 312 min_frames = min( self.lenFrames(), traj[0].lenFrames() ) 313 314 steps = self.lenFrames()/self.n_members + \ 315 traj[0].lenFrames()/traj[0].n_members 316 317 def __everyOther( traj_0, traj_1, list_0, list_1, minMembers, 318 minFrames, loops ): 319 result = [] 320 for j in range( 0, minMembers/2 ): 321 322 for i in range( j*loops , j*loops + minFrames*2/minMembers ): 323 result += [ list_0[i] ] 324 result += [ list_1[i] ] 325 326 while i < j*traj_0.n_members: 327 result += [ list_0[i] ] 328 329 while i < j*traj_1.n_members: 330 result += [ list_1[i] ] 331 332 return result
333 334 frames = __everyOther( self, traj[0], self.frames, 335 traj[0].frames, min_members, 336 min_frames, steps ) 337 338 r.frames = N.array(frames) 339 r.setRef( self.ref.clone()) 340 341 if self.frameNames and traj[0].frameNames: 342 r.frameNames = __everyOther( self, traj[0], self.frameNames, 343 traj[0].frameNames, min_members, 344 min_frames, steps ) 345 try: 346 # NOT TESTED!! 347 if self.pc and traj[0].pc: 348 r.pc['p'] = __everyOther( self, traj[0], self.pc['p'], 349 traj[0].pc['p'], min_members, steps ) 350 351 r.pc['u'] = __everyOther( self, traj[0], self.pc['u'], 352 traj[0].pc['u'], min_members, steps ) 353 354 # r.pc['p'] = N.concatenate( (self.pc['p'], traj[0].pc['p']),0) 355 # r.pc['u'] = N.concatenate( (self.pc['u'], traj[0].pc['u']),0) 356 except TypeError, why: 357 EHandler.error('cannot concat PC '+str(why) ) 358 359 # r.profiles = self.profiles.concat( traj[0].profiles ) 360 361 ## recursively add other trajectories 362 return r.concat( *traj[1:] )
363 364
365 - def compressMembers( self, mask ):
366 """ 367 Apply mask to member trajectories. 368 369 @param mask: positions in trajectory list to keep or remove 370 @type mask: [1|0] 371 372 @return: compressed EnsembleTraj 373 @rtype: EnsembleTraj 374 """ 375 return self.takeMembers( N.nonzero( mask ) )
376 377
378 - def keepMembers( self, indices ):
379 """ 380 in-place version of takeMembers. keepMembers( indices ) -> None 381 382 @param indices: member numbers 383 @type indices: [int] 384 """ 385 r = self.takeMembers( indices ) 386 self.replaceContent( r )
387 388
389 - def removeMembers( self, indices ):
390 """ 391 Remove given member trajectories from this ensemble. 392 393 @param indices: trajectory (member) numbers 394 @type indices: [int] 395 """ 396 i = range( self.n_members ) 397 i.remove( N.array(indices) ) 398 self.keepMembers( i )
399 400
401 - def resetFrameNames( self ):
402 """ 403 Reset frame names to t_00_m_00 .. t_50_m_10. 404 """ 405 n = self.n_members 406 steps = self.lenFrames() / n 407 408 time_member = [] 409 for s in range( steps ): 410 for m in range( n ): 411 time_member += [ (s,m) ] 412 413 self.frameNames = [ 't_%03i_m_%02i' % tm for tm in time_member ]
414 415
416 - def argsortMember( self, inverse_time=0, inverse_member=0, step=1 ):
417 """ 418 Get list of frame indices sorted first by member, then by time (unlike 419 the normal sorting of EnsembleTraj, which is first by time then by 420 member). Default is ascending order (first frame is first time step 421 of member 0). 422 423 @param inverse_time: descending time order (last frame first) 424 (default: 0) 425 @type inverse_time: 1|0 426 @param inverse_member: descending member order (last member first) 427 (default: 0) 428 @type inverse_member: 1|0 429 @param step: take only every step frame (default: 1) 430 @type step: int 431 432 @return: length is N_frames (only if step is 1 ) 433 @rtype: [int] 434 """ 435 r = [] 436 437 a, e, j = 0, self.n_members, 1 438 if inverse_member: 439 a, e, j = self.n_members, 0, -1 440 441 for i in range( a, e, j ): 442 443 mi = self.memberIndices( i, step ) 444 445 if inverse_time: mi.reverse() 446 447 r.extend( mi ) 448 449 return r
450 451
452 - def plotMemberProfiles( self, *name, **arg ):
453 """ 454 Plot profiles of all member trajectories seperately:: 455 plotMemberProfiles( name1, [name2, .. ],[ arg1=x,..]) 456 -> biggles.Table 457 458 @param name: profile name(s) 459 @type name: str 460 @param arg: pairs for biggles.Curve() and/or xlabel=..,ylabel=.. 461 @type arg: key=value 462 463 @return: biggles plot object 464 @rtype: biggles.FramedArray() 465 """ 466 if not biggles: 467 raise ImportError, 'biggles module could not be imported.' 468 469 rows = self.n_members / 2 + self.n_members % 2 470 page = biggles.FramedArray( rows , 2) 471 472 biggles.configure('fontsize_min', 1) 473 colors = T.colorSpectrum( len( name ) , '00FF00', 'FF00FF') 474 475 ml = self.memberList() 476 477 i = 0 478 minV = maxV = None 479 480 for t in ml: 481 482 for j in range( len(name)): 483 484 p = t.profile( name[j] ) 485 486 if minV is None or minV > min( p ): 487 minV = min(p) 488 if maxV is None or maxV < max( p ): 489 maxV = max(p) 490 491 page[i/2, i%2].add( biggles.Curve( range( len(p) ), list(p), 492 color=colors[j], **arg ) ) 493 494 page[i/2, i%2].add( biggles.PlotLabel( 0.8, 0.8-j/8.0, name[j], 495 color=colors[j]) ) 496 497 page[i/2, i%2].add( biggles.PlotLabel( 0.1, 0.9, 'Traj %i' % i)) 498 499 i += 1 500 501 if self.n_members % 2 != 0: 502 line = biggles.Line( (0,minV), (len(p),maxV) ) 503 page[ self.n_members/2, 1 ].add( line ) 504 505 page.uniform_limits = 1 506 page.xlabel = arg.get('xlabel',None) 507 page.ylabel = arg.get('ylabel',None) 508 509 return page
510 511
512 - def fitMembers( self, refIndex=None, refModel=None, mask=None, n_it=1, 513 prof='rms', verbose=1, **profInfos ):
514 """ 515 RMSD-fit each member trajectory seperately onto one of its frames 516 or its average structure. 517 518 @param refIndex: index of reference frame within member traj. 519 If None -> fit to average coordinates 520 (if refModel == None) 521 @type refIndex: int OR None 522 @param refModel: fit to this structure (default: None) 523 @type refModel: PDBModel 524 @param mask: atoms to consider (default: None, all heavy) 525 @type mask: [1|0] OR None 526 @param n_it: number of fit iterations, kicking out outliers 527 on the way (default: 1):: 528 1 -> classic single fit, 529 0 -> until convergence 530 @type n_it: 1|0 531 @param prof: save rms per frame in profile of this name (default: rms) 532 @type prof: str 533 @param profInfos: description key-value pairs for profile 534 @type profInfos: key=value 535 """ 536 ml = self.memberList() 537 538 for m in ml: 539 if refIndex == None: 540 if refModel==None: 541 ref = None 542 else: 543 ref = refModel 544 else: 545 ref = m[ refIndex ] 546 547 m.fit( ref=ref, mask=mask, n_it=n_it, prof=prof, 548 verbose=verbose, **profInfos ) 549 550 ## ToDo for memory efficiency: replace frame chunks on the fly 551 552 self.replaceContent( ml[0].concat( *ml[1:] ) ) 553 self.sortFrames()
554 555
556 - def blockFit( self, ref=None, mask=None ):
557 """ 558 RMSD-fit the average of each member trajectory (i.e. the trajectory 559 en block) onto the overall average (default) or a given structure. 560 561 @param ref: reference structure (default: average structure) 562 @type ref: PDBModel 563 @param mask: atoms to consider (default: None, all heavy) 564 @type mask: [1|0] OR None 565 """ 566 ref = ref or self.avgModel() 567 568 for m in range( self.n_members ): 569 570 indices = self.memberIndices( m ) 571 572 ## get a copy of this member's Trajectory 573 traj = self.takeFrames( indices ) 574 m_avg = traj.avgModel() 575 576 r, t = m_avg.transformation( ref, mask ) 577 traj.transform( r, t ) 578 579 ## replace original frames of this member 580 N.put( self.frames, indices, traj.frames )
581 582 583
584 - def outliers( self, z=1.0, mask=None, prof='rmsCA_last', 585 last=10, step=1, verbose=1 ):
586 """ 587 Identify outlier trajectories. First we calculate the CA-RMS of every 588 |step|th frame to the last frame. Outliers are member trajectories for 589 which the slope of this rms profile is z standard deviations below the 590 mean of all members. 591 592 @param z: z-value threshold 593 @type z: float 594 @param mask: atom mask used (default: ref.maskCA()) 595 @type mask: [int] 596 @param prof: name of pre-calculated profile to use 597 (default: 'rmsCA_last') 598 @type prof: str 599 @param last: skip |last| last frames from linear regression 600 @type last: int 601 @param step: frame offset 602 @type step: int 603 604 @return: member mask of outlier trajectories 605 @rtype: [0|1] 606 """ 607 if mask is None: mask = self.ref.maskCA() 608 609 traj = self.compressAtoms( mask ) 610 if step != 1: 611 traj = traj.thin( step ) 612 613 if not prof in traj.profiles: 614 traj.fitMembers( refIndex=-1, prof=prof, verbose=verbose ) 615 616 p_all = traj.profiles[ prof ] 617 n = traj.n_members 618 l = len( traj ) 619 620 pm = [ p_all[ member : l : n ][:-last] for member in range( n ) ] 621 622 slopes = [ M.linfit( range( l/n - last ), p )[0] for p in pm ] 623 624 mean, sd = N.average( slopes ), M.SD( slopes ) 625 626 return [ r - mean < - z * sd for r in slopes ]
627 628 629 ############# 630 ## TESTING 631 ############# 632 import Biskit.test as BT 633
634 -class Test(BT.BiskitTest):
635 """EnsembleTraj test""" 636
637 - def prepare(self):
638 self.tr = T.Load( T.testRoot() + '/lig_pcr_00/traj.dat')
639
640 - def test_EnsembleTraj( self ):
641 """EnsembleTraj.fit/fitMembers/plotMembers test """ 642 ## The second part of the test will fail with the slimmed 643 ## down test trajectory of T.testRoot(). To run the full 644 ## test pease select a larger trajectory. 645 646 self.tr = traj2ensemble( self.tr ) 647 648 mask = self.tr.memberMask( 1 ) 649 650 self.tr.fit( ref=self.tr.ref, 651 mask=self.tr.ref.maskCA(), 652 prof='rms_CA_ref', 653 verbose=self.local ) 654 655 self.tr.fitMembers( mask=self.tr.ref.maskCA(), 656 prof='rms_CA_0', refIndex=0, 657 verbose=self.local ) 658 659 self.tr.fitMembers( mask=self.tr.ref.maskCA(), 660 prof='rms_CA_av', 661 verbose=self.local ) 662 663 self.p = self.tr.plotMemberProfiles( 'rms_CA_av', 'rms_CA_0', 664 'rms_CA_ref', xlabel='frame' ) 665 if self.local or self.VERBOSITY > 2: 666 self.p.show() 667 668 self.assertAlmostEqual( 26.19851, 669 N.sum( self.tr.profile('rms_CA_av') ), 2 )
670
671 - def test_outliers(self, traj=None):
672 """EnsembleTraj.outliers/concat test""" 673 674 self.t2 = self.tr.concat( self.tr ) 675 676 self.o = self.t2.outliers( z=1.2, mask=self.tr.ref.maskCA(), 677 verbose=self.local ) 678 if self.local: 679 print self.o 680 681 self.t = self.t2.compressMembers( N.logical_not( self.o ) ) 682 683 self.p2 = self.t.plotMemberProfiles( 'rms', xlabel='frame' ) 684 685 if self.local or self.VERBOSITY > 2: 686 self.p2.show() 687 688 self.assertEqual( self.o, 10 * [False] )
689 690 691 if __name__ == '__main__': 692 693 BT.localTest() 694