1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 """
28 Run ptraj entropy analysis on Trajectory instance.
29 """
30
31 import tempfile, os
32 import numpy.oldnumeric as N
33 import random, time
34
35 import Biskit.tools as t
36 import Biskit.mathUtils as MU
37
38 from Biskit.Errors import BiskitError
39 from Biskit.AmberCrdEntropist import AmberCrdEntropist, EntropistError
40 from Biskit.AmberParmBuilder import AmberParmBuilder
41 from Biskit.PDBModel import PDBModel
42 from Biskit.Trajectory import Trajectory
43 from Biskit.EnsembleTraj import EnsembleTraj
44 from Biskit.LocalPath import LocalPath
45 from Biskit.Dock.Complex import Complex
46 from Biskit import EHandler
47
49 """
50 Run ptraj entropy analysis on Trajectory instance.
51 """
52
53 - def __init__( self, traj=None, parm=None, crd=None, ref=None, cast=0,
54 chains=None, border=None, split=0, shift=0, shuffle=0,
55 thin=None,
56 s=0, e=None, ss=0, se=None, step=1, atoms=None, heavy=0,
57 ex=[], ex_n=0, ex3=None, ex1=None,
58 fit_s=None, fit_e=None, memsave=1,
59 **kw ):
60 """
61 @param traj: path to 1 or 2 pickled Trajectory instances
62 (2 separated by '+', e.g. 'rec.traj+lig.traj')
63 @type traj: str
64 @param parm: try using existing parm file & keep it [create+discard]
65 @type parm: str
66 @param crd: target file for amber crd & keep it (default: discard)
67 @type crd: str
68 @param ref: superimpose onto this structure
69 @type ref: str|PDBModel|Complex
70 @param cast: equalize atom content against ref (if given) (default: 1)
71 @type cast: 0|1
72
73 @param chains: extract chains from traj (default: None, all chains)
74 @type chains: [int]
75 @param border: 1st chain of 2nd molecule; required for split, shift,
76 shuffle if traj is not already a tuple of trajectories
77 @type border: int
78 @param split: split trajectory after *border* and fit the two halfs
79 separately (default: 0)
80 @type split: 1|0
81 @param shift: recombine rec and lig member trajectories, should
82 disrupt correlations between rec and lig, requires
83 *chains* or 2 traj files to identify rec (default: 0)
84 @type shift: int
85 @param shuffle: shuffle the order of frames for one trajectory half,
86 requires *border* or 2 traj files to identify rec
87 @type shuffle: 0|1
88 @param s: start frame of complete traj (default: 0)
89 @type s: int
90 @param e: stop frame of complete traj (default: None)
91 @type e: int
92 @param ss: start frame of single member trajectories (only works
93 with EnsembleTraj; overrides s,e) (default: 0)
94 @type ss: int
95 @param se: stop frame of single member trajectories (only works
96 with EnsembleTraj; overrides s,e) (default: None)
97 @type se: int
98 @param step: frame offset (default: 1, no offset)
99 @type step: int
100 @param thin: use only randomly distributed fraction of frames
101 (default: all)
102 @type thin: float
103 @param atoms: atom names to consider (default: all)
104 @type atoms: [str]
105 @param heavy: remove hydrogens (default: 0)
106 @type heavy: 1|0
107 @param ex: exclude member trajectories
108 @type ex: [int] OR ([int],[int])
109 @param ex_n: exclude last n members OR...
110 @type ex_n: int
111 @param ex3: exclude *ex3*rd tripple of trajectories (default: 0)
112 (index starts with 1! 0 to exclude nothing) OR....
113 @type ex3: int
114 @param ex1: exclude *ex1*-th member remaining after applying *ex*
115 (default: None)(index starts with 1! 0 to exclude nothing)
116 @type ex1: int
117 @param fit_s: fit to average of different frame slice
118 @type fit_s: int|None
119 @param fit_e: fit to average of different frame slice
120 @type fit_e: int|None
121 @param memsave: delete internal trajectory after writing crd
122 (default: 1)
123 @type memsave: 1|0
124
125 @param kw: additional key=value parameters for AmberCrdEntropist
126 and Executor:
127 @type kw: key=value pairs
128 ::
129 ... parameters for AmberCrdEntropist
130 f_template - str, alternative ptraj input template
131
132 ... and key=value parameters for Executor:
133 f_out - str, target name for ptraj output file (default: discard)
134 debug - 0|1, keep all temporary files (default: 0)
135 verbose - 0|1, print progress messages to log (log != STDOUT)
136 node - str, host for calculation (None->local) NOT TESTED
137 (default: None)
138 nice - int, nice level (default: 0)
139 log - Biskit.LogFile, program log (None->STOUT) (default: None)
140 """
141
142
143 f_crd = crd or tempfile.mktemp('.crd')
144 self.keep_crd = crd is not None
145
146 f_parm = t.absfile( parm or tempfile.mktemp( '.parm' ) )
147 self.keep_parm = parm is not None
148
149 self.parmcrd = tempfile.mktemp('_ref.crd')
150
151 AmberCrdEntropist.__init__( self, f_parm, f_crd,
152 s=s, e=e, step=step, **kw )
153
154 self.fit_s = fit_s
155 self.fit_e = fit_e
156
157 self.cast = cast
158 self.chains = chains
159 self.border = border
160 self.split = split
161 self.shift = shift
162 self.shuffle= shuffle
163 self.thin = thin
164 self.thin_i = None
165 self.exclude= ex
166 self.ex_n = ex_n
167
168 self.sstart = ss
169 self.sstop = se
170 self.heavy = heavy
171 self.atoms = atoms
172 self.memsave= memsave
173
174 if ex3 in [0, None]:
175 self.ex3 = None
176 else:
177 self.ex3 = ex3-1
178 if ex3 != None: self.ex_n = 0
179
180 if ex1 != None:
181 self.ex3 = None
182 self.ex_n = 0
183 if ex1 in [0, None]:
184 self.ex1 = None
185 else:
186 self.ex1 = ex1-1
187
188
189 self.ref = self.prepareRef( ref )
190
191
192 self.traj = self.prepareTraj( traj, self.ref, cast=self.cast )
193 self.nframes = len( self.traj )
194
195
196 self.start = 0
197 self.stop = len( self.traj )
198 self.step = 1
199
200
202 """
203 Overrides Executor method.
204 """
205
206 if not os.path.exists( self.f_parm ):
207 self.buildParm()
208 else:
209 self.log.add('using existing %s' % self.f_parm)
210
211
212 if not os.path.exists( self.f_crd ):
213 self.log.add_nobreak('Writing amber crd file...')
214 self.traj.writeCrd( self.f_crd )
215 self.log.add('done')
216
217 if self.memsave: self.traj = None
218 else:
219 self.log.add('using existing %s' % self.f_crd)
220
221
223 """
224 Build amber topology.
225 """
226 a = AmberParmBuilder( self.traj.ref, verbose=self.verbose,
227 debug=self.debug )
228 self.log.add_nobreak('Building amber topology...')
229
230 a.parmMirror( self.f_parm, self.parmcrd )
231 self.log.add('Topology built')
232
233
234 - def fit( self, traj, refModel=None, mask=None, conv=1e-6 ):
235 """
236 Fit trajectory until convergence onto it's own average and then
237 transform the average of all frames onto the reference.
238
239 @param traj: trajectory in which to fit frames
240 @type traj: Trajectory
241 @param refModel: reference PDBModel
242 @type refModel: PDBModel
243 @param mask: atom mask for superposition (default: all)
244 @type mask: [1|0]
245 @param conv: convergence criteria (default: 1e-6)
246 @type conv: float
247 """
248 self.fit_e = self.fit_e or len( traj )
249 self.fit_s = self.fit_s or 0
250
251 traj.fit( ref=traj.ref, mask=mask )
252
253 m_avg = traj[self.fit_s : self.fit_e ].avgModel()
254
255
256 d = 1.
257 dd= 1.
258 while dd >= conv:
259
260 traj.fit( ref=m_avg, mask=mask )
261 m_new_avg = traj[self.fit_s : self.fit_e].avgModel()
262
263 oldD, d = d, m_avg.rms( m_new_avg, mask=mask )
264 self.log.add( "rms difference: %f" % d )
265
266 dd = oldD - d
267 m_avg = m_new_avg
268
269
270 if refModel:
271 self.log.add('fitting trajectory en-block onto reference...')
272
273 if refModel.atomNames() != traj.ref.atomNames():
274 self.log.add('casting ref for fitting...')
275 ref_i, i = refModel.compareAtoms( m_avg )
276
277 refModel = refModel.take( ref_i )
278 m_avg = m_avg.take( i )
279
280 if not mask is None: mask = N.take( mask, i )
281
282 r, t = m_avg.transformation( refModel, mask )
283 traj.transform( r, t )
284
285
287 """
288 Split file name::
289 split(traj1.dat+traj2.dat) -> (traj1.dat, traj2.dat)
290
291 @param f: file name
292 @type f: str
293
294 @return: split filename
295 @rtype: str, str
296 """
297 if f.find("+") != -1 :
298 split = f.find("+")
299 f1 = f[:split]
300 f2 = f[split+1:]
301
302 return f1, f2
303 return None
304
305
307 """
308 Remove non protein atoms and H if needed.
309
310 @param m: model to clean
311 @type m: PDBModel
312
313 @return: cleaned model
314 @rtype: PDBModel
315 """
316 m.keep( N.nonzero( m.maskProtein() ) )
317 if self.heavy:
318 m.keep( N.nonzero( m.maskHeavy() ) )
319 return m
320
321
323 """
324 Load PDBModel directly or extract it from Trajectory.
325
326 @param f: file name of PDB file, pickled PDBModel, or Trajectory
327 @type f: str
328
329 @return: model
330 @rtype: PDBModel
331
332 @raise IOError: if file does not exist
333 """
334 p = LocalPath( f )
335
336 isPdb = (f[-4:].upper() == '.PDB' or f[-7:].upper() == '.PDB.GZ')
337
338 if isPdb and p.exists():
339 return p.local()
340
341 o = p.load()
342
343 if isinstance( o, PDBModel ):
344 return o
345 if isinstance( o, Trajectory ):
346 return o.ref
347
348 raise EntropistError, 'unknown reference type'
349
350
352 """
353 Prepare reference model.
354
355 @param fname: file name
356 @type fname: str
357
358 @return: reference structure
359 @rtype: PDBModel|Complex
360
361 @raise EntropistError: if unknown reference type
362 """
363 if not fname:
364 return None
365
366 if self.__splitFilenames( fname ):
367 f1, f2 = self.__splitFilenames( fname )
368 m1, m2 = PDBModel( self.__getModel(f1) ), \
369 PDBModel( self.__getModel(f2) )
370
371 ref = Complex( m1, m2 )
372 else:
373 ref = t.Load( fname )
374
375 if isinstance( ref, Trajectory ):
376 ref = ref.ref
377
378 if isinstance( ref, PDBModel ):
379 return self.__cleanAtoms( ref )
380
381 if isinstance( ref, Complex ):
382 self.__cleanAtoms( ref.rec_model )
383 self.__cleanAtoms( ref.lig_model )
384 ref.lig_model_transformed = None
385 return ref
386
387 raise EntropistError, 'unknown reference type'
388
389
390 - def __add3( self, n_members, excluded, trippleIndex ):
391 """
392 Add a tripple of numbers from range( n_members ) to be
393 excluded for error estimation. Tripples are chosen to have
394 minimal overlap. For 10 trajectories (*n_members*=10), the
395 first 3 tripples will be (1,2,3), (4,5,6), (7,8,9).
396
397 @param n_members: number of member trajectories
398 @type n_members: int
399 @param excluded: excluded member trajectories
400 @type excluded: [ int ]
401 @param trippleIndex:
402 @type trippleIndex: int
403
404 @return: the indices of all excluded member trajectories
405 @rtype: [ int ]
406 """
407 remaining = MU.difference( range( n_members ), excluded )
408 tripple = self.tripples( remaining, trippleIndex+1 )[-1]
409 return MU.union( excluded, list(tripple) )
410
411
412 - def __add1( self, n_members, excluded, index ):
413 """
414 Add one number from range( n_members ) to list of excluded indices
415
416 @param n_members: number of member trajectories
417 @type n_members: int
418 @param excluded: excluded member trajectories
419 @type excluded: [ int ]
420 @param index:
421 @type index: int
422
423 @return: the indices of all excluded member trajectories
424 @rtype: [ int ]
425 """
426 remaining = MU.difference( range( n_members ), excluded )
427 new_i = remaining[index]
428 return excluded + [ new_i ]
429
430
432 """
433 Exclude members from a (set of) Trajectory.
434 @param traj: input trajectory
435 @type traj: EnsembleTraj
436 @param exclude: set of indices to be excluded
437 @type exclude: [ int ]
438
439 @return:
440 @rtype: EnsembleTraj
441 """
442 if exclude == None or len( exclude ) == 0:
443 return traj
444
445 members = range( traj.n_members )
446 self.log.add("excluding members: " + str(exclude))
447
448 return traj.takeMembers( MU.difference( members, exclude ) )
449
450
452 """
453 Some individual trajectories may have to be excluded as
454 outliers or for error estimation (depending on the parameters
455 passed to AmberEntropist).
456
457 @param t: one or two ensembles of trajectories
458 @type t: EnsembleTraj OR (EnsembleTraj, EnsembleTraj )
459
460 @return: t with some member trajectories excluded, if needed
461 @rtype: EnsembleTraj OR (EnsembleTraj, EnsembleTraj )
462 """
463 if self.ex_n:
464 self.exclude = range( self.ex_n )
465
466 if type( t ) is tuple and not type( self.exclude ) is tuple:
467 self.exclude = ( self.exclude, self.exclude )
468
469 if type( t ) is tuple:
470 n_memb = ( t[0].n_members, t[1].n_members )
471 else:
472 n_memb = t.n_members
473
474 if self.ex3 != None and type( self.exclude ) is tuple:
475 self.exclude = self.__add3(n_memb[0], self.exclude[0], self.ex3),\
476 self.__add3(n_memb[1], self.exclude[1], self.ex3)
477
478 if self.ex3 != None and type( self.exclude ) is list:
479 self.exclude = self.__add3( n_memb, self.exclude, self.ex3 )
480
481 if self.ex1 != None and type( self.exclude ) is tuple:
482 self.exclude = self.__add1(n_memb[0], self.exclude[0], self.ex1),\
483 self.__add1(n_memb[1], self.exclude[1], self.ex1)
484
485 if self.ex1 != None and type( self.exclude ) is list:
486 self.exclude = self.__add1( n_memb, self.exclude, self.ex1 )
487
488 if type( t ) is tuple:
489 if not type( self.exclude ) is tuple:
490 self.exclude = ( self.exclude, self.exclude )
491 t = self.__exclude( t[0], self.exclude[0] ),\
492 self.__exclude( t[1], self.exclude[1] )
493 else:
494 t = self.__exclude( t, self.exclude )
495
496 return t
497
498
500 """
501 Prepare trajectory for Amber.
502
503 @param fname: path to EnsembleTraj OR ( EnsembleTraj, EnsembleTraj )
504 @type fname: str OR (str,str)
505 @param ref: reference structure
506 @type ref: EnsembleTraj
507 @param cast: cast to reference (same atom content) (default: 1)
508 @type cast: 1|0
509
510 @return: split, fitted or shuffled, etc. trajectory instance
511 @rtype: EnsembleTraj OR (EnsembleTraj, EnsembleTraj )
512 """
513
514 if self.__splitFilenames( fname ):
515 f1, f2 = self.__splitFilenames( fname )
516 t = self.loadTraj( f1 ), self.loadTraj( f2 )
517 else:
518 t = self.loadTraj( fname )
519 if self.chains:
520 t = t.takeChains( self.chains )
521
522
523 if not type(t) is tuple and self.border:
524 lig = range( self.border, t.ref.lenChains() )
525 t = t.takeChains( range(self.border) ), t.takeChains( lig )
526
527
528 if not type(t) is tuple and (self.shift or self.shuffle or self.split):
529 raise EntropistError,'split,shift,shuffle require -border.'
530
531
532 if ref and type(t) is tuple and not isinstance( ref, Complex ):
533 rec = ref.takeChains( range(t[0].lenChains()) )
534 lig = ref.takeChains( range(t[0].lenChains(), t[1].lenChains()) )
535 ref = Complex( rec, lig )
536
537 if ref and type(t) is not tuple and isinstance( ref, Complex ):
538 ref = ref.rec_model.concat( ref.lig() )
539
540
541 t = self.__removeMembers( t )
542
543
544 if cast and type( t ) is not tuple:
545 self.castTraj( t, ref )
546
547 if cast and type( t ) is tuple:
548 self.castTraj( t[0], ref.rec_model )
549 self.castTraj( t[1], ref.lig_model )
550
551
552 if self.shift:
553 t = self.shiftTraj( t[0], self.shift ), t[1]
554
555 if self.shuffle:
556 t = self.shuffleTraj( t[0] ), self.shuffleTraj( t[1] )
557
558
559 if self.split and ref:
560 self.fit( t[0], ref.rec() )
561 self.fit( t[1], ref.lig() )
562 if self.split and not ref:
563 self.fit( t[0] )
564 self.fit( t[1] )
565
566 if type( t ) is tuple:
567 t = t[0].concatAtoms( t[1] )
568 ref = ref.rec_model.concat( ref.lig() )
569
570
571 if not self.split:
572 self.fit( t, ref )
573
574 self.log.add( 'Analysing trajectory with %i atoms and %i frames.' \
575 % (t.lenAtoms(), t.lenFrames()))
576
577 return t
578
579
581 """
582 wait with unpickling until another Entropist has finished.
583
584 @param fname: file name
585 @type fname: str
586
587 @return: trajectroy
588 @rtype: Trajectroy
589 """
590 flock = fname + '__locked'
591
592 while os.path.exists( flock ):
593 self.log.add_nobreak('~')
594 time.sleep( random.random() * 10 )
595 self.log.add('')
596
597 try:
598 f = open( flock, 'w' )
599 f.write('1')
600 f.close()
601
602 r = t.Load(fname)
603
604 finally:
605 t.tryRemove( flock )
606
607 return r
608
609
664
665
667 """
668 reorder member trajectories
669 """
670 if not shift:
671 return traj
672
673 if not isinstance( traj, EnsembleTraj):
674 raise EntropistError, 'shift requires EnsembleTraj'
675
676 r = range( shift, traj.n_members ) + range( shift )
677 self.log.add('reorder member trajectories: %s...' % str( r ) )
678
679 return traj.takeMembers( r )
680
681
683 """
684 reorder all frames at random
685 """
686 r = range( len( traj ) )
687 random.shuffle( r )
688 return traj.takeFrames( r )
689
690
692 """
693 Equalize atom content of traj to refModel.
694 """
695 l = traj.lenAtoms()
696 lref = len( refModel )
697
698 self.log.add('comparing traj with %i atoms to reference of %i atoms.'%\
699 (l, lref))
700 i, iRef = traj.ref.compareAtoms( refModel )
701
702 refModel.keep( iRef )
703 self.log.add("%i atoms deleted from reference."%(lref-len(refModel)))
704
705 traj.keepAtoms( i )
706 self.log.add( "%i atoms deleted from trajectory."% (l-len(i) ) )
707
708
710 """
711 Group items of lst into n tripples with minimal overlap.
712 """
713 all = []
714 l = len( lst )
715
716
717 for i in range( l ):
718 for j in range( i+1, l ):
719 for k in range( j+1, l ):
720 all += [ ( lst[i], lst[j], lst[k] ) ]
721
722
723 pw = N.zeros( (len(all), len(all)), N.Float32 )
724 for i in range( len( all ) ):
725 for j in range( i, len(all) ):
726 pw[i,j] = pw[j,i] = len( MU.intersection(all[i],all[j]) )**2
727
728 pos = 0
729 r = []
730
731 while len( r ) < n:
732
733 r += [ pos ]
734
735 overlap = N.sum( N.array( [ pw[ i ] for i in r ] ) )
736
737 pos = N.argmin( overlap )
738
739 return N.take( all, r )
740
741
743 """
744 Remove temporary files.
745 """
746 AmberCrdEntropist.cleanup( self )
747
748 if not self.debug:
749 t.tryRemove( self.parmcrd )
750
751 if not self.keep_crd:
752 t.tryRemove( self.f_crd )
753 if not self.keep_parm:
754 t.tryRemove( self.f_parm )
755
756
758 """
759 Called when done.
760 """
761 AmberCrdEntropist.finish( self )
762
763
764 if self.result['nframes'] != self.nframes:
765 raise EntropistError, 'incorrect number of frames: %i instead %i'%\
766 ( self.result['nframes'], self.nframes )
767