1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 """
26 Parallellized AmberEntropist calculation.
27 """
28
29 import os.path, copy
30 import numpy.oldnumeric as N
31
32 import Biskit.tools as T
33 import Biskit.settings as settings
34 import Biskit.mathUtils as MU
35
36 from Biskit.PVM.TrackingJobMaster import TrackingJobMaster
37 from Biskit.hosts import cpus_all, nice_dic
38 from Biskit import PDBModel, PDBProfiles, EHandler, StdLog
39
40 from Biskit.Dock import Complex
41
42 slave_path = T.projectRoot()+"/Biskit/AmberEntropySlave.py"
43
45 """
46 Run many AmberEntropist calculations on many nodes. The Master has
47 a standard set of 13 protocols to run on rec, lig, and com
48 trajectories, as well as on every single member trajectory - in
49 total 113. It accepts one variable parameter, e.g. s(tart). Each
50 protocol is then run for all values of the variable parameter. A
51 protocol is simply a set of options that are passed on to the
52 AmberEntropist (which is run from within AmberEntropySlave).
53 Comparing the different protocols allows to more or less separate
54 random from real correlations, rigid body from intermolecular
55 vibrations, etc.
56
57 Results are put into a tree-shaped dictionary of dictionaries. The
58 first dimension/key is the member index -- None for the complete
59 ensemble trajectory, 0 for the first member, etc. The second
60 dimension/key is the name of the protocol, e.g. 'com_split' for
61 the complex trajectory with seperately fitted receptor and
62 ligand. The last dimension contains the different values obtained
63 from the ptraj run, e.g. 'S_total' points to the total entropy in
64 cal/mol/K, 'contributions' to the entropy contribution of each
65 mode, 'T' to the assumed temperature, 'vibes' gives the number of
66 vibrations with too low frequencies (according to ptraj). All these
67 are lists of values - one for each value of the variable option.
68
69 Example::
70 * r[None]['fcom']['S_vibes'][0] -> float
71 first vibr. Entropy of free fake complex for complete ensemble
72 * r[0]['com']['S_total'] -> [ float, float, .. ]
73 the total entropies of the complex calculated for the first
74 ensemble member and the different values of the variable option
75 """
76
77 - def __init__(self, rec=None, lig=None, com=None, out=None,
78 cr=None, var='s', vrange=[0], jack=0,
79 zfilter=None, clean=0, all=1,
80 exrec=[], exlig=[], excom=[],
81 hosts=cpus_all,
82 niceness=nice_dic,
83 w=0, a=1, debug=0,
84 restart=0,
85 **kw ):
86 """
87 @param rec: free rec trajectory [required]
88 @type rec: str
89 @param lig: free lig trajectory [required]
90 @type lig: str
91 @param com: complex trajectory [required]
92 @type com: str
93 @param out: file name for pickled result [required]
94 @type out: str
95 @param cr: chains of receptor in complex trajectory [n_chains rec]
96 @type cr: [int]
97
98 @param var: name of variable option [ s ]
99 @type var: str
100 @param vrange: set of values used for variable option
101 OR 'start:stop:step', string convertable to
102 range() input
103 @type vrange: [any]
104 @param jack: set up leave-one-trajectory-out jackknife test
105 (default: 0) (replaces var with 'ex1' and vrange with
106 range(1,n_members+1))
107 @type jack: [0|1]
108
109 @param zfilter: kick out outlyer trajectories using z-score threshold
110 on RMSD trace (default: None->don't)
111 @type zfilter: float
112 @param clean: remove pickled ref models and member trajectories
113 (default: 0)
114 @type clean: 0|1
115 @param all: skip single member trajs (default: 1)
116 @type all: 0|1
117
118 @param exrec: exclude certain members of receptor ensemble [[]]
119 @type exrec: [int]
120 @param exlig: exclude certain members of ligand ensemble [[]]
121 @type exlig: [int]
122 @param excom: exclude certain members of complex ensemble [[]]
123 @type excom: [int]
124
125 @param hosts: nodes to be used (default: all known)
126 @type hosts: [str]
127 @param debug: don't delete output files (default: 0)
128 @type debug: 1|0
129
130
131 @param kw: additional key=value parameters for AmberEntropist,
132 AmberCrdEntropist, Executor and Master.
133 @type kw: key=value pairs
134 ::
135 ... parameters for AmberEntropist
136 cast - 1|0, equalize free and bound atom content [1]
137 s,e - int, start and stop frame [0, to end]
138 atoms - [ str ], names of atoms to consider [all]
139 step - int, frame offset [no offset]
140 thin - float, use randomly distributed fraction of frames [all]
141 (similar to step but perhaps better for entropy
142 calculations)
143 ex - [int] OR ([int],[int]), exclude member trajectories [[]]
144 ex_n - int, exclude last n members OR... [None]
145 ex3 - int, exclude |ex3|rd tripple of trajectories [0]
146 (index starts with 1! 0 to exclude nothing)
147
148 ... parameters for AmberCrdEntropist
149 f_template - str, alternative ptraj input template [default]
150
151 ... parameters for Executor:
152 log - Biskit.LogFile, program log (None->STOUT) [None]
153 verbose - 0|1, print progress messages to log [log != STDOUT]
154
155 ... parameters for Master
156 w - 0|1, show X window for each slave [0]
157 a - 0|1, add hosts to PVM [1]
158 """
159
160 self.fout = T.absfile( out )
161 self.ferror = os.path.dirname(self.fout) +'/AmberEntropy_errors.log'
162 self.debug = debug
163
164 self.log = StdLog()
165
166
167 self.rec = T.absfile( rec, 0 )
168 self.lig = T.absfile( lig, 0 )
169 self.com = T.absfile( com, 0 )
170 self.cr = cr
171 self.cl = None
172 self.var = var
173 self.vrange = self.__vrange( vrange )
174 self.jack = jack
175 self.zfilter = zfilter
176 self.n_members = None
177 self.clean = clean
178 self.all = all
179
180
181 self.ex_frec = exrec
182 self.ex_flig = exlig
183 self.ex_com = excom
184
185
186 self.ref_frec = self.ref_flig = None
187 self.ref_brec = self.ref_blig = self.ref_com = None
188
189
190 self.members_frec = self.members_flig = []
191 self.members_brec = self.members_blig = []
192
193
194 self.options = kw
195
196 if not restart:
197
198 self.processTrajs()
199
200
201 self.protocols = self.protocols_var_range( **kw )
202 self.saveProtocols()
203
204 TrackingJobMaster.__init__(self, self.protocols,
205 chunk_size=1,
206 hosts=hosts,
207 niceness=niceness,
208 slave_script=slave_path,
209 show_output=w,
210 add_hosts=a)
211
212 print "JobMaster initialized."
213
214
216 """
217 Interprete the vrange option -> [ int ] or [ float ]
218
219 @param v: vrange option
220 @type v: lst OR str
221
222 @return: range option
223 @rtype: [int] OR [float]
224 """
225 if type( v ) is list:
226 return [ self.__float_int(x) for x in v ]
227 if type( v ) is str and ':' in v:
228 v = tuple( [ self.__float_int(x) for x in v.split(':') ] )
229 return N.arange( *v )
230
231 return self.__float_int( v )
232
233
235 """
236 Convert v to int or, if necessary, float
237
238 @param v: value
239 @type v: any
240
241 @return: converted value
242 @rtype: int OR float
243 """
244 if float(v) % 1. != 0:
245 return float( v )
246 return int( float(v) )
247
248
249 - def loadTraj( self, fname, outliers=[], refname=None ):
250 """
251 Load trajectory from file.
252
253 @param fname: path to trajectory
254 @type fname: str
255 @param outliers: Identify outlier trajectories (default: [], identify)
256 @type outliers: [int] OR []
257 @param refname: name of reference (efault: None)
258 @type refname: str
259
260 @return: t, outliers, members
261 @rtype: trajectoty, [int], [int]
262 """
263 self.log.add('Loading ' + fname )
264 t = T.Load( fname )
265
266 t.ref.addChainId()
267 t = t.compressAtoms( t.ref.maskProtein() )
268
269 outliers = self.getOutliers( t, outliers )
270
271 if refname:
272 self.dumpMissing( t.ref, refname )
273
274 members = None
275 if not self.all:
276 members = self.dumpMembers( t, self.rec )
277
278 return t, outliers, members
279
280
282 """
283 Extract reference model and member trajectories from rec, lig, and
284 com trajectories. Identify outlier member trajectories, if requested.
285 """
286
287 self.ref_frec = self.nameRef( self.rec )
288
289 t, self.ex_frec, self.members_frec = self.loadTraj(
290 self.rec, self.ex_frec, self.ref_frec )
291
292 n_rec_members = t.n_members
293 self.cr = self.cr or range( t.ref.lenChains( breaks=0 ) )
294 del t
295
296
297 self.ref_flig = self.nameRef( self.lig )
298
299 t, self.ex_flig, self.members_flig = self.loadTraj(
300 self.lig, self.ex_flig, self.ref_flig )
301
302 n_lig_members = t.n_members
303 del t
304
305
306 fname = T.stripSuffix( T.absfile( self.com, resolveLinks=0 ) )
307 self.ref_com = fname + '_ref.complex'
308 self.ref_blig= fname + '_blig.model'
309 self.ref_brec= fname + '_brec.model'
310
311 t, self.ex_com, self.members_com = self.loadTraj(
312 self.com, self.ex_com )
313
314 n_com_members = t.n_members
315
316 self.cl = self.cl or MU.difference( range(t.ref.lenChains()), self.cr)
317 rec = t.ref.takeChains( self.cr, breaks=0 )
318 lig = t.ref.takeChains( self.cl, breaks=0 )
319
320 del t
321 self.dumpMissing( Complex( rec, lig ), self.ref_com )
322 self.dumpMissing( rec, self.ref_brec )
323 self.dumpMissing( lig, self.ref_blig )
324
325 self.equalizeMemberCount( n_rec_members, n_lig_members, n_com_members )
326
327 if self.jack: self.prepareJackknife()
328
329
331 """
332 Ensure we keep equal number of members trajectories from frec,
333 flig, and com.
334
335 @param n_rec: number of receptor trajectories
336 @type n_rec: int
337 @param n_lig: number of ligand trajectories
338 @type n_lig: int
339 @param n_com: number of complex trajectories
340 @type n_com: int
341 """
342 ex = [ self.ex_frec, self.ex_flig, self.ex_com ]
343 n_members = [ n_rec, n_lig, n_com ]
344
345
346 ex = [ ( ex[i], n_members[i] - len(ex[i]) ) for i in range(3) ]
347
348
349 n_min = min( [ x[1] for x in ex ] )
350
351 self.log.add('excluding non-outliers to match member count: ')
352
353 label = ['com','lig','rec']
354
355 for x, n in ex:
356 i = 0
357 s = label.pop()
358
359 while n > n_min:
360 self.log.add_nobreak( '%s: ' % s )
361 if not i in x:
362 x.append( i )
363 n -= 1
364 self.log.add_nobreak('%i, ' % i )
365 i += 1
366
367 self.log.add('')
368
369 self.n_members = n_min
370
371
373 """
374 Prepare leave-one-trajectory-out jackknife test.
375 """
376 self.vrange = range( self.n_members + 1 )
377 self.var = 'ex1'
378
379
381 fname = T.stripSuffix( T.absfile( fname, resolveLinks=0 ) )
382 return fname + '_ref.model'
383
384
388
389
391 """
392 Pickle *o* to path *fname*, if it is not already there.
393
394 @param o: object to dump
395 @type o: any
396 @param fname: file name
397 @type fname: str
398
399 @return: file name
400 @rtype: str
401 """
402 if os.path.exists( fname ):
403 self.log.add('using existing ' + fname )
404 else:
405 self.log.add('Saving ' + fname )
406 T.Dump( o, fname )
407
408 return fname
409
410
412 """
413 Identify member trajectories that haved moved much further than normal.
414
415 @param traj: Trajectory to analyze
416 @type traj: Trajectory
417 @param outlaws: members already marked for exclusion
418 @type outlaws: [int]
419
420 @return: member indices of outlyer trajectories (plus outlaws)
421 @rtype: [int]
422 """
423 if not self.zfilter:
424 return outlaws
425
426 outliers = N.nonzero( traj.outliers( z=self.zfilter,
427 mask=traj.ref.maskCA(), step=10) )
428 self.log.add('identified %i outliers with z-threshold %3.1f' %\
429 ( len(outliers), self.zfilter ) )
430
431 return MU.union( outliers, outlaws )
432
433
435 """
436 Dump ensemble member trajectories
437
438 @param traj: Trajectory to dump
439 @type traj: Trajectory
440 @param fname: trajectory file name - used to derrive name for members
441 @type fname: str'
442
443 @return: list of trajectory files
444 @rtype: [str]
445 """
446 fname = T.stripSuffix( T.absfile( fname, resolveLinks=0 ) )
447 members = range( traj.n_members )
448
449 r = []
450 for n in members:
451 f = fname + '_member_%02i.traj' % n
452 if os.path.exists( f ):
453 self.log.add('using existing ' + f )
454 else:
455 self.log.add_nobreak('saving ' + f + '...')
456 m = traj.takeMember( n )
457 T.Dump( m, f )
458 self.log.add('done')
459 r += [ f ]
460
461 return r
462
463
465 """
466 hand over parameters to slave once.
467
468 @param slave_tid: slave task id
469 @type slave_tid: int
470
471 @return: dictionary with init parameters
472 @rtype: {param:value}
473 """
474 host = self.hostnameFromTID( slave_tid )
475 nice = self.niceness.get( host, self.niceness.get('default',0) )
476
477 return {'ferror':self.ferror,
478 'debug':self.debug, 'nice':nice, 'host':host}
479
480
482 """
483 Tidy up
484 """
485 if self.clean:
486 self.cleanCache()
487
489 """
490 Remove left-over cache files
491 """
492 fs = [ self.ref_frec, self.ref_flig, self.ref_com, self.ref_brec,
493 self.ref_blig ]
494 fs.extend( self.members_frec + self.members_flig )
495 fs.extend( self.members_brec + self.members_blig )
496 fs.extend( self.members_com )
497
498 for f in fs:
499 self.log.add('removing %s: %i' % (f, T.tryRemove(f)) )
500
501
503 """
504 Save protocol to file.
505 """
506 f_prot = T.stripSuffix( T.absfile(self.fout) ) + '_protocols.dat'
507 self.log.add_nobreak( 'Saving parameters to %s...' % f_prot )
508 T.Dump( self.protocols, f_prot )
509
510
512 """
513 Write result to file.
514 """
515 tree = self.getResult()
516 self.log.add("Saving result to %s..." % self.fout)
517 T.Dump( tree, self.fout )
518 self.log.add( "Done" )
519
520
521
522
523
525 """
526 Merge 2 dictionaries *d1* and *d2* and return a copy
527 """
528 r = copy.copy( d1 )
529 r.update( d2 )
530 return r
531
532 - def protocols_standard( self, trec, tlig, tcom,
533 ex_frec=None, ex_flig=None, ex_com=None,
534 doshift=1,
535 **options ):
536 """
537 Create 13 parameter sets for AmberEntropist that cover the calculation
538 of rec, lig, com and fcom entropies with and without splitting of the
539 complex, with and without shifting and shuffling of frames.
540
541 @param options: additional options (like cast, s, e, atoms, thin, step)
542 that are the same in all parameter sets
543 @type options: key=value
544
545 @return: each value of the returned dict contains a set of
546 arguments for one AmberEntropist run
547 @rtype: dict of dict
548 """
549 fcp = self.__cpupdate
550 r = {}
551 S = self
552
553 d = { 'ref':None, 'cast':1, 'chains':None,
554 'split':0, 'shift':0, 'shuffle':0, 'ex_n':0, 'ex3':None,
555 'thin':None, 'step':1, 'ss':0, 'se':None, 'atoms':None }
556 d.update( options )
557
558 r['frec'] = fcp( d, {'traj':trec, 'ref':S.ref_brec, 'ex':ex_frec } )
559 r['flig'] = fcp( d, {'traj':tlig, 'ref':S.ref_blig, 'ex':ex_flig } )
560 r['brec'] = fcp( d, {'traj':tcom, 'ref':S.ref_frec, 'ex':ex_com,
561 'chains':S.cr } )
562 r['blig'] = fcp( d, {'traj':tcom, 'ref':S.ref_flig, 'ex':ex_com,
563 'chains':S.cl } )
564
565 r['fcom'] = fcp( d, {'traj':'%s+%s'%(trec, tlig),
566 'ex':(ex_frec, ex_flig),
567 'ref':S.ref_com, 'split':1 } )
568
569
570
571
572 r['fcom_shuff'] = fcp( r['fcom'], {'shuffle':1 } )
573
574 r['com'] = fcp( d, {'traj':tcom, 'ex':ex_com,
575 'ref':'%s+%s' % (S.ref_frec, S.ref_flig) } )
576
577 r['com_split'] = fcp( r['com'], { 'split':1, 'border':S.cl[0] } )
578
579 r['com_split_shuff'] = fcp( r['com'],
580 {'split':1,'shuffle':1,'border':S.cl[0] } )
581 if doshift:
582
583 r['com_split_shift'] = fcp( r['com'],
584 {'split':1,'shift':1, 'border':S.cl[0] } )
585
586 return r
587
588
590 """
591 Set of protocols for all-member trajectories AND single-member traj.
592 with the different shuffle, shift, split settings.
593 Usually 11 x 13 protocols for AmberEntropist (10 members and 1 for all)
594
595 @param options: additional options (like cast, s, e, atoms, thin, step)
596 that are the same in all parameter sets
597 @type options: key=value
598
599 @return: each value of the returned dict contains a set of arguments
600 for one AmberEntropist run, each key is a tuple of the
601 member index and the protocol name, i.e. (0, 'fcom_shuffle')
602 The set of protocols for all-member trajectories has member
603 index None.
604 @rtype: dict of dict
605 """
606 r = {}
607
608 prots = self.protocols_standard( self.rec, self.lig, self.com,
609 self.ex_frec, self.ex_flig, self.ex_com,
610 **options )
611 for k,p in prots.items():
612 r[ (None, k) ] = p
613
614 if not self.all:
615
616 for i in range( len( self.members_frec ) ):
617 prots = self.protocols_standard(self.members_frec[i],
618 self.members_flig[i],
619 self.members_com[i], doshift=0,
620 **options )
621 for k, p in prots.items():
622 r[ (i, k) ] = p
623
624 return r
625
626
628 """
629 Complete set of protocols also considering different values of the
630 variable option.
631 """
632 self.log.add( 'variable option %s with %i values' \
633 % (self.var, len(self.vrange)))
634
635 r = {}
636 for v in self.vrange:
637 d = copy.copy( options )
638 d[ self.var ] = v
639
640 prots = self.protocols_single_all( **d )
641
642 for k, p in prots.items():
643 r[ (v,) + k ] = p
644
645 return r
646
647
648
649
651 """
652 Take dict with tuple keys (value, int_member, str_protocol) and build
653 a tree-like dict of dicts in which the values of d can be accessed
654 like::
655 d[value][int_member][str_protocol]
656
657 @param d: the raw results accumulated from the slave nodes
658 @type d: dict
659
660 @return: tree-like dict ordered by variable value, member, protocol
661 @rtype: dict of dict of dict of dict
662 """
663 r = {}
664
665 keys = d.keys()
666
667
668 if len( keys[0] ) == 1:
669 for k in keys:
670 r[ k[0] ] = d[ k ]
671 return r
672
673 x_values = MU.nonredundant( [ k[0] for k in keys ] )
674
675 for x in x_values:
676
677 sub_keys = [ k for k in keys if k[0] == x ]
678 y_values = MU.nonredundant( [ k[1:] for k in sub_keys] )
679
680 r[ x ] = {}
681 for y in y_values:
682 r[x][y] = d[ (x,) + y ]
683
684 r[ x ] = self.dictionate( r[x] )
685
686 return r
687
688
690 """
691 Collapse the results for different values of the variable parameter
692 into lists and put the results into a tree ala::
693 r[ member_index ][ protocol_name ][ result_field ] -> [ values ]
694
695 @return: tree-like dict ordered by variable value, member, protocol
696 @rtype: dict of dict of dict of lists
697 """
698 tree = self.dictionate( self.result )
699
700 vvalues = tree.keys()
701 vvalues.sort()
702
703 keys = self.result.keys()
704 sub_keys = [ k for k in keys if k[0] == vvalues[0] ]
705
706 r = {}
707 for v, member, protcl in sub_keys:
708
709 try:
710 if not member in r:
711 r[member] = {}
712
713 r[member][protcl] = {}
714
715 run_dic = tree[v][member][protcl]
716
717 for k in run_dic.keys():
718 r[member][protcl][k] = [ tree[v][member][protcl][k] \
719 for v in vvalues ]
720 except:
721 EHandler.warning('missing result: ' + str(T.lastError()))
722
723 r['var'] = self.var
724 r['vrange']= self.vrange
725 r['protocols'] = self.protocols
726
727 self.result_tree = r
728 return r
729
730
731
732
733
734
735 if __name__ == '__main__':
736 niceness = {'default': 0}
737 hosts = cpus_all[:80]
738
739 f = T.testRoot() + '/Amber/AmberEntropyMaster/'
740
741 rec = f + 'rec/traj.dat'
742 lig = f + 'lig/traj.dat'
743 com = f + 'com/traj.dat'
744 out = f + 'entropy.out'
745
746 master = AmberEntropyMaster( rec, lig, com, out, step=1,
747 atoms=['CA','CB'],
748 var='ex1', vrange='0:10',
749 exrec=[1],exlig=[0],
750 all=1,
751 hosts=hosts, niceness=niceness,
752 w=1 )
753
754 master.start()
755