Package Biskit :: Module AmberParmBuilder
[hide private]
[frames] | no frames]

Source Code for Module Biskit.AmberParmBuilder

  1  ## Automatically adapted for numpy.oldnumeric Mar 26, 2007 by alter_code1.py 
  2   
  3  ## 
  4  ## Biskit, a toolkit for the manipulation of macromolecular structures 
  5  ## Copyright (C) 2004-2006 Raik Gruenberg & Johan Leckner 
  6  ## 
  7  ## This program is free software; you can redistribute it and/or 
  8  ## modify it under the terms of the GNU General Public License as 
  9  ## published by the Free Software Foundation; either version 2 of the 
 10  ## License, or any later version. 
 11  ## 
 12  ## This program is distributed in the hope that it will be useful, 
 13  ## but WITHOUT ANY WARRANTY; without even the implied warranty of 
 14  ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
 15  ## General Public License for more details. 
 16  ## 
 17  ## You find a copy of the GNU General Public License in the file 
 18  ## license.txt along with this program; if not, write to the Free 
 19  ## Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
 20  ## 
 21  ## 
 22  ## $Revision: 2.9 $ 
 23  ## last $Date: 2007/04/06 10:16:06 $ 
 24  ## last $Author: graik $ 
 25   
 26  """ 
 27  Create Amber topology and coordinate file from PDB. 
 28  """ 
 29   
 30  import os, tempfile, copy 
 31  import numpy.oldnumeric as N 
 32   
 33  import Biskit.tools as t 
 34  import Biskit.settings as s 
 35  import Biskit.mathUtils as MU 
 36  from Biskit.LogFile import LogFile, StdLog 
 37  from Biskit.PDBModel import PDBModel 
 38  from Biskit.Errors import BiskitError 
 39  from Biskit.PDBCleaner import PDBCleaner 
 40  from Biskit import Executor 
 41   
42 -class AmberError( BiskitError ):
43 pass
44
45 -class AmberParmBuilder:
46 """ 47 AmberParmBuilder 48 ================ 49 Create Amber topology and coordinate file from PDB. 50 51 - parmMirror(): 52 ...builds a fake parm that exactly mirrors a given PDB file. 53 This parm can be used for ptraj but not for simulations. 54 Currently, parmMirror only accepts amber-formatted PDBs as 55 input. It should be possible to create topologies that have 56 the same content and order of atoms as an xplor PDB but 57 some atoms will have different names. 58 59 - parmSolvated(): 60 ...builds a solvated system for PME simulations (incl. closing 61 of S-S bonds, capping of chain breaks). parmSolvated accepts 62 both xplor and amber-formatted PDBs as input. 63 64 Requires the amber programs C{tleap} and C{ambpdb}. 65 Requires leap template files in C{biskit/external/amber/leap/}. 66 67 @note: The design of AmberParmBuilder is less than elegant. It 68 would make more sense to split it into two classes that 69 are both derrived from Executor. 70 """ 71 72 ## script to create a parm that exactly mirrors a given PDB 73 script_mirror_pdb = """ 74 logFile %(leap_out)s 75 source %(leaprc)s 76 %(fmod)s 77 %(fprep)s 78 p = loadPdb %(in_pdb)s 79 %(delete_atoms)s 80 saveAmberParm p %(out_parm)s %(out_crd)s 81 quit 82 """ 83 84 ## tleap command to close a single S-S bond 85 ss_bond = "bond p.%i.SG p.%i.SG\n" 86 87 ## leap script for solvated topology 88 F_leap_in = t.projectRoot() + '/external/amber/leap/solvate_box.leap' 89 ## PDB with ACE capping residue 90 F_ace_cap = t.projectRoot() + '/external/amber/leap/ace_cap.pdb' 91 ## PDB with NME capping residue 92 F_nme_cap = t.projectRoot() + '/external/amber/leap/nme_cap.pdb' 93
94 - def __init__( self, model, 95 leap_template=F_leap_in, 96 leaprc=s.leaprc, 97 leap_out=None, leap_in=None, 98 leap_pdb=None, 99 log=None, 100 debug=0, 101 verbose=0, 102 **kw ):
103 """ 104 @param model: model 105 @type model: PDBModel or str 106 @param leap_template: path to template file for leap input 107 @type leap_template: str 108 @param leaprc: path to parameter file for leap 109 @type leaprc: str 110 @param leap_out: target file for leap.log (default: discard) 111 @type leap_out: str 112 @param leap_in: target file for leap.in script (default: discard) 113 @type leap_in: str 114 @param kw: kw=value pairs for additional options in the leap_template 115 @type kw: key=value 116 """ 117 self.m = PDBModel( model ) 118 119 self.leap_template = t.absfile( leap_template ) 120 self.leaprc = leaprc 121 122 self.leap_in = leap_in or tempfile.mktemp( '_leap_in' ) 123 self.keep_leap_in = leap_in is not None 124 125 self.leap_pdb = leap_pdb or tempfile.mktemp( '_leap_pdb' ) 126 self.keep_leap_pdb = leap_pdb is not None 127 128 self.leap_out= leap_out or tempfile.mktemp( '_leap_log' ) 129 self.keep_leap_out = leap_out is not None 130 131 self.log = log or StdLog() 132 133 self.debug = debug 134 self.verbose = verbose 135 136 self.__dict__.update( kw )
137 138
139 - def __runLeap( self, in_script, norun=0, **kw ):
140 """ 141 Create script file and run Leap. 142 143 @param in_script: content of ptraj script with place holders 144 @type in_script: str 145 @param norun: 1 - only create leap scrip (default: 0) 146 @type norun: 1|0 147 @param kw: key=value pairs for filling place holders in script 148 @type kw: key=value 149 150 @raise AmberError: if missing option for leap input file or 151 if could not create leap input file 152 """ 153 ## create leap script 154 try: 155 ## use own fields and given kw as parameters for leap script 156 d = copy.copy( self.__dict__ ) 157 d.update( kw ) 158 159 in_script = in_script % d 160 f = open( self.leap_in, 'w') 161 f.write( in_script ) 162 f.close() 163 164 if self.verbose: 165 self.log.add('leap-script: ') 166 self.log.add( in_script ) 167 168 except IOError: 169 raise AmberError('Could not create leap input file') 170 except: 171 raise AmberError('missing option for leap input file\n'+\ 172 'available: %s' % (str( d.keys() ) )) 173 174 ## run tleap 175 args = '-f %s' % self.leap_in 176 177 if not norun: 178 self.exe = Executor('tleap', args, log=self.log,verbose=1, 179 catch_out=0) 180 self.output, self.error, self.status = self.exe.run() 181 182 if not os.path.exists( kw['out_parm'] ): 183 raise AmberError, "tleap failed" 184 185 ## clean up 186 187 if not self.keep_leap_in and not self.debug: 188 t.tryRemove( self.leap_in ) 189 if not self.keep_leap_out and not self.debug: 190 t.tryRemove( self.leap_out)
191 192
193 - def parm2pdb( self, f_parm, f_crd, f_out, aatm=0 ):
194 """ 195 Use ambpdb to build PDB from parm and crd. 196 197 @param f_parm: existing parm file 198 @type f_parm: str 199 @param f_crd: existing crd file 200 @type f_crd: str 201 @param f_out: target file name for PDB 202 @type f_out: str 203 204 @return: f_out, target file name for PDB 205 @rtype: str 206 207 @raise AmberError: if ambpdb fail 208 """ 209 ## cmd = '%s -p %s -aatm < %s > %s' % \ 210 args = '-p %s %s' % (f_parm, '-aatm'*aatm ) 211 212 x = Executor('ambpdb', args, f_in=f_crd, f_out=f_out, 213 log=self.log, verbose=1) 214 215 output,error,status = x.run() 216 217 if not os.path.exists( f_out ): 218 raise AmberError, 'ambpdb failed.' 219 220 return f_out
221 222
223 - def __ssBonds( self, model, cutoff=4. ):
224 """ 225 Identify disulfide bonds. 226 227 @param model: model 228 @type model: PDBModel 229 @param cutoff: distance cutoff for S-S distance (default: 4.0) 230 @type cutoff: float 231 232 @return: list with numbers of residue pairs forming S-S 233 @rtype: [(int, int)] 234 """ 235 m = model.compress( model.mask( ['SG'] ) ) 236 237 if len( m ) < 2: 238 return [] 239 240 pw = MU.pairwiseDistances( m.xyz, m.xyz ) 241 242 pw = N.less( pw, cutoff ) 243 244 r = [] 245 for i in range( len( pw ) ): 246 for j in range( i+1, len(pw) ): 247 if pw[i,j]: 248 r += [ (m.aProfiles['residue_number'][i], 249 m.aProfiles['residue_number'][j]) ] 250 return r
251 252
253 - def __cys2cyx( self, model, ss_residues ):
254 """ 255 Rename all S-S bonded CYS into CYX. 256 257 @param model: model 258 @type model: PDBModel 259 @param ss_residues: original residue numbers of S-S pairs 260 @type ss_residues: [(int, int)] 261 """ 262 ss = [] 263 for a,b in ss_residues: 264 ss += [a,b] 265 266 for a in model: 267 if a['residue_number'] in ss: 268 a['residue_name'] = 'CYX'
269 270
271 - def capACE( self, model, chain ):
272 """ 273 Cap N-terminal of given chain. 274 275 @param model: model 276 @type model: PDBMode 277 @param chain: index of chain to be capped 278 @type chain: int 279 """ 280 self.log.add('Capping N-terminal of chain %i.' % chain ) 281 m_ace = PDBModel( self.F_ace_cap ) 282 283 chains_before = model.takeChains( range(chain), breaks=1 ) 284 m_chain = model.takeChains( [chain], breaks=1 ) 285 chains_after = model.takeChains( range(chain+1, model.lenChains(1)), 286 breaks=1 ) 287 288 m_term = m_chain.resModels()[0] 289 290 ## we need 3 atoms for superposition, CB might mess things up but 291 ## could help if there is no HN 292 if 'HN' in m_term.atomNames(): 293 m_ace.remove( ['CB'] ) 294 295 ## rename overhanging residue in cap PDB 296 for a in m_ace: 297 if a['residue_name'] != 'ACE': 298 a['residue_name'] = m_term.aProfiles['residue_name'][0] 299 else: 300 a['residue_number'] = m_term.aProfiles['residue_number'][0]-1 301 a['chain_id'] = m_term.aProfiles['chain_id'][0] 302 a['segment_id'] = m_term.aProfiles['segment_id'][0] 303 304 ## fit cap onto first residue of chain 305 m_ace = m_ace.magicFit( m_term ) 306 307 ## concat cap on chain 308 m_chain = m_ace.resModels()[0].concat( m_chain ) 309 310 ## re-assemble whole model 311 return chains_before.concat( m_chain, chains_after )
312 313
314 - def capNME( self, model, chain ):
315 """ 316 Cap C-terminal of given chain. 317 318 @param model: model 319 @type model: PDBMode 320 @param chain: index of chain to be capped 321 @type chain: int 322 """ 323 self.log.add('Capping C-terminal of chain %i.' % chain ) 324 m_nme = PDBModel( self.F_nme_cap ) 325 326 chains_before = model.takeChains( range(chain), breaks=1 ) 327 m_chain = model.takeChains( [chain], breaks=1 ) 328 chains_after = model.takeChains( range(chain+1, model.lenChains(1)), 329 breaks=1 ) 330 331 m_term = m_chain.resModels()[-1] 332 333 ## rename overhanging residue in cap PDB, renumber cap residue 334 for a in m_nme: 335 if a['residue_name'] != 'NME': 336 a['residue_name'] = m_term.aProfiles['residue_name'][0] 337 else: 338 a['residue_number'] = m_term.aProfiles['residue_number'][0]+1 339 a['chain_id'] = m_term.aProfiles['chain_id'][0] 340 a['segment_id'] = m_term.aProfiles['segment_id'][0] 341 342 ## chain should not have any terminal O after capping 343 m_chain.remove( ['OXT'] ) 344 345 ## fit cap onto last residue of chain 346 m_nme = m_nme.magicFit( m_term ) 347 348 ## concat cap on chain 349 m_chain = m_chain.concat( m_nme.resModels()[-1] ) 350 351 if m_chain._PDBModel__terAtoms != []: 352 m_chain._PDBModel__terAtoms = [ len( m_chain ) - 1 ] 353 354 ## re-assemble whole model 355 return chains_before.concat( m_chain, chains_after )
356 357
358 - def centerModel( self, model ):
359 """ 360 Geometric centar of model. 361 362 @param model: model 363 @type model: PDBMode 364 """ 365 center = N.average( model.getXyz() ) 366 model.setXyz( model.xyz - center )
367 368
369 - def leapModel( self, hetatm=0 ):
370 """ 371 Get a clean PDBModel for input into leap. 372 373 @param hetatm: keep HETATM records (default: 0) 374 @type hetatm: 1|0 375 376 @return: model 377 @rtype: PDBMod 378 """ 379 m = self.m.clone() 380 m.xplor2amber() 381 382 cleaner = PDBCleaner( m, log=self.log ) 383 m = cleaner.process( keep_hetatoms=hetatm, amber=1 ) 384 385 m.renumberResidues( addChainId=1 ) 386 387 self.centerModel( m ) 388 389 return m
390 391
392 - def __fLines( self, template, values ):
393 if not type( values ) is list: 394 values = [ values ] 395 396 return ''.join( [ template % v for v in values ] )
397 398
399 - def parmSolvated( self, f_out, f_out_crd=None, f_out_pdb=None, 400 hetatm=0, norun=0, 401 cap=0, capN=[], capC=[], 402 fmod=[], fprep=[], 403 box=10.0, **kw ):
404 """ 405 @param f_out: target file for parm (topology) 406 @type f_out: str 407 @param f_out_crd: target file for crd (coordinates) 408 (default:|f_out_base|.crd) 409 @type f_out_crd: str 410 @param f_out_pdb: target file for pdb (default:|f_out_base|.pdb) 411 @type f_out_pdb: str 412 @param hetatm: keep hetero atoms (default: 0) 413 @type hetatm: 1|0 414 @param cap: put ACE and NME capping residue on chain breaks 415 (default: 0) 416 @type cap: 1|0 417 @param capN: indices of chains that should get ACE cap (default: []) 418 @type capN: [int] 419 @param capC: indices of chains that should get NME cap (default: []) 420 @type capC: [int] 421 @param box: minimal distance of solute from box edge (default: 10.0) 422 @type box: float 423 @param fmod: list of files with amber parameter modifications 424 (to be loaded into leap with loadAmberParams) (default:[]) 425 @type fmod: [str] 426 @param fprep: list of files with amber residue definitions 427 (to be loaded into leap with loadAmberPrep) (default: []) 428 @type fprep: [str] 429 @param kw: additional key=value pairs for leap input template 430 @type kw: key=value 431 432 @raise IOError: 433 """ 434 f_out = t.absfile( f_out ) 435 f_out_crd = t.absfile( f_out_crd ) or t.stripSuffix( f_out ) + '.crd' 436 f_out_pdb = t.absfile( f_out_pdb ) or t.stripSuffix( f_out ) +\ 437 '_leap.pdb' 438 439 fmod = [ t.absfile( f ) for f in t.toList( fmod ) ] 440 fprep = [ t.absfile( f ) for f in t.toList( fprep ) ] 441 442 try: 443 self.log.add( 'Cleaning PDB file for Amber:' ) 444 m = self.leapModel( hetatm=hetatm ) 445 446 if cap: 447 end_broken = m.atom2chainIndices( m.chainBreaks() ) 448 capC = MU.union( capC, end_broken ) 449 capN = MU.union( capN, N.array( end_broken ) + 1 ) 450 451 for i in capN: 452 m = self.capACE( m, i ) 453 for i in capC: 454 m = self.capNME( m, i ) 455 456 m.renumberResidues( addChainId=1 ) ## again, to accomodate capping 457 458 template = open( self.leap_template ).read() 459 460 leap_mod = self.__fLines( 'm = loadAmberParams %s\n', fmod ) 461 leap_prep= self.__fLines( 'loadAmberPrep %s\n', fprep ) 462 463 ss = self.__ssBonds( m, cutoff=4. ) 464 self.__cys2cyx( m, ss ) 465 leap_ss = self.__fLines( self.ss_bond, ss ) 466 self.log.add('Found %i disulfide bonds: %s' % (len(ss),str(ss))) 467 468 self.log.add( 'writing cleaned PDB to %s' % self.leap_pdb ) 469 m.writePdb( self.leap_pdb, ter=3 ) 470 471 self.__runLeap( template, in_pdb=self.leap_pdb, 472 out_parm=f_out, out_crd=f_out_crd, 473 ss_bonds=leap_ss, fmod=leap_mod, 474 fprep=leap_prep, norun=norun, 475 box=box, **kw ) 476 477 if not norun: 478 parm_pdb = self.parm2pdb( f_out, f_out_crd, f_out_pdb ) 479 480 if not self.keep_leap_pdb and not self.debug: 481 t.tryRemove( self.leap_pdb ) 482 483 except IOError, why: 484 raise IOError, why
485 486
487 - def __deleteAtoms( self, m, i_atoms ):
488 """ 489 Delet atoms 490 491 @param m: model 492 @type m: PDBMode 493 @param i_atoms: atom index 494 @type i_atoms: [int] 495 496 @return: leap statements for deleting given atoms 497 @rtype: [str] 498 """ 499 cmd = 'remove p.%(res)i p.%(res)i.%(atom)i' 500 501 rM = m.resMap() 502 rI = m.resIndex() 503 504 s = [] 505 for i in i_atoms: 506 res = m.aProfiles['residue_number'][i] 507 ## substract atom index of first atom in of this residue 508 atm = i - rI[ rM[ i ] ] + 1 509 s += [ cmd % {'res':res, 'atom':atm} ] 510 511 return '\n'.join( s )
512 513
514 - def __inverseIndices( self, model, i_atoms ):
515 """ 516 @param model: model 517 @type model: PDBMode 518 @param i_atoms: atom index 519 @type i_atoms: [int] 520 521 @return: remaining atom indices of m that are NOT in i_atoms 522 @rtype: [int] 523 """ 524 mask = N.zeros( len( model ),N.Int ) 525 N.put( mask, i_atoms, 1 ) 526 return N.nonzero( N.logical_not( mask ) )
527 528
529 - def parmMirror( self, f_out, f_out_crd=None, fmod=[], fprep=[], **kw ):
530 """ 531 Create a parm7 file whose atom content (and order) exactly mirrors 532 the given PDBModel. This requires two leap runs. First we get a 533 temporary topology, then we identify all atoms added by leap and 534 build a final topology where these atoms are deleted. 535 This parm is hence NOT suited for simulations but can be used to parse 536 e.g. a trajectory or PDB into ptraj. 537 538 @param f_out: target parm file 539 @type f_out: str 540 @param f_out_crd: target crd file (default: f_out but ending .crd) 541 @type f_out_crd: str 542 """ 543 f_out = t.absfile( f_out ) 544 f_out_crd = t.absfile( f_out_crd ) or t.stripSuffix( f_out ) + '.crd' 545 546 ## if there are hydrogens, recast them to standard amber names 547 aatm = 'HA' in self.m.atomNames() ## 'HB2' in self.m.atomNames() 548 549 ## First leap round ## 550 m_ref = self.m.clone() 551 m_ref.xplor2amber( aatm=aatm ) 552 tmp_in = tempfile.mktemp( 'leap_in0.pdb' ) 553 m_ref.writePdb( tmp_in, ter=3 ) 554 555 tmp_parm = tempfile.mktemp( '_parm0' ) 556 tmp_crd = tempfile.mktemp( '_crd0' ) 557 558 leap_mod = self.__fLines( 'm = loadAmberParams %s\n', fmod ) 559 leap_prep= self.__fLines( 'loadAmberPrep %s\n', fprep ) 560 561 self.__runLeap( self.script_mirror_pdb, 562 leaprc=self.leaprc, fmod=leap_mod, fprep=leap_prep, 563 in_pdb=tmp_in, out_parm=tmp_parm, out_crd=tmp_crd, 564 delete_atoms='' ) 565 566 tmp_pdb = self.parm2pdb( tmp_parm, tmp_crd, 567 tempfile.mktemp( 'leap_out.pdb' ), aatm=aatm ) 568 569 if not self.debug: 570 t.tryRemove( tmp_parm ) 571 t.tryRemove( tmp_crd ) 572 t.tryRemove( tmp_in ) 573 574 ## load model with missing atoms added by leap 575 m_leap = PDBModel( tmp_pdb ) 576 577 ## compare atom content 578 iLeap, iRef = m_leap.compareAtoms( m_ref ) 579 580 ## check that ref model doesn't need any change 581 if iRef != range( len( m_ref ) ): 582 raise AmberError, "Cannot create exact mirror of %s.\n" % tmp_in +\ 583 "Leap has renamed/deleted original atoms in %s."% tmp_pdb 584 585 ## indices of atoms that were added by leap 586 delStr = self.__deleteAtoms( m_leap, 587 self.__inverseIndices( m_leap, iLeap ) ) 588 589 ## Second leap round ## 590 self.__runLeap( self.script_mirror_pdb, leaprc=self.leaprc, 591 in_pdb=tmp_pdb, fmod=leap_mod, fprep=leap_prep, 592 out_parm=f_out, out_crd=f_out_crd, 593 delete_atoms=delStr ) 594 595 if not self.debug: 596 t.tryRemove( tmp_pdb )
597 598 599 ############# 600 ## TESTING ## 601 if __name__ == '__main__': 602 603 f = t.testRoot() + '/Amber/' 604 605 traj = t.Load( f + 'lig.etraj') 606 m = traj.ref 607 608 ## create solvated topology from xplor PDB 609 a = AmberParmBuilder( m, verbose=1, debug=1 ) 610 611 a.parmSolvated( f + 'lig_solvated.parm', box=2.5, 612 f_out_pdb = f + 'lig_solvated.pdb', 613 f_out_crd = f + 'lig_solvated.crd') 614 615 616 ## create amber pdb without water and hydrogen 617 m = m.compress( m.maskProtein() ) 618 m = m.compress( m.maskHeavy() ) 619 620 f_in = f + '/AmberParmBuilder/lig_stripped.pdb' 621 622 m.writePdb( f_in ) 623 624 ## create mirror parm for this stripped PDB 625 626 a = AmberParmBuilder( f_in, verbose=1, debug=1 ) 627 628 a.parmMirror( f + '/AmberParmBuilder/lig_stripped.parm' ) 629