1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 """
25 Analyze a structure using Prosa2003.
26 """
27
28 import os.path
29 import string
30 import Numeric as N
31 import tempfile
32
33 import Biskit.tools as T
34
35 from Biskit import Executor
36 from Biskit import TemplateError
37
38 from Biskit.Errors import BiskitError
39 import time
40 import subprocess
41
43 pass
44
45
47 """
48 Analyze energies using Prosa2003 for a given PDBModels.
49 =======================================================
50
51 Reference
52 ---------
53 - U{http://lore.came.sbg.ac.at/}
54 - U{http://www.proceryon.com/solutions/prosa.html}
55 - Sippl,M.J. Recognition of Errors in Three-Dimensional
56 Structures of Proteins.Proteins, 17: 355-362 (1993).
57
58 Example usage
59 -------------
60 >>> x = Prosa2003( ref_model, [ m1, m2 ], verbose=1 )
61 >>> result = x.run()
62
63 Settings
64 --------
65 - I{lower_k = |integer| } and I{upper_k = |integer| }
66 - Pair interaction energies are calculated for residue pairs
67 whose distance k along the sequence is lower_k < k < upper_k.
68 Default values are lower_k = 1, upper_k = 600. For example if
69 you want to see only the short range energy contributions
70 (e.g. sequence separation k < 9) set lower_k = 1, upper_k= 9.
71 These parameters affect pair interactions only.
72
73 - I{pot_lb = |float| } and I{pot_ub = |float| }
74 - Pair energies are calculated in the distance range
75 [pot_lb, pot_ub] A. Outside this range energies are zero.
76 You can set the desired distance range of pair potential
77 calculations. For example if you are not interested in
78 energy contributions of close contacts, set pot lb = 4. The
79 energy of pair interactions in the intervall [0; pot_lb] is
80 then zero. Default: pot_lb = 0, pot_ub = 15. In addition the
81 upper bound depends on the pair potentials used. The current
82 maximum range is 15A.
83
84 Structure of the input file
85 ---------------------------
86 ::
87 pair potential --- read in other than default potential
88 surface potential
89 pdb_dir --- directory with pdb file
90 read pdb |filename| |object name|
91 lower_k |int|
92 upper_k |int|
93 pot_lb |float|
94 pot_ub |float|
95 analyse energy |object name|
96 print energy |object name| |filename(.ana)|
97 exit --- exit without launching graphical mode
98 """
99
100
101 inp = \
102 """pair potential $PROSA_BASE/%(pairPot)s pcb
103 surface potential $PROSA_BASE/%(surfPot)s scb
104 pdb_dir = %(temp_dir)s
105 read pdb %(prosaPdbFile)s %(objectName)s
106 lower_k = %(lower_k)i
107 upper_k = %(upper_k)i
108 pot_lb = %(pot_lb)3.1f
109 pot_ub = %(pot_ub)3.1f
110 analyse energy %(objectName)s
111 print energy %(objectName)s %(prosaOutput)s
112 exit\n
113 """
114
116 """
117 @param models: if more than one model is given they are
118 concatenated and the energy is calculated
119 for the two together.
120 @type models: PDBModels
121
122
123 @param kw: additional key=value parameters for Executor:
124 @type kw: key=value pairs
125 ::
126 debug - 0|1, keep all temporary files (default: 0)
127 verbose - 0|1, print progress messages to log (log != STDOUT)
128 node - str, host for calculation (None->local) NOT TESTED
129 (default: None)
130 nice - int, nice level (default: 0)
131 log - Biskit.LogFile, program log (None->STOUT) (default: None)
132 """
133
134 self.models = models
135
136
137 self.pairPot = 'prosa2003.pair-cb'
138 self.surfPot = 'prosa2003.surf-cb'
139
140
141 self.prosaPdbFile = tempfile.mktemp('_prosa2003.pdb')
142 self.prosaOutput = tempfile.mktemp('_prosa2003.out')
143 self.temp_dir = T.tempDir()
144
145 prosaInput = tempfile.mktemp('_prosa2003.inp')
146
147
148 self.objectName = 'obj1'
149 self.lower_k = 1
150 self.upper_k = 600
151 self.pot_lb = 0.
152 self.pot_ub = 15.
153
154 Executor.__init__( self, 'prosa2003', template=self.inp,
155 f_in=prosaInput, **kw )
156
157
158 self.checkPotentials()
159
160
162 """
163 Run Prosa2003.
164
165 @note: Was forced to overwrite execute function of Executor to get
166 prosa2003 running (the code below equals an os.system call).
167
168 @return: duration of calculation in seconds
169 @rtype: float
170
171 @raise Prosa2003_Error: if execution failed
172 """
173 start_time = time.time()
174
175 cmd = self.command()
176
177 shellexe = None
178 if self.exe.shell and self.exe.shellexe:
179 shellexe = self.exe.shellexe
180
181 stdin = stdout = stderr = None
182
183 if self.exe.pipes:
184 stdin = subprocess.PIPE
185 stdout= subprocess.PIPE
186 stderr= subprocess.PIPE
187 else:
188 inp = None
189 if self.f_in:
190 stdin = open( self.f_in )
191 if self.f_out:
192 stdout= open( self.f_out, 'w' )
193 if self.f_err and self.catch_err:
194 stderr= open( self.f_err, 'w' )
195
196 if self.verbose:
197 self.log.add('executing: %s' % cmd)
198 self.log.add('in folder: %s' % self.cwd )
199 self.log.add('input: %r' % stdin )
200 self.log.add('output: %r' % stdout )
201 self.log.add('errors: %r' % stderr )
202 self.log.add('wrapped: %r'% self.exe.shell )
203 self.log.add('shell: %r' % shellexe )
204 self.log.add('environment: %r' % self.environment() )
205 if self.exe.pipes and inp:
206 self.log.add('%i byte of input pipe' % len(str(inp)))
207
208 try:
209 retcode = subprocess.call( '%s %s > %s'\
210 %(self.exe.bin, self.f_in, self.f_out),
211 shell=True)
212 if retcode < 0:
213 raise Prosa2003_Error, "Child was terminated by signal" \
214 + str(retcode)
215
216 except Prosa2003_Error, why:
217 raise Prosa2003_Error, "Execution of Prosa2003 failed: " + str(why)
218
219 return time.time() - start_time
220
221
223 """
224 Check that the environment variable $PROSA_BASE is correct
225 and that the potential files are there.
226 """
227 sysPotDir = os.environ['PROSA_BASE']
228 altPotDir = '/Bis/shared/centos-3/prosa2003/ProSaData/'
229
230 if not os.path.exists( sysPotDir + self.pairPot + '.frq' ):
231 if os.path.exists( altPotDir + self.pairPot + '.frq' ):
232 os.environ['PROSA_BASE'] = altPotDir
233
234 if not os.path.exists( sysPotDir + self.surfPot + '.sff' ):
235 if os.path.exists( altPotDir + self.surfPot + '.sff' ):
236 os.environ['PROSA_BASE'] = altPotDir
237
238
240 """
241 Make and write a PROSA compatible pdb file.
242 - consecutive residue numbering
243 - same chainId troughout the model
244
245 @note: Overrides Executor method.
246 """
247 model=self.models[0]
248
249 if len(self.models)> 1:
250 for i in range( 1, len( self.models )):
251 model.concat(self.models[i])
252
253 model.renumberResidues()
254 for a in model.getAtoms():
255 a['chain_id'] = 'P'
256 model.writePdb( self.prosaPdbFile, ter=0 )
257
258
266
267
269 """
270 Parse the Prosa2003 output file.
271
272 @return: dictionary with the calculated potential profiles
273 and the parameters used
274 @rtype: dict
275 """
276 prosa_pair = []
277 prosa_surf = []
278 prosa_tot = []
279
280 prosaout = self.prosaOutput + '.ana'
281
282 lines = []
283 try:
284 lines = open( prosaout ).readlines()
285 if not lines:
286 raise IOError, 'File %s is empty'%( prosaout )
287 except IOError, why:
288 raise IOError, "Couldn't read Prosa result: " + str( why ) \
289 + '\n Check the Prosa license!'
290
291
292 for i in range( len(lines) ):
293 if lines[i][0] != '#':
294 nr, pair, surf, tot = string.split( lines[i])
295 prosa_pair += [ float( pair ) ]
296 prosa_surf += [ float( surf ) ]
297 prosa_tot += [ float( tot ) ]
298
299
300 result = {'prosa_pair':N.array(prosa_pair),
301 'prosa_surf':N.array(prosa_surf),
302 'prosa_tot':N.array(prosa_tot),
303 'ProsaInfo':{ 'lower_k':self.lower_k,
304 'upper_k':self.upper_k,
305 'pot_lb':self.pot_lb,
306 'pot_ub':self.pot_ub } }
307
308 return result
309
310
312 """
313 Sum of energy profiles.
314
315 @return: sum of the three energy profiles
316 [ E_prosa_pair, E_prosa_surf, E_prosa_tot]
317 @rtype: [float]
318 """
319
320 r = []
321 for k in ['prosa_pair', 'prosa_surf', 'prosa_tot']:
322 r += [ self.result[k] ]
323
324 return N.sum( r, 1 )
325
326
328 """
329 @note: Overrides Executor method
330 """
331 return not self.error is None
332
333
340
341
342
343
344
345
347 """
348 Test class
349 """
350
351 - def run( self, local=0 ):
352 """
353 Prosa2003 function test
354
355 @param local: transfer local variables to global and perform
356 other tasks only when run locally
357 @type local: 1|0
358
359 @return: list of energies
360 @rtype: [ float ]
361 """
362 from Biskit import PDBModel
363
364
365 ml = PDBModel( T.testRoot()+'/lig/1A19.pdb' )
366 ml = ml.compress( ml.maskProtein() )
367
368 mr = PDBModel( T.testRoot()+'/rec/1A2P.pdb' )
369 mr = mr.compress( mr.maskProtein() )
370
371
372 prosa = Prosa2003( [ml, mr], debug=0, verbose=0 )
373
374
375 ene = prosa.run()
376
377 result = prosa.prosaEnergy()
378
379 if local:
380 print "Result: ", result
381 globals().update( locals() )
382
383 return result
384
385
387 """
388 Precalculated result to check for consistent performance.
389
390 @return: list of energies
391 @rtype: [ float ]
392 """
393 return [ -94.568, -64.903, -159.463 ]
394
395
396 if __name__ == '__main__':
397
398 test = Test( )
399
400 assert test.run( local=1 ) == test.expected_result()
401