Package Biskit :: Module TrajCluster
[hide private]
[frames] | no frames]

Source Code for Module Biskit.TrajCluster

  1  ## 
  2  ## Biskit, a toolkit for the manipulation of macromolecular structures 
  3  ## Copyright (C) 2004-2005 Raik Gruenberg & Johan Leckner 
  4  ## 
  5  ## This program is free software; you can redistribute it and/or 
  6  ## modify it under the terms of the GNU General Public License as 
  7  ## published by the Free Software Foundation; either version 2 of the 
  8  ## License, or any later version. 
  9  ## 
 10  ## This program is distributed in the hope that it will be useful, 
 11  ## but WITHOUT ANY WARRANTY; without even the implied warranty of 
 12  ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
 13  ## General Public License for more details. 
 14  ## 
 15  ## You find a copy of the GNU General Public License in the file 
 16  ## license.txt along with this program; if not, write to the Free 
 17  ## Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
 18  ## 
 19  ## 
 20  ## last $Author: leckner $ 
 21  ## last $Date: 2006/09/13 10:28:13 $ 
 22  ## $Revision: 2.7 $ 
 23   
 24  """ 
 25  Cluster the members of a trajectory. 
 26  """ 
 27   
 28  import Numeric as N 
 29  import tools as T 
 30   
 31  from mathUtils import aboveDiagonal, SD 
 32  from FuzzyCluster import FuzzyCluster 
 33  import rmsFit 
 34  import types 
 35   
 36  from Errors import BiskitError 
 37   
38 -class ClusterError( BiskitError ):
39 pass
40
41 -class TrajCluster:
42
43 - def __init__( self, traj, verbose=1 ):
44 """ 45 @param traj: Trajectory to cluster (has to be fitted before hand) 46 @type traj: Trajectory 47 """ 48 self.traj = traj 49 ## atom mask applied before clustering 50 self.aMask = None 51 52 ## last clustering parameters 53 self.n_clusters = 0 54 self.fcWeight = 1.13 55 self.fcConverged = 1e-10 56 57 ## last clustering results 58 self.fc = None 59 self.fcCenters = None 60 61 ## by default prints clustering status to stdout 62 self.verbose = verbose
63 64
65 - def __raveled( self ):
66 """ 67 Apply current atom mask and return list of raveled frames. 68 """ 69 t = self.traj.compressAtoms( self.aMask ) 70 return N.array( map( N.ravel, t.frames ) )
71 72
73 - def cluster( self, n_clusters, weight=1.13, converged=1e-11, 74 aMask=None, force=0 ):
75 """ 76 Calculate new clusters. 77 78 @param n_clusters: number of clusters 79 @type n_clusters: int 80 @param weight: fuzziness weigth 81 @type weight: float (default: 1.13) 82 @param converged: stop iteration if min dist changes less than 83 converged (default: 1e-11) 84 @type converged: float 85 @param aMask: atom mask applied before clustering 86 @type aMask: [1|0] 87 @param force: re-calculate even if parameters haven't changed 88 (default:0) 89 @type force: 1|0 90 """ 91 if aMask == None: 92 aMask = N.ones( self.traj.getRef().lenAtoms() ) 93 94 if self.fc == None or force or self.fcWeight != weight \ 95 or self.n_clusters != n_clusters or self.aMask != aMask \ 96 or self.fcConverged != converged: 97 98 self.n_clusters = n_clusters 99 self.fcWeight = weight 100 self.aMask = aMask 101 102 self.fc = FuzzyCluster( self.__raveled(), self.n_clusters, 103 self.fcWeight ) 104 105 self.fcCenters = self.fc.go( self.fcConverged, 106 1000, nstep=10, 107 verbose=self.verbose )
108 109
110 - def calcClusterNumber( self, min_clst=5, max_clst=30, rmsLimit=1.0, 111 weight=1.13, converged=1e-11, aMask=None, force=0 ):
112 """ 113 Calculate the approximate number of clusters needed to pass 114 the average intra-cluster rmsd limit. 115 116 @param min_clst: lower limit for clusters (default: 5) 117 @type min_clst: int 118 @param max_clst: upper limit for clusters (default: 30 ) 119 @type max_clst: int 120 @param rmsLimit: rmsd criteria that the average of all clusters 121 must meet in Angstrom (default: 1.0) 122 @type rmsLimit: float 123 @param weight: fuzziness weigth (default: 1.13) 124 @type weight: float 125 @param converged: stop iteration if min dist changes less than 126 converged (default: 1e-11) 127 @type converged: float 128 @param force: re-calculate even if parameters haven't changed 129 (default: 0) 130 @type force: 1|0 131 132 @return: number of clusters 133 @rtype: int 134 135 @raise ClusterError: if can't determining number of clusters 136 """ 137 pos = [ min_clst, max_clst ] 138 139 while 1: 140 clst = int( N.average(pos) ) 141 self.cluster( clst, weight, converged, aMask, force=1 ) 142 rmsLst = [ self.avgRmsd(i, aMask)[0] for i in range(clst)] 143 144 if N.average( rmsLst ) > rmsLimit: 145 pos[0] = clst 146 else: 147 pos[1] = clst 148 149 if pos[1]-pos[0] == 1: 150 if self.verbose: 151 T.flushPrint('Converged at %i clusters, current average cluster rmsd %.2f\n'%( clst, N.average( rmsLst ) )) 152 return pos[1] 153 154 if pos[1]-pos[0] != 1: 155 if self.verbose: 156 T.flushPrint('Current cluster setting %i, current average cluster rmsd %.2f\n'%( clst, N.average( rmsLst ) )) 157 158 if pos[1]-pos[0]<= 0 or pos[0]<min_clst or pos[1]>max_clst: 159 raise ClusterError, "Error determining number of clusters"
160 161
162 - def memberships( self ):
163 """ 164 Get degree of membership of each frame to each cluster. 165 166 @return: N.array( n_clusters x n_frames ) 167 @rtype: array 168 """ 169 return self.fc.getMembershipMatrix()
170 171
172 - def maxMemberships(self):
173 """ 174 Get maximum membership value for each frame. 175 176 @return: list of float 177 @rtype: [float] 178 """ 179 msm = self.memberships() 180 return map( lambda x: max( msm[:,x]), range(0, self.traj.lenFrames() ))
181 182
183 - def centers( self ):
184 """ 185 Get 'center structure' for each cluster. 186 187 @return: N.array( n_clusters x n_atoms_masked x 3 ) 188 @rtype: array 189 """ 190 lenAtoms = N.shape( self.fcCenters )[1] / 3 191 return N.reshape( self.fcCenters, ( self.n_clusters, lenAtoms, 3))
192 193
194 - def centerFrames( self ):
195 """ 196 Get indices for frame nearest to each cluster center. 197 198 @return: list of cluster center indecies 199 @rtype: [int] 200 """ 201 return N.argmax( self.memberships(), 1 )
202 203
204 - def memberFrames( self, threshold=0. ):
205 """ 206 Get indices of all frames belonging to each cluster. Each frame 207 is guaranteed to belong, at least, to the cluster for which it has 208 its maximum membership. If threshold > 0, it can additionally pop 209 up in other clusters. 210 211 @param threshold: minimal cluster membership or 0 to consider 212 only max membership (default: 0) 213 @type threshold: float 214 215 @return: n_cluster, lst of lst of int, frame indices 216 @rtype: [[int]] 217 """ 218 ## best cluster for each frame 219 msm = self.memberships() 220 maxMemb = N.argmax( msm, 0 ) 221 222 r = [N.nonzero( N.equal(maxMemb, i) ) for i in range(0, self.n_clusters)] 223 r = [ x.tolist() for x in r ] 224 225 ## same thing but now taking all above threshold 226 ## -> same frame can end up in several clusters 227 if threshold > 0.: 228 r2 = [ N.nonzero( N.greater( l, threshold) ) for l in msm ] 229 230 ## add only additional frames 231 for i in range(0, len( r ) ): 232 try: 233 frames = r[i].tolist() 234 except: 235 frames = r[i] 236 237 r[i] = frames + [ fr for fr in r2[i] if fr not in r[i] ] 238 239 ## sort frames within each cluster by their membership 240 r = [ self.membershipSort( r[i], i) for i in range(0, len(r) )] 241 242 return r
243 244
245 - def membershipSort( self, frames, cluster ):
246 """ 247 Sort given list of frame indices by their membership in cluster. 248 249 @param frames: list of frame indecies 250 @type frames: [int] 251 @param cluster: cluster number 252 @type cluster: int 253 254 @return: indecies sorted by ther membership to cluster 255 @rtype: [int] 256 """ 257 msm = self.memberships() 258 259 pairs = [ (-msm[cluster, fr], fr) for fr in frames ] 260 pairs.sort() 261 return [ x[1] for x in pairs ]
262 263
264 - def memberTraj( self, cluster, threshold=0. ):
265 """ 266 Get trajectory with all frames belonging to this cluster, sorted 267 by their membership-degree (highest first). 268 269 @param cluster: cluster number 270 @type cluster: int 271 @param threshold: float 0-1, minimal cluster membership, 272 see L{memberFrames()} 273 @type threshold: float 274 275 @return: Trajectory with all members of a cluster, sorted by membership 276 @rtype: Trajectory 277 """ 278 mf = self.memberFrames( threshold )[cluster] 279 return self.traj.takeFrames( mf )
280 281
282 - def memberTrajs(self, threshold=0.):
283 """ 284 Get member Trajectories for each cluster. Frames are sorted by their 285 membership-degree (highest first). 286 287 @param threshold: float 0-1, minimal cluster membership, 288 see L{memberFrames()} 289 @type threshold: 290 291 @return: lst of Trajectories, with members of each cluster, 292 sorted by membership 293 @rtype: [Trajectory] 294 """ 295 mf = self.memberFrames( threshold ) 296 return [ self.traj.takeFrames(m) for m in mf ]
297 298
299 - def avgRmsd( self, cluster, aMask=None, threshold=0. ):
300 """ 301 Claculate the average pairwise rmsd (in Angstrom) for members 302 of a cluter. 303 304 @param cluster: cluster number 305 @type cluster: int 306 @param aMask: atom mask applied before calculation 307 @type aMask: [1|0] 308 @param threshold: float 0-1, minimal cluster membership, 309 see L{memberTraj()} 310 @type threshold: float 311 312 @return: average rmsd and the standard deviation 313 @rtype: float, float 314 """ 315 try: 316 rms = self.memberTraj(cluster,threshold).pairwiseRmsd( aMask ) 317 rms = aboveDiagonal( rms ) 318 except: 319 rms = [] 320 321 if len(N.ravel(rms)) == 1: 322 return N.average(rms)[0], 0.0 323 if len(N.ravel(rms)) == 0: 324 return 0.0, 0.0 325 326 return N.average( rms ), SD( rms )
327 328
329 - def avgRmsd2Ref( self, cluster, ref, avg=1 ):
330 """ 331 Claculate the rmsd (or average rmsd) of all frames belonging to a 332 cluster to a reference structure (in Angstrom). 333 334 @param cluster: cluster number 335 @type cluster: int 336 @param ref: reference structure 337 @type ref: model 338 @param avg: return the average rmsd (1) OR a list with all rmsds (0) 339 (default: 1) 340 @type avg: float OR [float] 341 342 """ 343 eTraj = self.memberTraj(cluster,threshold=0) 344 rms = [] 345 346 if type(ref) == types.InstanceType: 347 ref = ref.xyz 348 349 for frame in eTraj.frames: 350 rt, rmsdLst = rmsFit.match( ref, frame) 351 rms += [ rmsdLst[0][1] ] 352 if avg ==1: 353 return N.average(rms) 354 return rms
355 356 357 ############# 358 ## TESTING 359 ############# 360
361 -class Test:
362 """ 363 Test class 364 """ 365
366 - def run( self, local=0 ):
367 """ 368 run function test 369 370 @param local: transfer local variables to global and perform 371 other tasks only when run locally 372 @type local: 1|0 373 374 @return: 1 375 @rtype: int 376 """ 377 from Biskit.EnsembleTraj import traj2ensemble 378 379 traj = T.Load( T.testRoot()+'/lig_pcr_00/traj.dat') 380 381 traj = traj2ensemble( traj ) 382 383 aMask = traj.ref.mask( lambda a: a['name'] in ['CA','CB','CG'] ) 384 385 traj = traj.thin( 1 ) 386 387 if local: 388 traj.fit( aMask ) 389 tc = TrajCluster( traj ) 390 else: 391 traj.fit( aMask, verbose=0 ) 392 tc = TrajCluster( traj, verbose=0 ) 393 394 ## check how many clusters that are needed with the given criteria 395 n_clusters = tc.calcClusterNumber( min_clst=3, max_clst=15, 396 rmsLimit=0.7, aMask=aMask ) 397 398 ## cluster 399 tc.cluster( n_clusters, aMask=aMask ) 400 401 if local: 402 member_frames = tc.memberFrames() 403 404 print 'There are %i clusters where the members are:'%n_clusters 405 for i in range(n_clusters): 406 print 'Cluster %i (%i members): %s'%( i+1, 407 len(member_frames[i]), 408 member_frames[i] ) 409 410 globals().update( locals() ) 411 412 return 1
413 414
415 - def expected_result( self ):
416 """ 417 Precalculated result to check for consistent performance. 418 419 @return: 1 420 @rtype: int 421 """ 422 return 1
423 424 425 426 if __name__ == '__main__': 427 428 test = Test() 429 430 assert test.run( local=1 ) == test.expected_result() 431