1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 """
25 Cluster the members of a trajectory.
26 """
27
28 import Numeric as N
29 import tools as T
30
31 from mathUtils import aboveDiagonal, SD
32 from FuzzyCluster import FuzzyCluster
33 import rmsFit
34 import types
35
36 from Errors import BiskitError
37
39 pass
40
42
44 """
45 @param traj: Trajectory to cluster (has to be fitted before hand)
46 @type traj: Trajectory
47 """
48 self.traj = traj
49
50 self.aMask = None
51
52
53 self.n_clusters = 0
54 self.fcWeight = 1.13
55 self.fcConverged = 1e-10
56
57
58 self.fc = None
59 self.fcCenters = None
60
61
62 self.verbose = verbose
63
64
66 """
67 Apply current atom mask and return list of raveled frames.
68 """
69 t = self.traj.compressAtoms( self.aMask )
70 return N.array( map( N.ravel, t.frames ) )
71
72
73 - def cluster( self, n_clusters, weight=1.13, converged=1e-11,
74 aMask=None, force=0 ):
75 """
76 Calculate new clusters.
77
78 @param n_clusters: number of clusters
79 @type n_clusters: int
80 @param weight: fuzziness weigth
81 @type weight: float (default: 1.13)
82 @param converged: stop iteration if min dist changes less than
83 converged (default: 1e-11)
84 @type converged: float
85 @param aMask: atom mask applied before clustering
86 @type aMask: [1|0]
87 @param force: re-calculate even if parameters haven't changed
88 (default:0)
89 @type force: 1|0
90 """
91 if aMask == None:
92 aMask = N.ones( self.traj.getRef().lenAtoms() )
93
94 if self.fc == None or force or self.fcWeight != weight \
95 or self.n_clusters != n_clusters or self.aMask != aMask \
96 or self.fcConverged != converged:
97
98 self.n_clusters = n_clusters
99 self.fcWeight = weight
100 self.aMask = aMask
101
102 self.fc = FuzzyCluster( self.__raveled(), self.n_clusters,
103 self.fcWeight )
104
105 self.fcCenters = self.fc.go( self.fcConverged,
106 1000, nstep=10,
107 verbose=self.verbose )
108
109
110 - def calcClusterNumber( self, min_clst=5, max_clst=30, rmsLimit=1.0,
111 weight=1.13, converged=1e-11, aMask=None, force=0 ):
112 """
113 Calculate the approximate number of clusters needed to pass
114 the average intra-cluster rmsd limit.
115
116 @param min_clst: lower limit for clusters (default: 5)
117 @type min_clst: int
118 @param max_clst: upper limit for clusters (default: 30 )
119 @type max_clst: int
120 @param rmsLimit: rmsd criteria that the average of all clusters
121 must meet in Angstrom (default: 1.0)
122 @type rmsLimit: float
123 @param weight: fuzziness weigth (default: 1.13)
124 @type weight: float
125 @param converged: stop iteration if min dist changes less than
126 converged (default: 1e-11)
127 @type converged: float
128 @param force: re-calculate even if parameters haven't changed
129 (default: 0)
130 @type force: 1|0
131
132 @return: number of clusters
133 @rtype: int
134
135 @raise ClusterError: if can't determining number of clusters
136 """
137 pos = [ min_clst, max_clst ]
138
139 while 1:
140 clst = int( N.average(pos) )
141 self.cluster( clst, weight, converged, aMask, force=1 )
142 rmsLst = [ self.avgRmsd(i, aMask)[0] for i in range(clst)]
143
144 if N.average( rmsLst ) > rmsLimit:
145 pos[0] = clst
146 else:
147 pos[1] = clst
148
149 if pos[1]-pos[0] == 1:
150 if self.verbose:
151 T.flushPrint('Converged at %i clusters, current average cluster rmsd %.2f\n'%( clst, N.average( rmsLst ) ))
152 return pos[1]
153
154 if pos[1]-pos[0] != 1:
155 if self.verbose:
156 T.flushPrint('Current cluster setting %i, current average cluster rmsd %.2f\n'%( clst, N.average( rmsLst ) ))
157
158 if pos[1]-pos[0]<= 0 or pos[0]<min_clst or pos[1]>max_clst:
159 raise ClusterError, "Error determining number of clusters"
160
161
163 """
164 Get degree of membership of each frame to each cluster.
165
166 @return: N.array( n_clusters x n_frames )
167 @rtype: array
168 """
169 return self.fc.getMembershipMatrix()
170
171
173 """
174 Get maximum membership value for each frame.
175
176 @return: list of float
177 @rtype: [float]
178 """
179 msm = self.memberships()
180 return map( lambda x: max( msm[:,x]), range(0, self.traj.lenFrames() ))
181
182
184 """
185 Get 'center structure' for each cluster.
186
187 @return: N.array( n_clusters x n_atoms_masked x 3 )
188 @rtype: array
189 """
190 lenAtoms = N.shape( self.fcCenters )[1] / 3
191 return N.reshape( self.fcCenters, ( self.n_clusters, lenAtoms, 3))
192
193
195 """
196 Get indices for frame nearest to each cluster center.
197
198 @return: list of cluster center indecies
199 @rtype: [int]
200 """
201 return N.argmax( self.memberships(), 1 )
202
203
205 """
206 Get indices of all frames belonging to each cluster. Each frame
207 is guaranteed to belong, at least, to the cluster for which it has
208 its maximum membership. If threshold > 0, it can additionally pop
209 up in other clusters.
210
211 @param threshold: minimal cluster membership or 0 to consider
212 only max membership (default: 0)
213 @type threshold: float
214
215 @return: n_cluster, lst of lst of int, frame indices
216 @rtype: [[int]]
217 """
218
219 msm = self.memberships()
220 maxMemb = N.argmax( msm, 0 )
221
222 r = [N.nonzero( N.equal(maxMemb, i) ) for i in range(0, self.n_clusters)]
223 r = [ x.tolist() for x in r ]
224
225
226
227 if threshold > 0.:
228 r2 = [ N.nonzero( N.greater( l, threshold) ) for l in msm ]
229
230
231 for i in range(0, len( r ) ):
232 try:
233 frames = r[i].tolist()
234 except:
235 frames = r[i]
236
237 r[i] = frames + [ fr for fr in r2[i] if fr not in r[i] ]
238
239
240 r = [ self.membershipSort( r[i], i) for i in range(0, len(r) )]
241
242 return r
243
244
246 """
247 Sort given list of frame indices by their membership in cluster.
248
249 @param frames: list of frame indecies
250 @type frames: [int]
251 @param cluster: cluster number
252 @type cluster: int
253
254 @return: indecies sorted by ther membership to cluster
255 @rtype: [int]
256 """
257 msm = self.memberships()
258
259 pairs = [ (-msm[cluster, fr], fr) for fr in frames ]
260 pairs.sort()
261 return [ x[1] for x in pairs ]
262
263
265 """
266 Get trajectory with all frames belonging to this cluster, sorted
267 by their membership-degree (highest first).
268
269 @param cluster: cluster number
270 @type cluster: int
271 @param threshold: float 0-1, minimal cluster membership,
272 see L{memberFrames()}
273 @type threshold: float
274
275 @return: Trajectory with all members of a cluster, sorted by membership
276 @rtype: Trajectory
277 """
278 mf = self.memberFrames( threshold )[cluster]
279 return self.traj.takeFrames( mf )
280
281
283 """
284 Get member Trajectories for each cluster. Frames are sorted by their
285 membership-degree (highest first).
286
287 @param threshold: float 0-1, minimal cluster membership,
288 see L{memberFrames()}
289 @type threshold:
290
291 @return: lst of Trajectories, with members of each cluster,
292 sorted by membership
293 @rtype: [Trajectory]
294 """
295 mf = self.memberFrames( threshold )
296 return [ self.traj.takeFrames(m) for m in mf ]
297
298
299 - def avgRmsd( self, cluster, aMask=None, threshold=0. ):
300 """
301 Claculate the average pairwise rmsd (in Angstrom) for members
302 of a cluter.
303
304 @param cluster: cluster number
305 @type cluster: int
306 @param aMask: atom mask applied before calculation
307 @type aMask: [1|0]
308 @param threshold: float 0-1, minimal cluster membership,
309 see L{memberTraj()}
310 @type threshold: float
311
312 @return: average rmsd and the standard deviation
313 @rtype: float, float
314 """
315 try:
316 rms = self.memberTraj(cluster,threshold).pairwiseRmsd( aMask )
317 rms = aboveDiagonal( rms )
318 except:
319 rms = []
320
321 if len(N.ravel(rms)) == 1:
322 return N.average(rms)[0], 0.0
323 if len(N.ravel(rms)) == 0:
324 return 0.0, 0.0
325
326 return N.average( rms ), SD( rms )
327
328
330 """
331 Claculate the rmsd (or average rmsd) of all frames belonging to a
332 cluster to a reference structure (in Angstrom).
333
334 @param cluster: cluster number
335 @type cluster: int
336 @param ref: reference structure
337 @type ref: model
338 @param avg: return the average rmsd (1) OR a list with all rmsds (0)
339 (default: 1)
340 @type avg: float OR [float]
341
342 """
343 eTraj = self.memberTraj(cluster,threshold=0)
344 rms = []
345
346 if type(ref) == types.InstanceType:
347 ref = ref.xyz
348
349 for frame in eTraj.frames:
350 rt, rmsdLst = rmsFit.match( ref, frame)
351 rms += [ rmsdLst[0][1] ]
352 if avg ==1:
353 return N.average(rms)
354 return rms
355
356
357
358
359
360
362 """
363 Test class
364 """
365
366 - def run( self, local=0 ):
367 """
368 run function test
369
370 @param local: transfer local variables to global and perform
371 other tasks only when run locally
372 @type local: 1|0
373
374 @return: 1
375 @rtype: int
376 """
377 from Biskit.EnsembleTraj import traj2ensemble
378
379 traj = T.Load( T.testRoot()+'/lig_pcr_00/traj.dat')
380
381 traj = traj2ensemble( traj )
382
383 aMask = traj.ref.mask( lambda a: a['name'] in ['CA','CB','CG'] )
384
385 traj = traj.thin( 1 )
386
387 if local:
388 traj.fit( aMask )
389 tc = TrajCluster( traj )
390 else:
391 traj.fit( aMask, verbose=0 )
392 tc = TrajCluster( traj, verbose=0 )
393
394
395 n_clusters = tc.calcClusterNumber( min_clst=3, max_clst=15,
396 rmsLimit=0.7, aMask=aMask )
397
398
399 tc.cluster( n_clusters, aMask=aMask )
400
401 if local:
402 member_frames = tc.memberFrames()
403
404 print 'There are %i clusters where the members are:'%n_clusters
405 for i in range(n_clusters):
406 print 'Cluster %i (%i members): %s'%( i+1,
407 len(member_frames[i]),
408 member_frames[i] )
409
410 globals().update( locals() )
411
412 return 1
413
414
416 """
417 Precalculated result to check for consistent performance.
418
419 @return: 1
420 @rtype: int
421 """
422 return 1
423
424
425
426 if __name__ == '__main__':
427
428 test = Test()
429
430 assert test.run( local=1 ) == test.expected_result()
431