1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 """
27 Implementation of the fuzzy c-means algorithm
28
29 Author: Wolfgang Rieping 1998, 2002
30
31 Reference::
32 Heather L. Gordon, Rajmund L. Somorjai
33 Fuzzy Cluster Analysis of Molecular Dynamics Trajectories
34 PROTEINS: Structure, Function and Genetics 14:249-264 1992
35 """
36
37 import numpy.oldnumeric as N
38 import Biskit.mathUtils as MU
39 import tools
40
41 from numpy.oldnumeric.random_array import seed, random
42
43
44
45
46
47
48
49
50
51
52
53
54
56
57 d1 = N.diagonal(N.dot(x, N.transpose(x)))
58 d2 = N.diagonal(N.dot(y, N.transpose(y)))
59
60 a1 = N.add.outer(d1,d2)
61 a2 = N.dot(x, N.transpose(y))
62
63 return a1 - 2 * a2
64
65
67 return N.sqrt(squared_distance_matrix(x, y))
68
69
71 - def __init__(self, data, n_cluster, weight, seedx = 0, seedy = 0):
72 """
73 @param data: cluster this
74 @type data: [float] OR array
75 @param n_cluster: number of clusters
76 @type n_cluster: int
77 @param weight: fuzziness weigth
78 @type weight: float
79 @param seedx: random seed value for RandomArray.seed (default: 0)
80 @type seedx: int OR 0
81 @param seedy: random seed value for RandomArray.seed
82 (default: 0, set seed from clock)
83 @type seedy: int OR 0
84 """
85 self.data = N.array(data, N.Float)
86 self.w = weight
87 self.n_cluster = n_cluster
88 self.npoints, self.dimension = N.shape(data)
89 self.seedx = seedx
90 self.seedy = seedy
91
92
94
95 d2 = N.clip( d2, N.power(1e200, 1-self.w), 1e300 )
96 q = N.power(d2, 1. / (1. - self.w))
97 return q / N.sum(q)
98
99
101 p = N.power(msm, self.w)
102 ccenter = N.transpose(N.dot(p, self.data))
103 return N.transpose(ccenter / N.sum(p, 1))
104
105
107 return squared_distance_matrix(self.cluster_center, self.data)
108
109
111 """
112 @param centers: array with cluster centers
113 @type centers: array('f')
114
115 @return: distance to the centers, membership matrix, array of cenetrs
116 @rtype: array, array, array
117 """
118 d2 = squared_distance_matrix(centers, self.data)
119 msm = self.calc_membership_matrix(d2)
120 centers = self.calc_cluster_center(msm)
121
122 return d2, msm, centers
123
124
125 - def error(self, msm, d2):
126 """
127 @param msm: membership matrix
128 @type msm: array('f')
129 @param d2: distance from data to the centers
130 @type d2: array('f')
131
132 @return: weighted error
133 @rtype: float
134 """
135 p = N.power(msm, self.w)
136 product = N.dot(p, N.transpose(d2))
137 return N.trace(product)
138
139
141 """
142 Create a random membership matrix.
143
144 @return: random array of shape length of data to
145 cluster times number of clusters
146 @rtype: array('f')
147 """
148 seed(self.seedx, self.seedy)
149
150 r = random((self.npoints, self.n_cluster))
151 return N.transpose(r / N.sum(r))
152
153
154 - def go(self, errorthreshold, n_iterations=1e10, nstep=10, verbose=1):
155 """
156 Start the cluestering. Run until the error is below the error
157 treshold or the max number of iterations have been run.
158
159 @param errorthreshold: treshold value for error
160 @type errorthreshold: float
161 @param n_iterations: treshold value for number of iterations
162 (default: 1e10)
163 @type n_iterations: int
164 @param nstep: print information for every n'th step in the iteration
165 @type nstep: int
166
167 @return: array with cluster centers
168 @rtype: array('f')
169 """
170 iteration = 0
171 rel_err = 1e10
172 error = 1.e10
173
174 msm = self.create_membership_matrix()
175 centers = self.calc_cluster_center(msm)
176
177 while rel_err > errorthreshold and iteration < n_iterations:
178 d2, msm, centers = self.iterate(centers)
179 old_error = error
180 error = self.error(msm, d2)
181 rel_err = abs(1. - error/old_error)
182 iteration = iteration+1
183 if not iteration % nstep and verbose:
184 tools.errWrite( "%i %f\n" % (iteration, error) )
185
186 self.centers = centers
187 self.msm = msm
188 self.d2 = d2
189
190 return centers
191
192
194 centropy = N.diagonal(N.dot(self.msm,
195 N.transpose(N.log(self.msm))))
196 return -1/float(self.npoints)*centropy
197
198
201
202
204 p = N.power(self.msm, self.w)
205 return (self.n_cluster*N.sum(N.sum(p))-
206 self.npoints)/(self.npoints*(self.n_cluster-1))
207
208
210 return N.sum(N.power(self.msm, self.w), 1)/self.npoints
211
212
215
216
218 return self.msm
219
220
222 return self.cluster_center
223
224
226 centropy = N.sum(-N.log(self.msm)*\
227 self.msm)/float(self.n_cluster)
228 return MU.SD(centropy)
229
230
232 sd = MU.SD(self.msm)
233 return sd
234
235
236
237
238
239 import Biskit.test as BT
240
241 -class Test(BT.BiskitTest):
242 """FuzzyCluster test"""
243
245 """FuzzyCluster test"""
246 import gnuplot as G
247
248 x1 = random((500,2))
249 x2 = random((500,2)) + 1
250 x3 = random((500,2)) + 2
251
252 self.x = N.concatenate((x1, x2, x3))
253
254 self.fuzzy = FuzzyCluster(self.x, n_cluster=5, weight=1.5)
255
256 self.centers = self.fuzzy.go(1.e-30, n_iterations=50, nstep=10,
257 verbose=self.local)
258
259 if self.local:
260 print "cluster centers are displayed in green"
261 G.scatter( self.x, self.centers )
262
263 self.assertEqual( N.shape(self.centers), (5, 2) )
264
265 if __name__ == '__main__':
266
267 BT.localTest()
268