1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 """
26 Analyze a density distribution of values.
27 """
28
29 import Numeric as N
30 import math
31 import Biskit.hist as H
32
34 """
35 Analyze a density distribution of values.
36 Can be created from a list of samples or from a discrete distribution::
37
38 Density( values = [ float ] )
39 or
40 Density( p = array(2xN,'f') )
41 """
42
43 - def __init__(self, p = None, values = None, bins = 20):
44 """
45 @param p: discrete distribution, array (2 x len(data) ) with
46 start of bin and witdh of bin (default: None)
47 @type p: array
48 @param values: list of samples (default: None)
49 @type values: [float]
50 @param bins: number of bins (default: 20)
51 @type bins: int
52 """
53 if p is None and values is None:
54 raise ValueError, 'Either a discrete distribution or ' + \
55 'a list of samples must be specified'
56
57 if values is not None:
58
59 p = H.density(values, bins, steps = 0)
60
61 self.store(p)
62
63 self.verbose = 0
64
65
67 """
68 Analyze distribution data.
69
70 @param val: array (2 x len(data) ) with start of bin and witdh
71 of bin (default: None)
72 @type val: array
73 """
74 self.val = N.array(val, N.Float)
75 self.x = self.val[:,0]
76 self.p = self.val[:,1]
77
78 self.delta_x = abs(self.x[0] - self.x[1])
79
80 Z = N.sum(self.p) * self.delta_x
81
82 self.p /= Z
83
84
86 return self.val
87
88
90 from operator import getslice
91 return getslice(self.val, *args, **kw)
92
93
95 from operator import getitem
96 return getitem(self.val, *args, **kw)
97
98
100 """
101 confidenceInterval(self, level)
102
103 @param level: confidence level (e.g. 0.68 for stdev interval)
104 @type level: float
105
106 @return: start and end of the confidence interval
107 containing |level|*100 % of the probability
108 @rtype: float, float
109 """
110 order = N.argsort(self.p).tolist()
111 cumulative = N.add.accumulate(N.take(self.p, order)) * self.delta_x
112
113 ind = N.nonzero(N.greater_equal(cumulative, 1. - level))
114
115 sub_set = order[ind[0]:]
116
117 intervals = self.__find_intervals(sub_set)
118
119 boundaries = [(self.x[i[0]], self.x[i[-1]]) for i in intervals]
120
121 return tuple(boundaries)
122
123
125 """
126 findConfidenceInterval(self, x)
127 Find the smallest possible density interval that still includes x.
128
129 @param x: value
130 @type x: float
131
132 @return: convidence level, interval start and end
133 @rtype: float, (float,float)
134 """
135 closest = N.argmin(abs(self.x - x))
136
137 ind = N.nonzero(N.greater_equal(self.p, self.p[closest])).tolist()
138
139 intervals = self.__find_intervals(ind)
140
141 lens = N.array([len(i) for i in intervals])
142 levels = [N.sum(N.take(self.p, i)) for i in intervals]
143 level = N.sum(levels) * self.delta_x
144
145 boundaries = [(self.x[i[0]], self.x[i[-1]]) for i in intervals]
146
147 return level, tuple(boundaries)
148
149
158
159
161 """
162 Average of distribution.
163 """
164 return self.delta_x * N.sum(self.p * self.x)
165
166
168 """
169 Max height of distribution.
170 """
171 index = N.argmax(self.p)
172 return self.x[index]
173
174
176 l = N.array(l)
177 l = N.take(l, N.argsort(l))
178
179 break_points = N.nonzero(N.greater(l[1:] - l[:-1], 1))
180
181 start = 0
182 intervals = []
183
184 for i in range(len(break_points)):
185 index = break_points[i]
186 intervals.append(tuple(N.take(l, range(start, index + 1))))
187 start = index + 1
188
189 intervals.append(tuple(l[start:]))
190
191 return intervals
192
193
195 """
196 Get the probability of x from a lognormal density distribution that
197 is described by two parameters alpha and beta. Alpha and beta are not
198 the usual mean and standard dev of the lognormal distribution itself but
199 are the mean and stdev of the distribution after log-transformation.
200 The two parameters can hence be calculated from n sample values v::
201
202 alpha = 1/n N.sum( ln(vi) )
203 beta = N.sqrt( 1/(n-1) N.sum( ln(vi) - alpha )^2 )
204
205 @param x: value
206 @type x: float
207 @param alpha: mean of the log-transformed random variable
208 @type alpha: float
209 @param beta: stdev of the log-transformed random variable
210 @type beta: float
211
212 @return: probability of x
213 @rtype: float
214 """
215 return 1. / math.sqrt(2. * math.pi) / beta / x * \
216 math.exp(-0.5 / beta ** 2 * (math.log(x) - alpha) ** 2)
217
218
220 """
221 Estimate the probability of x NOT beeing a random observation from a
222 lognormal distribution that is described by a set of random values.
223 The exact solution to this problem is in L{Biskit.Statistics.lognormal}.
224
225 @param x: observed value
226 @type x: float
227 @param R: sample of random values; 0 -> don't clip (default: 1e-32)
228 @type R: [float]
229 @param clip: clip zeros at this value
230 @type clip: float
231
232 @return: confidence that x is not random, mean of random distrib.
233 @rtype: (float, float)
234 """
235 if clip and 0 in R:
236 R = N.clip( R, clip, max( R ) )
237
238 mean = N.average( N.log( R ) )
239
240 n = len( R )
241
242 stdv = N.sqrt(N.sum(N.power(N.log( R ) - mean, 2)) / (n - 1.))
243
244
245 stop = max( R ) * 50.0
246 step = stop / 100000
247 start = step / 10.0
248
249 X = [(v, p_lognormal(v, mean, stdv) ) for v in N.arange(start, stop, step)]
250
251
252 d = Density( X )
253
254 return d.findConfidenceInterval( x * 1.0 )[0], d.average()
255
256
257
258
259
260
262 """
263 Test class
264 """
265
266 - def run( self, local=0 ):
267 """
268 run function test
269
270 @param local: transfer local variables to global and perform
271 other tasks only when run locally
272 @type local: 1|0
273
274 @return: 1
275 @rtype: int
276 """
277 import random
278
279
280
281 X = [ (x, p_lognormal(x, 1.0, 0.5)) for x in N.arange(0.00001, 50, 0.001)]
282
283 for i in range( 5 ):
284
285
286 alpha = 2.
287 beta = 0.6
288
289 R = [ random.lognormvariate( alpha, beta ) for i in range( 10000 ) ]
290
291 if local: print logConfidence( 6.0, R )[0]
292
293 if local:
294 globals().update( locals() )
295
296 return 1
297
298
300 """
301 Precalculated result to check for consistent performance.
302
303 @return: 1
304 @rtype: int
305 """
306 return 1
307
308
309 if __name__ == '__main__':
310
311 test = Test()
312
313 assert test.run( local=1 ) == test.expected_result()
314