1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 """
25 pstat.py module
26
27 ::
28 #################################################
29 ####### Written by: Gary Strangman ###########
30 ####### Last modified: Jun 29, 2001 ###########
31 #################################################
32
33 This module provides some useful list and array manipulation routines
34 modeled after those found in the |Stat package by Gary Perlman, plus a
35 number of other useful list/file manipulation functions. The list-based
36 functions include::
37
38 abut (source,*args)
39 simpleabut (source, addon)
40 colex (listoflists,cnums)
41 collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
42 dm (listoflists,criterion)
43 flat (l)
44 linexand (listoflists,columnlist,valuelist)
45 linexor (listoflists,columnlist,valuelist)
46 linedelimited (inlist,delimiter)
47 lineincols (inlist,colsize)
48 lineincustcols (inlist,colsizes)
49 list2string (inlist)
50 makelol(inlist)
51 makestr(x)
52 printcc (lst,extra=2)
53 printincols (listoflists,colsize)
54 pl (listoflists)
55 printl(listoflists)
56 replace (lst,oldval,newval)
57 recode (inlist,listmap,cols='all')
58 remap (listoflists,criterion)
59 roundlist (inlist,num_digits_to_round_floats_to)
60 sortby(listoflists,sortcols)
61 unique (inlist)
62 duplicates(inlist)
63 writedelimited (listoflists, delimiter, file, writetype='w')
64
65 Some of these functions have alternate versions which are defined only if
66 Numeric (NumPy) can be imported. These functions are generally named as
67 above, with an 'a' prefix.::
68
69 aabut (source, *args)
70 acolex (a,indices,axis=1)
71 acollapse (a,keepcols,collapsecols,sterr=0,ns=0)
72 adm (a,criterion)
73 alinexand (a,columnlist,valuelist)
74 alinexor (a,columnlist,valuelist)
75 areplace (a,oldval,newval)
76 arecode (a,listmap,col='all')
77 arowcompare (row1, row2)
78 arowsame (row1, row2)
79 asortrows(a,axis=0)
80 aunique(inarray)
81 aduplicates(inarray)
82
83 Currently, the code is all but completely un-optimized. In many cases, the
84 array versions of functions amount simply to aliases to built-in array
85 functions/methods. Their inclusion here is for function name consistency.
86 """
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108 import stats
109 import string, copy
110 from types import *
111
112 __version__ = 0.4
113
114
115
116
117
118
119
120 -def abut (source,*args):
121 """
122 Like the |Stat abut command. It concatenates two lists side-by-side
123 and returns the result. '2D' lists are also accomodated for either argument
124 (source or addon). CAUTION: If one list is shorter, it will be repeated
125 until it is as long as the longest list. If this behavior is not desired,
126 use pstat.simpleabut().
127
128 Usage: abut(source, args) where args=any # of lists
129 Returns: a list of lists as long as the LONGEST list past, source on the
130 'left', lists in <args> attached consecutively on the 'right'
131 """
132
133 if type(source) not in [ListType,TupleType]:
134 source = [source]
135 for addon in args:
136 if type(addon) not in [ListType,TupleType]:
137 addon = [addon]
138 if len(addon) < len(source):
139 if len(source) % len(addon) == 0:
140 repeats = len(source)/len(addon)
141 origadd = copy.deepcopy(addon)
142 for i in range(repeats-1):
143 addon = addon + origadd
144 else:
145 repeats = len(source)/len(addon)+1
146 origadd = copy.deepcopy(addon)
147 for i in range(repeats-1):
148 addon = addon + origadd
149 addon = addon[0:len(source)]
150 elif len(source) < len(addon):
151 if len(addon) % len(source) == 0:
152 repeats = len(addon)/len(source)
153 origsour = copy.deepcopy(source)
154 for i in range(repeats-1):
155 source = source + origsour
156 else:
157 repeats = len(addon)/len(source)+1
158 origsour = copy.deepcopy(source)
159 for i in range(repeats-1):
160 source = source + origsour
161 source = source[0:len(addon)]
162
163 source = simpleabut(source,addon)
164 return source
165
166
168 """
169 Concatenates two lists as columns and returns the result. '2D' lists
170 are also accomodated for either argument (source or addon). This DOES NOT
171 repeat either list to make the 2 lists of equal length. Beware of list pairs
172 with different lengths ... the resulting list will be the length of the
173 FIRST list passed.
174
175 Usage: simpleabut(source,addon) where source, addon=list (or list-of-lists)
176 Returns: a list of lists as long as source, with source on the 'left' and
177 addon on the 'right'
178 """
179 if type(source) not in [ListType,TupleType]:
180 source = [source]
181 if type(addon) not in [ListType,TupleType]:
182 addon = [addon]
183 minlen = min(len(source),len(addon))
184 list = copy.deepcopy(source)
185 if type(source[0]) not in [ListType,TupleType]:
186 if type(addon[0]) not in [ListType,TupleType]:
187 for i in range(minlen):
188 list[i] = [source[i]] + [addon[i]]
189 else:
190 for i in range(minlen):
191 list[i] = [source[i]] + addon[i]
192 else:
193 if type(addon[0]) not in [ListType,TupleType]:
194 for i in range(minlen):
195 list[i] = source[i] + [addon[i]]
196 else:
197 for i in range(minlen):
198 list[i] = source[i] + addon[i]
199 source = list
200 return source
201
202
203 -def colex (listoflists,cnums):
204 """
205 Extracts from listoflists the columns specified in the list 'cnums'
206 (cnums can be an integer, a sequence of integers, or a string-expression that
207 corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex
208 columns 3 onward from the listoflists).
209
210 Usage: colex (listoflists,cnums)
211 Returns: a list-of-lists corresponding to the columns from listoflists
212 specified by cnums, in the order the column numbers appear in cnums
213 """
214 global index
215 column = 0
216 if type(cnums) in [ListType,TupleType]:
217 index = cnums[0]
218 column = map(lambda x: x[index], listoflists)
219 for col in cnums[1:]:
220 index = col
221 column = abut(column,map(lambda x: x[index], listoflists))
222 elif type(cnums) == StringType:
223 evalstring = 'map(lambda x: x'+cnums+', listoflists)'
224 column = eval(evalstring)
225 else:
226 index = cnums
227 column = map(lambda x: x[index], listoflists)
228 return column
229
230
231 -def collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
232 """
233 Averages data in collapsecol, keeping all unique items in keepcols
234 (using unique, which keeps unique LISTS of column numbers), retaining the
235 unique sets of values in keepcols, the mean for each. Setting fcn1
236 and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len)
237 will append those results (e.g., the sterr, N) after each calculated mean.
238 cfcn is the collapse function to apply (defaults to mean, defined here in the
239 pstat module to avoid circular imports with stats.py, but harmonicmean or
240 others could be passed).
241
242 Usage: collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
243 Returns: a list of lists with all unique permutations of entries appearing in
244 columns ("conditions") specified by keepcols, abutted with the result of
245 cfcn (if cfcn=None, defaults to the mean) of each column specified by
246 collapsecols.
247 """
248 def collmean (inlist):
249 s = 0
250 for item in inlist:
251 s = s + item
252 return s/float(len(inlist))
253
254 if type(keepcols) not in [ListType,TupleType]:
255 keepcols = [keepcols]
256 if type(collapsecols) not in [ListType,TupleType]:
257 collapsecols = [collapsecols]
258 if cfcn == None:
259 cfcn = collmean
260 if keepcols == []:
261 means = [0]*len(collapsecols)
262 for i in range(len(collapsecols)):
263 avgcol = colex(listoflists,collapsecols[i])
264 means[i] = cfcn(avgcol)
265 if fcn1:
266 try:
267 test = fcn1(avgcol)
268 except:
269 test = 'N/A'
270 means[i] = [means[i], test]
271 if fcn2:
272 try:
273 test = fcn2(avgcol)
274 except:
275 test = 'N/A'
276 try:
277 means[i] = means[i] + [len(avgcol)]
278 except TypeError:
279 means[i] = [means[i],len(avgcol)]
280 return means
281 else:
282 values = colex(listoflists,keepcols)
283 uniques = unique(values)
284 uniques.sort()
285 newlist = []
286 if type(keepcols) not in [ListType,TupleType]: keepcols = [keepcols]
287 for item in uniques:
288 if type(item) not in [ListType,TupleType]: item =[item]
289 tmprows = linexand(listoflists,keepcols,item)
290 for col in collapsecols:
291 avgcol = colex(tmprows,col)
292 item.append(cfcn(avgcol))
293 if fcn1 <> None:
294 try:
295 test = fcn1(avgcol)
296 except:
297 test = 'N/A'
298 item.append(test)
299 if fcn2 <> None:
300 try:
301 test = fcn2(avgcol)
302 except:
303 test = 'N/A'
304 item.append(test)
305 newlist.append(item)
306 return newlist
307
308
309 -def dm (listoflists,criterion):
310 """
311 Returns rows from the passed list of lists that meet the criteria in
312 the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9'
313 will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows
314 with column 2 equal to the string 'N').
315
316 Usage: dm (listoflists, criterion)
317 Returns: rows from listoflists that meet the specified criterion.
318 """
319 function = 'filter(lambda x: '+criterion+',listoflists)'
320 lines = eval(function)
321 return lines
322
323
325 """
326 Returns the flattened version of a '2D' list. List-correlate to the a.flat()
327 method of NumPy arrays.
328
329 Usage: flat(l)
330 """
331 newl = []
332 for i in range(len(l)):
333 for j in range(len(l[i])):
334 newl.append(l[i][j])
335 return newl
336
337
338 -def linexand (listoflists,columnlist,valuelist):
339 """
340 Returns the rows of a list of lists where col (from columnlist) = val
341 (from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]).
342 len(columnlist) must equal len(valuelist).
343
344 Usage: linexand (listoflists,columnlist,valuelist)
345 Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i
346 """
347 if type(columnlist) not in [ListType,TupleType]:
348 columnlist = [columnlist]
349 if type(valuelist) not in [ListType,TupleType]:
350 valuelist = [valuelist]
351 criterion = ''
352 for i in range(len(columnlist)):
353 if type(valuelist[i])==StringType:
354 critval = '\'' + valuelist[i] + '\''
355 else:
356 critval = str(valuelist[i])
357 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
358 criterion = criterion[0:-3]
359 function = 'filter(lambda x: '+criterion+',listoflists)'
360 lines = eval(function)
361 return lines
362
363
364 -def linexor (listoflists,columnlist,valuelist):
365 """
366 Returns the rows of a list of lists where col (from columnlist) = val
367 (from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[).
368 One value is required for each column in columnlist. If only one value
369 exists for columnlist but multiple values appear in valuelist, the
370 valuelist values are all assumed to pertain to the same column.
371
372 Usage: linexor (listoflists,columnlist,valuelist)
373 Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i
374 """
375 if type(columnlist) not in [ListType,TupleType]:
376 columnlist = [columnlist]
377 if type(valuelist) not in [ListType,TupleType]:
378 valuelist = [valuelist]
379 criterion = ''
380 if len(columnlist) == 1 and len(valuelist) > 1:
381 columnlist = columnlist*len(valuelist)
382 for i in range(len(columnlist)):
383 if type(valuelist[i])==StringType:
384 critval = '\'' + valuelist[i] + '\''
385 else:
386 critval = str(valuelist[i])
387 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
388 criterion = criterion[0:-2]
389 function = 'filter(lambda x: '+criterion+',listoflists)'
390 lines = eval(function)
391 return lines
392
393
395 """
396 Returns a string composed of elements in inlist, with each element
397 separated by 'delimiter.' Used by function writedelimited. Use '\t'
398 for tab-delimiting.
399
400 Usage: linedelimited (inlist,delimiter)
401 """
402 outstr = ''
403 for item in inlist:
404 if type(item) <> StringType:
405 item = str(item)
406 outstr = outstr + item + delimiter
407 outstr = outstr[0:-1]
408 return outstr
409
410
412 """
413 Returns a string composed of elements in inlist, with each element
414 right-aligned in columns of (fixed) colsize.
415
416 Usage: lineincols (inlist,colsize) where colsize is an integer
417 """
418 outstr = ''
419 for item in inlist:
420 if type(item) <> StringType:
421 item = str(item)
422 size = len(item)
423 if size <= colsize:
424 for i in range(colsize-size):
425 outstr = outstr + ' '
426 outstr = outstr + item
427 else:
428 outstr = outstr + item[0:colsize+1]
429 return outstr
430
431
433 """
434 Returns a string composed of elements in inlist, with each element
435 right-aligned in a column of width specified by a sequence colsizes. The
436 length of colsizes must be greater than or equal to the number of columns
437 in inlist.
438
439 Usage: lineincustcols (inlist,colsizes)
440 Returns: formatted string created from inlist
441 """
442 outstr = ''
443 for i in range(len(inlist)):
444 if type(inlist[i]) <> StringType:
445 item = str(inlist[i])
446 else:
447 item = inlist[i]
448 size = len(item)
449 if size <= colsizes[i]:
450 for j in range(colsizes[i]-size):
451 outstr = outstr + ' '
452 outstr = outstr + item
453 else:
454 outstr = outstr + item[0:colsizes[i]+1]
455 return outstr
456
457
459 """
460 Converts a 1D list to a single long string for file output, using
461 the string.join function.
462
463 Usage: list2string (inlist,delimit=' ')
464 Returns: the string created from inlist
465 """
466 stringlist = map(makestr,inlist)
467 return string.join(stringlist,delimit)
468
469
471 """
472 Converts a 1D list to a 2D list (i.e., a list-of-lists). Useful when you
473 want to use put() to write a 1D list one item per line in the file.
474
475 Usage: makelol(inlist)
476 Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc.
477 """
478 x = []
479 for item in inlist:
480 x.append([item])
481 return x
482
483
485 if type(x) <> StringType:
486 x = str(x)
487 return x
488
489
491 """
492 Prints a list of lists in columns, customized by the max size of items
493 within the columns (max size of items in col, plus 'extra' number of spaces).
494 Use 'dashes' or '\n' in the list-of-lists to print dashes or blank lines,
495 respectively.
496
497 Usage: printcc (lst,extra=2)
498 Returns: None
499 """
500 if type(lst[0]) not in [ListType,TupleType]:
501 lst = [lst]
502 rowstokill = []
503 list2print = copy.deepcopy(lst)
504 for i in range(len(lst)):
505 if lst[i] == ['\n'] or lst[i]=='\n' or lst[i]=='dashes' or lst[i]=='' or lst[i]==['']:
506 rowstokill = rowstokill + [i]
507 rowstokill.reverse()
508 for row in rowstokill:
509 del list2print[row]
510 maxsize = [0]*len(list2print[0])
511 for col in range(len(list2print[0])):
512 items = colex(list2print,col)
513 items = map(makestr,items)
514 maxsize[col] = max(map(len,items)) + extra
515 for row in lst:
516 if row == ['\n'] or row == '\n' or row == '' or row == ['']:
517 print
518 elif row == ['dashes'] or row == 'dashes':
519 dashes = [0]*len(maxsize)
520 for j in range(len(maxsize)):
521 dashes[j] = '-'*(maxsize[j]-2)
522 print lineincustcols(dashes,maxsize)
523 else:
524 print lineincustcols(row,maxsize)
525 return None
526
527
529 """
530 Prints a list of lists in columns of (fixed) colsize width, where
531 colsize is an integer.
532
533 Usage: printincols (listoflists,colsize)
534 Returns: None
535 """
536 for row in listoflists:
537 print lineincols(row,colsize)
538 return None
539
540
541 -def pl (listoflists):
542 """
543 Prints a list of lists, 1 list (row) at a time.
544
545 Usage: pl(listoflists)
546 Returns: None
547 """
548 for row in listoflists:
549 if row[-1] == '\n':
550 print row,
551 else:
552 print row
553 return None
554
555
557 """Alias for pl."""
558 pl(listoflists)
559 return
560
561
563 """
564 Replaces all occurrences of 'oldval' with 'newval', recursively.
565
566 Usage: replace (inlst,oldval,newval)
567 """
568 lst = inlst*1
569 for i in range(len(lst)):
570 if type(lst[i]) not in [ListType,TupleType]:
571 if lst[i]==oldval: lst[i]=newval
572 else:
573 lst[i] = replace(lst[i],oldval,newval)
574 return lst
575
576
577 -def recode (inlist,listmap,cols=None):
578 """
579 Changes the values in a list to a new set of values (useful when
580 you need to recode data from (e.g.) strings to numbers. cols defaults
581 to None (meaning all columns are recoded).
582
583 Usage: recode (inlist,listmap,cols=None) cols=recode cols, listmap=2D list
584 Returns: inlist with the appropriate values replaced with new ones
585 """
586 lst = copy.deepcopy(inlist)
587 if cols != None:
588 if type(cols) not in [ListType,TupleType]:
589 cols = [cols]
590 for col in cols:
591 for row in range(len(lst)):
592 try:
593 idx = colex(listmap,0).index(lst[row][col])
594 lst[row][col] = listmap[idx][1]
595 except ValueError:
596 pass
597 else:
598 for row in range(len(lst)):
599 for col in range(len(lst)):
600 try:
601 idx = colex(listmap,0).index(lst[row][col])
602 lst[row][col] = listmap[idx][1]
603 except ValueError:
604 pass
605 return lst
606
607
608 -def remap (listoflists,criterion):
609 """
610 Remaps values in a given column of a 2D list (listoflists). This requires
611 a criterion as a function of 'x' so that the result of the following is
612 returned ... map(lambda x: 'criterion',listoflists).
613
614 Usage: remap(listoflists,criterion) criterion=string
615 Returns: remapped version of listoflists
616 """
617 function = 'map(lambda x: '+criterion+',listoflists)'
618 lines = eval(function)
619 return lines
620
621
623 """
624 Goes through each element in a 1D or 2D inlist, and applies the following
625 function to all elements of FloatType ... round(element,digits).
626
627 Usage: roundlist(inlist,digits)
628 Returns: list with rounded floats
629 """
630 if type(inlist[0]) in [IntType, FloatType]:
631 inlist = [inlist]
632 l = inlist*1
633 for i in range(len(l)):
634 for j in range(len(l[i])):
635 if type(l[i][j])==FloatType:
636 l[i][j] = round(l[i][j],digits)
637 return l
638
639
640 -def sortby(listoflists,sortcols):
641 """
642 Sorts a list of lists on the column(s) specified in the sequence
643 sortcols.
644
645 Usage: sortby(listoflists,sortcols)
646 Returns: sorted list, unchanged column ordering
647 """
648 newlist = abut(colex(listoflists,sortcols),listoflists)
649 newlist.sort()
650 try:
651 numcols = len(sortcols)
652 except TypeError:
653 numcols = 1
654 crit = '[' + str(numcols) + ':]'
655 newlist = colex(newlist,crit)
656 return newlist
657
658
660 """
661 Returns all unique items in the passed list. If the a list-of-lists
662 is passed, unique LISTS are found (i.e., items in the first dimension are
663 compared).
664
665 Usage: unique (inlist)
666 Returns: the unique elements (or rows) in inlist
667 """
668 uniques = []
669 for item in inlist:
670 if item not in uniques:
671 uniques.append(item)
672 return uniques
673
675 """
676 Returns duplicate items in the FIRST dimension of the passed list.
677
678 Usage: duplicates (inlist)
679 """
680 dups = []
681 for i in range(len(inlist)):
682 if inlist[i] in inlist[i+1:]:
683 dups.append(inlist[i])
684 return dups
685
686
688 """
689 Returns items that are NOT duplicated in the first dim of the passed list.
690
691 Usage: nonrepeats (inlist)
692 """
693 nonrepeats = []
694 for i in range(len(inlist)):
695 if inlist.count(inlist[i]) == 1:
696 nonrepeats.append(inlist[i])
697 return nonrepeats
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717 try:
718 import Numeric
719 N = Numeric
720
721 - def aabut (source, *args):
722 """
723 Like the |Stat abut command. It concatenates two arrays column-wise
724 and returns the result. CAUTION: If one array is shorter, it will be
725 repeated until it is as long as the other.
726
727 Usage: aabut (source, args) where args=any # of arrays
728 Returns: an array as long as the LONGEST array past, source appearing on the
729 'left', arrays in <args> attached on the 'right'.
730 """
731 if len(source.shape)==1:
732 width = 1
733 source = N.resize(source,[source.shape[0],width])
734 else:
735 width = source.shape[1]
736 for addon in args:
737 if len(addon.shape)==1:
738 width = 1
739 addon = N.resize(addon,[source.shape[0],width])
740 else:
741 width = source.shape[1]
742 if len(addon) < len(source):
743 addon = N.resize(addon,[source.shape[0],addon.shape[1]])
744 elif len(source) < len(addon):
745 source = N.resize(source,[addon.shape[0],source.shape[1]])
746 source = N.concatenate((source,addon),1)
747 return source
748
749
750 - def acolex (a,indices,axis=1):
751 """
752 Extracts specified indices (a list) from passed array, along passed
753 axis (column extraction is default). BEWARE: A 1D array is presumed to be a
754 column-array (and that the whole array will be returned as a column).
755
756 Usage: acolex (a,indices,axis=1)
757 Returns: the columns of a specified by indices
758 """
759 if type(indices) not in [ListType,TupleType,N.ArrayType]:
760 indices = [indices]
761 if len(N.shape(a)) == 1:
762 cols = N.resize(a,[a.shape[0],1])
763 else:
764 cols = N.take(a,indices,axis)
765 return cols
766
767
768 - def acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
769 """
770 Averages data in collapsecol, keeping all unique items in keepcols
771 (using unique, which keeps unique LISTS of column numbers), retaining
772 the unique sets of values in keepcols, the mean for each. If stderror or
773 N of the mean are desired, set either or both parameters to 1.
774
775 Usage: acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
776 Returns: unique 'conditions' specified by the contents of columns specified
777 by keepcols, abutted with the mean(s) of column(s) specified by collapsecols
778 """
779 def acollmean (inarray):
780 return N.sum(N.ravel(inarray))
781
782 if cfcn == None:
783 cfcn = acollmean
784 if keepcols == []:
785 avgcol = acolex(a,collapsecols)
786 means = N.sum(avgcol)/float(len(avgcol))
787 if fcn1<>None:
788 try:
789 test = fcn1(avgcol)
790 except:
791 test = N.array(['N/A']*len(means))
792 means = aabut(means,test)
793 if fcn2<>None:
794 try:
795 test = fcn2(avgcol)
796 except:
797 test = N.array(['N/A']*len(means))
798 means = aabut(means,test)
799 return means
800 else:
801 if type(keepcols) not in [ListType,TupleType,N.ArrayType]:
802 keepcols = [keepcols]
803 values = colex(a,keepcols)
804 uniques = unique(values)
805 uniques.sort()
806 newlist = []
807 for item in uniques:
808 if type(item) not in [ListType,TupleType,N.ArrayType]:
809 item =[item]
810 tmprows = alinexand(a,keepcols,item)
811 for col in collapsecols:
812 avgcol = acolex(tmprows,col)
813 item.append(acollmean(avgcol))
814 if fcn1<>None:
815 try:
816 test = fcn1(avgcol)
817 except:
818 test = 'N/A'
819 item.append(test)
820 if fcn2<>None:
821 try:
822 test = fcn2(avgcol)
823 except:
824 test = 'N/A'
825 item.append(test)
826 newlist.append(item)
827 try:
828 new_a = N.array(newlist)
829 except TypeError:
830 new_a = N.array(newlist,'O')
831 return new_a
832
833
834 - def adm (a,criterion):
835 """
836 Returns rows from the passed list of lists that meet the criteria in
837 the passed criterion expression (a string as a function of x).
838
839 Usage: adm (a,criterion) where criterion is like 'x[2]==37'
840 """
841 function = 'filter(lambda x: '+criterion+',a)'
842 lines = eval(function)
843 try:
844 lines = N.array(lines)
845 except:
846 lines = N.array(lines,'O')
847 return lines
848
849
851 if type(x)==StringType:
852 return 1
853 else:
854 return 0
855
856
858 """
859 Returns the rows of an array where col (from columnlist) = val
860 (from valuelist). One value is required for each column in columnlist.
861
862 Usage: alinexand (a,columnlist,valuelist)
863 Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i
864 """
865 if type(columnlist) not in [ListType,TupleType,N.ArrayType]:
866 columnlist = [columnlist]
867 if type(valuelist) not in [ListType,TupleType,N.ArrayType]:
868 valuelist = [valuelist]
869 criterion = ''
870 for i in range(len(columnlist)):
871 if type(valuelist[i])==StringType:
872 critval = '\'' + valuelist[i] + '\''
873 else:
874 critval = str(valuelist[i])
875 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
876 criterion = criterion[0:-3]
877 return adm(a,criterion)
878
879
881 """
882 Returns the rows of an array where col (from columnlist) = val (from
883 valuelist). One value is required for each column in columnlist.
884 The exception is if either columnlist or valuelist has only 1 value,
885 in which case that item will be expanded to match the length of the
886 other list.
887
888 Usage: alinexor (a,columnlist,valuelist)
889 Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i
890 """
891 if type(columnlist) not in [ListType,TupleType,N.ArrayType]:
892 columnlist = [columnlist]
893 if type(valuelist) not in [ListType,TupleType,N.ArrayType]:
894 valuelist = [valuelist]
895 criterion = ''
896 if len(columnlist) == 1 and len(valuelist) > 1:
897 columnlist = columnlist*len(valuelist)
898 elif len(valuelist) == 1 and len(columnlist) > 1:
899 valuelist = valuelist*len(columnlist)
900 for i in range(len(columnlist)):
901 if type(valuelist[i])==StringType:
902 critval = '\'' + valuelist[i] + '\''
903 else:
904 critval = str(valuelist[i])
905 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
906 criterion = criterion[0:-2]
907 return adm(a,criterion)
908
909
911 """
912 Replaces all occurrences of oldval with newval in array a.
913
914 Usage: areplace(a,oldval,newval)
915 """
916 newa = N.not_equal(a,oldval)*a
917 return newa+N.equal(a,oldval)*newval
918
919
920 - def arecode (a,listmap,col='all'):
921 """
922 Remaps the values in an array to a new set of values (useful when
923 you need to recode data from (e.g.) strings to numbers as most stats
924 packages require. Can work on SINGLE columns, or 'all' columns at once.
925
926 Usage: arecode (a,listmap,col='all')
927 Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1]
928 """
929 ashape = a.shape
930 if col == 'all':
931 work = a.flat
932 else:
933 work = acolex(a,col)
934 work = work.flat
935 for pair in listmap:
936 if type(pair[1]) == StringType or work.typecode()=='O' or a.typecode()=='O':
937 work = N.array(work,'O')
938 a = N.array(a,'O')
939 for i in range(len(work)):
940 if work[i]==pair[0]:
941 work[i] = pair[1]
942 if col == 'all':
943 return N.reshape(work,ashape)
944 else:
945 return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1)
946 else:
947 work = N.where(N.equal(work,pair[0]),pair[1],work)
948 return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1)
949
950
952 """
953 Compares two rows from an array, regardless of whether it is an
954 array of numbers or of python objects (which requires the cmp function).
955
956 Usage: arowcompare(row1,row2)
957 Returns: an array of equal length containing 1s where the two rows had
958 identical elements and 0 otherwise
959 """
960 if row1.typecode()=='O' or row2.typecode=='O':
961 cmpvect = N.logical_not(abs(N.array(map(cmp,row1,row2))))
962 else:
963 cmpvect = N.equal(row1,row2)
964 return cmpvect
965
966
968 """
969 Compares two rows from an array, regardless of whether it is an
970 array of numbers or of python objects (which requires the cmp function).
971
972 Usage: arowsame(row1,row2)
973 Returns: 1 if the two rows are identical, 0 otherwise.
974 """
975 cmpval = N.alltrue(arowcompare(row1,row2))
976 return cmpval
977
978
980 """
981 Sorts an array "by rows". This differs from the Numeric.sort() function,
982 which sorts elements WITHIN the given axis. Instead, this function keeps
983 the elements along the given axis intact, but shifts them 'up or down'
984 relative to one another.
985
986 Usage: asortrows(a,axis=0)
987 Returns: sorted version of a
988 """
989 if axis != 0:
990 a = N.swapaxes(a, axis, 0)
991 l = a.tolist()
992 l.sort()
993 y = N.array(l)
994 if axis != 0:
995 y = N.swapaxes(y, axis, 0)
996 return y
997
998
1000 """
1001 Returns unique items in the FIRST dimension of the passed array. Only
1002 works on arrays NOT including string items.
1003
1004 Usage: aunique (inarray)
1005 """
1006 uniques = N.array([inarray[0]])
1007 if len(uniques.shape) == 1:
1008 for item in inarray[1:]:
1009 if N.add.reduce(N.equal(uniques,item).flat) == 0:
1010 try:
1011 uniques = N.concatenate([uniques,N.array[N.NewAxis,:]])
1012 except TypeError:
1013 uniques = N.concatenate([uniques,N.array([item])])
1014 else:
1015 if inarray.typecode() != 'O':
1016 for item in inarray[1:]:
1017 if not N.sum(N.alltrue(N.equal(uniques,item),1)):
1018 try:
1019 uniques = N.concatenate( [uniques,item[N.NewAxis,:]] )
1020 except TypeError:
1021 uniques = N.concatenate([uniques,N.array([item])])
1022 else:
1023 pass
1024 else:
1025 for item in inarray[1:]:
1026 newflag = 1
1027 for unq in uniques:
1028 test = N.sum(abs(N.array(map(cmp,item,unq))))
1029 if test == 0:
1030 newflag = 0
1031 break
1032 if newflag == 1:
1033 try:
1034 uniques = N.concatenate( [uniques,item[N.NewAxis,:]] )
1035 except TypeError:
1036 uniques = N.concatenate([uniques,N.array([item])])
1037 return uniques
1038
1039
1041 """
1042 Returns duplicate items in the FIRST dimension of the passed array. Only
1043 works on arrays NOT including string items.
1044
1045 Usage: aunique (inarray)
1046 """
1047 inarray = N.array(inarray)
1048 if len(inarray.shape) == 1:
1049 dups = []
1050 inarray = inarray.tolist()
1051 for i in range(len(inarray)):
1052 if inarray[i] in inarray[i+1:]:
1053 dups.append(inarray[i])
1054 dups = aunique(dups)
1055 else:
1056 dups = []
1057 aslist = inarray.tolist()
1058 for i in range(len(aslist)):
1059 if aslist[i] in aslist[i+1:]:
1060 dups.append(aslist[i])
1061 dups = unique(dups)
1062 dups = N.array(dups)
1063 return dups
1064
1065 except ImportError:
1066 pass
1067