author | A.M. Thurnherr <athurnherr@yahoo.com> |
Wed, 16 Jul 2014 09:46:31 -0400 | |
changeset 11 | 56799f01321a |
parent 5 | 7d6e11d484ec |
child 10 | 3dfa16523886 |
permissions | -rw-r--r-- |
0 | 1 |
#====================================================================== |
2 |
# L I B S T A T S . P L |
|
3 |
# doc: Wed Mar 24 13:59:27 1999 |
|
5 | 4 |
# dlm: Sun Nov 24 23:23:11 2013 |
0 | 5 |
# (c) 1999 A.M. Thurnherr |
5 | 6 |
# uE-Info: 192 1 NIL 0 0 72 2 2 4 NIL ofnI |
0 | 7 |
#====================================================================== |
8 |
||
9 |
# HISTORY: |
|
10 |
# Mar 24, 1999: - created for the ANO paper |
|
11 |
# Mar 27, 1999: - extended |
|
12 |
# Sep 18, 1999: - argument typechecking |
|
13 |
# Sep 30, 1999: - added gauss() |
|
14 |
# Oct 04, 1999: - moved gauss() to [./libfuns.pl] (changed from specfuns) |
|
15 |
# Oct 20, 1999: - changed, 'cause I understand it better |
|
16 |
# Oct 21, 1999: - changed &Fishers_z to &r2z(); added &z2r() |
|
17 |
# - added &sig_rr(), &sig_rrtrue |
|
18 |
# Jan 22, 2002: - added N(), avg(), stddev(), min(), max() |
|
19 |
# Feb 27, 2006: - adjusted median() for compat with NR (even # of points) |
|
20 |
# - added medianF() |
|
21 |
# Jun 27, 2006: - added medianFNaN() |
|
22 |
# Jul 1, 2006: - Version 3.3 [HISTORY] |
|
23 |
# - made median respect nan based on perlfunc(1) |
|
24 |
# Apr 25, 2008: - added &bootstrap() |
|
25 |
# Oct 24, 2010: - replaced grep { $_ == $_ } by grep { numberp($_) } everywhere |
|
26 |
# - added &fixLowSampStat() |
|
27 |
# Nov 5, 2010: - added std (stderr would have been better but that's used in libPOSIX.pl |
|
28 |
# Dec 18, 2010: - added stddev2, mad, mad2 |
|
29 |
# Dec 31, 2010: - added rms() |
|
30 |
# Jul 2, 2011: - added mad2F() |
|
31 |
# Mar 10, 2012: - medianF() -> medianAnts_(); mad2F() -> mad2Ants_() |
|
32 |
# - added sum() |
|
33 |
# Apr 26, 2012: - BUG: std() did not allow nan as stddev input |
|
3 | 34 |
# Oct 15, 2012: - added max_i(), min_i() |
5 | 35 |
# Nov 24, 2013: - renamed N to ndata |
36 |
# - added fdiff() |
|
0 | 37 |
|
38 |
require "$ANTS/libfuns.pl"; |
|
39 |
||
40 |
#---------------------------------------------------------------------- |
|
41 |
# estimate stderr given stddev & degrees of freedom |
|
42 |
# - return nan for dof <= 0 |
|
43 |
#---------------------------------------------------------------------- |
|
44 |
||
45 |
sub std(@) |
|
46 |
{ |
|
47 |
my($sig,$dof) = |
|
48 |
&antsFunUsage(2,".c","stddev, deg_of_freedom",@_); |
|
49 |
return nan unless ($dof > 0); |
|
50 |
return $sig / sqrt($dof); |
|
51 |
} |
|
52 |
||
53 |
#---------------------------------------------------------------------- |
|
54 |
# calc standard stats from vector of vals |
|
5 | 55 |
# - used, e.g., in [rfilt] |
0 | 56 |
#---------------------------------------------------------------------- |
57 |
||
58 |
sub min(@) |
|
59 |
{ |
|
60 |
my($min) = 9e99; |
|
61 |
for (my($i)=0; $i<=$#_; $i++) { |
|
62 |
$min = $_[$i] if (numberp($_[$i]) && $_[$i] < $min); |
|
63 |
} |
|
64 |
return $min<9e99 ? $min : nan; |
|
65 |
} |
|
66 |
||
3 | 67 |
sub min_i(@) |
68 |
{ |
|
69 |
my($min) = 9e99; |
|
70 |
my($min_i); |
|
71 |
||
72 |
for (my($i)=0; $i<=$#_; $i++) { |
|
73 |
$min_i=$i,$min=$_[$i] if (numberp($_[$i]) && $_[$i] < $min); |
|
74 |
} |
|
75 |
return $min<9e99 ? $min_i : nan; |
|
76 |
} |
|
77 |
||
0 | 78 |
sub max(@) |
79 |
{ |
|
80 |
my($max) = -9e99; |
|
81 |
for (my($i)=0; $i<=$#_; $i++) { |
|
82 |
$max = $_[$i] if (numberp($_[$i]) && $_[$i] > $max); |
|
83 |
} |
|
84 |
return $max>-9e99 ? $max : nan; |
|
85 |
} |
|
86 |
||
3 | 87 |
sub max_i(@) |
88 |
{ |
|
89 |
my($max) = -9e99; |
|
90 |
my($max_i); |
|
91 |
||
92 |
for (my($i)=0; $i<=$#_; $i++) { |
|
93 |
$max_i=$i,$max=$_[$i] if (numberp($_[$i]) && $_[$i] > $max); |
|
94 |
} |
|
95 |
return $max>-9e99 ? $max_i : nan; |
|
96 |
} |
|
97 |
||
5 | 98 |
sub ndata(@) |
0 | 99 |
{ |
100 |
my($N) = 0; |
|
101 |
for (my($i)=0; $i<=$#_; $i++) { $N++ if (numberp($_[$i])); } |
|
102 |
return $N; |
|
103 |
} |
|
104 |
||
105 |
sub sum(@) |
|
106 |
{ |
|
107 |
my($N) = my($sum) = 0; |
|
4
ff72b00b4342
after adding $pi and some other stuff
A.M. Thurnherr <athurnherr@yahoo.com>
parents:
3
diff
changeset
|
108 |
@_ = split(/\s+/,$_[0]) if (@_ == 1 && !numberp($_[0])); |
0 | 109 |
for (my($i)=0; $i<=$#_; $i++) { $N++,$sum+=$_[$i] if (numberp($_[$i])); } |
110 |
return ($N>0)?$sum:nan; |
|
111 |
} |
|
112 |
||
113 |
sub avg(@) |
|
114 |
{ |
|
115 |
my($N) = my($sum) = 0; |
|
116 |
for (my($i)=0; $i<=$#_; $i++) { $N++,$sum+=$_[$i] if (numberp($_[$i])); } |
|
117 |
return ($N>0)?$sum/$N:nan; |
|
118 |
} |
|
119 |
||
120 |
sub stddev2(@) # avg, val, val, val, ... |
|
121 |
{ |
|
122 |
my($N) = my($sum) = 0; |
|
123 |
for (my($i)=1; $i<=$#_; $i++) { |
|
124 |
$N++,$sum+=($_[0]-$_[$i])**2 if (numberp($_[$i])); |
|
125 |
} |
|
126 |
return ($N>1)?sqrt($sum/($N-1)):nan; |
|
127 |
} |
|
128 |
||
129 |
sub stddev(@) |
|
130 |
{ |
|
131 |
my($avg) = &avg(@_); |
|
132 |
return numberp($avg) ? stddev2($avg,@_) : nan; |
|
133 |
} |
|
134 |
||
135 |
sub rms(@) |
|
136 |
{ |
|
137 |
my($N) = my($sum) = 0; |
|
138 |
for (my($i)=0; $i<=$#_; $i++) { $N++,$sum+=$_[$i]**2 if (numberp($_[$i])); } |
|
139 |
return ($N>0)?sqrt($sum/$N):nan; |
|
140 |
} |
|
141 |
||
142 |
sub median(@) |
|
143 |
{ |
|
144 |
my(@svals) = sort {$a <=> $b} grep { numberp($_) } @_; |
|
145 |
return nan if (@svals == 0); |
|
146 |
return (@svals & 1) ? |
|
147 |
$svals[$#svals/2] : |
|
148 |
0.5 * ($svals[$#svals/2] + $svals[$#svals/2+1]); |
|
149 |
} |
|
150 |
||
151 |
sub medianAnts_($) |
|
152 |
{ |
|
153 |
my($fnr) = @_; |
|
154 |
my(@svals) = sort {@{$a}[$fnr] <=> @{$b}[$fnr]} grep { numberp(@{$_}[$fnr]) } @ants_; |
|
155 |
return nan if (@svals == 0); |
|
156 |
return (@svals & 1) ? |
|
157 |
$svals[$#svals/2][$fnr] : |
|
158 |
0.5 * ($svals[$#svals/2][$fnr] + $svals[$#svals/2+1][$fnr]); |
|
159 |
} |
|
160 |
||
161 |
sub mad2(@) # avg, val, val, val, ... |
|
162 |
{ |
|
163 |
my($N) = my($sum) = 0; |
|
164 |
for (my($i)=1; $i<=$#_; $i++) { |
|
165 |
$N++,$sum+=abs($_[0]-$_[$i]) if (numberp($_[$i])); |
|
166 |
} |
|
167 |
return ($N>0)?sqrt($sum/$N):nan; |
|
168 |
} |
|
169 |
||
170 |
sub mad(@) |
|
171 |
{ |
|
172 |
my($median) = &median(@_); |
|
173 |
return numberp($median) ? mad2($median,@_) : nan; |
|
174 |
} |
|
175 |
||
176 |
sub mad2Ants_($$) |
|
177 |
{ |
|
178 |
my($median,$fnr) = @_; |
|
179 |
my($sum,$n); |
|
180 |
||
181 |
for (my($r)=0; $r<@ants_; $r++) { |
|
182 |
next unless numberp($ants_[$r][$fnr]); |
|
183 |
$sum += abs($median - $ants_[$r][$fnr]); |
|
184 |
$n++ |
|
185 |
} |
|
186 |
return ($n>0) ? $sum/$n : nan; |
|
187 |
} |
|
188 |
||
5 | 189 |
sub fdiff(@) # finite difference, e.g. for [rfilt] |
190 |
{ |
|
191 |
return (numberp($_[0]) && numberp($_[$#_])) ? $_[$#_] - $_[0] : nan; |
|
192 |
} |
|
193 |
||
194 |
||
0 | 195 |
#---------------------------------------------------------------------- |
196 |
# &bootstrap(nDraw,cLim,statFun,val[,...]) |
|
197 |
# nDraw number of synthetic samples to draw |
|
198 |
# cLim confidence limit (e.g. 0.95) |
|
199 |
# statFun pointer to stats function |
|
200 |
# val[,...] data values |
|
201 |
# |
|
202 |
# e.g. bootstrap(1000,.5,\&avg,1,2,1000) |
|
203 |
#---------------------------------------------------------------------- |
|
204 |
||
205 |
sub bootstrap($$$@) |
|
206 |
{ |
|
207 |
my($nDraw,$cLim,$statFun,@vals) = @_; |
|
208 |
my(@sv,@stats); |
|
209 |
||
210 |
for (my($s)=0; $s<$nDraw; $s++) { |
|
211 |
for (my($i)=0; $i<@vals; $i++) { |
|
212 |
$sv[$i] = $vals[int(rand(@vals))]; |
|
213 |
} |
|
214 |
$stats[$s] = &$statFun(@sv); |
|
215 |
} |
|
216 |
@stats = sort {$a <=> $b} grep { numberp($_) } @stats; |
|
217 |
my($cli) = int($nDraw*(1-$cLim)/2); |
|
218 |
return ($stats[$cli],$stats[$#stats-$cli]); |
|
219 |
} |
|
220 |
||
221 |
#---------------------------------------------------------------------- |
|
222 |
# &fixLowSampStat(statRef,nsamp[]) |
|
223 |
# - replace stat (variance, stddev, stderr) based on small (<10) samples |
|
224 |
# with median calculated from all stats |
|
225 |
# - median of all stats is chosen to allow routine to work even if all |
|
226 |
# stats are based on small samples |
|
227 |
#---------------------------------------------------------------------- |
|
228 |
||
229 |
sub fixLowSampStat($@) |
|
230 |
{ |
|
231 |
my($statR,@nsamp) = @_; |
|
232 |
||
233 |
my($medStat) = median(@{$statR}); |
|
234 |
for (my($i)=0; $i<@{$statR}; $i++) { |
|
235 |
$statR->[$i] = $medStat |
|
236 |
unless ($nsamp[$i]>=10 || !defined($statR->[$i]) || $statR->[$i]>$medStat); |
|
237 |
} |
|
238 |
} |
|
239 |
||
240 |
#---------------------------------------------------------------------- |
|
241 |
# significance of difference of means (NR, 2nd ed, 14.2) |
|
242 |
#---------------------------------------------------------------------- |
|
243 |
||
244 |
sub Students_t(@) |
|
245 |
{ |
|
246 |
my($mu1,$sig1,$N1,$mu2,$sig2,$N2) = |
|
247 |
&antsFunUsage(6,"ffcffc","mu1, sigma1, N1, mu2, sigma2, N2",@_); |
|
248 |
my($var1) = $sig1 * $sig1; |
|
249 |
my($var2) = $sig2 * $sig2; |
|
250 |
my($sd) = sqrt($var1 + $var2 / ($N1+$N2-2) * (1/$N1 + 1/$N2)); |
|
251 |
return ($mu1-$mu2) / $sd; |
|
252 |
} |
|
253 |
||
254 |
sub slevel_mudiff1(@) |
|
255 |
{ |
|
256 |
my($mu1,$sig1,$N1,$mu2,$sig2,$N2) = |
|
257 |
&antsFunUsage(6,"ffcffc","mean1, sqrt(var1), N1, mean2, sqrt(var2), N2",@_); |
|
258 |
my($df) = $N1 + $N2 - 2; |
|
259 |
return &betai(0.5*$df,0.5, |
|
260 |
$df/($df + &Students_t(mu1,$sig1,$N1,$mu2,$sig2,$N2)**2)); |
|
261 |
} |
|
262 |
||
263 |
#---------------------------------------------------------------------- |
|
264 |
# significance of correlation coefficient (NR, 2nd ed, 14.5) |
|
265 |
#---------------------------------------------------------------------- |
|
266 |
||
267 |
sub slevel_r(@) |
|
268 |
{ |
|
269 |
my($r,$N) = &antsFunUsage(2,"fc","r, N",@_); |
|
270 |
return &erfcc(abs($r) * sqrt($N/2)); |
|
271 |
} |
|
272 |
||
273 |
#---------------------------------------------------------------------- |
|
274 |
# significance of difference btw two measured correlation coeffs |
|
275 |
# using Fisher's z (from NR, 2nd ed, 14.5). NB: averaging correlation |
|
276 |
# coefficients is done using [avgr] |
|
277 |
# NB: significance level is only good if correlated variables form |
|
278 |
# a binormal distribution |
|
279 |
#---------------------------------------------------------------------- |
|
280 |
||
281 |
sub r2z(@) |
|
282 |
{ |
|
283 |
my($r) = &antsFunUsage(1,"f","<r>",@_); |
|
284 |
return 0.5 * log((1+$r)/(1-$r)); |
|
285 |
} |
|
286 |
||
287 |
sub z2r(@) |
|
288 |
{ |
|
289 |
my($z) = &antsFunUsage(1,"f","<z>",@_); |
|
290 |
my($e) = exp(2*$z); |
|
291 |
return ($e-1) / ($e+1); |
|
292 |
} |
|
293 |
||
294 |
sub slevel_rrtrue(@) |
|
295 |
{ |
|
296 |
my($r,$N,$rtrue) = &antsFunUsage(3,"fcf","r, N, r_true",@_); |
|
297 |
croak("$0 (libstats.pl): N (=$N) < 10 in &slevel_rrtrue()\n") |
|
298 |
if ($N < 10); |
|
299 |
return &erfcc(abs(&r2z($r) - (&r2z($rtrue) + $rtrue/(2*$N-2))) * |
|
300 |
sqrt($N - 3) / sqrt(2)); |
|
301 |
} |
|
302 |
||
303 |
sub slevel_zz(@) |
|
304 |
{ |
|
305 |
my($z1,$N1,$z2,$N2) = &antsFunUsage(4,"fcfc","z1, N1, z2, N2",@_); |
|
306 |
croak("$0 (libstats.pl): N (=$N1,$N2) < 10 in &slevel_zz()\n") |
|
307 |
if ($N1 < 10 || $N2 < 10); |
|
308 |
return &erfcc(abs($z1-$z2) / sqrt(2/($N1-3) + 2/($N2-3))); |
|
309 |
} |
|
310 |
||
311 |
sub slevel_rr(@) |
|
312 |
{ |
|
313 |
my($r1,$N1,$r2,$N2) = &antsFunUsage(4,"fcfc","r1, N1, r2, N2",@_); |
|
314 |
return &slevel_zz(&r2z($r1),$N1,&r2z($r2),$N2); |
|
315 |
} |
|
316 |
||
317 |
#---------------------------------------------------------------------- |
|
318 |
# significance of difference btw two measured correlation coeffs |
|
319 |
# from brookes+dick63, p.216f |
|
320 |
# NB: result is returned as ratio of difference in Fisher's z to |
|
321 |
# standard error of Fisher's z; values >> 1 indicate that difference |
|
322 |
# is significant |
|
323 |
#---------------------------------------------------------------------- |
|
324 |
||
325 |
sub sig_rrtrue(@) |
|
326 |
{ |
|
327 |
my($r,$N,$rtrue) = &antsFunUsage(3,"fcf","r, N, r_true",@_); |
|
328 |
return abs(&r2z($r) - &r2z($rtrue)) * sqrt($N-3); |
|
329 |
||
330 |
} |
|
331 |
||
332 |
sub sig_rr(@) |
|
333 |
{ |
|
334 |
my($r1,$N1,$r2,$N2) = &antsFunUsage(4,"fcfc","r1, N1, r2, N2",@_); |
|
335 |
return abs(&r2z($r1) - &r2z($r2)) / (1/sqrt($N1-3)+1/sqrt($N2-3)); |
|
336 |
} |
|
337 |
||
338 |
#---------------------------------------------------------------------- |
|
339 |
||
340 |
1; |
|
341 |