time_lag.pl
changeset 49 5006e9158207
parent 48 d9309804b6cf
child 56 8f120b9f795a
--- a/time_lag.pl	Thu Mar 16 11:53:27 2017 -0400
+++ b/time_lag.pl	Tue Nov 27 16:59:05 2018 -0500
@@ -1,9 +1,9 @@
 #======================================================================
 #                    T I M E _ L A G . P L 
 #                    doc: Fri Dec 17 21:59:07 2010
-#                    dlm: Tue Mar  7 09:03:20 2017
+#                    dlm: Tue Oct 16 16:26:06 2018
 #                    (c) 2010 A.M. Thurnherr
-#                    uE-Info: 73 63 NIL 0 0 72 2 2 4 NIL ofnI
+#                    uE-Info: 80 38 NIL 0 0 72 2 2 4 NIL ofnI
 #======================================================================
 
 # HISTORY:
@@ -71,6 +71,13 @@
 #	Mar  7, 2016: - BUG: editing did not work correctly in all cases
 #	Mar  6, 2017: - BUG: assertion in mad_w failed with 2017 P18 DL#206
 #	Mar  9, 2017: - tightened timelag editing (good_diff: 2->1)
+#	Mar 22, 2018: - re-wrote heuristics to remove lags with large mads
+#				  - BUG: bestLag with 1 valid sample returned 0 mad
+#				  - BUG: timelag editing did not work correctly when there was not a sufficiently long valid lag
+#	Mar 27, 2018: - BUG: re-written heuristic could fail when there was a valid but unpopular lag with very low mad.
+#						 Solution: remove very unpopular lags first
+#	Oct  4, 2018: - added timelagging debug code
+#	Oct 16, 2018: - removed debug code
 
 # DIFFICULT STATIONS:
 #	NBP0901#131		this requires the search-radius doubling heuristic
@@ -102,22 +109,21 @@
 		$CTD_mean_w   += $CTD{W}[$s+$so];
 		$nsamp++;
 	}
-	return 9e99 unless ($nsamp);
+	return 9e99 unless ($nsamp > 1);
 	$LADCP_mean_w /= $nsamp;
 	$CTD_mean_w /= $nsamp;
 
-	my($sad) = 0;															# now, calculate mad
+	my($sad) = $nsamp = 0;													# now, calculate mad
 	for (my($e)=$fe; $e<=$le; $e++) {			
-		next unless numberp($LADCP{ENSEMBLE}[$e]->{REFLR_W});
 		my($s) = int(($LADCP{ENSEMBLE}[$e]->{ELAPSED} + $CTD{TIME_LAG} - $CTD{ELAPSED}[0]) / $CTD{DT} + 0.5);
 		next unless ($s>=0 && $s<=$#{$CTD{ELAPSED}});
 		next unless numberp($LADCP{ENSEMBLE}[$e]->{REFLR_W});
 		my($dw) = $LADCP{ENSEMBLE}[$e]->{REFLR_W}-$LADCP_mean_w - ($CTD{W}[$s+$so]-$CTD_mean_w);
 #		print(STDERR "dw = $dw ($LADCP{ENSEMBLE}[$e]->{REFLR_W}-$LADCP_mean_w - ($CTD{W}[$s+$so]-$CTD_mean_w)\n");
 		next unless (abs($dw) <= $w_max_lim);
-		$sad += abs($dw);
+		$sad += abs($dw); $nsamp++;
 	}
-	return $sad/$nsamp;
+	return $nsamp ? $sad/$nsamp : 9e99;
 }
 
 
@@ -198,18 +204,34 @@
 		$n_valid_windows++;
 		$nBest{$so}++; $madBest{$so} += $mad;
 	}
+	my($maxN) = 0;
 	foreach my $i (keys(%nBest)) {
+		$maxN = $nBest{$i} if ($nBest{$i} > $maxN);
 		$madBest{$i} /= $nBest{$i};
 	}
 
-	my($med_mad) = median(values(%madBest));								# remove lags with large mads
-	my($mad_mad) = mad2($med_mad,values(%madBest));
+	foreach my $lag (keys(%nBest)) {										# remove unpopular lags
+		next if ($nBest{$lag} >= $maxN/10);
+		$n_valid_windows -= $nBest{$lag};
+		$nBest{$lag} = 0; $madBest{$lag} = 9e99;
+	}
+	
+##	my($med_mad) = median(values(%madBest));								# remove lags with large mads
+##	my($mad_mad) = mad2($med_mad,values(%madBest));
+##	foreach my $lag (keys(%nBest)) {
+##		next if ($madBest{$lag} <= $med_mad+$mad_mad);
+##		$n_valid_windows -= $nBest{$lag};
+##		$nBest{$lag} = 0;
+##  }
+
+	my($min_mad) = min(values(%madBest));									# remove lags with large mads
 	foreach my $lag (keys(%nBest)) {
-		next if ($madBest{$lag} <= $med_mad+$mad_mad);
+		next if ($madBest{$lag} <= 3*$min_mad);
 		$n_valid_windows -= $nBest{$lag};
 		$nBest{$lag} = 0;
 	}
 
+
 	my(@best_lag);															# find 3 most popular lags
 	foreach my $lag (keys(%nBest)) {
 		$best_lag[0] = $lag if ($nBest{$lag} > $nBest{$best_lag[0]});
@@ -236,7 +258,6 @@
 			$best_lag[0],int(($nBest{$best_lag[0]}/$n_valid_windows)*100+0.5),100*$madBest{$best_lag[0]});
 	}
 
-																			
 	unless ($nBest{$best_lag[0]}+$nBest{$best_lag[1]}+$nBest{$best_lag[2]}	# require quorum
 				>= $opt_3*$n_valid_windows) {
 		if (max(@best_lag)-min(@best_lag) > $TL_max_allowed_three_lag_spread) {
@@ -278,6 +299,16 @@
 
 	#----------------------------------------------------
 	# Here, either $failed is set, or we have a valid lag.
+	#----------------------------------------------------
+
+#	if ($failed) {
+#		for (my($wi)=0; $wi<$n_windows; $wi++) {
+#			print(STDERR "$wi $so[$wi] $mad[$wi]\n");
+#		}
+#	}
+
+	#----------------------------------------------------
+	# Here, either $failed is set, or we have a valid lag.
 	# If we have a valid lag, a continuous range of good 
 	# lags is determined using a finite-state machine.
 	# 	state == 0		no good run found yet
@@ -354,6 +385,7 @@
 					   ($lg[$i] == $n_windows-1) ? $LADCP{ENSEMBLE}[$lastGoodEns]->{ELAPSED}  : $elapsed[$lg[$i]]);
 		}
 #		print(STDERR "elim = @elim\n");
+		$failed = 1 unless (@elim);
 		$nerm = $failed
 			  ? editBadTimeLagging($first_ens,$last_ens,-1)
 			  : editBadTimeLagging($first_ens,$last_ens,@elim);