2011_07_10/LADCP2CTDmatch
changeset 4 0d31245172fa
new file mode 100755
--- /dev/null
+++ b/2011_07_10/LADCP2CTDmatch
@@ -0,0 +1,333 @@
+#!/usr/bin/perl
+#======================================================================
+#                    L A D C P 2 C T D M A T C H 
+#                    doc: Thu Sep 23 22:01:19 2010
+#                    dlm: Tue Sep 28 21:43:29 2010
+#                    (c) 2010 A.M. Thurnherr
+#                    uE-Info: 312 33 NIL 0 0 70 10 2 4 NIL ofnI
+#======================================================================
+
+$antsSummary = 'match LADCP to CTD time series';
+
+# HISTORY:
+#	Sep 23, 2010: - incepted
+
+($ANTS) = ($0 =~ m{^(.*)/[^/]*$});
+$PERL_TOOLS = '/Data/LADCP/Software/Thurnherr/perl-tools';
+
+require "$ANTS/ants.pl";
+require "$PERL_TOOLS/RDI_BB_Read.pl";
+require "$PERL_TOOLS/RDI_Coords.pl";
+require "$PERL_TOOLS/RDI_Utils.pl";
+
+$antsParseHeader = 0;
+&antsUsage('dw:n:',2,
+	'[-d)ebug] ',
+	'[-w)indow <size[30s]>] [-n) <windows[20]]',
+	'<RDI file> <SeaBird file>');
+
+&antsCardOpt(\$opt_w,30);
+&antsCardOpt(\$opt_n,20);
+
+$LADCP_file = &antsFileArg();
+$CTD_file 	= &antsFileArg();
+
+&antsAddParams('LADCP_file',$LADCP_file,'CTD_file',$CTD_file);
+@antsNewLayout = ('elapsed','CTD_w','LADCP_w');
+&antsActivateOut();
+
+#----------------------------------------------------------------------
+# Step 1: Read Data
+#----------------------------------------------------------------------
+
+print(STDERR "Reading LADCP data ($LADCP_file)...");
+readData($LADCP_file,\%LADCP);
+printf(STDERR "\n\t%d ensembles\n",scalar(@{$LADCP{ENSEMBLE}}));
+
+print(STDERR "Reading CTD data ($CTD_file)...");
+open(F,$CTD_file) || croak("$CTD_file: $!\n");
+while (1) {											# parse header
+	chomp($hdr = <F>);
+	$hdr =~ s/\r*$//;
+	croak("$0: unexpected EOF (format error)\n") unless defined($hdr);
+	last if ($hdr eq '*END*');
+	
+	$CTD_nfields = $',next if ($hdr =~ /nquan = /);		# Layout
+	$CTD_nrecs = $',next if ($hdr =~ /nvalues = /);
+	$pressF = $1,next if ($hdr =~ /name (\d+) = prDM:/);
+
+	&antsAddParams('start_time',$1),next			# selected metadata
+		if ($hdr =~ /start_time = (.*)/);
+
+	&antsAddParams('station',$1),next
+		if ($hdr =~ /Station\s*:\s*(.*)/);
+	&antsAddParams('ship',$1),next
+		if ($hdr =~ /Ship\s*:\s*(.*)\s*$/);
+	&antsAddParams('cruise',$1),next
+		if ($hdr =~ /Cruise\s*:\s*(.*)\s*$/);
+	&antsAddParams('time',$1),next
+		if ($hdr =~ /Time\s*:\s*(.*)/);
+	&antsAddParams('date',$1),next
+		if ($hdr =~ /Date\s*:\s*(.*)/);
+
+	if ($hdr =~ /Latitude\s*[=:]\s*/) {
+		($deg,$min,$NS) = split(/ /,$');
+		$lat = $deg + $min/60;
+		$lat *= -1 if ($NS eq 'S');
+		&antsAddParams('lat',$lat);
+		next;
+	}
+	if ($hdr =~ /Longitude\s*[=:]\s*/) {
+		($deg,$min,$EW) = split(/ /,$');
+		$lon = $deg + $min/60;
+		$lon *= -1 if ($EW eq 'W');
+		&antsAddParams('lon',$lon);
+		next;
+	}
+	
+	if ($hdr =~ /interval = seconds: /) {
+		$CTD_sampint = 1*$';
+		&antsAddParams('CTD_interval',1/$CTD_sampint);
+		next;
+	}
+	
+	$CTD_badval = $',next
+		if ($hdr =~ /bad_flag = /);	
+	$CTD_file_type = $',next
+		if ($hdr =~ /file_type = /);	
+}
+
+croak("$CTD_file: cannot determine CTD file layout\n")
+	unless ($CTD_nfields && $CTD_nrecs);
+croak("$CTD_file: cannot determine missing value\n")
+	unless defined($CTD_badval);
+croak("$CTD_file: not a CTD time series file\n")
+	unless ($CTD_sampint);
+croak("$CTD_file: no pressure field\n")
+	unless defined($pressF);
+
+croak("$0: unknown latitude\n") unless defined($lat);
+
+if ($CTD_file_type eq 'ascii') {
+	while (1) {
+		last unless (@rec = &antsFileIn(F));
+		push(@CTD_press,($rec[$pressF] == $CTD_badval) ? nan : $rec[$pressF]);
+	}
+} elsif ($CTD_file_type eq 'binary') {
+
+	my($fbits) = 8 * length(pack('f',0));
+	croak(sprintf("$0: incompatible native CPU float representation (%d instead of 32bits)\n",fbits))
+		unless ($fbits == 32);	
+		
+	croak("$CTD_file: can't read binary data\n")
+		unless (read(F,$dta,4*$CTD_nfields*$CTD_nrecs) == 4*$CTD_nfields*$CTD_nrecs);
+	print(STDERR "$CTD_file: WARNING: extraneous data at EOF\n") unless eof(F);
+
+	$dta = pack('V*',unpack('N*',$dta))				# big-endian CPU
+		if (unpack('h*', pack('s', 1)) =~ /01/);	# c.f. perlport(1)
+	
+	@dta = unpack("f*",$dta);
+
+	for ($r=0; $r<$CTD_nrecs; $r++) {
+		push(@CTD_press,($dta[$r*$CTD_nfields+$pressF] == $CTD_badval) ? nan : $dta[$r*$CTD_nfields+$pressF]);
+	}
+} else {
+	croak("$CTD_file: unknown CTD file type $CTD_file_type\n");
+}
+
+printf(STDERR "\n\t%d scans\n",scalar(@CTD_press));
+
+#----------------------------------------------------------------------
+# Step 2a: Pre-Process CTD Data
+#----------------------------------------------------------------------
+
+printf(STDERR "Pre-processing data...");
+printf(STDERR "\n\tCTD...");
+
+#------------------------------------
+# calculate w and find deepest record
+#------------------------------------
+$CTD_maxpress = -9e99;
+for (my($r)=1; $r<$CTD_nrecs-1; $r++) {
+	print(STDERR '.') if ($r%500==0);
+	$CTD_w[$r] = 0.99*($CTD_press[$r+1] - $CTD_press[$r-1]) / (2*$CTD_sampint);
+	if ($CTD_press[$r] > $CTD_maxpress) {
+		$CTD_maxpress = $CTD_press[$r];
+		$CTD_bottom = $r;
+    }										
+}
+
+#----------------------------------------------------------------------
+# Step 2b: Pre-Process LADCP Data
+#----------------------------------------------------------------------
+
+print(STDERR "\n\tLADCP...");
+
+#------------------------------------------------------
+# construct a depth-vs-time "profile" from the raw data
+#------------------------------------------------------
+
+my($LADCP_start,$LADCP_end,$LADCP_bottom,$w_gap_time,$zErr,$maxz) =
+    mk_prof(\%LADCP,0,undef,1,6,70,0.1,120);
+croak("\n$LADCP_file: no good ensembles found\n")
+    unless defined($LADCP_start);
+    
+if ($opt_d) {
+	printf(STDERR "\n\t\tStart of cast    : %s (#%5d) at %6.1fm\n",
+	                    $LADCP{ENSEMBLE}[$LADCP_start]->{TIME},
+	                    $LADCP{ENSEMBLE}[$LADCP_start]->{NUMBER},
+	                    $LADCP{ENSEMBLE}[$LADCP_start]->{DEPTH});
+	printf(STDERR "\t\tBottom of cast   : %s (#%5d) at %6.1fm\n",
+	                    $LADCP{ENSEMBLE}[$LADCP_bottom]->{TIME},
+	                    $LADCP{ENSEMBLE}[$LADCP_bottom]->{NUMBER},
+	                    $LADCP{ENSEMBLE}[$LADCP_bottom]->{DEPTH});
+	if (defined($water_depth)) {
+	    printf(STDERR "\t\tSeabed           :                      at %6.1fm (+-%dm)\n",$water_depth,$sig_wd);
+	} else {
+	    print(STDERR "\t\tSeabed           : not found\n");
+	}
+	printf(STDERR "\t\tEnd of cast      : %s (#%5d) at %6.1fm\n",
+	                    $LADCP{ENSEMBLE}[$LADCP_end]->{TIME},
+	                    $LADCP{ENSEMBLE}[$LADCP_end]->{NUMBER},
+	                    $LADCP{ENSEMBLE}[$LADCP_end]->{DEPTH});
+}
+
+#------------------------------------------------------------------------
+# find 1st rec & ensemble >=80% down to max depth & make 1st guess at lag
+#------------------------------------------------------------------------
+
+my($CTD_80pct_down) = 0;
+$CTD_80pct_down++
+	while ($CTD_press[$CTD_80pct_down] < 0.8*$CTD_maxpress);
+
+my($LADCP_80pct_down) = 0;
+$LADCP_80pct_down++
+	while ($LADCP{ENSEMBLE}[$LADCP_80pct_down]->{DEPTH} < 0.8*$LADCP{ENSEMBLE}[$LADCP_bottom]->{DEPTH});
+
+
+my($first_guess_lag) = $LADCP{ENSEMBLE}[$LADCP_80pct_down]->{ELAPSED_TIME} -
+					   $CTD_80pct_down*$CTD_sampint;
+
+printf(STDERR "\t\t1st guess offset = %ds\n",$first_guess_lag*$CTD_sampint)
+	if ($opt_d);
+
+#------------------------------------------------------------------------------------
+# Linearly interpolate LADCP time series onto a new grid with $CTD_sampint resolution
+#	ALSO: apply first_guess_lag to make lags small, which keeps the bestlag data
+#		  chunks large
+#------------------------------------------------------------------------------------
+
+my($nGaps) = 0;
+
+sub interp_LADCP_w($$)
+{
+	my($elapsed,$ens) = @_;
+	my($sc) = ($elapsed - $LADCP{ENSEMBLE}[$ens-1]->{ELAPSED_TIME}) /
+			  ($LADCP{ENSEMBLE}[$ens]->{ELAPSED_TIME} -
+					$LADCP{ENSEMBLE}[$ens-1]->{ELAPSED_TIME});
+	unless (numberp($LADCP{ENSEMBLE}[$ens-1]->{W})) {
+		$nGaps++;
+		croak("$0: cannot (yet) handle double gaps --- request feature upgrade\n")
+			unless numberp($LADCP{ENSEMBLE}[$ens]->{W});
+		return $LADCP{ENSEMBLE}[$ens]->{W};
+	}
+	return $LADCP{ENSEMBLE}[$ens-1]->{W} +
+				$sc * ($LADCP{ENSEMBLE}[$ens]->{W} - $LADCP{ENSEMBLE}[$ens-1]->{W});
+}
+
+for (my($ens)=$LADCP_start,my($r)=0; $ens<=$LADCP_end; $ens++) {
+	while ($r*$CTD_sampint < $LADCP{ENSEMBLE}[$ens]->{ELAPSED_TIME}) {
+		$LADCP_w[$r-$first_guess_lag] = interp_LADCP_w($r*$CTD_sampint,$ens)
+			unless ($first_guess_lag > $r);
+		$r++;
+    }
+}
+
+print(STDERR "\t\t$nGaps gaps in w timeseries")
+	if ($opt_d);
+
+print(STDERR "\n");
+
+#----------------------------------------------------------------------
+# Step 3: Calculate Lags
+#----------------------------------------------------------------------
+
+printf(STDERR "Calculating $opt_n lags from %ds-long windows [s]: ",$opt_w);
+$opt_w = int($opt_w / $CTD_sampint);
+
+sub bestLag($)
+{
+	my($ws) = @_;														# window start index
+
+	my($best);
+	my($bestmad) = 9e99;												# mean absolute deviation
+	for (my($Llag)=-int($opt_w/2); $Llag<int($opt_w/2); $Llag++) {
+		my($sad) = my($nad) = 0;
+		for (my($Ci)=0; $Ci<$opt_w; $Ci++) {
+			my($Li) = $Ci + $Llag;
+			next if ($Li<0 || $Li>=$opt_w);
+			next unless numberp($CTD_w[$ws+$Ci]) && numberp($LADCP_w[$ws+$Li]);
+			$sad += abs($CTD_w[$ws+$Ci] - $LADCP_w[$ws+$Li]);
+			$nad++;
+		}
+		next unless ($nad > 0);
+		if ($sad/$nad < $bestmad) {
+			$best = $Llag;
+			$bestmad = $sad/$nad;
+		}
+	}
+	return $best;	
+}
+
+#---------------------------------------------------------------
+# carry out opt_n lag correlations and keep tally of the results
+#---------------------------------------------------------------
+my(@nBest);
+for (my($window)=0; $window<$opt_n; $window++) {
+	my($ws) = $window * int(@LADCP_w/$opt_n);			# window start
+	$ws = @LADCP_w-$opt_w if ($ws+$opt_w >= @LADCP_w);
+
+	$bestLag = bestLag($ws);
+	if (defined($bestLag)) {
+		printf(STDERR "%d ",$bestLag*$CTD_sampint);
+		$nBest{$bestLag}++;
+	} else {
+		printf(STDERR "nan ");
+	}
+}
+
+#----------------------
+# find most popular lag
+#----------------------
+my($best_lag);
+foreach my $i (keys(%nBest)) {
+	$best_lag = $i if ($nBest{$i} > $nBest{$best_lag});
+}
+croak("\n$0: cannot determine a valid lag\n")
+	unless ($nBest{$best_lag} > 1);
+print(STDERR "\n\n\tWARNING: only $nBest{$best_lag} of the lag estimates agree!\n")
+	if ($nBest{$best_lag} < $opt_n/2);
+
+if ($nBest{$best_lag} == $opt_n) {
+	printf(STDERR "\n\tunanimous lag = %ds\n",$best_lag*$CTD_sampint);
+} else {
+	printf(STDERR "\n\tmost popular lag = %ds\n",$best_lag*$CTD_sampint);
+}
+
+#----------------------------------------------------------------------
+# Step 4: associate CTD data with each LADCP ensemble & write time series
+#----------------------------------------------------------------------
+
+print(STDERR "Associating CTD data with LADCP ensembles...\n");
+
+my($overall_dt) = $CTD_sampint * ($best_lag + $first_guess_lag);
+for (my($ens)=$LADCP_start; $ens<=$LADCP_end; $ens++) {
+	my($r) = int(($LADCP{ENSEMBLE}[$ens]->{ELAPSED_TIME} - $overall_dt) / $CTD_sampint);
+	$LADCP{ENSEMBLE}[$ens]->{CTD_W} = $CTD_w[$r];
+	&antsOut($LADCP{ENSEMBLE}[$ens]->{ELAPSED_TIME},
+			 $LADCP{ENSEMBLE}[$ens]->{CTD_W},
+			 $LADCP{ENSEMBLE}[$ens]->{W});
+}
+
+&antsExit();
+