LADCP2CTDmatch
author A.M. Thurnherr <athurnherr@yahoo.com>
Tue, 06 Aug 2013 21:00:01 +0000
changeset 21 ec19ba9622f3
parent 0 de00d0f32431
permissions -rwxr-xr-x
V1.2

#!/usr/bin/perl
#======================================================================
#                    L A D C P 2 C T D M A T C H 
#                    doc: Thu Sep 23 22:01:19 2010
#                    dlm: Tue Sep 28 21:43:29 2010
#                    (c) 2010 A.M. Thurnherr
#                    uE-Info: 312 33 NIL 0 0 70 10 2 4 NIL ofnI
#======================================================================

$antsSummary = 'match LADCP to CTD time series';

# HISTORY:
#	Sep 23, 2010: - incepted

($ANTS) = ($0 =~ m{^(.*)/[^/]*$});
$PERL_TOOLS = '/Data/LADCP/Software/Thurnherr/perl-tools';

require "$ANTS/ants.pl";
require "$PERL_TOOLS/RDI_BB_Read.pl";
require "$PERL_TOOLS/RDI_Coords.pl";
require "$PERL_TOOLS/RDI_Utils.pl";

$antsParseHeader = 0;
&antsUsage('dw:n:',2,
	'[-d)ebug] ',
	'[-w)indow <size[30s]>] [-n) <windows[20]]',
	'<RDI file> <SeaBird file>');

&antsCardOpt(\$opt_w,30);
&antsCardOpt(\$opt_n,20);

$LADCP_file = &antsFileArg();
$CTD_file 	= &antsFileArg();

&antsAddParams('LADCP_file',$LADCP_file,'CTD_file',$CTD_file);
@antsNewLayout = ('elapsed','CTD_w','LADCP_w');
&antsActivateOut();

#----------------------------------------------------------------------
# Step 1: Read Data
#----------------------------------------------------------------------

print(STDERR "Reading LADCP data ($LADCP_file)...");
readData($LADCP_file,\%LADCP);
printf(STDERR "\n\t%d ensembles\n",scalar(@{$LADCP{ENSEMBLE}}));

print(STDERR "Reading CTD data ($CTD_file)...");
open(F,$CTD_file) || croak("$CTD_file: $!\n");
while (1) {											# parse header
	chomp($hdr = <F>);
	$hdr =~ s/\r*$//;
	croak("$0: unexpected EOF (format error)\n") unless defined($hdr);
	last if ($hdr eq '*END*');
	
	$CTD_nfields = $',next if ($hdr =~ /nquan = /);		# Layout
	$CTD_nrecs = $',next if ($hdr =~ /nvalues = /);
	$pressF = $1,next if ($hdr =~ /name (\d+) = prDM:/);

	&antsAddParams('start_time',$1),next			# selected metadata
		if ($hdr =~ /start_time = (.*)/);

	&antsAddParams('station',$1),next
		if ($hdr =~ /Station\s*:\s*(.*)/);
	&antsAddParams('ship',$1),next
		if ($hdr =~ /Ship\s*:\s*(.*)\s*$/);
	&antsAddParams('cruise',$1),next
		if ($hdr =~ /Cruise\s*:\s*(.*)\s*$/);
	&antsAddParams('time',$1),next
		if ($hdr =~ /Time\s*:\s*(.*)/);
	&antsAddParams('date',$1),next
		if ($hdr =~ /Date\s*:\s*(.*)/);

	if ($hdr =~ /Latitude\s*[=:]\s*/) {
		($deg,$min,$NS) = split(/ /,$');
		$lat = $deg + $min/60;
		$lat *= -1 if ($NS eq 'S');
		&antsAddParams('lat',$lat);
		next;
	}
	if ($hdr =~ /Longitude\s*[=:]\s*/) {
		($deg,$min,$EW) = split(/ /,$');
		$lon = $deg + $min/60;
		$lon *= -1 if ($EW eq 'W');
		&antsAddParams('lon',$lon);
		next;
	}
	
	if ($hdr =~ /interval = seconds: /) {
		$CTD_sampint = 1*$';
		&antsAddParams('CTD_interval',1/$CTD_sampint);
		next;
	}
	
	$CTD_badval = $',next
		if ($hdr =~ /bad_flag = /);	
	$CTD_file_type = $',next
		if ($hdr =~ /file_type = /);	
}

croak("$CTD_file: cannot determine CTD file layout\n")
	unless ($CTD_nfields && $CTD_nrecs);
croak("$CTD_file: cannot determine missing value\n")
	unless defined($CTD_badval);
croak("$CTD_file: not a CTD time series file\n")
	unless ($CTD_sampint);
croak("$CTD_file: no pressure field\n")
	unless defined($pressF);

croak("$0: unknown latitude\n") unless defined($lat);

if ($CTD_file_type eq 'ascii') {
	while (1) {
		last unless (@rec = &antsFileIn(F));
		push(@CTD_press,($rec[$pressF] == $CTD_badval) ? nan : $rec[$pressF]);
	}
} elsif ($CTD_file_type eq 'binary') {

	my($fbits) = 8 * length(pack('f',0));
	croak(sprintf("$0: incompatible native CPU float representation (%d instead of 32bits)\n",fbits))
		unless ($fbits == 32);	
		
	croak("$CTD_file: can't read binary data\n")
		unless (read(F,$dta,4*$CTD_nfields*$CTD_nrecs) == 4*$CTD_nfields*$CTD_nrecs);
	print(STDERR "$CTD_file: WARNING: extraneous data at EOF\n") unless eof(F);

	$dta = pack('V*',unpack('N*',$dta))				# big-endian CPU
		if (unpack('h*', pack('s', 1)) =~ /01/);	# c.f. perlport(1)
	
	@dta = unpack("f*",$dta);

	for ($r=0; $r<$CTD_nrecs; $r++) {
		push(@CTD_press,($dta[$r*$CTD_nfields+$pressF] == $CTD_badval) ? nan : $dta[$r*$CTD_nfields+$pressF]);
	}
} else {
	croak("$CTD_file: unknown CTD file type $CTD_file_type\n");
}

printf(STDERR "\n\t%d scans\n",scalar(@CTD_press));

#----------------------------------------------------------------------
# Step 2a: Pre-Process CTD Data
#----------------------------------------------------------------------

printf(STDERR "Pre-processing data...");
printf(STDERR "\n\tCTD...");

#------------------------------------
# calculate w and find deepest record
#------------------------------------
$CTD_maxpress = -9e99;
for (my($r)=1; $r<$CTD_nrecs-1; $r++) {
	print(STDERR '.') if ($r%500==0);
	$CTD_w[$r] = 0.99*($CTD_press[$r+1] - $CTD_press[$r-1]) / (2*$CTD_sampint);
	if ($CTD_press[$r] > $CTD_maxpress) {
		$CTD_maxpress = $CTD_press[$r];
		$CTD_bottom = $r;
    }										
}

#----------------------------------------------------------------------
# Step 2b: Pre-Process LADCP Data
#----------------------------------------------------------------------

print(STDERR "\n\tLADCP...");

#------------------------------------------------------
# construct a depth-vs-time "profile" from the raw data
#------------------------------------------------------

my($LADCP_start,$LADCP_end,$LADCP_bottom,$w_gap_time,$zErr,$maxz) =
    mk_prof(\%LADCP,0,undef,1,6,70,0.1,120);
croak("\n$LADCP_file: no good ensembles found\n")
    unless defined($LADCP_start);
    
if ($opt_d) {
	printf(STDERR "\n\t\tStart of cast    : %s (#%5d) at %6.1fm\n",
	                    $LADCP{ENSEMBLE}[$LADCP_start]->{TIME},
	                    $LADCP{ENSEMBLE}[$LADCP_start]->{NUMBER},
	                    $LADCP{ENSEMBLE}[$LADCP_start]->{DEPTH});
	printf(STDERR "\t\tBottom of cast   : %s (#%5d) at %6.1fm\n",
	                    $LADCP{ENSEMBLE}[$LADCP_bottom]->{TIME},
	                    $LADCP{ENSEMBLE}[$LADCP_bottom]->{NUMBER},
	                    $LADCP{ENSEMBLE}[$LADCP_bottom]->{DEPTH});
	if (defined($water_depth)) {
	    printf(STDERR "\t\tSeabed           :                      at %6.1fm (+-%dm)\n",$water_depth,$sig_wd);
	} else {
	    print(STDERR "\t\tSeabed           : not found\n");
	}
	printf(STDERR "\t\tEnd of cast      : %s (#%5d) at %6.1fm\n",
	                    $LADCP{ENSEMBLE}[$LADCP_end]->{TIME},
	                    $LADCP{ENSEMBLE}[$LADCP_end]->{NUMBER},
	                    $LADCP{ENSEMBLE}[$LADCP_end]->{DEPTH});
}

#------------------------------------------------------------------------
# find 1st rec & ensemble >=80% down to max depth & make 1st guess at lag
#------------------------------------------------------------------------

my($CTD_80pct_down) = 0;
$CTD_80pct_down++
	while ($CTD_press[$CTD_80pct_down] < 0.8*$CTD_maxpress);

my($LADCP_80pct_down) = 0;
$LADCP_80pct_down++
	while ($LADCP{ENSEMBLE}[$LADCP_80pct_down]->{DEPTH} < 0.8*$LADCP{ENSEMBLE}[$LADCP_bottom]->{DEPTH});


my($first_guess_lag) = $LADCP{ENSEMBLE}[$LADCP_80pct_down]->{ELAPSED_TIME} -
					   $CTD_80pct_down*$CTD_sampint;

printf(STDERR "\t\t1st guess offset = %ds\n",$first_guess_lag*$CTD_sampint)
	if ($opt_d);

#------------------------------------------------------------------------------------
# Linearly interpolate LADCP time series onto a new grid with $CTD_sampint resolution
#	ALSO: apply first_guess_lag to make lags small, which keeps the bestlag data
#		  chunks large
#------------------------------------------------------------------------------------

my($nGaps) = 0;

sub interp_LADCP_w($$)
{
	my($elapsed,$ens) = @_;
	my($sc) = ($elapsed - $LADCP{ENSEMBLE}[$ens-1]->{ELAPSED_TIME}) /
			  ($LADCP{ENSEMBLE}[$ens]->{ELAPSED_TIME} -
					$LADCP{ENSEMBLE}[$ens-1]->{ELAPSED_TIME});
	unless (numberp($LADCP{ENSEMBLE}[$ens-1]->{W})) {
		$nGaps++;
		croak("$0: cannot (yet) handle double gaps --- request feature upgrade\n")
			unless numberp($LADCP{ENSEMBLE}[$ens]->{W});
		return $LADCP{ENSEMBLE}[$ens]->{W};
	}
	return $LADCP{ENSEMBLE}[$ens-1]->{W} +
				$sc * ($LADCP{ENSEMBLE}[$ens]->{W} - $LADCP{ENSEMBLE}[$ens-1]->{W});
}

for (my($ens)=$LADCP_start,my($r)=0; $ens<=$LADCP_end; $ens++) {
	while ($r*$CTD_sampint < $LADCP{ENSEMBLE}[$ens]->{ELAPSED_TIME}) {
		$LADCP_w[$r-$first_guess_lag] = interp_LADCP_w($r*$CTD_sampint,$ens)
			unless ($first_guess_lag > $r);
		$r++;
    }
}

print(STDERR "\t\t$nGaps gaps in w timeseries")
	if ($opt_d);

print(STDERR "\n");

#----------------------------------------------------------------------
# Step 3: Calculate Lags
#----------------------------------------------------------------------

printf(STDERR "Calculating $opt_n lags from %ds-long windows [s]: ",$opt_w);
$opt_w = int($opt_w / $CTD_sampint);

sub bestLag($)
{
	my($ws) = @_;														# window start index

	my($best);
	my($bestmad) = 9e99;												# mean absolute deviation
	for (my($Llag)=-int($opt_w/2); $Llag<int($opt_w/2); $Llag++) {
		my($sad) = my($nad) = 0;
		for (my($Ci)=0; $Ci<$opt_w; $Ci++) {
			my($Li) = $Ci + $Llag;
			next if ($Li<0 || $Li>=$opt_w);
			next unless numberp($CTD_w[$ws+$Ci]) && numberp($LADCP_w[$ws+$Li]);
			$sad += abs($CTD_w[$ws+$Ci] - $LADCP_w[$ws+$Li]);
			$nad++;
		}
		next unless ($nad > 0);
		if ($sad/$nad < $bestmad) {
			$best = $Llag;
			$bestmad = $sad/$nad;
		}
	}
	return $best;	
}

#---------------------------------------------------------------
# carry out opt_n lag correlations and keep tally of the results
#---------------------------------------------------------------
my(@nBest);
for (my($window)=0; $window<$opt_n; $window++) {
	my($ws) = $window * int(@LADCP_w/$opt_n);			# window start
	$ws = @LADCP_w-$opt_w if ($ws+$opt_w >= @LADCP_w);

	$bestLag = bestLag($ws);
	if (defined($bestLag)) {
		printf(STDERR "%d ",$bestLag*$CTD_sampint);
		$nBest{$bestLag}++;
	} else {
		printf(STDERR "nan ");
	}
}

#----------------------
# find most popular lag
#----------------------
my($best_lag);
foreach my $i (keys(%nBest)) {
	$best_lag = $i if ($nBest{$i} > $nBest{$best_lag});
}
croak("\n$0: cannot determine a valid lag\n")
	unless ($nBest{$best_lag} > 1);
print(STDERR "\n\n\tWARNING: only $nBest{$best_lag} of the lag estimates agree!\n")
	if ($nBest{$best_lag} < $opt_n/2);

if ($nBest{$best_lag} == $opt_n) {
	printf(STDERR "\n\tunanimous lag = %ds\n",$best_lag*$CTD_sampint);
} else {
	printf(STDERR "\n\tmost popular lag = %ds\n",$best_lag*$CTD_sampint);
}

#----------------------------------------------------------------------
# Step 4: associate CTD data with each LADCP ensemble & write time series
#----------------------------------------------------------------------

print(STDERR "Associating CTD data with LADCP ensembles...\n");

my($overall_dt) = $CTD_sampint * ($best_lag + $first_guess_lag);
for (my($ens)=$LADCP_start; $ens<=$LADCP_end; $ens++) {
	my($r) = int(($LADCP{ENSEMBLE}[$ens]->{ELAPSED_TIME} - $overall_dt) / $CTD_sampint);
	$LADCP{ENSEMBLE}[$ens]->{CTD_W} = $CTD_w[$r];
	&antsOut($LADCP{ENSEMBLE}[$ens]->{ELAPSED_TIME},
			 $LADCP{ENSEMBLE}[$ens]->{CTD_W},
			 $LADCP{ENSEMBLE}[$ens]->{W});
}

&antsExit();