#!/usr/bin/perl
# a band pass filter that can be applied to x,y,z data columns
# Ben Smith - S/V Mother of Perl - 2006
############################################
use strict;
use warnings;

use Getopt::Long;
use Data::Dumper;

our(@min,@max,$errorFile,$help) = undef;
our($inputFile,$outputFile);
our(@sigma) = undef ;
our($verbose) = undef ;

for(0..3) { # initialize lists elements to undef
    $min[$_] = undef;
    $max[$_] = undef;
    $sigma[$_] = undef;
}

my $opts = GetOptions (
		       "input|i=s" => \$inputFile,
		       "output|o=s" => \$outputFile,
		       "errorFile|e=s" => \$errorFile,
		       "sigmaX|X=s" => sub { $sigma[0] = h2f($_[1]); },
		       "sigmaY|Y=s" => sub { $sigma[1] = h2f($_[1]); },
		       "sigmaZ|Z=s" => sub { $sigma[2] = h2f($_[1]); },
		       "sigmaT|T=s" => sub { $sigma[3] = h2f($_[1]); },
		       "minX|minLong=s" => sub { $min[0] = h2f($_[1]); },
		       "maxX|maxLong=s" => sub { $max[0] = h2f($_[1]); },
		       "minY|minLat=s" => sub { $min[1] = h2f($_[1]); },
		       "maxY|maxLat=s" => sub { $max[1] = h2f($_[1]); },
		       "minZ|minDepth=s" => sub { $min[2] = h2f($_[1]); },
		       "maxZ|maxDepth=s" => sub { $max[2] = h2f($_[1]); },
		       "minT|minTime=s" => sub { $min[3] = h2f($_[1]); },
		       "maxT|maxTime=s" => sub { $max[3] = h2f($_[1]); },
		       "verbose|v" => \$verbose,
		       "help|h" => \$help,

		       );
if($help || !$inputFile) {
    print STDERR <<"EOT";
usage: BandPass -i <inputFile> -o <outputFile> [<options>]
  ( input and output files must be specified )
  options:
    -e | --errorFile <errorFile> for rejected records
    -X | --sigmaX <value>
    -Y | --sigmaY <value>
    -Z | --sigmaZ <value>
    -T | --sigmaT <value>
    --minX | --minLong <value>    
    --maxX | --maxLong <value>
    --minY | --minLat <value>
    --maxY | --maxLat <value>
    --minZ | --minDepth <value>
    --maxZ | --maxDepth <value>
    --minT | --minTime <value>
    --maxT | --maxTime <value>
    -v | --verbose
    -h | --help
EOT
exit;
}

# test to see if the input file exists
my %stats;

if($sigma[0] || $sigma[1] || $sigma[2] || $sigma[3]) { # we need statistics 
    %stats = StatAnalyze($inputFile);
    print Data::Dumper->Dump([\%stats]),"\n" if $verbose;
}
my $pass = 0;
my $fail = 0;

open(INPUT,"<$inputFile") || die "cannot open input file \"$inputFile\":$!\n";
open(OUTPUT,">$outputFile") || 
    die "cannot open output file \"$outputFile\":$!\n";
if(defined $errorFile) {
    open ERRORS, ">$errorFile" || 
	die "cannot open error file \"$errorFile\":$!\n";
}

while(my $line = <INPUT>){
    chomp($line);
    my (@data) = split(/\t/,$line);
    my $test = 1;
    my $errorLn;

    
    for(my $n = 0; $n < 3; ++ $n) { # evaluate each data field
	if($sigma[$n]) { # statistical filter
	    my $diff = $data[$n] - $stats{mean}[$n] ;
	    my $statTest = $sigma[$n] * $stats{stdev}[$n];
	    print "$n\t$diff\t$statTest\n" if $verbose;
	    if($diff > $statTest) {
		$test = 0;
		$errorLn = join("\t",@data,($n+1),$diff,$statTest);
		last;
	    }
	}
	# range filter
	if( (defined($min[$n]) && ($data[$n] < $min[$n])) || 
	    (defined($max[$n]) && ($data[$n] > $max[$n])) ) {
	    $test = 0;

	    my($minStr) = (defined $min[$n]) ? $min[$n] : "undef";
	    my($maxStr) = (defined $max[$n]) ? $max[$n] : "undef";
	    $errorLn = join("\t",@data,($n+1),"$minStr","$maxStr");
	    last;
	}
    }
    if($test) {
	print OUTPUT "$line\n";
	++ $pass;
    } else {
	print ERRORS "$errorLn\n" if defined $errorFile;
	++ $fail;
    }
}

print STDERR "Bandpass filter: $pass passed; $fail failed\n";


exit;

############################## subroutines #################

sub h2f { # convert from hh:mm:ss format numbers to hh.hhhhh numbers
    my $hstring = shift;
    my($h,$m,$s) = split(/:/,$hstring);

    if(defined $s || defined $m){
	my($sign) = ($h > 0) ? 1 : -1;
	return sprintf("%.5f",$h + ($sign * $m/60) + ($sign * $s/3600));
    } else {
	return $h;
    }
}

sub StatAnalyze {
    # returns a hash of statistics for each field found
    my $inputFile = shift;
    my %results ;

    my(@fields,$count)=0;

    open INPUT, "<$inputFile" || 
	die "cannot open input file \"$inputFile\" for statistics: $!\n";
    while(<INPUT>) {
	chomp;
	my @fields = split;
	next if not scalar @fields;
	++ $count;
	for(my $n=0; $n< scalar @fields; ++ $n) {
	    if((not defined $results{max}[$n]) ||
	       ($fields[$n] > $results{max}[$n])) {
		$results{max}[$n] = $fields[$n];
	    } 
	    if((not defined $results{min}[$n]) ||
	       ($fields[$n] < $results{min}[$n])) {
		$results{min}[$n] = $fields[$n];
	    }
	    $results{accum}[$n] = 0 unless defined $results{accum}[$n];
	    $results{accum}[$n] += $fields[$n];
	}
    } # while
    close INPUT;

    my $nfields = scalar @{$results{accum}};
    for( my $n=0; $n < $nfields; ++ $n) {
	$results{mean}[$n] = $results{accum}[$n] / $count;
    }
    
    # figure standard deviation for each field
    open INPUT, "<$inputFile" || 
	die "cannot open input file \"$inputFile\" for statistics: $!\n";
    while(<INPUT>) {
	chomp;
	my @fields = split;
	next if not scalar @fields;
	++ $count;
	for(my $n=0; $n< scalar @fields; ++ $n) {
	    $results{var}[$n] = 0 unless defined $results{var}[$n];
	    my $diff = $fields[$n] - $results{mean}[$n]; 
	    $results{var}[$n] += $diff * $diff;
	}
    }
    close INPUT;

    for( my $n=0; $n < $nfields; ++ $n) {
	$results{stdev}[$n] = sqrt($results{var}[$n] / $count);
    }

    return %results;
}
Home
 Boat
 Log
 Photos
 Videos
 Hydrography
   nmea2xyzt
   NMEAfields
   Datalogger
  Cleaning
   BandPass
  Analysis
  GridPlot
 Blog
 ForSale
 SiteMap