#!/usr/bin/perl
# a band pass filter that can be applied to x,y,z data columns
# Ben Smith - S/V Mother of Perl - 2006
############################################
use strict;
use warnings;
use Getopt::Long;
use Data::Dumper;
our(@min,@max,$errorFile,$help) = undef;
our($inputFile,$outputFile);
our(@sigma) = undef ;
our($verbose) = undef ;
for(0..3) { # initialize lists elements to undef
$min[$_] = undef;
$max[$_] = undef;
$sigma[$_] = undef;
}
my $opts = GetOptions (
"input|i=s" => \$inputFile,
"output|o=s" => \$outputFile,
"errorFile|e=s" => \$errorFile,
"sigmaX|X=s" => sub { $sigma[0] = h2f($_[1]); },
"sigmaY|Y=s" => sub { $sigma[1] = h2f($_[1]); },
"sigmaZ|Z=s" => sub { $sigma[2] = h2f($_[1]); },
"sigmaT|T=s" => sub { $sigma[3] = h2f($_[1]); },
"minX|minLong=s" => sub { $min[0] = h2f($_[1]); },
"maxX|maxLong=s" => sub { $max[0] = h2f($_[1]); },
"minY|minLat=s" => sub { $min[1] = h2f($_[1]); },
"maxY|maxLat=s" => sub { $max[1] = h2f($_[1]); },
"minZ|minDepth=s" => sub { $min[2] = h2f($_[1]); },
"maxZ|maxDepth=s" => sub { $max[2] = h2f($_[1]); },
"minT|minTime=s" => sub { $min[3] = h2f($_[1]); },
"maxT|maxTime=s" => sub { $max[3] = h2f($_[1]); },
"verbose|v" => \$verbose,
"help|h" => \$help,
);
if($help || !$inputFile) {
print STDERR <<"EOT";
usage: BandPass -i <inputFile> -o <outputFile> [<options>]
( input and output files must be specified )
options:
-e | --errorFile <errorFile> for rejected records
-X | --sigmaX <value>
-Y | --sigmaY <value>
-Z | --sigmaZ <value>
-T | --sigmaT <value>
--minX | --minLong <value>
--maxX | --maxLong <value>
--minY | --minLat <value>
--maxY | --maxLat <value>
--minZ | --minDepth <value>
--maxZ | --maxDepth <value>
--minT | --minTime <value>
--maxT | --maxTime <value>
-v | --verbose
-h | --help
EOT
exit;
}
# test to see if the input file exists
my %stats;
if($sigma[0] || $sigma[1] || $sigma[2] || $sigma[3]) { # we need statistics
%stats = StatAnalyze($inputFile);
print Data::Dumper->Dump([\%stats]),"\n" if $verbose;
}
my $pass = 0;
my $fail = 0;
open(INPUT,"<$inputFile") || die "cannot open input file \"$inputFile\":$!\n";
open(OUTPUT,">$outputFile") ||
die "cannot open output file \"$outputFile\":$!\n";
if(defined $errorFile) {
open ERRORS, ">$errorFile" ||
die "cannot open error file \"$errorFile\":$!\n";
}
while(my $line = <INPUT>){
chomp($line);
my (@data) = split(/\t/,$line);
my $test = 1;
my $errorLn;
for(my $n = 0; $n < 3; ++ $n) { # evaluate each data field
if($sigma[$n]) { # statistical filter
my $diff = $data[$n] - $stats{mean}[$n] ;
my $statTest = $sigma[$n] * $stats{stdev}[$n];
print "$n\t$diff\t$statTest\n" if $verbose;
if($diff > $statTest) {
$test = 0;
$errorLn = join("\t",@data,($n+1),$diff,$statTest);
last;
}
}
# range filter
if( (defined($min[$n]) && ($data[$n] < $min[$n])) ||
(defined($max[$n]) && ($data[$n] > $max[$n])) ) {
$test = 0;
my($minStr) = (defined $min[$n]) ? $min[$n] : "undef";
my($maxStr) = (defined $max[$n]) ? $max[$n] : "undef";
$errorLn = join("\t",@data,($n+1),"$minStr","$maxStr");
last;
}
}
if($test) {
print OUTPUT "$line\n";
++ $pass;
} else {
print ERRORS "$errorLn\n" if defined $errorFile;
++ $fail;
}
}
print STDERR "Bandpass filter: $pass passed; $fail failed\n";
exit;
############################## subroutines #################
sub h2f { # convert from hh:mm:ss format numbers to hh.hhhhh numbers
my $hstring = shift;
my($h,$m,$s) = split(/:/,$hstring);
if(defined $s || defined $m){
my($sign) = ($h > 0) ? 1 : -1;
return sprintf("%.5f",$h + ($sign * $m/60) + ($sign * $s/3600));
} else {
return $h;
}
}
sub StatAnalyze {
# returns a hash of statistics for each field found
my $inputFile = shift;
my %results ;
my(@fields,$count)=0;
open INPUT, "<$inputFile" ||
die "cannot open input file \"$inputFile\" for statistics: $!\n";
while(<INPUT>) {
chomp;
my @fields = split;
next if not scalar @fields;
++ $count;
for(my $n=0; $n< scalar @fields; ++ $n) {
if((not defined $results{max}[$n]) ||
($fields[$n] > $results{max}[$n])) {
$results{max}[$n] = $fields[$n];
}
if((not defined $results{min}[$n]) ||
($fields[$n] < $results{min}[$n])) {
$results{min}[$n] = $fields[$n];
}
$results{accum}[$n] = 0 unless defined $results{accum}[$n];
$results{accum}[$n] += $fields[$n];
}
} # while
close INPUT;
my $nfields = scalar @{$results{accum}};
for( my $n=0; $n < $nfields; ++ $n) {
$results{mean}[$n] = $results{accum}[$n] / $count;
}
# figure standard deviation for each field
open INPUT, "<$inputFile" ||
die "cannot open input file \"$inputFile\" for statistics: $!\n";
while(<INPUT>) {
chomp;
my @fields = split;
next if not scalar @fields;
++ $count;
for(my $n=0; $n< scalar @fields; ++ $n) {
$results{var}[$n] = 0 unless defined $results{var}[$n];
my $diff = $fields[$n] - $results{mean}[$n];
$results{var}[$n] += $diff * $diff;
}
}
close INPUT;
for( my $n=0; $n < $nfields; ++ $n) {
$results{stdev}[$n] = sqrt($results{var}[$n] / $count);
}
return %results;
}