Design of Smooth
The filter is conceptually a moving average, but instead of just being historical moving average, it looks ahead and behind so that the relationship of the smoothed column does not change with the other columns in the table. There is a price to be paid at either end of the data set, and that price is that rows for half of the width of span of the averaging is lost at either end (top and bottom) of the table.
The averaging can be weighted (as is the default). You can specify your own weighting with the filter | f command line argument. All filters are considered symetrical around the center point, and therefor only half of the filter is specified. For example, a 20 record wide boxcar (uniformly weighted) filter would be specified with
1111111111. The default filter (which can also change by editing the script, is a somewhat Gaussian filter with the specification:
10 10 9 9 9 9 9 8 8 8 8 7 7 7 6 5 4 3 3 2 2 1 1It is 41 records wide.
By default, the averaging assumes the data records are evenly spaced, but you may specify a "time" column for data that arbitrary spacing, in which case the averaging is based on interpolation between known points.
Usage
usage: Smooth [options]
options:
filter | f <value list> one half of a filter weighting curve
valueCol | v <column with values to be smoothed, 1 is first column>
timeCol | t <column with values representing time, 1 is first column>
if time column is specified, the weighting will be based
on even increments of time
threshold | T <minimum for triggering smoothing>
help | h this display
input | i <input file name>
output | o <output file name>
The smooth Perl Script
#!/usr/bin/perl
# $Id: smooth.PL,v 1.2 2007/08/01 21:06:38 ben Exp ben $
# Ben Smith, S/V Mother of Perl, 2007
########################################################
use strict;
use warnings;
use Getopt::Long;
my @filterSpec = ();
my @defFilterSpec = qw(10 10 9 9 9 9 9 8 8 8 8 7 7 7 6 5 4 3 3 2 2 1 1);
# data table columns
my $timeCol = 1;
my $valueCol = 2;
my $threshold = 0; # replace spurious data with interpolation if not zero
my $help = undef;
my $inputFile = undef;
my $outputFile = undef;
my $result = GetOptions(
"filter|f=i{0,20}" => \@filterSpec,
"valueCol|v=i" => \$valueCol,
"timeCol|t=i" => \$timeCol,
"threshold|T=f" => \$threshold,
"help|h" => \$help,
"input|i=s" => \$inputFile,
"output|o=s" => \$outputFile,
);
if($help) {
print STDERR <<"EOT";
usage: Smooth [options]
options:
filter | f <value list> one half of a filter weighting curve
valueCol | v <column with values to be smoothed, 1 is first column>
timeCol | t <column with values representing time, 1 is first column>
if time column is specified, the weighting will be based
on even increments of time
threshold | T <minimum for triggering smoothing>
help | h this display
input | i <input file name>
output | o <output file name>
EOT
exit;
}
my $timeLast = undef;
my $timeThis = undef;
my $Last = undef;
my $This = undef;
my $TrendLast = 0;
my $TrendThis = 0;
$threshold = abs($threshold); #insure we don't have a negative threshold
#use the default filter spec if non specified on command line
@filterSpec = @defFilterSpec unless scalar @filterSpec;
my $filter = Lowpass->new($threshold,@filterSpec);
# deterine source and destination
if (defined $inputFile) {
open(STDIN, "<$inputFile");
}
if (defined $outputFile) {
open(STDOUT, ">$outputFile");
}
# the loop
while(<>){
chomp;
my(@fields) = split;
my $time = $fields[$timeCol-1];
my $value = $fields[$valueCol-1];
# skip lines with invalid time or value
next if $time =~ m/[a-zA-Z]/;
next if $value =~ m/[a-zA-Z]/;
# rounding
$value = sprintf("%.3f",$value);
# apply smoothing
($This,$timeThis) = $filter->smooth($value,$time);
if(defined $This){
$fields[$valueCol-1] = $This;
$fields[$timeCol-1] = $timeThis;
}
print join("\t",@fields) . "\n" if defined $This;
}
package Lowpass;
sub new {
my $class = shift;
my $threshold = shift;
my @weightModel = @_;
my $self = {};
bless($self,$class);
if(defined $threshold) {$self->setThreshold($threshold);}
if(scalar @weightModel) {$self->setWeights(@weightModel);}
return $self;
}
sub setThreshold {
my $self = shift;
my $threshold = shift;
if(defined $threshold) { $self->{'threshold'} = $threshold; }
return $self;
}
sub threshold {
my $self = shift;
return $self->{'threshold'};
}
sub setWeights {
my $self = shift;
my @weightModel = @_; # the rest of the argument list determines
# the size and weighting of the elements.
# from the center to the end
# form the real weighting list, the sum of which must be 1
my @copy = @weightModel; shift @copy; # loose the center
my @weights = (reverse(@copy),@weightModel);
# sum the parts and divide each element by the sum
my $sum = 0;
for(@weights) { $sum += $_; } # sum them up
# divide by sum to make new sum equal 1
for(my $n=0; $n <= $#weights; ++$n) { $weights[$n] = $weights[$n]/$sum; }
$self->{'weights'} = [@weights];
$self->{'ListSize'} = scalar @weights;
$self->{'center'} = ($self->ListSize - 1) / 2; # index starts at zero
$self->{'dataList'} = ();
return $self;
}
sub push {
my $self = shift;
my $newValue = shift;
my $key = shift;
push(@{$self->{'dataList'}},[$newValue,$key]);
if ($self->dataListCurr > $self->ListSize) {
shift @{$self->{'dataList'}};
}
return $self;
}
sub pop {
my $self = shift;
return(pop @{$self->{'dataList'}});
}
sub ListSize {
my $self = shift;
return $self->{'ListSize'};
}
sub center {
my $self = shift;
return $self->{'center'};
}
sub dataList {
my $self = shift;
return @{$self->{'dataList'}};
}
sub dataListCurr {
my $self = shift;
return scalar $self->dataList;
}
sub dataSet {
my $self = shift;
my $index = shift;
return @{$self->{'dataList'}}[$index];
}
sub value { # retrieve or set value at index
my $self = shift;
my $index = shift;
my $value = shift;
if (defined $value) {
${$self->{'dataList'}}[$index][0] = $value;
}
return @{$self->dataSet($index)}[0];
}
sub key {
my $self = shift;
my $index = shift;
return @{$self->dataSet($index)}[1];
}
sub weights {
my $self = shift;
my $index = shift;
return $self->{'weights'};
}
sub weight {
my $self = shift;
my $index = shift;
return @{$self->weights}[$index];
}
sub avg {
my $self = shift;
my $sum = 0;
my $key = $self->key($self->center);
my $max = $self->ListSize - 1;
for(my $n = 0; $n <= $max; ++ $n) {
$sum += $self->value($n) * $self->weight($n);
}
return ($sum,$key);
}
sub smooth {
my $self = shift;
my $newValue = shift;
my $key = shift;
# push new value into smoothing list dropping oldest value
$self->push($newValue,$key);
# filter out spikes on previous data value
if($self->threshold && (scalar $self->dataList > 3)) {
my $nearAvg = ($self->value(-3) + $self->value(-1))/2;
if(abs($nearAvg - $self->value(-2)) > $self->{'threshold'}) {
# replace previous value with nearAvg
$self->value(-2,$nearAvg);
}
}
# applied weighted averaging to smoothing list and return center value
if($self->dataListCurr == $self->ListSize) { #list full, ready to average
return($self->avg);
} else { # not full, just return undef
return(undef, undef);
}
}
1;