--- a +++ b/HRV.src/filt.c @@ -0,0 +1,277 @@ +/* filt.c Joe Mietus Apr 1 2011 */ + +/* filt.c Joe Mietus Oct 7 2008 */ +/* getline renamed to lineget May 18 2010 */ + +/* +filt : +Usage: filt filt hwin [options] + Reads 1 (y) or 2 (x,y) columns from stdin and + filters outliers by deleting those intervals outside + of `filt' range of the average within a window + `hwin' distance on either side of the + current interval. + options are : + [-x min max] : exclude date outside min - max + [-n] : print ratio of nout to nin + [-p file] : print excluded points to file + not including start/end hwin buffer + excluded points may not be printed in time + sequential order if -x opption is used +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + +#define MAXSTR 256 +#define MAXWIN 1024 + +char *prog, line[MAXSTR], tmp[MAXSTR]; +int ncols; +static double x[MAXWIN+1], y[MAXWIN+1]; +FILE *fp; + + +main(argc, argv) +int argc; +char * argv[]; +{ + int hwin, win, i, j, k, nin, nout; + int xflag, nflag, pflag; + double filt, filtmin, filtmax, min, max, mtmp, sum, av; + + xflag = 0; + nflag = pflag = nin = nout = 0; + sum = 0.0; + prog = argv[0]; + + if (argc < 3) { + usage(); + exit(1); + } + + i = 1; + if ((filt = atof(argv[i])) <= 0) { + fprintf(stderr, "%s: filt must be greater than 0\n", + prog); + exit(2); + } + if ((hwin = atoi(argv[++i])) <= 0) { + fprintf(stderr, "%s: hwin must be integer greater than 0\n", + prog); + exit(2); + } + if ((win = 2*hwin) > MAXWIN) { + fprintf(stderr, "%s: max hwin = %d\n", prog, MAXWIN/2); + exit(2); + } + + for ( ; ++i < argc && *argv[i] == '-'; ) { + switch(argv[i][1]) { + case 'x': if (i+3 > argc) { + usage(); + exit(1); + } + min = atof(argv[++i]); + max = atof(argv[++i]); + if (min > max) { + mtmp = min; + min = max; + max = mtmp; + } + xflag = 1; + break; + case 'n': nflag = 1; + break; + case 'p': if ((fp = fopen(argv[++i], "w")) == NULL) + fprintf(stderr, "%s : Can't open %s\n", + argv[0], argv[i]); + else + pflag = 1; + break; + default: usage(); + exit(1); + } + } + + i = 0; + if (fgets(line, MAXSTR, stdin) == NULL) { + fprintf(stderr, "%s : no data read\n", prog); + exit(2); + } + if (sscanf(line, "%lf %lf %s", &x[i], &y[i], tmp) == 3) { + fprintf(stderr, "%s : incorrectly formatted data\n", prog); + exit(2); + } + else if (sscanf(line, "%lf %lf", &x[i], &y[i]) == 2) { + ncols = 2; + } + else if (sscanf(line, "%lf %s", &x[i], tmp) == 2) { + fprintf(stderr, "%s : incorrectly formatted data\n", prog); + exit(2); + } + else if (sscanf(line, "%lf", &y[i]) == 1) { + ncols = 1; + x[0] = 0; + } + else { + fprintf(stderr, "%s : incorrectly formatted data\n", prog); + exit(2); + } + + nin++; + if (pflag) + fprintline(i); + + i++; + while (i<=win) { + if (lineget(i) == EOF) { + fprintf(stderr, "%s : insufficient data\n", prog); + exit(2); + } + nin++; + + if (xflag && (y[i]<min || y[i]>max)) { + if (pflag) + fprintline(i); + } + else { + if (pflag && i<hwin) + fprintline(i); + i++; + } + } + + i = 0; + j = hwin; + + for (i=0; i<=win; i++) + sum += y[i]; + sum -= y[j]; + av = sum/win; + sum += y[j] - y[0]; + + filtmax = filtmin = filt * av; + + if (y[j] <= av+filtmax && y[j] >= av-filtmin) { + printline(j); + nout++; + } + else if (pflag) + fprintline(j); + + i = 0; + + while (lineget(i) != EOF) { + nin++; + + if (xflag && (y[i]<min || y[i]>max)) { + if (pflag) + fprintline(i); + } + else { + if (++j > win) + j = 0; + + sum += y[i] - y[j]; + av = sum/win; + if (++i > win) + i = 0; + sum += y[j] - y[i]; + + filtmax = filtmin = filt * av; + + if (y[j] <= av+filtmax && y[j] >= av-filtmin) { + printline(j); + nout++; + } + else { + if (pflag) + fprintline(j); + } + } + } + + if (pflag) { + for (i=0; i<hwin; i++) { + if (++j > win) + j = 0; + fprintline(j); + } + } + + if (nflag) + fprintf(stderr, "nout : nin = %d : %d = %g\n", + nout, nin, (double)nout/nin); + + exit(0); +} + + + +lineget(i) +int i; +{ + int j; + static int n = 1; + + if (fgets(line, MAXSTR, stdin) == NULL) + return(EOF); + + if (ncols == 1) { + if ((j = sscanf(line, "%lf %s", &y[i], tmp)) != 1) { + fprintf(stderr, "%s: incorrectly formatted data : ", prog); + fprintf(stderr, "line = %d\n", n+1); + exit(2); + } + x[i] = n; + } + else if (ncols == 2) { + if ((j = sscanf(line, "%lf %lf %s", &x[i], &y[i], tmp)) != 2) { + fprintf(stderr, "%s: incorrectly formatted data : ", prog); + fprintf(stderr, "line = %d\n", n+1); + exit(2); + } + } + + n++; + + return(j); +} + + +printline(i) +int i; +{ + if (ncols == 2) + printf("%.9g ", x[i]); + printf("%g\n", y[i]); +} + + +fprintline(i) +int i; +{ + if (ncols == 2) + fprintf(fp, "%.9g ", x[i]); + fprintf(fp, "%g\n", y[i]); +} + + +usage() +{ + fprintf(stderr, "Usage: %s filt hwin [options]\n", prog); + fprintf(stderr, " Reads 1 (y) or 2 (x,y) columns from stdin and\n"); + fprintf(stderr, " filters outliers by deleting those intervals outside\n"); + fprintf(stderr, " of `filt' range of the average within a window\n"); + fprintf(stderr, " `hwin' distance on either side of the\n"); + fprintf(stderr, " current interval.\n\n"); + fprintf(stderr, " options are :\n"); + fprintf(stderr, " [-x min max] : exclude date outside min - max\n"); + fprintf(stderr, " [-n] : print ratio of nout to nin\n"); + fprintf(stderr, " [-p file] : print excluded points to file\n"); + fprintf(stderr, " not including start/end hwin buffer\n"); + fprintf(stderr, " excluded points may not be printed in time\n"); + fprintf(stderr, " sequential order if -x opption is used\n"); +}