/* file: gmse.c                 M. Costa                3 January 2019
                                Last revised            8 January 2019 (Benjamin Moody)
-------------------------------------------------------------------------------
gmse: calculates multiscale entropy with different coarse-graining methods (generalized multiscale entropy, GMSE)
Copyright (C) 2019 Madalena Costa

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; either version 3 of the License, or (at your option) any later
version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.  See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with
this program; if not, see <https://www.gnu.org/licenses/>.

You may contact the author by e-mail (mcosta3@bidmc.harvard.edu).  For updates
to this software, please visit PhysioNet (https://www.physionet.org/).
_______________________________________________________________________________
*/


#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>

#define MAXSTR 1250     /* maximum string size */
#define M_MAX 10        /* maximum value of the parameter m */
#define SCALE_MAX 100   /* maximum scale value */
#define J_MIN 5         /* starting scale */
#define D 50            /* minimum number of matches assumed necessary
                        for "reliable" computation of SampEn */

/* Global variables */
char *prog, line[MAXSTR];

double *t, *rr, *cg;
int rr_nlin, rr_bufsize, *x, flag_time_rr=0;

/*
  The program reads input files comprising either one column of
  numbers (flag_time_rr=0) or two columns of numbers, time and RR
  intervals (flag_time_rr=1).

  The program derives coarse-grained time series (cg) that represent
  the system's dynamics on different time scales. Beat number
  information is saved in x.

  Next, sample entropy is calculated for each cg time series.

  The output comprises four columns: scale, sample entropy, number of
  matches and r*sd values.
*/

/* Function prototypes */
void alloc_double (double **array, int n);
void alloc_int (int **array, int n);
double StandardDeviation (double *a, int n);
double SampleEntropy (double r, int j, int m, int n);
int CG_mean (int j);
int CG_SD (int j);
int CG_var (int j);
int CG_MAD (int j);
void usage ();

int main(int argc, char *argv[])
{

  int i, j, m, i_min, i_max, scale_step, scale_max,
    flag_r = 0, flag_x = 0, cg_meth, cg_nlin;
  double r, sd;

  /* The parameters for the computation of sample entropy are
     m (vector length) and r (noise tolerance).

     The variable cg_nlin is the length of the coarse-grained time
     series (varies with scale).

     The variable cg_meth takes the values 1 to 4 depending on the
     coarse-graining procedure that is selected (1 mean; 2 sd; 3
     variance; 4 mean absolute deviation.) */

  prog = argv[0];
  scale_max = 20;
  scale_step = 1;
  m = 2;
  i_min = 0;
  i_max = INT_MAX;
  r = 0.15;
  cg_meth = 2;

  i = 0;
  while (++i < argc && *argv[i] == '-') {
    switch (argv[i][1]) {
    case 'n':
      if (i + 1 >= argc) {
        fprintf(stderr, "%s: %s: requires an argument\n", prog, argv[i]);
        exit(1);
      }
      scale_max = atoi(argv[++i]);
      break;

    case 'a':
      if (i + 1 >= argc) {
        fprintf(stderr, "%s: %s: requires an argument\n", prog, argv[i]);
        exit(1);
      }
      scale_step = atoi(argv[++i]);
      break;

    case 'm':
      if (i + 1 >= argc) {
        fprintf(stderr, "%s: %s: requires an argument\n", prog, argv[i]);
        exit(1);
      }
      m = atoi(argv[++i]);
      break;

    case 'r':
      if (i + 1 >= argc) {
        fprintf(stderr, "%s: %s: requires an argument\n", prog, argv[i]);
        exit(1);
      }
      flag_r = 1;
      r = atof(argv[++i]);
      break;

    case 'x':
      if (i + 1 >= argc) {
        fprintf(stderr, "%s: %s: requires an argument\n", prog, argv[i]);
        exit(1);
      }
      flag_x=1;
      r = atof(argv[++i]);
      break;

    case 'i':
      if (i + 1 >= argc) {
        fprintf(stderr, "%s: %s: requires an argument\n", prog, argv[i]);
        exit(1);
      }
      i_min = atoi(argv[++i]);
      break;

    case 'I':
      if (i + 1 >= argc) {
        fprintf(stderr, "%s: %s: requires an argument\n", prog, argv[i]);
        exit(1);
      }
      i_max = atoi(argv[++i]);
      break;

    case 'c':
      if (i + 1 >= argc) {
        fprintf(stderr, "%s: %s: requires an argument\n", prog, argv[i]);
        exit(1);
      }
      cg_meth = atoi(argv[++i]);
      break;

    default:
      usage();
      exit(1);
    }
  }

  if (scale_max <= 0 || scale_max > SCALE_MAX) {
    fprintf(stderr, "%s: -n: invalid argument (must be between 1 and %d)\n",
            prog, SCALE_MAX);
    exit(1);
  }
  if (scale_step <= 0 || scale_step > scale_max - 1) {
    fprintf(stderr, "%s: -a: invalid argument (must be between 1 and %d)\n",
            prog, scale_max - 1);
    exit(1);
  }
  if (m <= 0 || m >= M_MAX) {
    fprintf(stderr, "%s: -m: invalid argument (must be between 1 and %d)\n",
            prog, M_MAX - 1);
    exit(1);
  }
  if (flag_r && flag_x) {
    fprintf(stderr, "%s: cannot use both -r and -x\n", prog);
    exit(1);
  }
  if (r <= 0) {
    fprintf(stderr, "%s: %s: invalid argument (must be greater than 0)\n",
            prog, (flag_r ? "-r" : "-x"));
    exit(1);
  }
  if (cg_meth < 1 || cg_meth > 4) {
    fprintf(stderr, "%s: -c: invalid argument (must be between 1 and 4)\n",
            prog);
    exit(1);
  }

  /* Memory Allocation */
  rr_bufsize = 1024;
  rr_nlin = 0;
  alloc_double(&t, rr_bufsize);
  alloc_double(&rr, rr_bufsize);
  alloc_double(&cg, rr_bufsize);
  alloc_int(&x, rr_bufsize);

  /* Data Read from stdin */
  j = -1;
  while (fgets(line, MAXSTR, stdin) != NULL){
    j++;
    if (rr_nlin >= rr_bufsize) {
      rr_bufsize *= 2;
      alloc_double(&t, rr_bufsize);
      alloc_double(&rr, rr_bufsize);
      alloc_double(&cg, rr_bufsize);
      alloc_int(&x, rr_bufsize);
    }
    if (j >= i_min && j <= i_max) {
      if (sscanf(line, "%lf %lf", &t[j-i_min], &rr[j-i_min]) == 2){
        rr_nlin = j - i_min + 1;
        flag_time_rr = 1;
      }
      else if (sscanf(line, "%lf", &rr[j-i_min]) == 1)
        rr_nlin = j - i_min + 1;
      else {
        fprintf(stderr, "The input file should contain two columns: time and rr interval.\n");
        exit(1);
      }
    }
  }

  /* Deriving the coarse-grained time series & calculating sample entropy */
  switch (cg_meth) {

  case 1:
    for (j = 1; j <= scale_max; j += scale_step) {
      cg_nlin = CG_mean (j);
      if (flag_x == 0 && j == 1){
        sd = StandardDeviation (rr, rr_nlin);
        r = r * sd;
      }
      SampleEntropy (r, j, m, cg_nlin);
    }
    break;

  case 2:
    for (j = J_MIN; j <= scale_max; j += scale_step){
      cg_nlin = CG_SD (j);
      if (flag_x == 0 && j == J_MIN){
        sd = StandardDeviation (cg, cg_nlin);
	r = r * sd;
      }
      SampleEntropy (r, j, m, cg_nlin);
    }
    break;

  case 3:
    for (j = J_MIN; j <= scale_max; j += scale_step) {
      cg_nlin = CG_var (j);
      if (flag_x == 0 && j == J_MIN){
        sd = StandardDeviation (cg, cg_nlin);
        r = r * sd;
      }
      SampleEntropy (r, j, m, cg_nlin);
    }
    break;

  case 4:
    for (j = J_MIN; j <= scale_max; j += scale_step){
      cg_nlin = CG_MAD (j);
      if (flag_x == 0 && j == J_MIN){
        sd = StandardDeviation (cg, cg_nlin);
        r = r * sd;
      }
      SampleEntropy (r, j, m, cg_nlin);
    }
    break;

  default:
    usage();
    exit(1);
  }

  /* Free memory */
  free(rr);
  free(cg);
  free(t);
  free(x);
  return 0;
}

void alloc_double(double **array, int n)
{
  double *a;

  if ((a = realloc(*array, n * sizeof (double))) == NULL) {
    fprintf(stderr, "%s : insufficient memory\n", prog);
    exit(2);
  }
  *array = a;
}

void alloc_int(int **array, int n)
{
  int *a;

  if ((a = realloc(*array, n * sizeof (int))) == NULL) {
    fprintf(stderr, "%s : insufficient memory\n", prog);
    exit(2);
  }
  *array = a;
}

double StandardDeviation(double *a, int count)
{
  double sum = 0.0, sum2 = 0.0, sd;
  int j;

  for (j = 0; j < count; j++){
    sum += a[j];
    sum2 += a[j] * a[j];
  }
  sd = sqrt((sum2 - sum * sum / count) / (count - 1));

  return (sd);
}

double SampleEntropy(double r, int j, int m, int cg_nlin)
{
  int i, k, l, cont[SCALE_MAX][M_MAX+1],
    discontinuity_t = 0, discontinuity_m = 0;
  double SE[SCALE_MAX];

  cg_nlin = cg_nlin - m; /* for 1<=i<=cg_nlin both vectors of length
                            m_max and m_max+1 are defined */

  for (i = 0; i < M_MAX; i++)
    cont[j][i] = 0;

  for (i = 0; i < cg_nlin; ++i) {
    discontinuity_t = 0;
    /* checking if the components of the template vector are
       consecutive */
    for (k = 0; k < m; k++)
      if (x[i+k+1] - x[i+k] != j)
        discontinuity_t = 1;
      
    if (discontinuity_t == 0 || flag_time_rr == 0) {
      for (l = i + 1; l < cg_nlin; ++l) { /* self-matches are not counted */
        /* checking if the components of the matching vector are
           consecutive */
        discontinuity_m = 0;
        for (k = 0; k < m; k++)
          if (x[l+k+1] - x[l+k] != j)
            discontinuity_m = 1;
	
        if (discontinuity_m == 0 || flag_time_rr == 0){
          k = 0;
          while (k <= m && fabs(cg[i+k] - cg[l+k]) <= r){
            k++;
            cont[j][k]++;
          }
        }
      }
    }
  }

  if (cont[j][m+1] == 0 || cont[j][m] == 0)
    SE[j] = -log((double) 1 / ((cg_nlin) * (cg_nlin - 1)));
  else
    SE[j] = -log((double) cont[j][m+1] / cont[j][m]);

  if (cont[j][m+1] < D) {
    fprintf(stderr,
            "%s: error: At scale %d, the number of matches (with r*sd=%.3f)\n"
            "      for %d-component vectors is %d.  The time series length\n"
            "      may be too short, or the r value too restrictive, to\n"
            "      analyze this scale.\n\n",
            prog, j, r, m + 1, cont[j][m+1]);
    exit(1);
  }

  printf ("%d\t%.4lf\t%d/%d\t\t%lf\n", j, SE[j], cont[j][m+1], cont[j][m], r);

  return SE[j];
}

int CG_mean(int j)
{
  int i = 0, l, n = 0, length, discontinuity;

  while (i < rr_nlin - j + 1) {
    cg[n] = 0;
    discontinuity = 0;

    if (flag_time_rr == 0)
      length = j;
    else{
      length = 0;

      /* check if rr intervals are consecutive */
      for (l = i; l < i + j && discontinuity == 0; l++) {
	if (fabs(t[l+1] - t[l] - rr[l]) < 0.1)
	  length += 1;
	else{
	  discontinuity = 1;
	  length +=1;
	}
      }
    }

    if (discontinuity == 0) {
      for (l = 0; l < j; l++)
        cg[n] += rr[i+l] / j;
      x[n] = i;
     n++;
    }

    i += (length);
  }

  return n;
}

int CG_SD(int j)
{
  double sum, sum2;
  int i = 0, l, n = 0, length, discontinuity;

  while (i < rr_nlin - j + 1) {
    sum = 0;
    sum2 = 0;
    discontinuity = 0;

    if (flag_time_rr == 0)
      length = j;
    else{
      length = 0;
      
      /* check if rr intervals are consecutive */
      for (l = i; l < i + j && discontinuity == 0; l++) {
	if (fabs(t[l+1] - t[l] - rr[l]) < 0.1)
	  length += 1;
	else{	  
	  discontinuity = 1;
	  length += 1;
	}
      }
    }

    if (discontinuity == 0){
      for (l = 0; l < j; l++){
        sum += rr[i+l];
        sum2 += rr[i+l] * rr[i+l];
      }
      x[n] = i;
      if ( (sum2 - sum * sum / j) / (j - 1) > 0)
	cg[n] = sqrt((sum2 - sum * sum / j) / (j - 1));
      else cg[n]=0;
      
      n++;
    }

    i += (length);
  }
  return n;
}

int CG_var(int j)
{
  double sum, sum2;
  int i = 0, l, n = 0, length, discontinuity;

  while (i < rr_nlin - j + 1) {
    sum = 0;
    sum2 = 0;
    discontinuity = 0;

    if (flag_time_rr == 0)
      length = j;
    else{
      length = 0;

      /* check if rr intervals are consecutive */
      for (l = i; l < i + j && discontinuity == 0; l++) {
	if (fabs(t[l+1] - t[l] - rr[l]) < 0.1)
          length += 1;
	else{
	  discontinuity = 1;
	  length += 1;
	}
      }
    }

    if (discontinuity == 0){
      for (l = 0; l < j; l++) {
        sum += rr[i+l];
        sum2 += rr[i+l] * rr[i+l];
      }
      x[n] = i;
      cg[n] = (sum2 - sum * sum / j) / (j-1);
      n++;
    }
    i += (length);
  }
  return n;
}

int CG_MAD(int j)
{
  double sum, sum2;
  int i = 0, l, n = 0, length, discontinuity;

  while (i < rr_nlin - j + 1) {
    sum = 0;
    sum2 = 0;
    discontinuity = 0;

    if (flag_time_rr == 0)
      length = j;
    else{
      length = 0;

      /* check if rr intervals are consecutive */
      for (l = i; l < i + j && discontinuity == 0; l++) {
	if (fabs(t[l+1] - t[l] - rr[l]) < 0.1)
	  length += 1;
	else{
	  discontinuity = 1;
	  length += 1;
	}
      }
    }

    if (discontinuity == 0){
      for (l = 0; l < j; l++)
        sum += rr[i+l] / j;
      for (l = 0; l < j; l++)
        sum2 += fabs(rr[i+l] - sum);
      x[n] = i;
      cg[n] = sum2 / j;
      n++;
    }
    i += (length);
  }
  return n;
}

void usage()
{
  fprintf(stderr, "%s [options]\n", prog);
  fprintf(stderr, " -n SCALE   maximum scale factor [default: 20]\n");
  fprintf(stderr, " -a STEP    difference between consecutive scale\n");
  fprintf(stderr, "            factors [default: 1]\n");
  fprintf(stderr, " -m COUNT   template length (m value) [default: 2]\n");
  fprintf(stderr, " -r FACTOR  tolerance level in proportion to the\n");
  fprintf(stderr, "            standard deviation [default: 0.15]\n");
  fprintf(stderr, " -x VALUE   absolute tolerance level\n");
  fprintf(stderr, " -i N       skip first N input values\n");
  fprintf(stderr, " -I N       process at most N input values\n");
  fprintf(stderr, " -c [1-4]   coarse-graining function:\n");
  fprintf(stderr, "             -c 1: mean\n");
  fprintf(stderr, "             -c 2: standard deviation [default]\n");
  fprintf(stderr, "             -c 3: variance\n");
  fprintf(stderr, "             -c 4: mean absolute deviation\n");
}