Squashed commit of the sb-vbs branch.

Includes the SD-VBS benchmarks modified to: - Use libextra to loop as realtime jobs - Preallocate memory before starting their main computation - Accept input via stdin instead of via argc Does not include the SD-VBS matlab code. Fixes libextra execution in LITMUS^RT.
author: Leo Chan <leochanj@live.unc.edu> 2020-10-22 01:53:21 -0400
committer: Joshua Bakita <jbakita@cs.unc.edu> 2020-10-22 01:56:35 -0400
commit: d17b33131c14864bd1eae275f49a3f148e21cf29 (patch)
tree: 0d8f77922e8d193cb0f6edab83018f057aad64a0 /SD-VBS/benchmarks/pca/src
parent: 601ed25a4c5b66cb75315832c15613a727db2c26 (diff)
2 files changed, 694 insertions, 0 deletions
diff --git a/SD-VBS/benchmarks/pca/src/pca b/SD-VBS/benchmarks/pca/src/pca
new file mode 100755
index 0000000..15b5908
--- /dev/null
+++ b/SD-VBS/benchmarks/pca/src/pca
Binary files differ
diff --git a/SD-VBS/benchmarks/pca/src/pca.c b/SD-VBS/benchmarks/pca/src/pca.c
new file mode 100644
index 0000000..4637e7c
--- /dev/null
+++ b/SD-VBS/benchmarks/pca/src/pca.c
@@ -0,0 +1,694 @@
+/***********************  Contents  ****************************************
+ * Principal Components Analysis: C, 638 lines. ****************************
+ * Sample input data set (final 36 lines). *********************************
+ ***************************************************************************
+ */
+/*********************************/
+/* Principal Components Analysis */
+/*********************************/
+/*********************************************************************/
+/* Principal Components Analysis or the Karhunen-Loeve expansion is a
+   classical method for dimensionality reduction or exploratory data
+   analysis.  One reference among many is: F. Murtagh and A. Heck,
+   Multivariate Data Analysis, Kluwer Academic, Dordrecht, 1987.
+Author:
+F. Murtagh
+Phone:        + 49 89 32006298 (work)
+ 49 89 965307 (home)
+Earn/Bitnet:  fionn@dgaeso51,  fim@dgaipp1s,  murtagh@stsci
+Span:         esomc1::fionn
+Internet:     murtagh@scivax.stsci.edu
+F. Murtagh, Munich, 6 June 1989                                   */   
+/*********************************************************************/
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#define SIGN(a, b) ( (b) < 0 ? -fabs(a) : fabs(a) )
+main(argc, argv)
+  int argc;
+  char *argv[];
+{
+  FILE *stream;
+  int  n, m,  i, j, k, k2;
+  float **data, **matrix(), **symmat, **symmat2, *vector(), *evals, *interm;
+  void free_matrix(), free_vector(), corcol(), covcol(), scpcol();
+  void tred2(), tqli();
+  float in_value;
+  char option, *strncpy();
+  /*********************************************************************
+    Get from command line:
+    input data file name, #rows, #cols, option.
+    Open input file: fopen opens the file whose name is stored in the
+    pointer argv[argc-1]; if unsuccessful, error message is printed to
+    stderr.
+   *********************************************************************/
+  if (argc !=  5)
+  {
+    printf("Syntax help: PCA filename #rows #cols option\n\n");
+    printf("(filename -- give full path name,\n");
+    printf(" #rows                          \n");
+    printf(" #cols    -- integer values,\n");                  
+    printf(" option   -- R (recommended) for correlation analysis,\n");
+    printf("             V for variance/covariance analysis\n");
+    printf("             S for SSCP analysis.)\n");
+    exit(1);
+  }
+  n = atoi(argv[2]);              /* # rows */
+  m = atoi(argv[3]);              /* # columns */
+  strncpy(&option,argv[4],1);     /* Analysis option */
+  printf("No. of rows: %d, no. of columns: %d.\n",n,m);
+  printf("Input file: %s.\n",argv[1]);
+  if ((stream = fopen(argv[1],"r")) == NULL)
+  {
+    fprintf(stderr, "Program %s : cannot open file %s\n",
+        argv[0], argv[1]);
+    fprintf(stderr, "Exiting to system.");
+    exit(1);
+    /* Note: in versions of DOS prior to 3.0, argv[0] contains the
+       string "C". */
+  }
+  /* Now read in data. */
+  data = matrix(n, m);  /* Storage allocation for input data */
+  for (i = 1; i <= n; i++)
+  {
+    for (j = 1; j <= m; j++)
+    {
+      fscanf(stream, "%f", &in_value);
+      data[i][j] = in_value;
+      printf("at row %d column %d is %lf\n",i,j,data[i][j]);
+    }
+  }
+  /* Check on (part of) input data.
+     for (i = 1; i <= 18; i++) {for (j = 1; j <= 8; j++)  {
+     printf("%7.1f", data[i][j]);  }  printf("\n");  }
+     */
+  symmat = matrix(m, m);  /* Allocation of correlation (etc.) matrix */
+  /* Look at analysis option; branch in accordance with this. */
+  switch(option)
+  {
+    case 'R':
+    case 'r':
+      printf("Analysis of correlations chosen.\n");
+      corcol(data, n, m, symmat);
+      /* Output correlation matrix.
+         for (i = 1; i <= m; i++) {
+         for (j = 1; j <= 8; j++)  {
+         printf("%7.4f", symmat[i][j]);  }
+         printf("\n");  }
+         */
+      break;
+    case 'V':
+    case 'v':
+      printf("Analysis of variances-covariances chosen.\n");
+      covcol(data, n, m, symmat);
+      /* Output variance-covariance matrix.
+         for (i = 1; i <= m; i++) {
+         for (j = 1; j <= 8; j++)  {
+         printf("%7.1f", symmat[i][j]);  }
+         printf("\n");  }
+         */
+      break;
+    case 'S':
+    case 's':
+      printf("Analysis of sums-of-squares-cross-products");
+      printf(" matrix chosen.\n");
+      scpcol(data, n, m, symmat);
+      /* Output SSCP matrix.
+         for (i = 1; i <= m; i++) {
+         for (j = 1; j <= 8; j++)  {
+         printf("%7.1f", symmat[i][j]);  }
+         printf("\n");  }
+         */
+      break;
+    default:
+      printf("Option: %s\n",option);
+      printf("For option, please type R, V, or S\n");
+      printf("(upper or lower case).\n");
+      printf("Exiting to system.\n");
+      exit(1);
+      break;
+  }
+  /*********************************************************************
+    Eigen-reduction
+   **********************************************************************/
+  /* Allocate storage for dummy and new vectors. */
+  evals = vector(m);     /* Storage alloc. for vector of eigenvalues */
+  printf("the vector storage size is %d\n",m);
+  interm = vector(m);    /* Storage alloc. for 'intermediate' vector */
+  symmat2 = matrix(m, m);  /* Duplicate of correlation (etc.) matrix */
+  for (i = 1; i <= m; i++) {
+    for (j = 1; j <= m; j++) {
+      symmat2[i][j] = symmat[i][j]; /* Needed below for col. projections */
+    }
+  }
+  tred2(symmat, m, evals, interm);  /* Triangular decomposition */
+  printf("eval value at 0 is %lf\n",evals[0]);
+  printf("eval value at 1 is %lf\n",evals[1]);
+  printf("eval value at 2 is %lf\n",evals[2]);
+  printf("m/height is %lf \n",m);
+  printf("m/height is %d \n",m);
+  printf("m/height is %d \n",n);
+  printf("m/height is %d \n",n);
+  tqli(evals, interm, m, symmat);   /* Reduction of sym. trid. matrix */
+  /* evals now contains the eigenvalues,
+     columns of symmat now contain the associated eigenvectors. */
+  printf("\nEigenvalues:\n");
+  for (j = m; j >= 1; j--) {
+    printf("%18.5f\n", evals[j]); }
+  printf("\n(Eigenvalues should be strictly positive; limited\n");
+  printf("precision machine arithmetic may affect this.\n");
+  printf("Eigenvalues are often expressed as cumulative\n");
+  printf("percentages, representing the 'percentage variance\n");
+  printf("explained' by the associated axis or principal component.)\n");
+  printf("\nEigenvectors:\n");
+  printf("(First three; their definition in terms of original vbes.)\n");
+  for (j = 1; j <= m; j++) {
+    for (i = 1; i <= 3; i++)  {
+      printf("%12.4f", symmat[j][m-i+1]);  }
+    printf("\n");  }
+  /* Form projections of row-points on first three prin. components. */
+  /* Store in 'data', overwriting original data. */
+  for (i = 1; i <= n; i++) {
+    for (j = 1; j <= m; j++) {
+      interm[j] = data[i][j];
+     printf("Iteration i=%d j=%d data = %lf\n\n", i,j,data[i][j]);
+    }   
+    for (k = 1; k <= 3; k++) {
+      data[i][k] = 0.0;
+      for (k2 = 1; k2 <= m; k2++) {
+        data[i][k] += interm[k2] * symmat[k2][m-k+1]; 
+     printf("Iteration i= %d, j = %d k=%d, k2=%d : data = %lf\n",i,j,k, k2,data[i][k]);
+      }
+     printf("\n");
+    }
+  }
+  printf("\nProjections of row-points on first 3 prin. comps.:\n");
+  for (i = 1; i <= n; i++) {
+    for (j = 1; j <= 3; j++)  {
+      printf("%12.4f", data[i][j]);  }
+    printf("\n");  }
+return;
+  /* Form projections of col.-points on first three prin. components. */
+  /* Store in 'symmat2', overwriting what was stored in this. */
+  for (j = 1; j <= m; j++) {
+    for (k = 1; k <= m; k++) {
+      interm[k] = symmat2[j][k]; }  /*symmat2[j][k] will be overwritten*/
+    for (i = 1; i <= 3; i++) {
+      symmat2[j][i] = 0.0;
+      for (k2 = 1; k2 <= m; k2++) {
+        symmat2[j][i] += interm[k2] * symmat[k2][m-i+1]; }
+      if (evals[m-i+1] > 0.0005)   /* Guard against zero eigenvalue */
+        symmat2[j][i] /= sqrt(evals[m-i+1]);   /* Rescale */
+      else
+        symmat2[j][i] = 0.0;    /* Standard kludge */
+    }
+  }
+  printf("\nProjections of column-points on first 3 prin. comps.:\n");
+  for (j = 1; j <= m; j++) {
+    for (k = 1; k <= 3; k++)  {
+      printf("%12.4f", symmat2[j][k]);  }
+    printf("\n");  }
+  free_matrix(data, n, m);
+  free_matrix(symmat, m, m);
+  free_matrix(symmat2, m, m);
+  free_vector(evals, m);
+  free_vector(interm, m);
+}
+/**  Correlation matrix: creation  ***********************************/
+void corcol(data, n, m, symmat)
+  float **data, **symmat;
+  int n, m;
+  /* Create m * m correlation matrix from given n * m data matrix. */
+{
+  float eps = 0.005;
+  float x, *mean, *stddev, *vector();
+  int i, j, j1, j2;
+  /* Allocate storage for mean and std. dev. vectors */
+  mean = vector(m);
+  stddev = vector(m);
+  /* Determine mean of column vectors of input data matrix */
+  for (j = 1; j <= m; j++)
+  {
+    mean[j] = 0.0;
+    for (i = 1; i <= n; i++)
+    {
+      mean[j] += data[i][j];
+    }
+    mean[j] /= (float)n;
+  }
+  printf("\nMeans of column vectors:\n");
+  for (j = 1; j <= m; j++)  {
+    printf("%7.1f",mean[j]);  }   printf("\n");
+  /* Determine standard deviations of column vectors of data matrix. */
+  for (j = 1; j <= m; j++)
+  {
+    stddev[j] = 0.0;
+    for (i = 1; i <= n; i++)
+    {
+      stddev[j] += (   ( data[i][j] - mean[j] ) *
+          ( data[i][j] - mean[j] )  );
+    }
+    stddev[j] /= (float)n;
+    stddev[j] = sqrt(stddev[j]);
+    /* The following in an inelegant but usual way to handle
+       near-zero std. dev. values, which below would cause a zero-
+       divide. */
+    if (stddev[j] <= eps) stddev[j] = 1.0;
+  }
+  printf("\nStandard deviations of columns:\n");
+  for (j = 1; j <= m; j++) { printf("%7.1f", stddev[j]); }
+  printf("\n");
+  /* Center and reduce the column vectors. */
+  for (i = 1; i <= n; i++)
+  {
+    for (j = 1; j <= m; j++)
+    {
+      data[i][j] -= mean[j];
+      x = sqrt((float)n);
+      x *= stddev[j];
+      data[i][j] /= x;
+      printf("value is %lf\n",data[i][j]);
+    }
+  }
+  /* Calculate the m * m correlation matrix. */
+  for (j1 = 1; j1 <= m-1; j1++)
+  {
+    symmat[j1][j1] = 1.0;
+    for (j2 = j1+1; j2 <= m; j2++)
+    {
+      symmat[j1][j2] = 0.0;
+      for (i = 1; i <= n; i++)
+      {
+        symmat[j1][j2] += ( data[i][j1] * data[i][j2]);
+          printf("multiplying values [%d][%d] * [%d][%d]\n",i,j1,i,j2);
+          printf("Multiplying %lf and %lf\n",data[i][j1],data[i][j2]);
+          printf("Value at %d %d = %lf\n",j1,j2,symmat[j1][j2]);
+      }
+      printf("**SPLIT**\n");
+      printf("swapping [%d] [%d] = [%d] [%d]\n",j2,j1, j1,j2);
+      symmat[j2][j1] = symmat[j1][j2];
+    }
+  }
+  symmat[m][m] = 1.0;
+  return;
+}
+/**  Variance-covariance matrix: creation  *****************************/
+void covcol(data, n, m, symmat)
+  float **data, **symmat;
+  int n, m;
+  /* Create m * m covariance matrix from given n * m data matrix. */
+{
+  float *mean, *vector();
+  int i, j, j1, j2;
+  /* Allocate storage for mean vector */
+  mean = vector(m);
+  /* Determine mean of column vectors of input data matrix */
+  for (j = 1; j <= m; j++)
+  {
+    mean[j] = 0.0;
+    for (i = 1; i <= n; i++)
+    {
+      mean[j] += data[i][j];
+    }
+    mean[j] /= (float)n;
+  }
+  printf("\nMeans of column vectors:\n");
+  for (j = 1; j <= m; j++)  {
+    printf("%7.1f",mean[j]);  }   printf("\n");
+  /* Center the column vectors. */
+  for (i = 1; i <= n; i++)
+  {
+    for (j = 1; j <= m; j++)
+    {
+      data[i][j] -= mean[j];
+    }
+  }
+  /* Calculate the m * m covariance matrix. */
+  for (j1 = 1; j1 <= m; j1++)
+  {
+    for (j2 = j1; j2 <= m; j2++)
+    {
+      symmat[j1][j2] = 0.0;
+      for (i = 1; i <= n; i++)
+      {
+        symmat[j1][j2] += data[i][j1] * data[i][j2];
+      }
+      symmat[j2][j1] = symmat[j1][j2];
+    }
+  }
+  return;
+}
+/**  Sums-of-squares-and-cross-products matrix: creation  **************/
+void scpcol(data, n, m, symmat)
+  float **data, **symmat;
+  int n, m;
+  /* Create m * m sums-of-cross-products matrix from n * m data matrix. */
+{
+  int i, j1, j2;
+  /* Calculate the m * m sums-of-squares-and-cross-products matrix. */
+  for (j1 = 1; j1 <= m; j1++)
+  {
+    for (j2 = j1; j2 <= m; j2++)
+    {
+      symmat[j1][j2] = 0.0;
+      for (i = 1; i <= n; i++)
+      {
+        symmat[j1][j2] += data[i][j1] * data[i][j2];
+      }
+      symmat[j2][j1] = symmat[j1][j2];
+    }
+  }
+  return;
+}
+/**  Error handler  **************************************************/
+void erhand(err_msg)
+  char err_msg[];
+  /* Error handler */
+{
+  fprintf(stderr,"Run-time error:\n");
+  fprintf(stderr,"%s\n", err_msg);
+  fprintf(stderr,"Exiting to system.\n");
+  exit(1);
+}
+/**  Allocation of vector storage  ***********************************/
+float *vector(n)
+  int n;
+  /* Allocates a float vector with range [1..n]. */
+{
+  float *v;
+  v = (float *) malloc ((unsigned) n*sizeof(float));
+  if (!v) erhand("Allocation failure in vector().");
+  return v-1;
+}
+/**  Allocation of float matrix storage  *****************************/
+float **matrix(n,m)
+  int n, m;
+  /* Allocate a float matrix with range [1..n][1..m]. */
+{
+  int i;
+  float **mat;
+  /* Allocate pointers to rows. */
+  mat = (float **) malloc((unsigned) (n)*sizeof(float*));
+  if (!mat) erhand("Allocation failure 1 in matrix().");
+  mat -= 1;
+  /* Allocate rows and set pointers to them. */
+  for (i = 1; i <= n; i++)
+  {
+    mat[i] = (float *) malloc((unsigned) (m)*sizeof(float));
+    if (!mat[i]) erhand("Allocation failure 2 in matrix().");
+    mat[i] -= 1;
+  }
+  /* Return pointer to array of pointers to rows. */
+  return mat;
+}
+/**  Deallocate vector storage  *********************************/
+void free_vector(v,n)
+  float *v;
+  int n;
+  /* Free a float vector allocated by vector(). */
+{
+  free((char*) (v+1));
+}
+/**  Deallocate float matrix storage  ***************************/
+void free_matrix(mat,n,m)
+  float **mat;
+  int n, m;
+  /* Free a float matrix allocated by matrix(). */
+{
+  int i;
+  for (i = n; i >= 1; i--)
+  {
+    free ((char*) (mat[i]+1));
+  }
+  free ((char*) (mat+1));
+}
+/**  Reduce a real, symmetric matrix to a symmetric, tridiag. matrix. */
+void tred2(a, n, d, e)
+  float **a, *d, *e;
+  /* float **a, d[], e[]; */
+  int n;
+  /* Householder reduction of matrix a to tridiagonal form.
+Algorithm: Martin et al., Num. Math. 11, 181-195, 1968.
+Ref: Smith et al., Matrix Eigensystem Routines -- EISPACK Guide
+Springer-Verlag, 1976, pp. 489-494.
+W H Press et al., Numerical Recipes in C, Cambridge U P,
+1988, pp. 373-374.  */
+{
+  int l, k, j, i;
+  float scale, hh, h, g, f;
+  for (i = n; i >= 2; i--)
+  {
+    l = i - 1;
+    h = scale = 0.0;
+    if (l > 1)
+    {
+      for (k = 1; k <= l; k++)
+        scale += fabs(a[i][k]);
+      if (scale == 0.0)
+        e[i] = a[i][l];
+      else
+      {
+        for (k = 1; k <= l; k++)
+        {
+          a[i][k] /= scale;
+          h += a[i][k] * a[i][k];
+        }
+        f = a[i][l];
+        g = f>0 ? -sqrt(h) : sqrt(h);
+        e[i] = scale * g;
+        h -= f * g;
+        a[i][l] = f - g;
+        f = 0.0;
+        for (j = 1; j <= l; j++)
+        {
+          a[j][i] = a[i][j]/h;
+          g = 0.0;
+          for (k = 1; k <= j; k++)
+            g += a[j][k] * a[i][k];
+          for (k = j+1; k <= l; k++)
+            g += a[k][j] * a[i][k];
+          e[j] = g / h;
+          f += e[j] * a[i][j];
+        }
+        hh = f / (h + h);
+        for (j = 1; j <= l; j++)
+        {
+          f = a[i][j];
+          e[j] = g = e[j] - hh * f;
+          for (k = 1; k <= j; k++)
+            a[j][k] -= (f * e[k] + g * a[i][k]);
+        }
+      }
+    }
+    else
+      e[i] = a[i][l];
+    d[i] = h;
+  }
+  d[1] = 0.0;
+  e[1] = 0.0;
+  for (i = 1; i <= n; i++)
+  {
+    l = i - 1;
+    if (d[i])
+    {
+      for (j = 1; j <= l; j++)
+      {
+        g = 0.0;
+        for (k = 1; k <= l; k++)
+          g += a[i][k] * a[k][j];
+        for (k = 1; k <= l; k++)
+          a[k][j] -= g * a[k][i];
+      }
+    }
+    d[i] = a[i][i];
+    a[i][i] = 1.0;
+    for (j = 1; j <= l; j++)
+      a[j][i] = a[i][j] = 0.0;
+  }
+}
+/**  Tridiagonal QL algorithm -- Implicit  **********************/
+void tqli(d, e, n, z)
+  float d[], e[], **z;
+  int n;
+{
+  int m, l, iter, i, k;
+  float s, r, p, g, f, dd, c, b;
+  void erhand();
+  for (i = 2; i <= n; i++)
+    e[i-1] = e[i];
+  e[n] = 0.0;
+  for (l = 1; l <= n; l++)
+  {
+    iter = 0;
+    do
+    {
+      for (m = l; m <= n-1; m++)
+      {
+        dd = fabs(d[m]) + fabs(d[m+1]);
+        if (fabs(e[m]) + dd == dd) break;
+      }
+      if (m != l)
+      {
+        if (iter++ == 30) erhand("No convergence in TLQI.");
+        g = (d[l+1] - d[l]) / (2.0 * e[l]);
+        r = sqrt((g * g) + 1.0);
+        g = d[m] - d[l] + e[l] / (g + SIGN(r, g));
+        s = c = 1.0;
+        p = 0.0;
+        for (i = m-1; i >= l; i--)
+        {
+          f = s * e[i];
+          b = c * e[i];
+          if (fabs(f) >= fabs(g))
+          {
+            c = g / f;
+            r = sqrt((c * c) + 1.0);
+            e[i+1] = f * r;
+            c *= (s = 1.0/r);
+          }
+          else
+          {
+            s = f / g;
+            r = sqrt((s * s) + 1.0);
+            e[i+1] = g * r;
+            s *= (c = 1.0/r);
+          }
+          g = d[i+1] - p;
+          r = (d[i] - g) * s + 2.0 * c * b;
+          p = s * r;
+          d[i+1] = g + p;
+          g = c * r - b;
+          for (k = 1; k <= n; k++)
+          {
+            f = z[k][i+1];
+            z[k][i+1] = s * z[k][i] + c * f;
+            z[k][i] = c * z[k][i] - s * f;
+          }
+        }
+        d[l] = d[l] - p;
+        e[l] = g;
+        e[m] = 0.0;
+      }
+    }  while (m != l);
+  }
+}
author	Leo Chan <leochanj@live.unc.edu>	2020-10-22 01:53:21 -0400
committer	Joshua Bakita <jbakita@cs.unc.edu>	2020-10-22 01:56:35 -0400
commit	d17b33131c14864bd1eae275f49a3f148e21cf29 (patch)
tree	0d8f77922e8d193cb0f6edab83018f057aad64a0 /SD-VBS/benchmarks/pca/src
parent	601ed25a4c5b66cb75315832c15613a727db2c26 (diff)

diff --git a/SD-VBS/benchmarks/pca/src/pca b/SD-VBS/benchmarks/pca/src/pca new file mode 100755 index 0000000..15b5908 --- /dev/null +++ b/SD-VBS/benchmarks/pca/src/pca
Binary files differ


diff --git a/SD-VBS/benchmarks/pca/src/pca.c b/SD-VBS/benchmarks/pca/src/pca.c new file mode 100644 index 0000000..4637e7c --- /dev/null +++ b/SD-VBS/benchmarks/pca/src/pca.c
@@ -0,0 +1,694 @@
	1	/********************* Contents **************************************
	2	* Principal Components Analysis: C, 638 lines. ****************************
	3	* Sample input data set (final 36 lines). *********************************
	4	***************************************************************************
	5	*/
	6
	7	/*********************************/
	8	/* Principal Components Analysis */
	9	/*********************************/
	10
	11	/*********************************************************************/
	12	/* Principal Components Analysis or the Karhunen-Loeve expansion is a
	13	classical method for dimensionality reduction or exploratory data
	14	analysis. One reference among many is: F. Murtagh and A. Heck,
	15	Multivariate Data Analysis, Kluwer Academic, Dordrecht, 1987.
	16
	17	Author:
	18	F. Murtagh
	19	Phone: + 49 89 32006298 (work)
	20	+ 49 89 965307 (home)
	21	Earn/Bitnet: fionn@dgaeso51, fim@dgaipp1s, murtagh@stsci
	22	Span: esomc1::fionn
	23	Internet: murtagh@scivax.stsci.edu
	24
	25	F. Murtagh, Munich, 6 June 1989 */
	26	/*********************************************************************/
	27
	28	#include <stdio.h>
	29	#include <string.h>
	30	#include <math.h>
	31
	32	#define SIGN(a, b) ( (b) < 0 ? -fabs(a) : fabs(a) )
	33
	34	main(argc, argv)
	35	int argc;
	36	char *argv[];
	37
	38	{
	39	FILE *stream;
	40	int n, m, i, j, k, k2;
	41	float data, matrix(), symmat, symmat2, vector(), evals, *interm;
	42	void free_matrix(), free_vector(), corcol(), covcol(), scpcol();
	43	void tred2(), tqli();
	44	float in_value;
	45	char option, *strncpy();
	46
	47	/*********************************************************************
	48	Get from command line:
	49	input data file name, #rows, #cols, option.
	50
	51	Open input file: fopen opens the file whose name is stored in the
	52	pointer argv[argc-1]; if unsuccessful, error message is printed to
	53	stderr.
	54	*********************************************************************/
	55
	56	if (argc != 5)
	57	{
	58	printf("Syntax help: PCA filename #rows #cols option\n\n");
	59	printf("(filename -- give full path name,\n");
	60	printf(" #rows \n");
	61	printf(" #cols -- integer values,\n");
	62	printf(" option -- R (recommended) for correlation analysis,\n");
	63	printf(" V for variance/covariance analysis\n");
	64	printf(" S for SSCP analysis.)\n");
	65	exit(1);
	66	}
	67
	68	n = atoi(argv[2]); /* # rows */
	69	m = atoi(argv[3]); /* # columns */
	70	strncpy(&option,argv[4],1); /* Analysis option */
	71
	72	printf("No. of rows: %d, no. of columns: %d.\n",n,m);
	73	printf("Input file: %s.\n",argv[1]);
	74
	75	if ((stream = fopen(argv[1],"r")) == NULL)
	76	{
	77	fprintf(stderr, "Program %s : cannot open file %s\n",
	78	argv[0], argv[1]);
	79	fprintf(stderr, "Exiting to system.");
	80	exit(1);
	81	/* Note: in versions of DOS prior to 3.0, argv[0] contains the
	82	string "C". */
	83	}
	84
	85	/* Now read in data. */
	86
	87	data = matrix(n, m); /* Storage allocation for input data */
	88
	89	for (i = 1; i <= n; i++)
	90	{
	91	for (j = 1; j <= m; j++)
	92	{
	93	fscanf(stream, "%f", &in_value);
	94	data[i][j] = in_value;
	95	printf("at row %d column %d is %lf\n",i,j,data[i][j]);
	96	}
	97	}
	98
	99
	100
	101	/* Check on (part of) input data.
	102	for (i = 1; i <= 18; i++) {for (j = 1; j <= 8; j++) {
	103	printf("%7.1f", data[i][j]); } printf("\n"); }
	104	*/
	105
	106
	107
	108	symmat = matrix(m, m); /* Allocation of correlation (etc.) matrix */
	109
	110	/* Look at analysis option; branch in accordance with this. */
	111
	112	switch(option)
	113	{
	114	case 'R':
	115	case 'r':
	116	printf("Analysis of correlations chosen.\n");
	117	corcol(data, n, m, symmat);
	118
	119	/* Output correlation matrix.
	120	for (i = 1; i <= m; i++) {
	121	for (j = 1; j <= 8; j++) {
	122	printf("%7.4f", symmat[i][j]); }
	123	printf("\n"); }
	124	*/
	125	break;
	126	case 'V':
	127	case 'v':
	128	printf("Analysis of variances-covariances chosen.\n");
	129	covcol(data, n, m, symmat);
	130
	131	/* Output variance-covariance matrix.
	132	for (i = 1; i <= m; i++) {
	133	for (j = 1; j <= 8; j++) {
	134	printf("%7.1f", symmat[i][j]); }
	135	printf("\n"); }
	136	*/
	137	break;
	138	case 'S':
	139	case 's':
	140	printf("Analysis of sums-of-squares-cross-products");
	141	printf(" matrix chosen.\n");
	142	scpcol(data, n, m, symmat);
	143
	144	/* Output SSCP matrix.
	145	for (i = 1; i <= m; i++) {
	146	for (j = 1; j <= 8; j++) {
	147	printf("%7.1f", symmat[i][j]); }
	148	printf("\n"); }
	149	*/
	150	break;
	151	default:
	152	printf("Option: %s\n",option);
	153	printf("For option, please type R, V, or S\n");
	154	printf("(upper or lower case).\n");
	155	printf("Exiting to system.\n");
	156	exit(1);
	157	break;
	158	}
	159
	160	/*********************************************************************
	161	Eigen-reduction
	162	**********************************************************************/
	163
	164	/* Allocate storage for dummy and new vectors. */
	165	evals = vector(m); /* Storage alloc. for vector of eigenvalues */
	166	printf("the vector storage size is %d\n",m);
	167	interm = vector(m); /* Storage alloc. for 'intermediate' vector */
	168	symmat2 = matrix(m, m); /* Duplicate of correlation (etc.) matrix */
	169	for (i = 1; i <= m; i++) {
	170	for (j = 1; j <= m; j++) {
	171	symmat2[i][j] = symmat[i][j]; /* Needed below for col. projections */
	172	}
	173	}
	174	tred2(symmat, m, evals, interm); /* Triangular decomposition */
	175	printf("eval value at 0 is %lf\n",evals[0]);
	176	printf("eval value at 1 is %lf\n",evals[1]);
	177	printf("eval value at 2 is %lf\n",evals[2]);
	178
	179	printf("m/height is %lf \n",m);
	180
	181	printf("m/height is %d \n",m);
	182	printf("m/height is %d \n",n);
	183	printf("m/height is %d \n",n);
	184
	185
	186	tqli(evals, interm, m, symmat); /* Reduction of sym. trid. matrix */
	187	/* evals now contains the eigenvalues,
	188	columns of symmat now contain the associated eigenvectors. */
	189
	190	printf("\nEigenvalues:\n");
	191	for (j = m; j >= 1; j--) {
	192	printf("%18.5f\n", evals[j]); }
	193	printf("\n(Eigenvalues should be strictly positive; limited\n");
	194	printf("precision machine arithmetic may affect this.\n");
	195	printf("Eigenvalues are often expressed as cumulative\n");
	196	printf("percentages, representing the 'percentage variance\n");
	197	printf("explained' by the associated axis or principal component.)\n");
	198
	199	printf("\nEigenvectors:\n");
	200	printf("(First three; their definition in terms of original vbes.)\n");
	201	for (j = 1; j <= m; j++) {
	202	for (i = 1; i <= 3; i++) {
	203	printf("%12.4f", symmat[j][m-i+1]); }
	204	printf("\n"); }
	205
	206
	207
	208
	209	/* Form projections of row-points on first three prin. components. */
	210	/* Store in 'data', overwriting original data. */
	211	for (i = 1; i <= n; i++) {
	212	for (j = 1; j <= m; j++) {
	213	interm[j] = data[i][j];
	214	printf("Iteration i=%d j=%d data = %lf\n\n", i,j,data[i][j]);
	215
	216	}
	217	for (k = 1; k <= 3; k++) {
	218	data[i][k] = 0.0;
	219	for (k2 = 1; k2 <= m; k2++) {
	220	data[i][k] += interm[k2] * symmat[k2][m-k+1];
	221	printf("Iteration i= %d, j = %d k=%d, k2=%d : data = %lf\n",i,j,k, k2,data[i][k]);
	222
	223	}
	224	printf("\n");
	225
	226	}
	227	}
	228
	229
	230
	231
	232
	233
	234
	235
	236
	237
	238
	239	printf("\nProjections of row-points on first 3 prin. comps.:\n");
	240	for (i = 1; i <= n; i++) {
	241	for (j = 1; j <= 3; j++) {
	242	printf("%12.4f", data[i][j]); }
	243	printf("\n"); }
	244	return;
	245
	246	/* Form projections of col.-points on first three prin. components. */
	247	/* Store in 'symmat2', overwriting what was stored in this. */
	248	for (j = 1; j <= m; j++) {
	249	for (k = 1; k <= m; k++) {
	250	interm[k] = symmat2[j][k]; } /symmat2[j][k] will be overwritten/
	251	for (i = 1; i <= 3; i++) {
	252	symmat2[j][i] = 0.0;
	253	for (k2 = 1; k2 <= m; k2++) {
	254	symmat2[j][i] += interm[k2] * symmat[k2][m-i+1]; }
	255	if (evals[m-i+1] > 0.0005) /* Guard against zero eigenvalue */
	256	symmat2[j][i] /= sqrt(evals[m-i+1]); /* Rescale */
	257	else
	258	symmat2[j][i] = 0.0; /* Standard kludge */
	259	}
	260	}
	261
	262	printf("\nProjections of column-points on first 3 prin. comps.:\n");
	263	for (j = 1; j <= m; j++) {
	264	for (k = 1; k <= 3; k++) {
	265	printf("%12.4f", symmat2[j][k]); }
	266	printf("\n"); }
	267
	268	free_matrix(data, n, m);
	269	free_matrix(symmat, m, m);
	270	free_matrix(symmat2, m, m);
	271	free_vector(evals, m);
	272	free_vector(interm, m);
	273
	274	}
	275
	276	/ Correlation matrix: creation *********************************/
	277
	278	void corcol(data, n, m, symmat)
	279	float data, symmat;
	280	int n, m;
	281	/* Create m * m correlation matrix from given n * m data matrix. */
	282	{
	283	float eps = 0.005;
	284	float x, mean, stddev, *vector();
	285	int i, j, j1, j2;
	286
	287	/* Allocate storage for mean and std. dev. vectors */
	288
	289	mean = vector(m);
	290	stddev = vector(m);
	291
	292	/* Determine mean of column vectors of input data matrix */
	293
	294	for (j = 1; j <= m; j++)
	295	{
	296	mean[j] = 0.0;
	297	for (i = 1; i <= n; i++)
	298	{
	299	mean[j] += data[i][j];
	300	}
	301	mean[j] /= (float)n;
	302	}
	303
	304	printf("\nMeans of column vectors:\n");
	305	for (j = 1; j <= m; j++) {
	306	printf("%7.1f",mean[j]); } printf("\n");
	307
	308	/* Determine standard deviations of column vectors of data matrix. */
	309
	310	for (j = 1; j <= m; j++)
	311	{
	312	stddev[j] = 0.0;
	313	for (i = 1; i <= n; i++)
	314	{
	315	stddev[j] += ( ( data[i][j] - mean[j] ) *
	316	( data[i][j] - mean[j] ) );
	317	}
	318	stddev[j] /= (float)n;
	319	stddev[j] = sqrt(stddev[j]);
	320	/* The following in an inelegant but usual way to handle
	321	near-zero std. dev. values, which below would cause a zero-
	322	divide. */
	323	if (stddev[j] <= eps) stddev[j] = 1.0;
	324	}
	325
	326	printf("\nStandard deviations of columns:\n");
	327	for (j = 1; j <= m; j++) { printf("%7.1f", stddev[j]); }
	328	printf("\n");
	329
	330	/* Center and reduce the column vectors. */
	331
	332	for (i = 1; i <= n; i++)
	333	{
	334	for (j = 1; j <= m; j++)
	335	{
	336	data[i][j] -= mean[j];
	337	x = sqrt((float)n);
	338	x *= stddev[j];
	339	data[i][j] /= x;
	340	printf("value is %lf\n",data[i][j]);
	341	}
	342	}
	343
	344	/* Calculate the m * m correlation matrix. */
	345	for (j1 = 1; j1 <= m-1; j1++)
	346	{
	347	symmat[j1][j1] = 1.0;
	348	for (j2 = j1+1; j2 <= m; j2++)
	349	{
	350	symmat[j1][j2] = 0.0;
	351	for (i = 1; i <= n; i++)
	352	{
	353	symmat[j1][j2] += ( data[i][j1] * data[i][j2]);
	354	printf("multiplying values [%d][%d] * [%d][%d]\n",i,j1,i,j2);
	355	printf("Multiplying %lf and %lf\n",data[i][j1],data[i][j2]);
	356
	357	printf("Value at %d %d = %lf\n",j1,j2,symmat[j1][j2]);
	358	}
	359	printf("SPLIT\n");
	360	printf("swapping [%d] [%d] = [%d] [%d]\n",j2,j1, j1,j2);
	361
	362	symmat[j2][j1] = symmat[j1][j2];
	363	}
	364	}
	365	symmat[m][m] = 1.0;
	366
	367
	368	return;
	369
	370	}
	371
	372	/ Variance-covariance matrix: creation ***************************/
	373
	374	void covcol(data, n, m, symmat)
	375	float data, symmat;
	376	int n, m;
	377	/* Create m * m covariance matrix from given n * m data matrix. */
	378	{
	379	float mean, vector();
	380	int i, j, j1, j2;
	381
	382	/* Allocate storage for mean vector */
	383
	384	mean = vector(m);
	385
	386	/* Determine mean of column vectors of input data matrix */
	387
	388	for (j = 1; j <= m; j++)
	389	{
	390	mean[j] = 0.0;
	391	for (i = 1; i <= n; i++)
	392	{
	393	mean[j] += data[i][j];
	394	}
	395	mean[j] /= (float)n;
	396	}
	397
	398	printf("\nMeans of column vectors:\n");
	399	for (j = 1; j <= m; j++) {
	400	printf("%7.1f",mean[j]); } printf("\n");
	401
	402	/* Center the column vectors. */
	403
	404	for (i = 1; i <= n; i++)
	405	{
	406	for (j = 1; j <= m; j++)
	407	{
	408	data[i][j] -= mean[j];
	409	}
	410	}
	411
	412	/* Calculate the m * m covariance matrix. */
	413	for (j1 = 1; j1 <= m; j1++)
	414	{
	415	for (j2 = j1; j2 <= m; j2++)
	416	{
	417	symmat[j1][j2] = 0.0;
	418	for (i = 1; i <= n; i++)
	419	{
	420	symmat[j1][j2] += data[i][j1] * data[i][j2];
	421	}
	422	symmat[j2][j1] = symmat[j1][j2];
	423	}
	424	}
	425
	426	return;
	427
	428	}
	429
	430	/ Sums-of-squares-and-cross-products matrix: creation ************/
	431
	432	void scpcol(data, n, m, symmat)
	433	float data, symmat;
	434	int n, m;
	435	/* Create m * m sums-of-cross-products matrix from n * m data matrix. */
	436	{
	437	int i, j1, j2;
	438
	439	/* Calculate the m * m sums-of-squares-and-cross-products matrix. */
	440
	441	for (j1 = 1; j1 <= m; j1++)
	442	{
	443	for (j2 = j1; j2 <= m; j2++)
	444	{
	445	symmat[j1][j2] = 0.0;
	446	for (i = 1; i <= n; i++)
	447	{
	448	symmat[j1][j2] += data[i][j1] * data[i][j2];
	449	}
	450	symmat[j2][j1] = symmat[j1][j2];
	451	}
	452	}
	453
	454	return;
	455
	456	}
	457
	458	/ Error handler ************************************************/
	459
	460	void erhand(err_msg)
	461	char err_msg[];
	462	/* Error handler */
	463	{
	464	fprintf(stderr,"Run-time error:\n");
	465	fprintf(stderr,"%s\n", err_msg);
	466	fprintf(stderr,"Exiting to system.\n");
	467	exit(1);
	468	}
	469
	470	/ Allocation of vector storage *********************************/
	471
	472	float *vector(n)
	473	int n;
	474	/* Allocates a float vector with range [1..n]. */
	475	{
	476
	477	float *v;
	478
	479	v = (float ) malloc ((unsigned) nsizeof(float));
	480	if (!v) erhand("Allocation failure in vector().");
	481	return v-1;
	482
	483	}
	484
	485	/ Allocation of float matrix storage ***************************/
	486
	487	float **matrix(n,m)
	488	int n, m;
	489	/* Allocate a float matrix with range [1..n][1..m]. */
	490	{
	491	int i;
	492	float **mat;
	493
	494	/* Allocate pointers to rows. */
	495	mat = (float *) malloc((unsigned) (n)sizeof(float*));
	496	if (!mat) erhand("Allocation failure 1 in matrix().");
	497	mat -= 1;
	498
	499	/* Allocate rows and set pointers to them. */
	500	for (i = 1; i <= n; i++)
	501	{
	502	mat[i] = (float ) malloc((unsigned) (m)sizeof(float));
	503	if (!mat[i]) erhand("Allocation failure 2 in matrix().");
	504	mat[i] -= 1;
	505	}
	506
	507	/* Return pointer to array of pointers to rows. */
	508	return mat;
	509
	510	}
	511
	512	/ Deallocate vector storage *******************************/
	513
	514	void free_vector(v,n)
	515	float *v;
	516	int n;
	517	/* Free a float vector allocated by vector(). */
	518	{
	519	free((char*) (v+1));
	520	}
	521
	522	/ Deallocate float matrix storage *************************/
	523
	524	void free_matrix(mat,n,m)
	525	float **mat;
	526	int n, m;
	527	/* Free a float matrix allocated by matrix(). */
	528	{
	529	int i;
	530
	531	for (i = n; i >= 1; i--)
	532	{
	533	free ((char*) (mat[i]+1));
	534	}
	535	free ((char*) (mat+1));
	536	}
	537
	538	/** Reduce a real, symmetric matrix to a symmetric, tridiag. matrix. */
	539
	540	void tred2(a, n, d, e)
	541	float *a, d, *e;
	542	/* float *a, d[], e[]; /
	543	int n;
	544	/* Householder reduction of matrix a to tridiagonal form.
	545	Algorithm: Martin et al., Num. Math. 11, 181-195, 1968.
	546	Ref: Smith et al., Matrix Eigensystem Routines -- EISPACK Guide
	547	Springer-Verlag, 1976, pp. 489-494.
	548	W H Press et al., Numerical Recipes in C, Cambridge U P,
	549	1988, pp. 373-374. */
	550	{
	551	int l, k, j, i;
	552	float scale, hh, h, g, f;
	553
	554	for (i = n; i >= 2; i--)
	555	{
	556	l = i - 1;
	557	h = scale = 0.0;
	558	if (l > 1)
	559	{
	560	for (k = 1; k <= l; k++)
	561	scale += fabs(a[i][k]);
	562	if (scale == 0.0)
	563	e[i] = a[i][l];
	564	else
	565	{
	566	for (k = 1; k <= l; k++)
	567	{
	568	a[i][k] /= scale;
	569	h += a[i][k] * a[i][k];
	570	}
	571	f = a[i][l];
	572	g = f>0 ? -sqrt(h) : sqrt(h);
	573	e[i] = scale * g;
	574	h -= f * g;
	575	a[i][l] = f - g;
	576	f = 0.0;
	577	for (j = 1; j <= l; j++)
	578	{
	579	a[j][i] = a[i][j]/h;
	580	g = 0.0;
	581	for (k = 1; k <= j; k++)
	582	g += a[j][k] * a[i][k];
	583	for (k = j+1; k <= l; k++)
	584	g += a[k][j] * a[i][k];
	585	e[j] = g / h;
	586	f += e[j] * a[i][j];
	587	}
	588	hh = f / (h + h);
	589	for (j = 1; j <= l; j++)
	590	{
	591	f = a[i][j];
	592	e[j] = g = e[j] - hh * f;
	593	for (k = 1; k <= j; k++)
	594	a[j][k] -= (f * e[k] + g * a[i][k]);
	595	}
	596	}
	597	}
	598	else
	599	e[i] = a[i][l];
	600	d[i] = h;
	601	}
	602	d[1] = 0.0;
	603	e[1] = 0.0;
	604	for (i = 1; i <= n; i++)
	605	{
	606	l = i - 1;
	607	if (d[i])
	608	{
	609	for (j = 1; j <= l; j++)
	610	{
	611	g = 0.0;
	612	for (k = 1; k <= l; k++)
	613	g += a[i][k] * a[k][j];
	614	for (k = 1; k <= l; k++)
	615	a[k][j] -= g * a[k][i];
	616	}
	617	}
	618	d[i] = a[i][i];
	619	a[i][i] = 1.0;
	620	for (j = 1; j <= l; j++)
	621	a[j][i] = a[i][j] = 0.0;
	622	}
	623	}
	624
	625	/ Tridiagonal QL algorithm -- Implicit ********************/
	626
	627	void tqli(d, e, n, z)
	628	float d[], e[], **z;
	629	int n;
	630	{
	631	int m, l, iter, i, k;
	632	float s, r, p, g, f, dd, c, b;
	633	void erhand();
	634
	635	for (i = 2; i <= n; i++)
	636	e[i-1] = e[i];
	637	e[n] = 0.0;
	638	for (l = 1; l <= n; l++)
	639	{
	640	iter = 0;
	641	do
	642	{
	643	for (m = l; m <= n-1; m++)
	644	{
	645	dd = fabs(d[m]) + fabs(d[m+1]);
	646	if (fabs(e[m]) + dd == dd) break;
	647	}
	648	if (m != l)
	649	{
	650	if (iter++ == 30) erhand("No convergence in TLQI.");
	651	g = (d[l+1] - d[l]) / (2.0 * e[l]);
	652	r = sqrt((g * g) + 1.0);
	653	g = d[m] - d[l] + e[l] / (g + SIGN(r, g));
	654	s = c = 1.0;
	655	p = 0.0;
	656	for (i = m-1; i >= l; i--)
	657	{
	658	f = s * e[i];
	659	b = c * e[i];
	660	if (fabs(f) >= fabs(g))
	661	{
	662	c = g / f;
	663	r = sqrt((c * c) + 1.0);
	664	e[i+1] = f * r;
	665	c *= (s = 1.0/r);
	666	}
	667	else
	668	{
	669	s = f / g;
	670	r = sqrt((s * s) + 1.0);
	671	e[i+1] = g * r;
	672	s *= (c = 1.0/r);
	673	}
	674	g = d[i+1] - p;
	675	r = (d[i] - g) * s + 2.0 * c * b;
	676	p = s * r;
	677	d[i+1] = g + p;
	678	g = c * r - b;
	679	for (k = 1; k <= n; k++)
	680	{
	681	f = z[k][i+1];
	682	z[k][i+1] = s * z[k][i] + c * f;
	683	z[k][i] = c * z[k][i] - s * f;
	684	}
	685	}
	686	d[l] = d[l] - p;
	687	e[l] = g;
	688	e[m] = 0.0;
	689	}
	690	} while (m != l);
	691	}
	692	}
	693
	694