/* Please note: * This code is the optimized version of the first version of Matrix * Stressmark. It uses less temporary vectors and vsariables, thus reduce * memory allocation/deallocation overhead. the simulation is faster */ /* * Sample code for the DIS Matrix Stressmark * * This source code is the completely correct source code based on * the example codes provided by Atlantic Aerospace Division, Titan * Systems Corporation, 2000. * * If you just compile and generate the executables from this source * code, this code would be enough. However, if you wish to get a complete * understanding of this stressmark, it is strongly suggested that you * read the Benchmark Analysis and Specifications Document Version 1.0 * before going on since the detailed comments are given in this documents. * the comments are not repeated here. */ /* * The Sparse Matrix Storage is implemented by Compact Row Storage Scheme * In the code, the data is first generated by randomNonzeroFloat() * the data is first stored in a full-space matrix with size of dim*dim * then the data is transfered to the Compact Row Matrix, * the data value is kept in *value, * the columns corresponding to the value are stored in *col_ind, * the start element of each row is stored in *row_start. */ /* * Please note: * the total number of data is numberNonzero +dim * among which, NumberNonzero because this is symmetric matrix * dim because the diagonal elements */ #include "DISstressmarkRNG.h" #include "extra.h" #include #include #include #include #include #define MIN_SEED -2147483647 #define MAX_SEED -1 #define MIN_DIM 1 #define MAX_DIM 32768 #define MAX_ITERATIONS 65536 #define MIN_TOLERANCE 1e-7 // 0.000007 #define MAX_TOLERANCE 0.5 #define MIN_NUMBER -3.4e10 / dim #define MAX_NUMBER 3.4e10 / dim #define EPSI 1.0e-10 #define MIN_DIG_NUMBER 1.0e-10 #define MAX_DIG_NUMBER 3.4e10 /* * External variable, dimension */ static int dim; /* * matrix * vector */ void matrixMulvector(double *value, int *col_ind, int *row_start, double *vector, double *out) { int l, ll; double sum; int tmp_rs, tmp_re; for (l = 0; l < dim; l++) { *(out + l) = 0; tmp_rs = row_start[l]; if (tmp_rs != -1) { tmp_re = row_start[l + 1]; /* *get the start and ending elements of * each row */ for (ll = tmp_rs; ll < tmp_re; ll++) { *(out + l) += value[ll] * vector[col_ind[ll]]; } } } return; } /* * vector1 - vector2 */ void vectorSub(double *vector1, double *vector2, double *vector) { int l; for (l = 0; l < dim; l++) { *(vector + l) = *(vector1 + l) - *(vector2 + l); } return; } /* * vector1 + vector2 */ void vectorAdd(double *vector1, double *vector2, double *vector) { int l; for (l = 0; l < dim; l++) { *(vector + l) = *(vector1 + l) + *(vector2 + l); } return; } /* * vector1 * vector2 */ double vectorMul(double *vector1, double *vector2) { int l; double product; product = 0; for (l = 0; l < dim; l++) { product += (*(vector1 + l)) * (*(vector2 + l)); } return product; } /* * /vector/ */ double vectorValue(double *vector) { double value; int l; value = 0; for (l = 0; l < dim; l++) { value += (*(vector + l)) * (*(vector + l)); } return (sqrt(value)); } /* * transpose(vector) * In fact, we return the original vector here */ void transpose(double *vector, double *vect) { int l; for (l = 0; l < dim; l++) { *(vect + l) = *(vector + l); } return; } /* * value * */ void valueMulvector(double value, double *vector, double *vect) { int l; int lll, i; double tmp; for (l = 0; l < dim; l++) { *(vect + l) = *(vector + l) * value; } return; } /* * generate the data distributed sparsely in matrix */ void initMatrix(double *matrix, int dim, int numberNonzero) { int k, l, ll; int i, j; int lll; double sum; for (k = 0; k < dim * dim; k++) { *(matrix + k) = 0; } for (l = 0; l < numberNonzero / 2; l++) { i = randomUInt(1, dim - 1); j = randomUInt(0, i - 1); while (*(matrix + i * dim + j) != 0) { i++; if (i == dim) { j++; if (j == dim - 1) { j = 0; i = 1; } else { i = j + 1; } } } if (*(matrix + i * dim + j) == 0) { *(matrix + i * dim + j) = (double)randomNonZeroFloat(MIN_NUMBER, MAX_NUMBER, EPSI); *(matrix + j * dim + i) = *(matrix + i * dim + j); } } for (ll = 0; ll < dim; ll++) { *(matrix + ll * dim + ll) = (double)randomNonZeroFloat( -MAX_DIG_NUMBER, MAX_DIG_NUMBER, MIN_DIG_NUMBER); sum = 0; for (lll = 0; lll < dim; lll++) { if (lll != ll) { sum += *(matrix + lll * dim + ll); } } if (*(matrix + ll * dim + ll) < sum) { *(matrix + ll * dim + ll) += sum; } } return; } /* * generate the data value in the vectors */ void initVector(double *vector, int dim) { int l; for (l = 0; l < dim; l++) { *(vector + l) = (double)randomFloat(MIN_NUMBER, MAX_NUMBER); } return; } /* * make a vector contains value of zero */ void zeroVector(double *vector, int dim) { int l; for (l = 0; l < dim; l++) { *(vector + l) = 0; } return; } /* * return a vector which is the copy of the vect */ void equalVector(double *vect, double *vect1) { int l; for (l = 0; l < dim; l++) { *(vect1 + l) = *(vect + l); } return; } void biConjugateGradient(double *value, int *col_ind, int *row_start, double *vectorB, double *vectorX, double errorTolerance, int maxIterations, double *actualError, int *actualIteration, int dim, // Start internal matrixes double *vectorP, double *vectorR, double *nextVectorR, double *tmpVector1, double *tmpVector2, double *tmpVector3) /* * in the code, we use a lot of temparary vectors and variables * this is just for simple and clear * you can optimize these temporary variables and vectors * based on your need * */ { double error; int iteration; double alpha, beta; double tmpValue1, tmpValue2; int i; int l; int ll; alpha = 0; beta = 0; /* * vectorR = vectorB - matrixA*vectorX */ matrixMulvector(value, col_ind, row_start, vectorX, tmpVector1); vectorSub(vectorB, tmpVector1, vectorR); /* * vectorP = vectorR */ equalVector(vectorR, vectorP); /* * error = |matrixA * vectorX - vectorB| / |vectorB| */ vectorSub(tmpVector1, vectorB, tmpVector1); error = vectorValue(tmpVector1) / vectorValue(vectorB); iteration = 0; while ((iteration < maxIterations) && (error > errorTolerance)) { /* * alpha = (transpose(vectorR) * vectorR) / * (transpose(vectorP) * (matrixA * vectorP) */ matrixMulvector(value, col_ind, row_start, vectorP, tmpVector1); transpose(vectorR, tmpVector2); transpose(vectorP, tmpVector3); tmpValue1 = vectorMul(tmpVector3, tmpVector1); tmpValue2 = vectorMul(tmpVector2, vectorR); alpha = tmpValue2 / tmpValue1; /* * nextVectorR = vectorR - alpha*(matrixA * vectorP) */ valueMulvector(alpha, tmpVector1, tmpVector2); vectorSub(vectorR, tmpVector2, tmpVector1); equalVector(tmpVector1, nextVectorR); /* * beta = (transpose(nextVectorR) * nextVectorR) / * (transpose(vectorR) * vectorR) */ transpose(nextVectorR, tmpVector3); tmpValue1 = vectorMul(tmpVector3, nextVectorR); transpose(vectorR, tmpVector2); tmpValue2 = vectorMul(tmpVector2, vectorR); beta = tmpValue1 / tmpValue2; /* * vectorX = vectorX + alpha * vectorP */ valueMulvector(alpha, vectorP, tmpVector1); vectorAdd(vectorX, tmpVector1, vectorX); /* *vectorP = nextVectorR + beta*vectorP */ valueMulvector(beta, vectorP, tmpVector1); vectorAdd(nextVectorR, tmpVector1, tmpVector1); for (ll = 0; ll < dim; ll++) { *(vectorP + ll) = *(tmpVector1 + ll); } /* * vectorR = nextVectorR */ for (l = 0; l < dim; l++) { *(vectorR + l) = *(nextVectorR + l); } /* * error = |matrixA * vectorX - vectorB| / |vectorB| */ matrixMulvector(value, col_ind, row_start, vectorX, tmpVector1); vectorSub(tmpVector1, vectorB, tmpVector1); error = vectorValue(tmpVector1) / vectorValue(vectorB); iteration++; } *actualError = error; *actualIteration = iteration; return; } /* * This is the function to transfer the data from the matrix of dense storage * to Compact Row Storage */ void create_CRS(double *matrixA, double *value, int *col_ind, int *row_start, int dim, int numberNonzero) { int i, j, k; int cnt; double tmp; /* *initialize the row_start */ for (k = 0; k < dim; k++) { row_start[k] = -1; } /* * make the end of the last row to be numberNonzero + dim. */ row_start[dim] = numberNonzero + dim; /* * initialize the col_ind */ for (k = 0; k < numberNonzero + dim; k++) { col_ind[k] = -1; } cnt = 0; for (i = 0; (cnt < numberNonzero + dim) && (i < dim); i++) { for (j = 0; (cnt < numberNonzero + dim) && (j < dim); j++) { tmp = *(matrixA + i * dim + j); if (tmp != 0) { value[cnt] = tmp; col_ind[cnt] = j; if (row_start[i] == -1) row_start[i] = cnt; cnt += 1; } } } row_start[i] = cnt; return; } int main(int argc, char **argv) { int seed; int numberNonzero; int maxIterations; float errorTolerance; double actualError; int actualIteration; time_t beginTime; time_t endTime; double *matrixA; double *vectorB; double *vectorX; double *value; int *col_ind; int *row_start; double sum; int k; // Internal vects for biConj double *vectorR, *vectorP, *nextVectorR; double *tmpVector1, *tmpVector2, *tmpVector3; SET_UP assert(fscanf(stdin, "%d %d %d %d %f", &seed, &dim, &numberNonzero, &maxIterations, &errorTolerance) == 5); assert((seed > MIN_SEED) && (seed < MAX_SEED)); assert((dim > MIN_DIM) && (dim < MAX_DIM)); assert((numberNonzero > dim) && (numberNonzero < dim * dim)); assert((maxIterations > 0) && (maxIterations < MAX_ITERATIONS)); assert((errorTolerance > MIN_TOLERANCE) && (errorTolerance < MAX_TOLERANCE)); matrixA = (double *)malloc(dim * dim * sizeof(double)); vectorB = (double *)malloc(dim * sizeof(double)); vectorX = (double *)malloc(dim * sizeof(double)); value = (double *)malloc((numberNonzero + dim) * sizeof(double)); col_ind = (int *)malloc((numberNonzero + dim) * sizeof(int)); row_start = (int *)malloc((dim + 1) * sizeof(int)); // Internal matricies for biConj vectorP = (double *)malloc(dim * sizeof(double)); vectorR = (double *)malloc(dim * sizeof(double)); nextVectorR = (double *)malloc(dim * sizeof(double)); tmpVector1 = (double *)malloc(dim * sizeof(double)); tmpVector2 = (double *)malloc(dim * sizeof(double)); tmpVector3 = (double *)malloc(dim * sizeof(double)); randInit(seed); for_each_job { initMatrix(matrixA, dim, numberNonzero); create_CRS(matrixA, value, col_ind, row_start, dim, numberNonzero); initVector(vectorB, dim); zeroVector(vectorX, dim); beginTime = time(NULL); actualError = 0; actualIteration = 0; biConjugateGradient(value, col_ind, row_start, vectorB, vectorX, errorTolerance, maxIterations, &actualError, &actualIteration, dim, vectorP, vectorR, nextVectorR, tmpVector1, tmpVector2, tmpVector3); } endTime = time(NULL); free(tmpVector1); free(tmpVector2); free(tmpVector3); free(vectorR); free(vectorP); sum = 0; for (k = 1; k < dim; k++) { sum += sum + *(vectorX + k); } //fprintf(stdout, "sum = %f, actualError = %e, actualIteration = %d\n", sum, // actualError, actualIteration); volatile double _stop_optimizer = sum + actualError + actualIteration; fprintf(stderr, "time for matrix stressmark = %f seconds.\n", difftime(endTime, beginTime)); WRITE_TO_FILE return (0); }