2 files changed, 28 insertions, 9 deletions
diff --git a/dis/WSS_DOCS.md b/dis/WSS_DOCS.md
index da5e066..3f151b6 100644
--- a/dis/WSS_DOCS.md
+++ b/dis/WSS_DOCS.md
@@ -1,4 +1,10 @@
 # Documentation Mapping DIS Stressmark Parameters to WSS
+This details exactly how the input parameters of each Stressmark translate to
+their dynamic allocations size. With some algebra, we then convert those
+equations such that we can determine the needed parameters for a specific
+desired dynamic allocation size.
+The equations teased out here are implemented in `gen_input.py`.
 ## Field
 1 allocation in main()
@@ -18,6 +24,15 @@ sizeof(double) * (dim^2+3dim+numberNonzero) + sizeof(int) * (2dim+1+numberNonzer
 *Allocations in biConjugateGradient()*
 sizeof(double) * 7dim
+*Vectors*
+sizeof(double) * 8dim
+*Matrix*
+sizeof(double) * dim * dim
+*Helpers*
+sizeof(int) * dim+1
+sizeof(int) + (nnZR + dim)
+sizeof(double) + (nnZR + dim)
 ## Neighborhood
 1 allocation in createImage, 2 allocations in neighborhoodCalculation
diff --git a/dis/gen_input.py b/dis/gen_input.py
index c7821b0..67ee316 100755
--- a/dis/gen_input.py
+++ b/dis/gen_input.py
@@ -24,19 +24,23 @@ def setup_field(params, wss):
    return params
 def setup_matrix(params, wss):
-    nnZR = 0.08 # 8% seems average
+    nnZR = 0.80 # 8% seems average, but it's too fast to compute. Use 80%.
-    # This formula is out of a solver
    si = sizeof(c_int)
    sd = sizeof(c_double)
-    d = (sqrt((si**2) * (-(nnZR-1)) - si*sd*(nnZR-9) + si*wss*nnZR + sd*(25*sd+wss*nnZR+wss)) - si - 5*sd) / (si*nnZR + sd*nnZR + sd)
+    # s is size double, i is size int, z in nnZR
+    # This formula is out of a solver
+    d = (sqrt(sd*sd*(81-4*nnZR) + 4*sd*(wss - si*nnZR + 9*si) - 4) - 9*sd - 2*si) / (2*sd)
    params[1] = floor(d);
-    params[2] = floor(d*d*nnZR);
+    # Make sure that we don't add more elements than the matrix size
+    params[2] = floor(d*d*nnZR)
+    # Set error as low as possible to slow things down
+    params[4] = 1.0e-7
    if params[1] <= 0 or params[2] <= 0:
        raise Exception("WSS too small for matrix benchmark!")
    return params
 def setup_neighborhood(params, wss):
-    bitDepth = 8
+    bitDepth = 15
    bitDepthAlloc = sizeof(c_int) * (2**(bitDepth + 1) - 1)
    dim = sqrt((wss - bitDepthAlloc) / sizeof(c_short))
    params[1] = bitDepth
@@ -51,7 +55,7 @@ def setup_neighborhood(params, wss):
    return params
 def setup_pointer(params, wss):
-    n = 10;
+    n = 1; # Only use one "thread"
    f = (wss - sizeof(c_int) * 4 * n) / sizeof(c_int)
    params[0] = floor(f)
    params[4] = floor(n)
@@ -73,8 +77,8 @@ def setup_update(params, wss):
    params[0] = floor(f)
    if params[0] <= 0:
        raise Exception("WSS too small for update benchmark!")
-    # Don't do more than 100M hops (keeps time array feasible)
+    # Don't do more than 10M hops (keeps time array feasible)
-    params[2] = min(100000000, int(params[2]))
+    params[2] = min(10000000, int(params[2]))
    # Enforce size requirements
    params[4] = min(params[0]-1, int(params[4]))
    params[5] = min(params[0]-1, int(params[5]))
@@ -103,7 +107,7 @@ with open(sys.argv[2], "r") as template:
    params = template.readline().split()
    mutated_params = BENCH_TO_PARAMS[benchmark_name](params, wss);
    print(" ".join(map(lambda x: str(x), mutated_params)))
-    print(" ".join(map(lambda x: str(x), mutated_params)), file=sys.stderr)
+    print("Using", " ".join(map(lambda x: str(x), mutated_params)), "for", benchmark_name, "stressmark", file=sys.stderr)
    if benchmark_name == "pointer":
        # Clone the data format used in the template
        for i in range(0,10):