diff options
author | Joshua Bakita <jbakita@cs.unc.edu> | 2020-10-17 14:54:28 -0400 |
---|---|---|
committer | Joshua Bakita <jbakita@cs.unc.edu> | 2020-10-17 14:54:28 -0400 |
commit | cef282a003b65df452a51f12773799f83c3cd627 (patch) | |
tree | 4be64982f8184df9b3ad23c4b7a3fa04b498ed30 | |
parent | a3886552003d031acb9039e920b7c9ddce946ad6 (diff) |
Fix WSS -> argument generation for Matrix and tune others
Now as used for (rejected) RTSS'20 paper
-rw-r--r-- | dis/WSS_DOCS.md | 15 | ||||
-rwxr-xr-x | dis/gen_input.py | 22 |
2 files changed, 28 insertions, 9 deletions
diff --git a/dis/WSS_DOCS.md b/dis/WSS_DOCS.md index da5e066..3f151b6 100644 --- a/dis/WSS_DOCS.md +++ b/dis/WSS_DOCS.md | |||
@@ -1,4 +1,10 @@ | |||
1 | # Documentation Mapping DIS Stressmark Parameters to WSS | 1 | # Documentation Mapping DIS Stressmark Parameters to WSS |
2 | This details exactly how the input parameters of each Stressmark translate to | ||
3 | their dynamic allocations size. With some algebra, we then convert those | ||
4 | equations such that we can determine the needed parameters for a specific | ||
5 | desired dynamic allocation size. | ||
6 | |||
7 | The equations teased out here are implemented in `gen_input.py`. | ||
2 | 8 | ||
3 | ## Field | 9 | ## Field |
4 | 1 allocation in main() | 10 | 1 allocation in main() |
@@ -18,6 +24,15 @@ sizeof(double) * (dim^2+3dim+numberNonzero) + sizeof(int) * (2dim+1+numberNonzer | |||
18 | *Allocations in biConjugateGradient()* | 24 | *Allocations in biConjugateGradient()* |
19 | sizeof(double) * 7dim | 25 | sizeof(double) * 7dim |
20 | 26 | ||
27 | *Vectors* | ||
28 | sizeof(double) * 8dim | ||
29 | *Matrix* | ||
30 | sizeof(double) * dim * dim | ||
31 | *Helpers* | ||
32 | sizeof(int) * dim+1 | ||
33 | sizeof(int) + (nnZR + dim) | ||
34 | sizeof(double) + (nnZR + dim) | ||
35 | |||
21 | ## Neighborhood | 36 | ## Neighborhood |
22 | 1 allocation in createImage, 2 allocations in neighborhoodCalculation | 37 | 1 allocation in createImage, 2 allocations in neighborhoodCalculation |
23 | 38 | ||
diff --git a/dis/gen_input.py b/dis/gen_input.py index c7821b0..67ee316 100755 --- a/dis/gen_input.py +++ b/dis/gen_input.py | |||
@@ -24,19 +24,23 @@ def setup_field(params, wss): | |||
24 | return params | 24 | return params |
25 | 25 | ||
26 | def setup_matrix(params, wss): | 26 | def setup_matrix(params, wss): |
27 | nnZR = 0.08 # 8% seems average | 27 | nnZR = 0.80 # 8% seems average, but it's too fast to compute. Use 80%. |
28 | # This formula is out of a solver | ||
29 | si = sizeof(c_int) | 28 | si = sizeof(c_int) |
30 | sd = sizeof(c_double) | 29 | sd = sizeof(c_double) |
31 | d = (sqrt((si**2) * (-(nnZR-1)) - si*sd*(nnZR-9) + si*wss*nnZR + sd*(25*sd+wss*nnZR+wss)) - si - 5*sd) / (si*nnZR + sd*nnZR + sd) | 30 | # s is size double, i is size int, z in nnZR |
31 | # This formula is out of a solver | ||
32 | d = (sqrt(sd*sd*(81-4*nnZR) + 4*sd*(wss - si*nnZR + 9*si) - 4) - 9*sd - 2*si) / (2*sd) | ||
32 | params[1] = floor(d); | 33 | params[1] = floor(d); |
33 | params[2] = floor(d*d*nnZR); | 34 | # Make sure that we don't add more elements than the matrix size |
35 | params[2] = floor(d*d*nnZR) | ||
36 | # Set error as low as possible to slow things down | ||
37 | params[4] = 1.0e-7 | ||
34 | if params[1] <= 0 or params[2] <= 0: | 38 | if params[1] <= 0 or params[2] <= 0: |
35 | raise Exception("WSS too small for matrix benchmark!") | 39 | raise Exception("WSS too small for matrix benchmark!") |
36 | return params | 40 | return params |
37 | 41 | ||
38 | def setup_neighborhood(params, wss): | 42 | def setup_neighborhood(params, wss): |
39 | bitDepth = 8 | 43 | bitDepth = 15 |
40 | bitDepthAlloc = sizeof(c_int) * (2**(bitDepth + 1) - 1) | 44 | bitDepthAlloc = sizeof(c_int) * (2**(bitDepth + 1) - 1) |
41 | dim = sqrt((wss - bitDepthAlloc) / sizeof(c_short)) | 45 | dim = sqrt((wss - bitDepthAlloc) / sizeof(c_short)) |
42 | params[1] = bitDepth | 46 | params[1] = bitDepth |
@@ -51,7 +55,7 @@ def setup_neighborhood(params, wss): | |||
51 | return params | 55 | return params |
52 | 56 | ||
53 | def setup_pointer(params, wss): | 57 | def setup_pointer(params, wss): |
54 | n = 10; | 58 | n = 1; # Only use one "thread" |
55 | f = (wss - sizeof(c_int) * 4 * n) / sizeof(c_int) | 59 | f = (wss - sizeof(c_int) * 4 * n) / sizeof(c_int) |
56 | params[0] = floor(f) | 60 | params[0] = floor(f) |
57 | params[4] = floor(n) | 61 | params[4] = floor(n) |
@@ -73,8 +77,8 @@ def setup_update(params, wss): | |||
73 | params[0] = floor(f) | 77 | params[0] = floor(f) |
74 | if params[0] <= 0: | 78 | if params[0] <= 0: |
75 | raise Exception("WSS too small for update benchmark!") | 79 | raise Exception("WSS too small for update benchmark!") |
76 | # Don't do more than 100M hops (keeps time array feasible) | 80 | # Don't do more than 10M hops (keeps time array feasible) |
77 | params[2] = min(100000000, int(params[2])) | 81 | params[2] = min(10000000, int(params[2])) |
78 | # Enforce size requirements | 82 | # Enforce size requirements |
79 | params[4] = min(params[0]-1, int(params[4])) | 83 | params[4] = min(params[0]-1, int(params[4])) |
80 | params[5] = min(params[0]-1, int(params[5])) | 84 | params[5] = min(params[0]-1, int(params[5])) |
@@ -103,7 +107,7 @@ with open(sys.argv[2], "r") as template: | |||
103 | params = template.readline().split() | 107 | params = template.readline().split() |
104 | mutated_params = BENCH_TO_PARAMS[benchmark_name](params, wss); | 108 | mutated_params = BENCH_TO_PARAMS[benchmark_name](params, wss); |
105 | print(" ".join(map(lambda x: str(x), mutated_params))) | 109 | print(" ".join(map(lambda x: str(x), mutated_params))) |
106 | print(" ".join(map(lambda x: str(x), mutated_params)), file=sys.stderr) | 110 | print("Using", " ".join(map(lambda x: str(x), mutated_params)), "for", benchmark_name, "stressmark", file=sys.stderr) |
107 | if benchmark_name == "pointer": | 111 | if benchmark_name == "pointer": |
108 | # Clone the data format used in the template | 112 | # Clone the data format used in the template |
109 | for i in range(0,10): | 113 | for i in range(0,10): |