diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2013-01-10 17:48:39 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2013-01-10 17:48:39 -0500 |
commit | 629486d62ae22c33251d3c367af3febff5fe1e28 (patch) | |
tree | ef78fc8235c61f8ba37d109ea04266b6ce49b804 /gpu | |
parent | 1bf0f0094cd9671adfc07cf840bde67cd4cc0c38 (diff) |
Clean up GPU test code placement.
Diffstat (limited to 'gpu')
-rw-r--r-- | gpu/aux_threads.c | 313 | ||||
-rw-r--r-- | gpu/dgl.c | 251 | ||||
-rw-r--r-- | gpu/ikglptest.c | 633 | ||||
-rw-r--r-- | gpu/locktest.c | 206 | ||||
-rw-r--r-- | gpu/nested.c | 245 | ||||
-rw-r--r-- | gpu/normal_task.c | 84 | ||||
-rw-r--r-- | gpu/rtspin_fake_cuda.cpp | 1169 |
7 files changed, 2901 insertions, 0 deletions
diff --git a/gpu/aux_threads.c b/gpu/aux_threads.c new file mode 100644 index 0000000..6636f36 --- /dev/null +++ b/gpu/aux_threads.c | |||
@@ -0,0 +1,313 @@ | |||
1 | /* based_mt_task.c -- A basic multi-threaded real-time task skeleton. | ||
2 | * | ||
3 | * This (by itself useless) task demos how to setup a multi-threaded LITMUS^RT | ||
4 | * real-time task. Familiarity with the single threaded example (base_task.c) | ||
5 | * is assumed. | ||
6 | * | ||
7 | * Currently, liblitmus still lacks automated support for real-time | ||
8 | * tasks, but internaly it is thread-safe, and thus can be used together | ||
9 | * with pthreads. | ||
10 | */ | ||
11 | |||
12 | #include <stdio.h> | ||
13 | #include <stdlib.h> | ||
14 | #include <unistd.h> | ||
15 | |||
16 | #include <fcntl.h> | ||
17 | #include <sys/stat.h> | ||
18 | #include <sys/time.h> | ||
19 | #include <sys/resource.h> | ||
20 | |||
21 | /* Include gettid() */ | ||
22 | #include <sys/types.h> | ||
23 | |||
24 | /* Include threading support. */ | ||
25 | #include <pthread.h> | ||
26 | |||
27 | /* Include the LITMUS^RT API.*/ | ||
28 | #include "litmus.h" | ||
29 | |||
30 | //#define PERIOD 500 | ||
31 | #define PERIOD 10 | ||
32 | //#define EXEC_COST 10 | ||
33 | #define EXEC_COST 1 | ||
34 | |||
35 | int NUM_AUX_THREADS = 2; | ||
36 | |||
37 | #define LITMUS_STATS_FILE "/proc/litmus/stats" | ||
38 | |||
39 | /* The information passed to each thread. Could be anything. */ | ||
40 | struct thread_context { | ||
41 | int id; | ||
42 | struct timeval total_time; | ||
43 | }; | ||
44 | |||
45 | /* The real-time thread program. Doesn't have to be the same for | ||
46 | * all threads. Here, we only have one that will invoke job(). | ||
47 | */ | ||
48 | void* rt_thread(void *tcontext); | ||
49 | void* aux_thread(void *tcontext); | ||
50 | |||
51 | /* Declare the periodically invoked job. | ||
52 | * Returns 1 -> task should exit. | ||
53 | * 0 -> task should continue. | ||
54 | */ | ||
55 | int job(void); | ||
56 | |||
57 | |||
58 | /* Catch errors. | ||
59 | */ | ||
60 | #define CALL( exp ) do { \ | ||
61 | int ret; \ | ||
62 | ret = exp; \ | ||
63 | if (ret != 0) \ | ||
64 | fprintf(stderr, "%s failed: %m\n", #exp);\ | ||
65 | else \ | ||
66 | fprintf(stderr, "%s ok.\n", #exp); \ | ||
67 | } while (0) | ||
68 | |||
69 | int gRun = 1; | ||
70 | |||
71 | pthread_mutex_t gMutex = PTHREAD_MUTEX_INITIALIZER; | ||
72 | pthread_barrier_t gBar; | ||
73 | |||
74 | #define OPTSTR "t:fcb" | ||
75 | |||
76 | int main(int argc, char** argv) | ||
77 | { | ||
78 | int i; | ||
79 | struct thread_context *ctx; | ||
80 | pthread_t *task; | ||
81 | |||
82 | int opt; | ||
83 | int before = 0; | ||
84 | int aux_flags = 0; | ||
85 | int do_future = 0; | ||
86 | |||
87 | while ((opt = getopt(argc, argv, OPTSTR)) != -1) { | ||
88 | switch(opt) | ||
89 | { | ||
90 | case 't': | ||
91 | NUM_AUX_THREADS = atoi(optarg); | ||
92 | printf("%d aux threads\n", NUM_AUX_THREADS); | ||
93 | break; | ||
94 | case 'f': | ||
95 | aux_flags |= AUX_FUTURE; | ||
96 | do_future = 1; | ||
97 | break; | ||
98 | case 'c': | ||
99 | aux_flags |= AUX_CURRENT; | ||
100 | break; | ||
101 | case 'b': | ||
102 | before = 1; | ||
103 | printf("Will become real-time before spawning aux threads.\n"); | ||
104 | break; | ||
105 | } | ||
106 | } | ||
107 | |||
108 | if (aux_flags == 0) { | ||
109 | printf("Must specify -c (AUX_CURRENT) and/or -f (AUX_FUTURE) for aux tasks.\n"); | ||
110 | return -1; | ||
111 | } | ||
112 | |||
113 | ctx = calloc(NUM_AUX_THREADS, sizeof(struct thread_context)); | ||
114 | task = calloc(NUM_AUX_THREADS, sizeof(pthread_t)); | ||
115 | |||
116 | //lt_t delay = ms2lt(1000); | ||
117 | |||
118 | /***** | ||
119 | * 3) Initialize LITMUS^RT. | ||
120 | * Task parameters will be specified per thread. | ||
121 | */ | ||
122 | init_litmus(); | ||
123 | |||
124 | { | ||
125 | pthread_barrierattr_t battr; | ||
126 | pthread_barrierattr_init(&battr); | ||
127 | pthread_barrier_init(&gBar, &battr, (NUM_AUX_THREADS)+1); | ||
128 | } | ||
129 | |||
130 | if(before) | ||
131 | { | ||
132 | CALL( init_rt_thread() ); | ||
133 | CALL( sporadic_global(EXEC_COST, PERIOD) ); | ||
134 | CALL( task_mode(LITMUS_RT_TASK) ); | ||
135 | } | ||
136 | |||
137 | |||
138 | if(do_future && before) | ||
139 | { | ||
140 | CALL( enable_aux_rt_tasks(aux_flags) ); | ||
141 | } | ||
142 | |||
143 | // printf("Red Leader is now real-time!\n"); | ||
144 | |||
145 | for (i = 0; i < NUM_AUX_THREADS; i++) { | ||
146 | ctx[i].id = i; | ||
147 | pthread_create(task + i, NULL, aux_thread, (void *) (ctx + i)); | ||
148 | } | ||
149 | |||
150 | // pthread_barrier_wait(&gBar); | ||
151 | |||
152 | // sleep(1); | ||
153 | |||
154 | if(!before) | ||
155 | { | ||
156 | CALL( init_rt_thread() ); | ||
157 | CALL( sporadic_global(EXEC_COST, PERIOD) ); | ||
158 | CALL( task_mode(LITMUS_RT_TASK) ); | ||
159 | } | ||
160 | |||
161 | // secondary call *should* be harmless | ||
162 | CALL( enable_aux_rt_tasks(aux_flags) ); | ||
163 | |||
164 | { | ||
165 | int last = time(0); | ||
166 | // struct timespec sleeptime = {0, 1000}; // 1 microsecond | ||
167 | // for(i = 0; i < 24000; ++i) { | ||
168 | for(i = 0; i < 2000; ++i) { | ||
169 | sleep_next_period(); | ||
170 | // printf("RED LEADER!\n"); | ||
171 | |||
172 | // nanosleep(&sleeptime, NULL); | ||
173 | |||
174 | pthread_mutex_lock(&gMutex); | ||
175 | |||
176 | if((i%(10000/PERIOD)) == 0) { | ||
177 | int now = time(0); | ||
178 | printf("hearbeat %d: %d\n", i, now - last); | ||
179 | last = now; | ||
180 | } | ||
181 | |||
182 | pthread_mutex_unlock(&gMutex); | ||
183 | } | ||
184 | } | ||
185 | |||
186 | CALL( disable_aux_rt_tasks(aux_flags) ); | ||
187 | gRun = 0; | ||
188 | |||
189 | CALL( task_mode(BACKGROUND_TASK) ); | ||
190 | |||
191 | /***** | ||
192 | * 5) Wait for RT threads to terminate. | ||
193 | */ | ||
194 | for (i = 0; i < NUM_AUX_THREADS; i++) { | ||
195 | if (task[i] != 0) { | ||
196 | float time; | ||
197 | pthread_join(task[i], NULL); | ||
198 | time = ctx[i].total_time.tv_sec + ctx[i].total_time.tv_usec / (float)(1e6); | ||
199 | printf("child %d: %fs\n", i, time); | ||
200 | } | ||
201 | } | ||
202 | |||
203 | |||
204 | /***** | ||
205 | * 6) Clean up, maybe print results and stats, and exit. | ||
206 | */ | ||
207 | return 0; | ||
208 | } | ||
209 | |||
210 | |||
211 | |||
212 | /* A real-time thread is very similar to the main function of a single-threaded | ||
213 | * real-time app. Notice, that init_rt_thread() is called to initialized per-thread | ||
214 | * data structures of the LITMUS^RT user space libary. | ||
215 | */ | ||
216 | void* aux_thread(void *tcontext) | ||
217 | { | ||
218 | struct thread_context *ctx = (struct thread_context *) tcontext; | ||
219 | int count = 0; | ||
220 | |||
221 | // pthread_barrier_wait(&gBar); | ||
222 | |||
223 | while(gRun) | ||
224 | { | ||
225 | if(count++ % 100000 == 0) { | ||
226 | pthread_mutex_lock(&gMutex); | ||
227 | pthread_mutex_unlock(&gMutex); | ||
228 | } | ||
229 | } | ||
230 | |||
231 | { | ||
232 | struct rusage use; | ||
233 | long int sec; | ||
234 | |||
235 | getrusage(RUSAGE_THREAD, &use); | ||
236 | |||
237 | ctx->total_time.tv_usec = use.ru_utime.tv_usec + use.ru_stime.tv_usec; | ||
238 | sec = ctx->total_time.tv_usec / (long int)(1e6); | ||
239 | ctx->total_time.tv_usec = ctx->total_time.tv_usec % (long int)(1e6); | ||
240 | ctx->total_time.tv_sec = use.ru_utime.tv_sec + use.ru_stime.tv_sec + sec; | ||
241 | } | ||
242 | |||
243 | return ctx; | ||
244 | } | ||
245 | |||
246 | |||
247 | /* A real-time thread is very similar to the main function of a single-threaded | ||
248 | * real-time app. Notice, that init_rt_thread() is called to initialized per-thread | ||
249 | * data structures of the LITMUS^RT user space libary. | ||
250 | */ | ||
251 | void* rt_thread(void *tcontext) | ||
252 | { | ||
253 | struct thread_context *ctx = (struct thread_context *) tcontext; | ||
254 | |||
255 | /* Make presence visible. */ | ||
256 | printf("RT Thread %d active.\n", ctx->id); | ||
257 | |||
258 | /***** | ||
259 | * 1) Initialize real-time settings. | ||
260 | */ | ||
261 | CALL( init_rt_thread() ); | ||
262 | CALL( sporadic_global(EXEC_COST, PERIOD + ctx->id * 10) ); | ||
263 | |||
264 | |||
265 | /***** | ||
266 | * 2) Transition to real-time mode. | ||
267 | */ | ||
268 | CALL( task_mode(LITMUS_RT_TASK) ); | ||
269 | |||
270 | |||
271 | |||
272 | wait_for_ts_release(); | ||
273 | |||
274 | /* The task is now executing as a real-time task if the call didn't fail. | ||
275 | */ | ||
276 | |||
277 | |||
278 | |||
279 | /***** | ||
280 | * 3) Invoke real-time jobs. | ||
281 | */ | ||
282 | while(gRun) { | ||
283 | /* Wait until the next job is released. */ | ||
284 | sleep_next_period(); | ||
285 | printf("%d: task.\n", ctx->id); | ||
286 | } | ||
287 | |||
288 | /***** | ||
289 | * 4) Transition to background mode. | ||
290 | */ | ||
291 | CALL( task_mode(BACKGROUND_TASK) ); | ||
292 | |||
293 | { | ||
294 | struct rusage use; | ||
295 | long int sec; | ||
296 | |||
297 | getrusage(RUSAGE_THREAD, &use); | ||
298 | ctx->total_time.tv_usec = use.ru_utime.tv_usec + use.ru_stime.tv_usec; | ||
299 | sec = ctx->total_time.tv_usec / (long int)(1e6); | ||
300 | ctx->total_time.tv_usec = ctx->total_time.tv_usec % (long int)(1e6); | ||
301 | ctx->total_time.tv_sec = use.ru_utime.tv_sec + use.ru_stime.tv_sec + sec; | ||
302 | } | ||
303 | |||
304 | return ctx; | ||
305 | } | ||
306 | |||
307 | int job(void) | ||
308 | { | ||
309 | /* Do real-time calculation. */ | ||
310 | |||
311 | /* Don't exit. */ | ||
312 | return 0; | ||
313 | } | ||
diff --git a/gpu/dgl.c b/gpu/dgl.c new file mode 100644 index 0000000..a045879 --- /dev/null +++ b/gpu/dgl.c | |||
@@ -0,0 +1,251 @@ | |||
1 | #include <stdio.h> | ||
2 | #include <stdlib.h> | ||
3 | #include <stdint.h> | ||
4 | #include <unistd.h> | ||
5 | #include <assert.h> | ||
6 | #include <errno.h> | ||
7 | #include <sys/types.h> | ||
8 | #include <sys/stat.h> | ||
9 | #include <fcntl.h> | ||
10 | |||
11 | /* Include gettid() */ | ||
12 | #include <sys/types.h> | ||
13 | |||
14 | /* Include threading support. */ | ||
15 | #include <pthread.h> | ||
16 | |||
17 | /* Include the LITMUS^RT API.*/ | ||
18 | #include "litmus.h" | ||
19 | |||
20 | /* Catch errors. | ||
21 | */ | ||
22 | #define CALL( exp ) do { \ | ||
23 | int ret; \ | ||
24 | ret = exp; \ | ||
25 | if (ret != 0) \ | ||
26 | fprintf(stderr, "%s failed: %m\n", #exp);\ | ||
27 | else \ | ||
28 | fprintf(stderr, "%s ok.\n", #exp); \ | ||
29 | } while (0) | ||
30 | |||
31 | #define TH_CALL( exp ) do { \ | ||
32 | int ret; \ | ||
33 | ret = exp; \ | ||
34 | if (ret != 0) \ | ||
35 | fprintf(stderr, "[%d] %s failed: %m\n", ctx->id, #exp); \ | ||
36 | else \ | ||
37 | fprintf(stderr, "[%d] %s ok.\n", ctx->id, #exp); \ | ||
38 | } while (0) | ||
39 | |||
40 | #define TH_SAFE_CALL( exp ) do { \ | ||
41 | int ret; \ | ||
42 | fprintf(stderr, "[%d] calling %s...\n", ctx->id, #exp); \ | ||
43 | ret = exp; \ | ||
44 | if (ret != 0) \ | ||
45 | fprintf(stderr, "\t...[%d] %s failed: %m\n", ctx->id, #exp); \ | ||
46 | else \ | ||
47 | fprintf(stderr, "\t...[%d] %s ok.\n", ctx->id, #exp); \ | ||
48 | } while (0) | ||
49 | |||
50 | |||
51 | /* these are only default values */ | ||
52 | int NUM_THREADS=3; | ||
53 | int NUM_SEMS=1; | ||
54 | int NUM_REPLICAS=1; | ||
55 | int NEST_DEPTH=1; | ||
56 | |||
57 | int SLEEP_BETWEEN_JOBS = 1; | ||
58 | |||
59 | #define MAX_SEMS 1000 | ||
60 | #define MAX_NEST_DEPTH 10 | ||
61 | |||
62 | |||
63 | // 1000 = 1us | ||
64 | #define EXEC_COST 1000*1 | ||
65 | #define PERIOD 1000*10 | ||
66 | |||
67 | /* The information passed to each thread. Could be anything. */ | ||
68 | struct thread_context { | ||
69 | int id; | ||
70 | int fd; | ||
71 | int ikglp; | ||
72 | int od[MAX_SEMS]; | ||
73 | int count; | ||
74 | unsigned int rand; | ||
75 | }; | ||
76 | |||
77 | void* rt_thread(void* _ctx); | ||
78 | int nested_job(struct thread_context* ctx, int *count, int *next); | ||
79 | int job(struct thread_context*); | ||
80 | |||
81 | #define OPTSTR "t:k:s:d:f" | ||
82 | |||
83 | int main(int argc, char** argv) | ||
84 | { | ||
85 | int i; | ||
86 | struct thread_context* ctx; | ||
87 | pthread_t* task; | ||
88 | int fd; | ||
89 | |||
90 | int opt; | ||
91 | while((opt = getopt(argc, argv, OPTSTR)) != -1) { | ||
92 | switch(opt) { | ||
93 | case 't': | ||
94 | NUM_THREADS = atoi(optarg); | ||
95 | break; | ||
96 | case 'k': | ||
97 | NUM_REPLICAS = atoi(optarg); | ||
98 | assert(NUM_REPLICAS > 0); | ||
99 | break; | ||
100 | case 's': | ||
101 | NUM_SEMS = atoi(optarg); | ||
102 | assert(NUM_SEMS >= 0 && NUM_SEMS <= MAX_SEMS); | ||
103 | break; | ||
104 | case 'd': | ||
105 | NEST_DEPTH = atoi(optarg); | ||
106 | assert(NEST_DEPTH >= 1 && NEST_DEPTH <= MAX_NEST_DEPTH); | ||
107 | break; | ||
108 | case 'f': | ||
109 | SLEEP_BETWEEN_JOBS = 0; | ||
110 | break; | ||
111 | default: | ||
112 | fprintf(stderr, "Unknown option: %c\n", opt); | ||
113 | exit(-1); | ||
114 | break; | ||
115 | } | ||
116 | } | ||
117 | |||
118 | ctx = (struct thread_context*) calloc(NUM_THREADS, sizeof(struct thread_context)); | ||
119 | task = (pthread_t*) calloc(NUM_THREADS, sizeof(pthread_t)); | ||
120 | |||
121 | srand(0); /* something repeatable for now */ | ||
122 | |||
123 | fd = open("semaphores", O_RDONLY | O_CREAT, S_IRUSR | S_IWUSR); | ||
124 | |||
125 | CALL( init_litmus() ); | ||
126 | |||
127 | for (i = 0; i < NUM_THREADS; i++) { | ||
128 | ctx[i].id = i; | ||
129 | ctx[i].fd = fd; | ||
130 | ctx[i].rand = rand(); | ||
131 | CALL( pthread_create(task + i, NULL, rt_thread, ctx + i) ); | ||
132 | } | ||
133 | |||
134 | |||
135 | for (i = 0; i < NUM_THREADS; i++) | ||
136 | pthread_join(task[i], NULL); | ||
137 | |||
138 | |||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | void* rt_thread(void* _ctx) | ||
143 | { | ||
144 | int i; | ||
145 | int do_exit = 0; | ||
146 | |||
147 | struct thread_context *ctx = (struct thread_context*)_ctx; | ||
148 | |||
149 | TH_CALL( init_rt_thread() ); | ||
150 | |||
151 | /* Vary period a little bit. */ | ||
152 | TH_CALL( sporadic_task_ns(EXEC_COST, PERIOD + 10*ctx->id, 0, 0, LITMUS_LOWEST_PRIORITY, | ||
153 | RT_CLASS_SOFT, NO_ENFORCEMENT, NO_SIGNALS, 0) ); | ||
154 | |||
155 | ctx->ikglp = open_ikglp_sem(ctx->fd, 0, (void*)&NUM_REPLICAS); | ||
156 | if(ctx->ikglp < 0) | ||
157 | perror("open_ikglp_sem"); | ||
158 | else | ||
159 | printf("ikglp od = %d\n", ctx->ikglp); | ||
160 | |||
161 | for (i = 0; i < NUM_SEMS; i++) { | ||
162 | ctx->od[i] = open_rsm_sem(ctx->fd, i+1); | ||
163 | if(ctx->od[i] < 0) | ||
164 | perror("open_rsm_sem"); | ||
165 | else | ||
166 | printf("rsm[%d] od = %d\n", i, ctx->od[i]); | ||
167 | } | ||
168 | |||
169 | TH_CALL( task_mode(LITMUS_RT_TASK) ); | ||
170 | |||
171 | |||
172 | printf("[%d] Waiting for TS release.\n ", ctx->id); | ||
173 | wait_for_ts_release(); | ||
174 | ctx->count = 0; | ||
175 | |||
176 | do { | ||
177 | int replica = -1; | ||
178 | int first = (int)(NUM_SEMS * (rand_r(&(ctx->rand)) / (RAND_MAX + 1.0))); | ||
179 | int last = (first + NEST_DEPTH - 1 >= NUM_SEMS) ? NUM_SEMS - 1 : first + NEST_DEPTH - 1; | ||
180 | int dgl_size = last - first + 1; | ||
181 | int dgl[dgl_size]; | ||
182 | |||
183 | // construct the DGL | ||
184 | for(i = first; i <= last; ++i) { | ||
185 | dgl[i-first] = ctx->od[i]; | ||
186 | } | ||
187 | |||
188 | |||
189 | replica = litmus_lock(ctx->ikglp); | ||
190 | printf("[%d] got ikglp replica %d.\n", ctx->id, replica); | ||
191 | fflush(stdout); | ||
192 | |||
193 | |||
194 | litmus_dgl_lock(dgl, dgl_size); | ||
195 | printf("[%d] acquired dgl.\n", ctx->id); | ||
196 | fflush(stdout); | ||
197 | |||
198 | |||
199 | do_exit = job(ctx); | ||
200 | |||
201 | |||
202 | printf("[%d] unlocking dgl.\n", ctx->id); | ||
203 | fflush(stdout); | ||
204 | litmus_dgl_unlock(dgl, dgl_size); | ||
205 | |||
206 | |||
207 | printf("[%d]: freeing ikglp replica %d.\n", ctx->id, replica); | ||
208 | fflush(stdout); | ||
209 | litmus_unlock(ctx->ikglp); | ||
210 | |||
211 | if(SLEEP_BETWEEN_JOBS && !do_exit) { | ||
212 | sleep_next_period(); | ||
213 | } | ||
214 | } while(!do_exit); | ||
215 | |||
216 | /***** | ||
217 | * 4) Transition to background mode. | ||
218 | */ | ||
219 | TH_CALL( task_mode(BACKGROUND_TASK) ); | ||
220 | |||
221 | |||
222 | return NULL; | ||
223 | } | ||
224 | |||
225 | void dirty_kb(int kb) | ||
226 | { | ||
227 | int32_t one_kb[256]; | ||
228 | int32_t sum = 0; | ||
229 | int32_t i; | ||
230 | |||
231 | for (i = 0; i < 256; i++) | ||
232 | sum += one_kb[i]; | ||
233 | kb--; | ||
234 | /* prevent tail recursion */ | ||
235 | if (kb) | ||
236 | dirty_kb(kb); | ||
237 | for (i = 0; i < 256; i++) | ||
238 | sum += one_kb[i]; | ||
239 | } | ||
240 | |||
241 | int job(struct thread_context* ctx) | ||
242 | { | ||
243 | /* Do real-time calculation. */ | ||
244 | dirty_kb(8); | ||
245 | |||
246 | /* Don't exit. */ | ||
247 | //return ctx->count++ > 100; | ||
248 | //return ctx->count++ > 12000; | ||
249 | //return ctx->count++ > 120000; | ||
250 | return ctx->count++ > 50000; // controls number of jobs per task | ||
251 | } | ||
diff --git a/gpu/ikglptest.c b/gpu/ikglptest.c new file mode 100644 index 0000000..5f566d5 --- /dev/null +++ b/gpu/ikglptest.c | |||
@@ -0,0 +1,633 @@ | |||
1 | #include <stdio.h> | ||
2 | #include <stdlib.h> | ||
3 | #include <string.h> | ||
4 | #include <stdint.h> | ||
5 | #include <unistd.h> | ||
6 | #include <assert.h> | ||
7 | #include <errno.h> | ||
8 | #include <sys/types.h> | ||
9 | #include <sys/stat.h> | ||
10 | #include <fcntl.h> | ||
11 | #include <time.h> | ||
12 | #include <math.h> | ||
13 | |||
14 | /* Include gettid() */ | ||
15 | #include <sys/types.h> | ||
16 | |||
17 | /* Include threading support. */ | ||
18 | #include <pthread.h> | ||
19 | |||
20 | /* Include the LITMUS^RT API.*/ | ||
21 | #include "litmus.h" | ||
22 | |||
23 | /* Catch errors. | ||
24 | */ | ||
25 | #if 1 | ||
26 | #define CALL( exp ) do { \ | ||
27 | int ret; \ | ||
28 | ret = exp; \ | ||
29 | if (ret != 0) \ | ||
30 | fprintf(stderr, "%s failed: %m\n", #exp);\ | ||
31 | else \ | ||
32 | fprintf(stderr, "%s ok.\n", #exp); \ | ||
33 | } while (0) | ||
34 | |||
35 | #define TH_CALL( exp ) do { \ | ||
36 | int ret; \ | ||
37 | ret = exp; \ | ||
38 | if (ret != 0) \ | ||
39 | fprintf(stderr, "[%d] %s failed: %m\n", ctx->id, #exp); \ | ||
40 | else \ | ||
41 | fprintf(stderr, "[%d] %s ok.\n", ctx->id, #exp); \ | ||
42 | } while (0) | ||
43 | |||
44 | #define TH_SAFE_CALL( exp ) do { \ | ||
45 | int ret; \ | ||
46 | fprintf(stderr, "[%d] calling %s...\n", ctx->id, #exp); \ | ||
47 | ret = exp; \ | ||
48 | if (ret != 0) \ | ||
49 | fprintf(stderr, "\t...[%d] %s failed: %m\n", ctx->id, #exp); \ | ||
50 | else \ | ||
51 | fprintf(stderr, "\t...[%d] %s ok.\n", ctx->id, #exp); \ | ||
52 | } while (0) | ||
53 | #else | ||
54 | #define CALL( exp ) | ||
55 | #define TH_CALL( exp ) | ||
56 | #define TH_SAFE_CALL( exp ) | ||
57 | #endif | ||
58 | |||
59 | /* these are only default values */ | ||
60 | int NUM_THREADS=3; | ||
61 | int NUM_AUX_THREADS=0; | ||
62 | int NUM_SEMS=1; | ||
63 | int NUM_GPUS=1; | ||
64 | int GPU_OFFSET=0; | ||
65 | int NUM_SIMULT_USERS = 1; | ||
66 | int ENABLE_AFFINITY = 0; | ||
67 | int NEST_DEPTH=1; | ||
68 | int USE_KFMLP = 0; | ||
69 | int RELAX_FIFO_MAX_LEN = 0; | ||
70 | int USE_DYNAMIC_GROUP_LOCKS = 0; | ||
71 | |||
72 | int SLEEP_BETWEEN_JOBS = 1; | ||
73 | |||
74 | int gAuxRun = 1; | ||
75 | pthread_mutex_t gMutex = PTHREAD_MUTEX_INITIALIZER; | ||
76 | |||
77 | #define MAX_SEMS 1000 | ||
78 | |||
79 | // 1000 = 1us | ||
80 | #define EXEC_COST 1000*1 | ||
81 | #define PERIOD 2*1000*100 | ||
82 | |||
83 | /* The information passed to each thread. Could be anything. */ | ||
84 | struct thread_context { | ||
85 | int id; | ||
86 | int fd; | ||
87 | int kexclu; | ||
88 | int od[MAX_SEMS]; | ||
89 | int count; | ||
90 | unsigned int rand; | ||
91 | int mig_count[5]; | ||
92 | }; | ||
93 | |||
94 | void* rt_thread(void* _ctx); | ||
95 | void* aux_thread(void* _ctx); | ||
96 | int nested_job(struct thread_context* ctx, int *count, int *next, int runfactor); | ||
97 | int job(struct thread_context* ctx, int runfactor); | ||
98 | |||
99 | |||
100 | struct avg_info | ||
101 | { | ||
102 | float avg; | ||
103 | float stdev; | ||
104 | }; | ||
105 | |||
106 | struct avg_info feedback(int _a, int _b) | ||
107 | { | ||
108 | fp_t a = _frac(_a, 10000); | ||
109 | fp_t b = _frac(_b, 10000); | ||
110 | int i; | ||
111 | |||
112 | fp_t actual_fp; | ||
113 | |||
114 | fp_t _est, _err; | ||
115 | |||
116 | int base = 1000000; | ||
117 | //int range = 40; | ||
118 | |||
119 | fp_t est = _integer_to_fp(base); | ||
120 | fp_t err = _fp(base/2); | ||
121 | |||
122 | #define NUM_SAMPLES 10000 | ||
123 | |||
124 | float samples[NUM_SAMPLES] = {0.0}; | ||
125 | float accu_abs, accu; | ||
126 | float avg; | ||
127 | float devsum; | ||
128 | float stdev; | ||
129 | struct avg_info ret; | ||
130 | |||
131 | for(i = 0; i < NUM_SAMPLES; ++i) { | ||
132 | int num = ((rand()%40)*(rand()%2 ? -1 : 1)/100.0)*base + base; | ||
133 | float rel_err; | ||
134 | |||
135 | actual_fp = _integer_to_fp(num); | ||
136 | |||
137 | // printf("Before: est = %d\terr = %d\n", (int)_fp_to_integer(est), (int)_fp_to_integer(err)); | ||
138 | |||
139 | _err = _sub(actual_fp, est); | ||
140 | _est = _add(_mul(a, _err), _mul(b, err)); | ||
141 | |||
142 | rel_err = _fp_to_integer(_mul(_div(_err, est), _integer_to_fp(10000)))/10000.0; | ||
143 | rel_err *= 100.0; | ||
144 | //printf("%6.2f\n", rel_err); | ||
145 | samples[i] = rel_err; | ||
146 | |||
147 | est = _est; | ||
148 | err = _add(err, _err); | ||
149 | |||
150 | if((int)_fp_to_integer(est) <= 0) { | ||
151 | est = actual_fp; | ||
152 | err = _div(actual_fp, _integer_to_fp(2)); | ||
153 | } | ||
154 | |||
155 | //printf("After: est = %d\terr = %d\n", (int)_fp_to_integer(est), (int)_fp_to_integer(err)); | ||
156 | } | ||
157 | |||
158 | accu_abs = 0.0; | ||
159 | accu = 0.0; | ||
160 | for(i = 0; i < NUM_SAMPLES; ++i) { | ||
161 | accu += samples[i]; | ||
162 | accu_abs += abs(samples[i]); | ||
163 | } | ||
164 | |||
165 | avg = accu_abs/NUM_SAMPLES; | ||
166 | devsum = 0; | ||
167 | for(i = 0; i < NUM_SAMPLES; ++i) { | ||
168 | float dev = samples[i] - avg; | ||
169 | dev *= dev; | ||
170 | devsum += dev; | ||
171 | } | ||
172 | |||
173 | stdev = sqrtf(devsum/(NUM_SAMPLES-1)); | ||
174 | |||
175 | ret.avg = avg; | ||
176 | ret.stdev = stdev; | ||
177 | |||
178 | //printf("AVG: %6.2f\tw/ neg: %6.2f\n", accu_abs/NUM_SAMPLES, accu/NUM_SAMPLES); | ||
179 | |||
180 | //return (accu_abs/NUM_SAMPLES); | ||
181 | return(ret); | ||
182 | } | ||
183 | |||
184 | |||
185 | |||
186 | #define OPTSTR "t:k:o:z:s:d:lfaryA:" | ||
187 | |||
188 | int main(int argc, char** argv) | ||
189 | { | ||
190 | int i; | ||
191 | struct thread_context* ctx; | ||
192 | struct thread_context* aux_ctx; | ||
193 | pthread_t* task; | ||
194 | pthread_t* aux_task; | ||
195 | int fd; | ||
196 | |||
197 | int opt; | ||
198 | while((opt = getopt(argc, argv, OPTSTR)) != -1) { | ||
199 | switch(opt) { | ||
200 | case 't': | ||
201 | NUM_THREADS = atoi(optarg); | ||
202 | break; | ||
203 | case 'A': | ||
204 | NUM_AUX_THREADS = atoi(optarg); | ||
205 | break; | ||
206 | case 'k': | ||
207 | NUM_GPUS = atoi(optarg); | ||
208 | assert(NUM_GPUS > 0); | ||
209 | break; | ||
210 | case 'z': | ||
211 | NUM_SIMULT_USERS = atoi(optarg); | ||
212 | assert(NUM_SIMULT_USERS > 0); | ||
213 | break; | ||
214 | case 'o': | ||
215 | GPU_OFFSET = atoi(optarg); | ||
216 | assert(GPU_OFFSET >= 0); | ||
217 | break; | ||
218 | case 's': | ||
219 | NUM_SEMS = atoi(optarg); | ||
220 | assert(NUM_SEMS >= 0 && NUM_SEMS < MAX_SEMS); | ||
221 | break; | ||
222 | case 'd': | ||
223 | NEST_DEPTH = atoi(optarg); | ||
224 | assert(NEST_DEPTH >= 0); | ||
225 | break; | ||
226 | case 'f': | ||
227 | SLEEP_BETWEEN_JOBS = 0; | ||
228 | break; | ||
229 | case 'a': | ||
230 | ENABLE_AFFINITY = 1; | ||
231 | break; | ||
232 | case 'l': | ||
233 | USE_KFMLP = 1; | ||
234 | break; | ||
235 | case 'y': | ||
236 | USE_DYNAMIC_GROUP_LOCKS = 1; | ||
237 | break; | ||
238 | case 'r': | ||
239 | RELAX_FIFO_MAX_LEN = 1; | ||
240 | break; | ||
241 | default: | ||
242 | fprintf(stderr, "Unknown option: %c\n", opt); | ||
243 | exit(-1); | ||
244 | break; | ||
245 | } | ||
246 | } | ||
247 | |||
248 | #if 0 | ||
249 | int best_a = 0, best_b = 0; | ||
250 | int first = 1; | ||
251 | int TRIALS = 15; | ||
252 | |||
253 | int a, b, t; | ||
254 | |||
255 | struct avg_info best = {0.0,0.0}, second_best; | ||
256 | |||
257 | int second_best_a, second_best_b; | ||
258 | |||
259 | srand(time(0)); | ||
260 | |||
261 | int step = 50; | ||
262 | |||
263 | for(b = 2000; b < 5000; b += step) { | ||
264 | for(a = 1500; a < b; a += (step/4)) { | ||
265 | float std_accum = 0; | ||
266 | float avg_accum = 0; | ||
267 | for(t = 0; t < TRIALS; ++t) { | ||
268 | struct avg_info temp; | ||
269 | temp = feedback(a, b); | ||
270 | std_accum += temp.stdev; | ||
271 | avg_accum += temp.avg; | ||
272 | } | ||
273 | |||
274 | float avg_std = std_accum / TRIALS; | ||
275 | |||
276 | if(first || avg_std < best.stdev) { | ||
277 | second_best_a = best_a; | ||
278 | second_best_b = best_b; | ||
279 | second_best = best; | ||
280 | |||
281 | best.stdev = avg_std; | ||
282 | best.avg = avg_accum / TRIALS; | ||
283 | best_a = a; | ||
284 | best_b = b; | ||
285 | |||
286 | first = 0; | ||
287 | } | ||
288 | } | ||
289 | } | ||
290 | |||
291 | printf("Best:\ta = %d\tb = %d\t(b-a) = %d\tavg = %6.2f\tstdev = %6.2f\n", best_a, best_b, best_b - best_a, best.avg, best.stdev); | ||
292 | printf("2nd:\ta = %d\tb = %d\t(b-a) = %d\tavg = %6.2f\tstdev = %6.2f\n", second_best_a, second_best_b, second_best_b - second_best_a, second_best.avg, second_best.stdev); | ||
293 | |||
294 | |||
295 | a = 14008; | ||
296 | b = 16024; | ||
297 | float std_accum = 0; | ||
298 | float avg_accum = 0; | ||
299 | for(t = 0; t < TRIALS; ++t) { | ||
300 | struct avg_info temp; | ||
301 | temp = feedback(a, b); | ||
302 | std_accum += temp.stdev; | ||
303 | avg_accum += temp.avg; | ||
304 | } | ||
305 | |||
306 | printf("Aaron:\tavg = %6.2f\tstd = %6.2f\n", avg_accum/TRIALS, std_accum/TRIALS); | ||
307 | |||
308 | |||
309 | |||
310 | |||
311 | return 0; | ||
312 | #endif | ||
313 | |||
314 | |||
315 | |||
316 | |||
317 | ctx = (struct thread_context*) calloc(NUM_THREADS, sizeof(struct thread_context)); | ||
318 | task = (pthread_t*) calloc(NUM_THREADS, sizeof(pthread_t)); | ||
319 | |||
320 | if (NUM_AUX_THREADS) { | ||
321 | aux_ctx = (struct thread_context*) calloc(NUM_AUX_THREADS, sizeof(struct thread_context)); | ||
322 | aux_task = (pthread_t*) calloc(NUM_AUX_THREADS, sizeof(pthread_t)); | ||
323 | } | ||
324 | |||
325 | srand(0); /* something repeatable for now */ | ||
326 | |||
327 | fd = open("semaphores", O_RDONLY | O_CREAT, S_IRUSR | S_IWUSR); | ||
328 | |||
329 | CALL( init_litmus() ); | ||
330 | |||
331 | for (i = 0; i < NUM_AUX_THREADS; i++) { | ||
332 | aux_ctx[i].id = i; | ||
333 | CALL( pthread_create(aux_task + i, NULL, aux_thread, ctx + i) ); | ||
334 | } | ||
335 | |||
336 | for (i = 0; i < NUM_THREADS; i++) { | ||
337 | ctx[i].id = i; | ||
338 | ctx[i].fd = fd; | ||
339 | ctx[i].rand = rand(); | ||
340 | memset(&ctx[i].mig_count, 0, sizeof(ctx[i].mig_count)); | ||
341 | CALL( pthread_create(task + i, NULL, rt_thread, ctx + i) ); | ||
342 | } | ||
343 | |||
344 | if (NUM_AUX_THREADS) { | ||
345 | TH_CALL( init_rt_thread() ); | ||
346 | TH_CALL( sporadic_task_ns(EXEC_COST, PERIOD + 10*NUM_THREADS+1, 0, 0, | ||
347 | LITMUS_LOWEST_PRIORITY, RT_CLASS_SOFT, NO_ENFORCEMENT, NO_SIGNALS, 1) ); | ||
348 | TH_CALL( task_mode(LITMUS_RT_TASK) ); | ||
349 | |||
350 | printf("[MASTER] Waiting for TS release.\n "); | ||
351 | wait_for_ts_release(); | ||
352 | |||
353 | CALL( enable_aux_rt_tasks(AUX_CURRENT) ); | ||
354 | |||
355 | for(i = 0; i < 25000; ++i) { | ||
356 | sleep_next_period(); | ||
357 | pthread_mutex_lock(&gMutex); | ||
358 | pthread_mutex_unlock(&gMutex); | ||
359 | } | ||
360 | |||
361 | CALL( disable_aux_rt_tasks(AUX_CURRENT) ); | ||
362 | __sync_synchronize(); | ||
363 | gAuxRun = 0; | ||
364 | __sync_synchronize(); | ||
365 | |||
366 | for (i = 0; i < NUM_AUX_THREADS; i++) | ||
367 | pthread_join(aux_task[i], NULL); | ||
368 | |||
369 | TH_CALL( task_mode(BACKGROUND_TASK) ); | ||
370 | } | ||
371 | |||
372 | for (i = 0; i < NUM_THREADS; i++) | ||
373 | pthread_join(task[i], NULL); | ||
374 | |||
375 | return 0; | ||
376 | } | ||
377 | |||
378 | int affinity_cost[] = {1, 4, 8, 16}; | ||
379 | |||
380 | int affinity_distance(struct thread_context* ctx, int a, int b) | ||
381 | { | ||
382 | int i; | ||
383 | int dist; | ||
384 | |||
385 | if(a >= 0 && b >= 0) { | ||
386 | for(i = 0; i <= 3; ++i) { | ||
387 | if(a>>i == b>>i) { | ||
388 | dist = i; | ||
389 | goto out; | ||
390 | } | ||
391 | } | ||
392 | dist = 0; // hopefully never reached. | ||
393 | } | ||
394 | else { | ||
395 | dist = 0; | ||
396 | } | ||
397 | |||
398 | out: | ||
399 | //printf("[%d]: distance: %d -> %d = %d\n", ctx->id, a, b, dist); | ||
400 | |||
401 | ++(ctx->mig_count[dist]); | ||
402 | |||
403 | return dist; | ||
404 | |||
405 | // int groups[] = {2, 4, 8}; | ||
406 | // int i; | ||
407 | // | ||
408 | // if(a < 0 || b < 0) | ||
409 | // return (sizeof(groups)/sizeof(groups[0])); // worst affinity | ||
410 | // | ||
411 | // // no migration | ||
412 | // if(a == b) | ||
413 | // return 0; | ||
414 | // | ||
415 | // for(i = 0; i < sizeof(groups)/sizeof(groups[0]); ++i) { | ||
416 | // if(a/groups[i] == b/groups[i]) | ||
417 | // return (i+1); | ||
418 | // } | ||
419 | // assert(0); | ||
420 | // return -1; | ||
421 | } | ||
422 | |||
423 | |||
424 | void* aux_thread(void* _ctx) | ||
425 | { | ||
426 | struct thread_context *ctx = (struct thread_context*)_ctx; | ||
427 | |||
428 | while (gAuxRun) { | ||
429 | pthread_mutex_lock(&gMutex); | ||
430 | pthread_mutex_unlock(&gMutex); | ||
431 | } | ||
432 | |||
433 | return ctx; | ||
434 | } | ||
435 | |||
436 | void* rt_thread(void* _ctx) | ||
437 | { | ||
438 | int i; | ||
439 | int do_exit = 0; | ||
440 | int last_replica = -1; | ||
441 | |||
442 | struct thread_context *ctx = (struct thread_context*)_ctx; | ||
443 | |||
444 | TH_CALL( init_rt_thread() ); | ||
445 | |||
446 | /* Vary period a little bit. */ | ||
447 | TH_CALL( sporadic_task_ns(EXEC_COST, PERIOD + 10*ctx->id, 0, 0, | ||
448 | LITMUS_LOWEST_PRIORITY, RT_CLASS_SOFT, NO_ENFORCEMENT, NO_SIGNALS, 1) ); | ||
449 | |||
450 | if(USE_KFMLP) { | ||
451 | ctx->kexclu = open_kfmlp_gpu_sem(ctx->fd, | ||
452 | 0, /* name */ | ||
453 | NUM_GPUS, | ||
454 | GPU_OFFSET, | ||
455 | NUM_SIMULT_USERS, | ||
456 | ENABLE_AFFINITY | ||
457 | ); | ||
458 | } | ||
459 | else { | ||
460 | // ctx->kexclu = open_ikglp_sem(ctx->fd, 0, &NUM_GPUS); | ||
461 | ctx->kexclu = open_ikglp_gpu_sem(ctx->fd, | ||
462 | 0, /* name */ | ||
463 | NUM_GPUS, | ||
464 | GPU_OFFSET, | ||
465 | NUM_SIMULT_USERS, | ||
466 | ENABLE_AFFINITY, | ||
467 | RELAX_FIFO_MAX_LEN | ||
468 | ); | ||
469 | } | ||
470 | if(ctx->kexclu < 0) | ||
471 | perror("open_kexclu_sem"); | ||
472 | else | ||
473 | printf("kexclu od = %d\n", ctx->kexclu); | ||
474 | |||
475 | for (i = 0; i < NUM_SEMS; ++i) { | ||
476 | ctx->od[i] = open_rsm_sem(ctx->fd, i + ctx->kexclu + 2); | ||
477 | if(ctx->od[i] < 0) | ||
478 | perror("open_rsm_sem"); | ||
479 | else | ||
480 | printf("rsm[%d] od = %d\n", i, ctx->od[i]); | ||
481 | } | ||
482 | |||
483 | TH_CALL( task_mode(LITMUS_RT_TASK) ); | ||
484 | |||
485 | printf("[%d] Waiting for TS release.\n ", ctx->id); | ||
486 | wait_for_ts_release(); | ||
487 | ctx->count = 0; | ||
488 | |||
489 | // if (ctx->id == 0 && NUM_AUX_THREADS) { | ||
490 | // CALL( enable_aux_rt_tasks() ); | ||
491 | // } | ||
492 | |||
493 | do { | ||
494 | int first = (int)(NUM_SEMS * (rand_r(&(ctx->rand)) / (RAND_MAX + 1.0))); | ||
495 | int last = (first + NEST_DEPTH - 1 >= NUM_SEMS) ? NUM_SEMS - 1 : first + NEST_DEPTH - 1; | ||
496 | int dgl_size = last - first + 1; | ||
497 | int replica = -1; | ||
498 | int distance; | ||
499 | |||
500 | int dgl[dgl_size]; | ||
501 | |||
502 | // construct the DGL | ||
503 | for(i = first; i <= last; ++i) { | ||
504 | dgl[i-first] = ctx->od[i]; | ||
505 | } | ||
506 | |||
507 | replica = litmus_lock(ctx->kexclu); | ||
508 | |||
509 | //printf("[%d] got kexclu replica %d.\n", ctx->id, replica); | ||
510 | //fflush(stdout); | ||
511 | |||
512 | distance = affinity_distance(ctx, replica, last_replica); | ||
513 | |||
514 | if(USE_DYNAMIC_GROUP_LOCKS) { | ||
515 | litmus_dgl_lock(dgl, dgl_size); | ||
516 | } | ||
517 | else { | ||
518 | for(i = 0; i < dgl_size; ++i) { | ||
519 | litmus_lock(dgl[i]); | ||
520 | } | ||
521 | } | ||
522 | |||
523 | //do_exit = nested_job(ctx, &count, &first, affinity_cost[distance]); | ||
524 | do_exit = job(ctx, affinity_cost[distance]); | ||
525 | |||
526 | if(USE_DYNAMIC_GROUP_LOCKS) { | ||
527 | litmus_dgl_unlock(dgl, dgl_size); | ||
528 | } | ||
529 | else { | ||
530 | for(i = dgl_size - 1; i >= 0; --i) { | ||
531 | litmus_unlock(dgl[i]); | ||
532 | } | ||
533 | } | ||
534 | |||
535 | //printf("[%d]: freeing kexclu replica %d.\n", ctx->id, replica); | ||
536 | //fflush(stdout); | ||
537 | |||
538 | litmus_unlock(ctx->kexclu); | ||
539 | |||
540 | last_replica = replica; | ||
541 | |||
542 | if(SLEEP_BETWEEN_JOBS && !do_exit) { | ||
543 | sleep_next_period(); | ||
544 | } | ||
545 | } while(!do_exit); | ||
546 | |||
547 | // if (ctx->id == 0 && NUM_AUX_THREADS) { | ||
548 | // gAuxRun = 0; | ||
549 | // __sync_synchronize(); | ||
550 | // CALL( disable_aux_rt_tasks() ); | ||
551 | // } | ||
552 | |||
553 | /***** | ||
554 | * 4) Transition to background mode. | ||
555 | */ | ||
556 | TH_CALL( task_mode(BACKGROUND_TASK) ); | ||
557 | |||
558 | for(i = 0; i < sizeof(ctx->mig_count)/sizeof(ctx->mig_count[0]); ++i) | ||
559 | { | ||
560 | printf("[%d]: mig_count[%d] = %d\n", ctx->id, i, ctx->mig_count[i]); | ||
561 | } | ||
562 | |||
563 | return NULL; | ||
564 | } | ||
565 | |||
566 | //int nested_job(struct thread_context* ctx, int *count, int *next, int runfactor) | ||
567 | //{ | ||
568 | // int ret; | ||
569 | // | ||
570 | // if(*count == 0 || *next == NUM_SEMS) | ||
571 | // { | ||
572 | // ret = job(ctx, runfactor); | ||
573 | // } | ||
574 | // else | ||
575 | // { | ||
576 | // int which_sem = *next; | ||
577 | // int rsm_od = ctx->od[which_sem]; | ||
578 | // | ||
579 | // ++(*next); | ||
580 | // --(*count); | ||
581 | // | ||
582 | // //printf("[%d]: trying to get semaphore %d.\n", ctx->id, which_sem); | ||
583 | // //fflush(stdout); | ||
584 | // litmus_lock(rsm_od); | ||
585 | // | ||
586 | // //printf("[%d] got semaphore %d.\n", ctx->id, which_sem); | ||
587 | // //fflush(stdout); | ||
588 | // ret = nested_job(ctx, count, next, runfactor); | ||
589 | // | ||
590 | // //printf("[%d]: freeing semaphore %d.\n", ctx->id, which_sem); | ||
591 | // //fflush(stdout); | ||
592 | // litmus_unlock(rsm_od); | ||
593 | // } | ||
594 | // | ||
595 | //return(ret); | ||
596 | //} | ||
597 | |||
598 | |||
599 | void dirty_kb(int kb) | ||
600 | { | ||
601 | int32_t one_kb[256]; | ||
602 | int32_t sum = 0; | ||
603 | int32_t i; | ||
604 | |||
605 | if(!kb) | ||
606 | return; | ||
607 | |||
608 | for (i = 0; i < 256; i++) | ||
609 | sum += one_kb[i]; | ||
610 | kb--; | ||
611 | /* prevent tail recursion */ | ||
612 | if (kb) | ||
613 | dirty_kb(kb); | ||
614 | for (i = 0; i < 256; i++) | ||
615 | sum += one_kb[i]; | ||
616 | } | ||
617 | |||
618 | int job(struct thread_context* ctx, int runfactor) | ||
619 | { | ||
620 | //struct timespec tosleep = {0, 100000}; // 0.1 ms | ||
621 | |||
622 | //printf("[%d]: runfactor = %d\n", ctx->id, runfactor); | ||
623 | |||
624 | //dirty_kb(8 * runfactor); | ||
625 | dirty_kb(1 * runfactor); | ||
626 | //nanosleep(&tosleep, NULL); | ||
627 | |||
628 | /* Don't exit. */ | ||
629 | //return ctx->count++ > 100; | ||
630 | //return ctx->count++ > 12000; | ||
631 | //return ctx->count++ > 120000; | ||
632 | return ctx->count++ > 25000; // controls number of jobs per task | ||
633 | } | ||
diff --git a/gpu/locktest.c b/gpu/locktest.c new file mode 100644 index 0000000..bc4fc54 --- /dev/null +++ b/gpu/locktest.c | |||
@@ -0,0 +1,206 @@ | |||
1 | #include <stdio.h> | ||
2 | #include <stdlib.h> | ||
3 | #include <stdint.h> | ||
4 | #include <unistd.h> | ||
5 | #include <assert.h> | ||
6 | #include <errno.h> | ||
7 | #include <sys/types.h> | ||
8 | #include <sys/stat.h> | ||
9 | #include <fcntl.h> | ||
10 | |||
11 | /* Include gettid() */ | ||
12 | #include <sys/types.h> | ||
13 | |||
14 | /* Include threading support. */ | ||
15 | #include <pthread.h> | ||
16 | |||
17 | /* Include the LITMUS^RT API.*/ | ||
18 | #include "litmus.h" | ||
19 | |||
20 | /* Catch errors. | ||
21 | */ | ||
22 | #define CALL( exp ) do { \ | ||
23 | int ret; \ | ||
24 | ret = exp; \ | ||
25 | if (ret != 0) \ | ||
26 | fprintf(stderr, "%s failed: %m\n", #exp);\ | ||
27 | else \ | ||
28 | fprintf(stderr, "%s ok.\n", #exp); \ | ||
29 | } while (0) | ||
30 | |||
31 | #define TH_CALL( exp ) do { \ | ||
32 | int ret; \ | ||
33 | ret = exp; \ | ||
34 | if (ret != 0) \ | ||
35 | fprintf(stderr, "[%d] %s failed: %m\n", ctx->id, #exp); \ | ||
36 | else \ | ||
37 | fprintf(stderr, "[%d] %s ok.\n", ctx->id, #exp); \ | ||
38 | } while (0) | ||
39 | |||
40 | #define TH_SAFE_CALL( exp ) do { \ | ||
41 | int ret; \ | ||
42 | fprintf(stderr, "[%d] calling %s...\n", ctx->id, #exp); \ | ||
43 | ret = exp; \ | ||
44 | if (ret != 0) \ | ||
45 | fprintf(stderr, "\t...[%d] %s failed: %m\n", ctx->id, #exp); \ | ||
46 | else \ | ||
47 | fprintf(stderr, "\t...[%d] %s ok.\n", ctx->id, #exp); \ | ||
48 | } while (0) | ||
49 | |||
50 | |||
51 | /* these are only default values */ | ||
52 | int NUM_THREADS=3; | ||
53 | int NUM_SEMS=10; | ||
54 | |||
55 | #define MAX_SEMS 1000 | ||
56 | |||
57 | #define EXEC_COST 10 | ||
58 | #define PERIOD 100 | ||
59 | |||
60 | /* The information passed to each thread. Could be anything. */ | ||
61 | struct thread_context { | ||
62 | int id; | ||
63 | int fd; | ||
64 | int od[MAX_SEMS]; | ||
65 | int count; | ||
66 | unsigned int rand; | ||
67 | }; | ||
68 | |||
69 | void* rt_thread(void* _ctx); | ||
70 | int nested_job(struct thread_context* ctx, int *count, int *next); | ||
71 | int job(struct thread_context*); | ||
72 | |||
73 | #define OPTSTR "t:s:" | ||
74 | |||
75 | int main(int argc, char** argv) | ||
76 | { | ||
77 | int i; | ||
78 | struct thread_context* ctx; | ||
79 | pthread_t* task; | ||
80 | int fd; | ||
81 | |||
82 | int opt; | ||
83 | while((opt = getopt(argc, argv, OPTSTR)) != -1) { | ||
84 | switch(opt) { | ||
85 | case 't': | ||
86 | NUM_THREADS = atoi(optarg); | ||
87 | break; | ||
88 | case 's': | ||
89 | NUM_SEMS = atoi(optarg); | ||
90 | assert(NUM_SEMS <= MAX_SEMS); | ||
91 | break; | ||
92 | default: | ||
93 | fprintf(stderr, "Unknown option: %c\n", opt); | ||
94 | exit(-1); | ||
95 | break; | ||
96 | } | ||
97 | } | ||
98 | |||
99 | ctx = (struct thread_context*) calloc(NUM_THREADS, sizeof(struct thread_context)); | ||
100 | task = (pthread_t*) calloc(NUM_THREADS, sizeof(pthread_t)); | ||
101 | |||
102 | srand(0); /* something repeatable for now */ | ||
103 | |||
104 | fd = open("semaphores", O_RDONLY | O_CREAT, S_IRUSR | S_IWUSR); | ||
105 | |||
106 | CALL( init_litmus() ); | ||
107 | |||
108 | for (i = 0; i < NUM_THREADS; i++) { | ||
109 | ctx[i].id = i; | ||
110 | ctx[i].fd = fd; | ||
111 | ctx[i].rand = rand(); | ||
112 | CALL( pthread_create(task + i, NULL, rt_thread, ctx + i) ); | ||
113 | } | ||
114 | |||
115 | |||
116 | for (i = 0; i < NUM_THREADS; i++) | ||
117 | pthread_join(task[i], NULL); | ||
118 | |||
119 | |||
120 | return 0; | ||
121 | } | ||
122 | |||
123 | void* rt_thread(void* _ctx) | ||
124 | { | ||
125 | int i; | ||
126 | int do_exit = 0; | ||
127 | |||
128 | struct thread_context *ctx = (struct thread_context*)_ctx; | ||
129 | |||
130 | TH_CALL( init_rt_thread() ); | ||
131 | |||
132 | /* Vary period a little bit. */ | ||
133 | TH_CALL( sporadic_global(EXEC_COST, PERIOD + 10*ctx->id) ); | ||
134 | |||
135 | for (i = 0; i < NUM_SEMS; i++) { | ||
136 | ctx->od[i] = open_fmlp_sem(ctx->fd, i); | ||
137 | if(ctx->od[i] < 0) | ||
138 | perror("open_fmlp_sem"); | ||
139 | } | ||
140 | |||
141 | TH_CALL( task_mode(LITMUS_RT_TASK) ); | ||
142 | |||
143 | |||
144 | printf("[%d] Waiting for TS release.\n ", ctx->id); | ||
145 | wait_for_ts_release(); | ||
146 | ctx->count = 0; | ||
147 | |||
148 | do { | ||
149 | int which_sem = (int)(NUM_SEMS * (rand_r(&(ctx->rand)) / (RAND_MAX + 1.0))); | ||
150 | |||
151 | printf("[%d]: trying to get semaphore %d.\n", ctx->id, which_sem); | ||
152 | fflush(stdout); | ||
153 | |||
154 | TH_SAFE_CALL ( litmus_lock(which_sem) ); | ||
155 | |||
156 | printf("[%d] got semaphore %d.\n", ctx->id, which_sem); | ||
157 | fflush(stdout); | ||
158 | |||
159 | do_exit = job(ctx); | ||
160 | |||
161 | printf("[%d]: freeing semaphore %d.\n", ctx->id, which_sem); | ||
162 | fflush(stdout); | ||
163 | |||
164 | TH_SAFE_CALL ( litmus_unlock(which_sem) ); | ||
165 | |||
166 | if(!do_exit) { | ||
167 | sleep_next_period(); | ||
168 | } | ||
169 | } while(!do_exit); | ||
170 | |||
171 | /***** | ||
172 | * 4) Transition to background mode. | ||
173 | */ | ||
174 | TH_CALL( task_mode(BACKGROUND_TASK) ); | ||
175 | |||
176 | |||
177 | return NULL; | ||
178 | } | ||
179 | |||
180 | void dirty_kb(int kb) | ||
181 | { | ||
182 | int32_t one_kb[256]; | ||
183 | int32_t sum = 0; | ||
184 | int32_t i; | ||
185 | |||
186 | for (i = 0; i < 256; i++) | ||
187 | sum += one_kb[i]; | ||
188 | kb--; | ||
189 | /* prevent tail recursion */ | ||
190 | if (kb) | ||
191 | dirty_kb(kb); | ||
192 | for (i = 0; i < 256; i++) | ||
193 | sum += one_kb[i]; | ||
194 | } | ||
195 | |||
196 | int job(struct thread_context* ctx) | ||
197 | { | ||
198 | /* Do real-time calculation. */ | ||
199 | dirty_kb(8); | ||
200 | |||
201 | /* Don't exit. */ | ||
202 | //return ctx->count++ > 100; | ||
203 | //return ctx->count++ > 12000; | ||
204 | //return ctx->count++ > 120000; | ||
205 | return ctx->count++ > 30000; // controls number of jobs per task | ||
206 | } | ||
diff --git a/gpu/nested.c b/gpu/nested.c new file mode 100644 index 0000000..07e237b --- /dev/null +++ b/gpu/nested.c | |||
@@ -0,0 +1,245 @@ | |||
1 | #include <stdio.h> | ||
2 | #include <stdlib.h> | ||
3 | #include <stdint.h> | ||
4 | #include <unistd.h> | ||
5 | #include <assert.h> | ||
6 | #include <errno.h> | ||
7 | #include <sys/types.h> | ||
8 | #include <sys/stat.h> | ||
9 | #include <fcntl.h> | ||
10 | |||
11 | /* Include gettid() */ | ||
12 | #include <sys/types.h> | ||
13 | |||
14 | /* Include threading support. */ | ||
15 | #include <pthread.h> | ||
16 | |||
17 | /* Include the LITMUS^RT API.*/ | ||
18 | #include "litmus.h" | ||
19 | |||
20 | /* Catch errors. | ||
21 | */ | ||
22 | #define CALL( exp ) do { \ | ||
23 | int ret; \ | ||
24 | ret = exp; \ | ||
25 | if (ret != 0) \ | ||
26 | fprintf(stderr, "%s failed: %m\n", #exp);\ | ||
27 | else \ | ||
28 | fprintf(stderr, "%s ok.\n", #exp); \ | ||
29 | } while (0) | ||
30 | |||
31 | #define TH_CALL( exp ) do { \ | ||
32 | int ret; \ | ||
33 | ret = exp; \ | ||
34 | if (ret != 0) \ | ||
35 | fprintf(stderr, "[%d] %s failed: %m\n", ctx->id, #exp); \ | ||
36 | else \ | ||
37 | fprintf(stderr, "[%d] %s ok.\n", ctx->id, #exp); \ | ||
38 | } while (0) | ||
39 | |||
40 | #define TH_SAFE_CALL( exp ) do { \ | ||
41 | int ret; \ | ||
42 | fprintf(stderr, "[%d] calling %s...\n", ctx->id, #exp); \ | ||
43 | ret = exp; \ | ||
44 | if (ret != 0) \ | ||
45 | fprintf(stderr, "\t...[%d] %s failed: %m\n", ctx->id, #exp); \ | ||
46 | else \ | ||
47 | fprintf(stderr, "\t...[%d] %s ok.\n", ctx->id, #exp); \ | ||
48 | } while (0) | ||
49 | |||
50 | |||
51 | #define NUM_CPUS 4 | ||
52 | |||
53 | //#define NUM_THREADS 3 | ||
54 | int NUM_THREADS=3; | ||
55 | |||
56 | /* NEST_DEPTH may not be greater than NUM_SEMS. */ | ||
57 | //#define NUM_SEMS 10 | ||
58 | int NUM_SEMS=10; | ||
59 | |||
60 | int SLEEP_BETWEEN_JOBS = 1; | ||
61 | |||
62 | #define MAX_SEMS 1000 | ||
63 | |||
64 | //#define NEST_DEPTH 5 | ||
65 | int NEST_DEPTH=5; | ||
66 | |||
67 | #define EXEC_COST 1000*1 | ||
68 | #define PERIOD 1000*10 | ||
69 | |||
70 | /* The information passed to each thread. Could be anything. */ | ||
71 | struct thread_context { | ||
72 | int id; | ||
73 | int fd; | ||
74 | int od[MAX_SEMS]; | ||
75 | int count; | ||
76 | unsigned int rand; | ||
77 | }; | ||
78 | |||
79 | void* rt_thread(void* _ctx); | ||
80 | int nested_job(struct thread_context* ctx, int *count, int *next); | ||
81 | int job(struct thread_context*); | ||
82 | |||
83 | #define OPTSTR "t:s:d:f" | ||
84 | |||
85 | int main(int argc, char** argv) | ||
86 | { | ||
87 | int i; | ||
88 | struct thread_context* ctx; //[NUM_THREADS]; | ||
89 | pthread_t* task; //[NUM_THREADS]; | ||
90 | int fd; | ||
91 | |||
92 | int opt; | ||
93 | while((opt = getopt(argc, argv, OPTSTR)) != -1) { | ||
94 | switch(opt) { | ||
95 | case 't': | ||
96 | NUM_THREADS = atoi(optarg); | ||
97 | break; | ||
98 | case 's': | ||
99 | NUM_SEMS = atoi(optarg); | ||
100 | assert(NUM_SEMS <= MAX_SEMS); | ||
101 | break; | ||
102 | case 'd': | ||
103 | NEST_DEPTH = atoi(optarg); | ||
104 | break; | ||
105 | case 'f': | ||
106 | SLEEP_BETWEEN_JOBS = 0; | ||
107 | break; | ||
108 | default: | ||
109 | fprintf(stderr, "Unknown option: %c\n", opt); | ||
110 | exit(-1); | ||
111 | break; | ||
112 | } | ||
113 | } | ||
114 | |||
115 | ctx = (struct thread_context*) calloc(NUM_THREADS, sizeof(struct thread_context)); | ||
116 | task = (pthread_t*) calloc(NUM_THREADS, sizeof(pthread_t)); | ||
117 | |||
118 | srand(0); /* something repeatable for now */ | ||
119 | |||
120 | fd = open("semaphores", O_RDONLY | O_CREAT, S_IRUSR | S_IWUSR); | ||
121 | |||
122 | CALL( init_litmus() ); | ||
123 | |||
124 | for (i = 0; i < NUM_THREADS; i++) { | ||
125 | ctx[i].id = i; | ||
126 | ctx[i].fd = fd; | ||
127 | ctx[i].rand = rand(); | ||
128 | CALL( pthread_create(task + i, NULL, rt_thread, ctx + i) ); | ||
129 | } | ||
130 | |||
131 | |||
132 | for (i = 0; i < NUM_THREADS; i++) | ||
133 | pthread_join(task[i], NULL); | ||
134 | |||
135 | |||
136 | return 0; | ||
137 | } | ||
138 | |||
139 | void* rt_thread(void* _ctx) | ||
140 | { | ||
141 | int i; | ||
142 | int do_exit = 0; | ||
143 | |||
144 | struct thread_context *ctx = (struct thread_context*)_ctx; | ||
145 | |||
146 | /* Make presence visible. */ | ||
147 | //printf("RT Thread %d active.\n", ctx->id); | ||
148 | |||
149 | TH_CALL( init_rt_thread() ); | ||
150 | TH_CALL( sporadic_task_ns(EXEC_COST, PERIOD + 10*ctx->id, 0, 0, | ||
151 | LITMUS_LOWEST_PRIORITY, RT_CLASS_SOFT, NO_ENFORCEMENT, NO_SIGNALS, 0) ); | ||
152 | |||
153 | for (i = 0; i < NUM_SEMS; i++) { | ||
154 | ctx->od[i] = open_rsm_sem(ctx->fd, i); | ||
155 | if(ctx->od[i] < 0) | ||
156 | perror("open_rsm_sem"); | ||
157 | //printf("[%d] ctx->od[%d]: %d\n", ctx->id, i, ctx->od[i]); | ||
158 | } | ||
159 | |||
160 | TH_CALL( task_mode(LITMUS_RT_TASK) ); | ||
161 | |||
162 | |||
163 | printf("[%d] Waiting for TS release.\n ", ctx->id); | ||
164 | wait_for_ts_release(); | ||
165 | ctx->count = 0; | ||
166 | |||
167 | do { | ||
168 | int first = (int)(NUM_SEMS * (rand_r(&(ctx->rand)) / (RAND_MAX + 1.0))); | ||
169 | int count = NEST_DEPTH; | ||
170 | do_exit = nested_job(ctx, &count, &first); | ||
171 | |||
172 | if(SLEEP_BETWEEN_JOBS && !do_exit) { | ||
173 | sleep_next_period(); | ||
174 | } | ||
175 | } while(!do_exit); | ||
176 | |||
177 | /***** | ||
178 | * 4) Transition to background mode. | ||
179 | */ | ||
180 | TH_CALL( task_mode(BACKGROUND_TASK) ); | ||
181 | |||
182 | |||
183 | return NULL; | ||
184 | } | ||
185 | |||
186 | |||
187 | int nested_job(struct thread_context* ctx, int *count, int *next) | ||
188 | { | ||
189 | int ret; | ||
190 | |||
191 | if(*count == 0 || *next == NUM_SEMS) /* base case */ | ||
192 | { | ||
193 | ret = job(ctx); | ||
194 | } | ||
195 | else | ||
196 | { | ||
197 | int which_sem = ctx->od[*next]; | ||
198 | |||
199 | ++(*next); | ||
200 | --(*count); | ||
201 | |||
202 | printf("[%d]: trying to get semaphore %d.\n", ctx->id, which_sem); | ||
203 | fflush(stdout); | ||
204 | TH_SAFE_CALL ( litmus_lock(which_sem) ); | ||
205 | printf("[%d] got semaphore %d.\n", ctx->id, which_sem); | ||
206 | fflush(stdout); | ||
207 | ret = nested_job(ctx, count, next); | ||
208 | TH_SAFE_CALL ( litmus_unlock(which_sem) ); | ||
209 | fflush(stdout); | ||
210 | } | ||
211 | |||
212 | return(ret); | ||
213 | } | ||
214 | |||
215 | |||
216 | |||
217 | void dirty_kb(int kb) | ||
218 | { | ||
219 | int32_t one_kb[256]; | ||
220 | int32_t sum = 0; | ||
221 | int32_t i; | ||
222 | |||
223 | for (i = 0; i < 256; i++) | ||
224 | sum += one_kb[i]; | ||
225 | kb--; | ||
226 | /* prevent tail recursion */ | ||
227 | if (kb) | ||
228 | dirty_kb(kb); | ||
229 | for (i = 0; i < 256; i++) | ||
230 | sum += one_kb[i]; | ||
231 | } | ||
232 | |||
233 | |||
234 | |||
235 | int job(struct thread_context* ctx) | ||
236 | { | ||
237 | /* Do real-time calculation. */ | ||
238 | dirty_kb(8); | ||
239 | |||
240 | /* Don't exit. */ | ||
241 | //return ctx->count++ > 100; | ||
242 | //return ctx->count++ > 12000; | ||
243 | //return ctx->count++ > 120000; | ||
244 | return ctx->count++ > 30000; | ||
245 | } | ||
diff --git a/gpu/normal_task.c b/gpu/normal_task.c new file mode 100644 index 0000000..ffc95b1 --- /dev/null +++ b/gpu/normal_task.c | |||
@@ -0,0 +1,84 @@ | |||
1 | #include <stdio.h> | ||
2 | #include <stdlib.h> | ||
3 | #include <string.h> | ||
4 | #include <stdint.h> | ||
5 | #include <unistd.h> | ||
6 | #include <assert.h> | ||
7 | #include <errno.h> | ||
8 | #include <sys/types.h> | ||
9 | #include <sys/stat.h> | ||
10 | #include <fcntl.h> | ||
11 | #include <time.h> | ||
12 | #include <math.h> | ||
13 | |||
14 | /* Include gettid() */ | ||
15 | #include <sys/types.h> | ||
16 | |||
17 | /* Include threading support. */ | ||
18 | #include <pthread.h> | ||
19 | |||
20 | /* Include the LITMUS^RT API.*/ | ||
21 | #include "litmus.h" | ||
22 | |||
23 | /* Catch errors. | ||
24 | */ | ||
25 | #if 1 | ||
26 | #define CALL( exp ) do { \ | ||
27 | int ret; \ | ||
28 | ret = exp; \ | ||
29 | if (ret != 0) \ | ||
30 | fprintf(stderr, "%s failed: %m\n", #exp);\ | ||
31 | else \ | ||
32 | fprintf(stderr, "%s ok.\n", #exp); \ | ||
33 | } while (0) | ||
34 | |||
35 | #define TH_CALL( exp ) do { \ | ||
36 | int ret; \ | ||
37 | ret = exp; \ | ||
38 | if (ret != 0) \ | ||
39 | fprintf(stderr, "[%d] %s failed: %m\n", ctx->id, #exp); \ | ||
40 | else \ | ||
41 | fprintf(stderr, "[%d] %s ok.\n", ctx->id, #exp); \ | ||
42 | } while (0) | ||
43 | |||
44 | #define TH_SAFE_CALL( exp ) do { \ | ||
45 | int ret; \ | ||
46 | fprintf(stderr, "[%d] calling %s...\n", ctx->id, #exp); \ | ||
47 | ret = exp; \ | ||
48 | if (ret != 0) \ | ||
49 | fprintf(stderr, "\t...[%d] %s failed: %m\n", ctx->id, #exp); \ | ||
50 | else \ | ||
51 | fprintf(stderr, "\t...[%d] %s ok.\n", ctx->id, #exp); \ | ||
52 | } while (0) | ||
53 | #else | ||
54 | #define CALL( exp ) | ||
55 | #define TH_CALL( exp ) | ||
56 | #define TH_SAFE_CALL( exp ) | ||
57 | #endif | ||
58 | |||
59 | /* these are only default values */ | ||
60 | // 1000 = 1us | ||
61 | #define EXEC_COST 1000*1 | ||
62 | #define PERIOD 2*1000*100 | ||
63 | |||
64 | |||
65 | int main(int argc, char** argv) | ||
66 | { | ||
67 | CALL( init_litmus() ); | ||
68 | |||
69 | CALL( init_rt_thread() ); | ||
70 | CALL( sporadic_task_ns(EXEC_COST, PERIOD, 0, 0, | ||
71 | LITMUS_LOWEST_PRIORITY, RT_CLASS_SOFT, NO_ENFORCEMENT, NO_SIGNALS, 1) ); | ||
72 | //CALL( task_mode(LITMUS_RT_TASK) ); | ||
73 | |||
74 | fprintf(stdout, "Waiting for TS release.\n "); | ||
75 | wait_for_ts_release(); | ||
76 | |||
77 | fprintf(stdout, "Released!\n"); | ||
78 | |||
79 | //sleep_next_period(); | ||
80 | //CALL( task_mode(BACKGROUND_TASK) ); | ||
81 | |||
82 | return 0; | ||
83 | } | ||
84 | |||
diff --git a/gpu/rtspin_fake_cuda.cpp b/gpu/rtspin_fake_cuda.cpp new file mode 100644 index 0000000..667c675 --- /dev/null +++ b/gpu/rtspin_fake_cuda.cpp | |||
@@ -0,0 +1,1169 @@ | |||
1 | #include <sys/time.h> | ||
2 | |||
3 | #include <stdint.h> | ||
4 | #include <stdio.h> | ||
5 | #include <stdlib.h> | ||
6 | #include <unistd.h> | ||
7 | #include <time.h> | ||
8 | #include <assert.h> | ||
9 | #include <fcntl.h> | ||
10 | #include <errno.h> | ||
11 | |||
12 | #include <blitz/array.h> | ||
13 | |||
14 | #include <boost/interprocess/managed_shared_memory.hpp> | ||
15 | #include <boost/interprocess/sync/interprocess_barrier.hpp> | ||
16 | #include <boost/interprocess/sync/interprocess_mutex.hpp> | ||
17 | |||
18 | #include "litmus.h" | ||
19 | |||
20 | using namespace blitz; | ||
21 | using namespace std; | ||
22 | using namespace boost::interprocess; | ||
23 | |||
24 | #define RESET_RELEASE_ON_MISS | ||
25 | |||
26 | |||
27 | void bail_out(const char* msg) | ||
28 | { | ||
29 | perror(msg); | ||
30 | exit(-1 * errno); | ||
31 | } | ||
32 | |||
33 | |||
34 | static void usage(char *error) { | ||
35 | fprintf(stderr, "Error: %s\n", error); | ||
36 | fprintf(stderr, | ||
37 | "Usage:\n" | ||
38 | " rt_spin [COMMON-OPTS] WCET PERIOD DURATION\n" | ||
39 | " rt_spin [COMMON-OPTS] -f FILE [-o COLUMN] WCET PERIOD\n" | ||
40 | " rt_spin -l\n" | ||
41 | "\n" | ||
42 | "COMMON-OPTS = [-w] [-p PARTITION] [-c CLASS] [-s SCALE]\n" | ||
43 | "\n" | ||
44 | "WCET and PERIOD are milliseconds, DURATION is seconds.\n"); | ||
45 | exit(EXIT_FAILURE); | ||
46 | } | ||
47 | |||
48 | #define NUMS 4096 | ||
49 | static int num[NUMS]; | ||
50 | |||
51 | #define PAGE_SIZE (1024*4) | ||
52 | |||
53 | bool ENABLE_WAIT = true; | ||
54 | bool GPU_TASK = false; | ||
55 | bool ENABLE_AFFINITY = false; | ||
56 | bool USE_KFMLP = false; | ||
57 | bool RELAX_FIFO_MAX_LEN = false; | ||
58 | bool USE_DYNAMIC_GROUP_LOCKS = false; | ||
59 | bool BROADCAST_STATE = false; | ||
60 | bool ENABLE_CHUNKING = false; | ||
61 | bool MIGRATE_VIA_SYSMEM = false; | ||
62 | |||
63 | int GPU_PARTITION = 0; | ||
64 | int GPU_PARTITION_SIZE = 0; | ||
65 | int NUM_SIMULT_USERS = 1; | ||
66 | size_t SEND_SIZE = 0; | ||
67 | size_t RECV_SIZE = 0; | ||
68 | size_t STATE_SIZE = 0; | ||
69 | size_t CHUNK_SIZE = PAGE_SIZE; | ||
70 | |||
71 | |||
72 | #define MAX_GPUS 8 | ||
73 | |||
74 | int KEXCLU_LOCK; | ||
75 | int EE_LOCKS[MAX_GPUS]; | ||
76 | int CE_SEND_LOCKS[MAX_GPUS]; | ||
77 | int CE_RECV_LOCKS[MAX_GPUS]; | ||
78 | |||
79 | int CUR_DEVICE = -1; | ||
80 | int LAST_DEVICE = -1; | ||
81 | |||
82 | bool useEngineLocks() | ||
83 | { | ||
84 | return(NUM_SIMULT_USERS != 1); | ||
85 | } | ||
86 | |||
87 | int gpuCyclesPerSecond = 0; | ||
88 | |||
89 | uint64_t *init_release_time = NULL; | ||
90 | barrier *release_barrier = NULL; | ||
91 | barrier *gpu_barrier = NULL; | ||
92 | interprocess_mutex *gpu_mgmt_mutexes = NULL; | ||
93 | managed_shared_memory *segment_ptr = NULL; | ||
94 | managed_shared_memory *release_segment_ptr = NULL; | ||
95 | |||
96 | // observed average rate when four GPUs on same node in use from pagelocked memory. | ||
97 | // about 1/3 to 1/4 this when there is no bus contention. | ||
98 | //const double msPerByte = 4.22e-07; | ||
99 | //const double transOverhead = 0.01008; // also observed. | ||
100 | |||
101 | |||
102 | |||
103 | char *d_send_data[MAX_GPUS] = {0}; | ||
104 | char *d_recv_data[MAX_GPUS] = {0}; | ||
105 | char *d_state_data[MAX_GPUS] = {0}; | ||
106 | |||
107 | //cudaStream_t streams[MAX_GPUS]; | ||
108 | |||
109 | char *h_send_data = 0; | ||
110 | char *h_recv_data = 0; | ||
111 | char *h_state_data = 0; | ||
112 | |||
113 | |||
114 | #include <sys/mman.h> | ||
115 | #define USE_PAGE_LOCKED_MEMORY | ||
116 | #ifdef USE_PAGE_LOCKED_MEMORY | ||
117 | #define c_malloc(s) \ | ||
118 | mmap(NULL, s , \ | ||
119 | PROT_READ | PROT_WRITE, \ | ||
120 | MAP_PRIVATE | MAP_ANONYMOUS | MAP_LOCKED, \ | ||
121 | -1, 0) | ||
122 | #else | ||
123 | #define c_malloc(s) malloc(s) | ||
124 | #endif | ||
125 | |||
126 | typedef int cudaError_t; | ||
127 | #define cudaSuccess 0 | ||
128 | |||
129 | enum cudaMemcpyKind { | ||
130 | cudaMemcpyHostToDevice = 0, | ||
131 | cudaMemcpyDeviceToHost = 1, | ||
132 | cudaMemcpyDeviceToDevice = 2, | ||
133 | }; | ||
134 | |||
135 | cudaError_t cudaGetLastError() | ||
136 | { | ||
137 | return cudaSuccess; | ||
138 | } | ||
139 | |||
140 | //////////////////////////////////////////////////////////////////////// | ||
141 | //////////////////////////////////////////////////////////////////////// | ||
142 | //////////////////////////////////////////////////////////////////////// | ||
143 | //////////////////////////////////////////////////////////////////////// | ||
144 | |||
145 | struct ce_lock_state | ||
146 | { | ||
147 | int locks[2]; | ||
148 | size_t num_locks; | ||
149 | size_t budget_remaining; | ||
150 | bool locked; | ||
151 | |||
152 | ce_lock_state(int device_a, enum cudaMemcpyKind kind, size_t size, int device_b = -1) { | ||
153 | num_locks = (device_a != -1) + (device_b != -1); | ||
154 | |||
155 | if(device_a != -1) { | ||
156 | locks[0] = (kind == cudaMemcpyHostToDevice) ? | ||
157 | CE_SEND_LOCKS[device_a] : CE_RECV_LOCKS[device_a]; | ||
158 | } | ||
159 | |||
160 | if(device_b != -1) { | ||
161 | assert(kind == cudaMemcpyDeviceToDevice); | ||
162 | |||
163 | locks[1] = CE_RECV_LOCKS[device_b]; | ||
164 | |||
165 | if(locks[1] < locks[0]) { | ||
166 | int temp = locks[1]; | ||
167 | locks[1] = locks[0]; | ||
168 | locks[0] = temp; | ||
169 | } | ||
170 | } | ||
171 | |||
172 | if(!ENABLE_CHUNKING) | ||
173 | budget_remaining = size; | ||
174 | else | ||
175 | budget_remaining = CHUNK_SIZE; | ||
176 | } | ||
177 | |||
178 | void lock() { | ||
179 | if(USE_DYNAMIC_GROUP_LOCKS) { | ||
180 | litmus_dgl_lock(locks, num_locks); | ||
181 | } | ||
182 | else | ||
183 | { | ||
184 | for(int l = 0; l < num_locks; ++l) | ||
185 | { | ||
186 | litmus_lock(locks[l]); | ||
187 | } | ||
188 | } | ||
189 | locked = true; | ||
190 | } | ||
191 | |||
192 | void unlock() { | ||
193 | if(USE_DYNAMIC_GROUP_LOCKS) { | ||
194 | litmus_dgl_unlock(locks, num_locks); | ||
195 | } | ||
196 | else | ||
197 | { | ||
198 | // reverse order | ||
199 | for(int l = num_locks - 1; l >= 0; --l) | ||
200 | { | ||
201 | litmus_unlock(locks[l]); | ||
202 | } | ||
203 | } | ||
204 | locked = false; | ||
205 | } | ||
206 | |||
207 | void refresh() { | ||
208 | budget_remaining = CHUNK_SIZE; | ||
209 | } | ||
210 | |||
211 | bool budgetIsAvailable(size_t tosend) { | ||
212 | return(tosend >= budget_remaining); | ||
213 | } | ||
214 | |||
215 | void decreaseBudget(size_t spent) { | ||
216 | budget_remaining -= spent; | ||
217 | } | ||
218 | }; | ||
219 | |||
220 | // precondition: if do_locking == true, locks in state are held. | ||
221 | cudaError_t __chunkMemcpy(void* a_dst, const void* a_src, size_t count, | ||
222 | enum cudaMemcpyKind kind, | ||
223 | ce_lock_state* state) | ||
224 | { | ||
225 | cudaError_t ret = cudaSuccess; | ||
226 | int remaining = count; | ||
227 | |||
228 | char* dst = (char*)a_dst; | ||
229 | const char* src = (const char*)a_src; | ||
230 | |||
231 | // disable chunking, if needed, by setting chunk_size equal to the | ||
232 | // amount of data to be copied. | ||
233 | int chunk_size = (ENABLE_CHUNKING) ? CHUNK_SIZE : count; | ||
234 | int i = 0; | ||
235 | |||
236 | while(remaining != 0) | ||
237 | { | ||
238 | int bytesToCopy = std::min(remaining, chunk_size); | ||
239 | |||
240 | if(state && state->budgetIsAvailable(bytesToCopy) && state->locked) { | ||
241 | //cutilSafeCall( cudaStreamSynchronize(streams[CUR_DEVICE]) ); | ||
242 | ret = cudaGetLastError(); | ||
243 | |||
244 | if(ret != cudaSuccess) | ||
245 | { | ||
246 | break; | ||
247 | } | ||
248 | |||
249 | state->unlock(); | ||
250 | state->refresh(); // replentish. | ||
251 | // we can only run out of | ||
252 | // budget if chunking is enabled. | ||
253 | // we presume that init budget would | ||
254 | // be set to cover entire memcpy | ||
255 | // if chunking were disabled. | ||
256 | } | ||
257 | |||
258 | if(state && !state->locked) { | ||
259 | state->lock(); | ||
260 | } | ||
261 | |||
262 | //ret = cudaMemcpy(dst+i*chunk_size, src+i*chunk_size, bytesToCopy, kind); | ||
263 | //cudaMemcpyAsync(dst+i*chunk_size, src+i*chunk_size, bytesToCopy, kind, streams[CUR_DEVICE]); | ||
264 | |||
265 | if(state) { | ||
266 | state->decreaseBudget(bytesToCopy); | ||
267 | } | ||
268 | |||
269 | // if(ret != cudaSuccess) | ||
270 | // { | ||
271 | // break; | ||
272 | // } | ||
273 | |||
274 | ++i; | ||
275 | remaining -= bytesToCopy; | ||
276 | } | ||
277 | return ret; | ||
278 | } | ||
279 | |||
280 | cudaError_t chunkMemcpy(void* a_dst, const void* a_src, size_t count, | ||
281 | enum cudaMemcpyKind kind, | ||
282 | int device_a = -1, // device_a == -1 disables locking | ||
283 | bool do_locking = true, | ||
284 | int device_b = -1) | ||
285 | { | ||
286 | cudaError_t ret; | ||
287 | if(!do_locking || device_a == -1) { | ||
288 | ret = __chunkMemcpy(a_dst, a_src, count, kind, NULL); | ||
289 | //cutilSafeCall( cudaStreamSynchronize(streams[CUR_DEVICE]) ); | ||
290 | if(ret == cudaSuccess) | ||
291 | ret = cudaGetLastError(); | ||
292 | } | ||
293 | else { | ||
294 | ce_lock_state state(device_a, kind, count, device_b); | ||
295 | state.lock(); | ||
296 | ret = __chunkMemcpy(a_dst, a_src, count, kind, &state); | ||
297 | //cutilSafeCall( cudaStreamSynchronize(streams[CUR_DEVICE]) ); | ||
298 | if(ret == cudaSuccess) | ||
299 | ret = cudaGetLastError(); | ||
300 | state.unlock(); | ||
301 | } | ||
302 | return ret; | ||
303 | } | ||
304 | |||
305 | |||
306 | //////////////////////////////////////////////////////////////////////// | ||
307 | //////////////////////////////////////////////////////////////////////// | ||
308 | //////////////////////////////////////////////////////////////////////// | ||
309 | |||
310 | |||
311 | inline uint64_t timespec_to_ns(const struct timespec& t) | ||
312 | { | ||
313 | return(t.tv_sec*1e9 + t.tv_nsec); | ||
314 | } | ||
315 | |||
316 | inline struct timespec ns_to_timespec(const uint64_t& ns) | ||
317 | { | ||
318 | struct timespec temp = {ns/1e9, ns - ns/1e9}; | ||
319 | return(temp); | ||
320 | } | ||
321 | |||
322 | inline uint64_t clock_gettime_ns(clockid_t clk_id) | ||
323 | { | ||
324 | struct timespec temp; | ||
325 | clock_gettime(clk_id, &temp); | ||
326 | return timespec_to_ns(temp); | ||
327 | } | ||
328 | |||
329 | |||
330 | |||
331 | static int loop_once(void) | ||
332 | { | ||
333 | int i, j = 0; | ||
334 | for (i = 0; i < NUMS; i++) | ||
335 | j += num[i]++; | ||
336 | return j; | ||
337 | } | ||
338 | |||
339 | static int loop_for(double exec_time, double emergency_exit) | ||
340 | { | ||
341 | double last_loop = 0, loop_start; | ||
342 | int tmp = 0; | ||
343 | |||
344 | double start = cputime(); | ||
345 | double now = cputime(); | ||
346 | |||
347 | while (now + last_loop < start + exec_time) { | ||
348 | loop_start = now; | ||
349 | tmp += loop_once(); | ||
350 | now = cputime(); | ||
351 | last_loop = now - loop_start; | ||
352 | if (emergency_exit && wctime() > emergency_exit) { | ||
353 | /* Oops --- this should only be possible if the execution time tracking | ||
354 | * is broken in the LITMUS^RT kernel. */ | ||
355 | fprintf(stderr, "!!! rtspin/%d emergency exit!\n", getpid()); | ||
356 | fprintf(stderr, "Something is seriously wrong! Do not ignore this.\n"); | ||
357 | break; | ||
358 | } | ||
359 | } | ||
360 | |||
361 | return tmp; | ||
362 | } | ||
363 | |||
364 | static void allocate_locks() | ||
365 | { | ||
366 | // allocate k-FMLP lock | ||
367 | int fd = open("semaphores", O_RDONLY | O_CREAT, S_IRUSR | S_IWUSR); | ||
368 | |||
369 | int base_name = GPU_PARTITION * 1000; | ||
370 | |||
371 | if(USE_KFMLP) { | ||
372 | KEXCLU_LOCK = open_kfmlp_gpu_sem(fd, | ||
373 | base_name, /* name */ | ||
374 | GPU_PARTITION_SIZE, | ||
375 | GPU_PARTITION*GPU_PARTITION_SIZE, | ||
376 | NUM_SIMULT_USERS, | ||
377 | ENABLE_AFFINITY | ||
378 | ); | ||
379 | } | ||
380 | else { | ||
381 | KEXCLU_LOCK = open_ikglp_gpu_sem(fd, | ||
382 | base_name, /* name */ | ||
383 | GPU_PARTITION_SIZE, | ||
384 | GPU_PARTITION*GPU_PARTITION_SIZE, | ||
385 | NUM_SIMULT_USERS, | ||
386 | ENABLE_AFFINITY, | ||
387 | RELAX_FIFO_MAX_LEN | ||
388 | ); | ||
389 | } | ||
390 | if(KEXCLU_LOCK < 0) | ||
391 | perror("open_kexclu_sem"); | ||
392 | |||
393 | if(NUM_SIMULT_USERS > 1) | ||
394 | { | ||
395 | // allocate the engine locks. | ||
396 | for (int i = 0; i < MAX_GPUS; ++i) | ||
397 | { | ||
398 | EE_LOCKS[i] = open_rsm_sem(fd, (i+1)*10 + base_name); | ||
399 | if(EE_LOCKS[i] < 0) | ||
400 | perror("open_rsm_sem"); | ||
401 | |||
402 | CE_SEND_LOCKS[i] = open_rsm_sem(fd, (i+1)*10 + base_name + 1); | ||
403 | if(CE_SEND_LOCKS[i] < 0) | ||
404 | perror("open_rsm_sem"); | ||
405 | |||
406 | if(NUM_SIMULT_USERS == 3) | ||
407 | { | ||
408 | // allocate a separate lock for the second copy engine | ||
409 | CE_RECV_LOCKS[i] = open_rsm_sem(fd, (i+1)*10 + base_name + 2); | ||
410 | if(CE_RECV_LOCKS[i] < 0) | ||
411 | perror("open_rsm_sem"); | ||
412 | } | ||
413 | else | ||
414 | { | ||
415 | // share a single lock for the single copy engine | ||
416 | CE_RECV_LOCKS[i] = CE_SEND_LOCKS[i]; | ||
417 | } | ||
418 | } | ||
419 | } | ||
420 | } | ||
421 | |||
422 | static void allocate_host_memory() | ||
423 | { | ||
424 | // round up to page boundaries | ||
425 | size_t send_alloc_bytes = SEND_SIZE + (SEND_SIZE%PAGE_SIZE != 0)*PAGE_SIZE; | ||
426 | size_t recv_alloc_bytes = RECV_SIZE + (RECV_SIZE%PAGE_SIZE != 0)*PAGE_SIZE; | ||
427 | size_t state_alloc_bytes = STATE_SIZE + (STATE_SIZE%PAGE_SIZE != 0)*PAGE_SIZE; | ||
428 | |||
429 | printf("Allocating host memory. send = %dB, recv = %dB, state = %dB\n", | ||
430 | send_alloc_bytes, recv_alloc_bytes, state_alloc_bytes); | ||
431 | |||
432 | // if(send_alloc_bytes > 0) | ||
433 | // { | ||
434 | // h_send_data = (char *)c_malloc(send_alloc_bytes); | ||
435 | // memset(h_send_data, 0x55, send_alloc_bytes); // write some random value | ||
436 | // // this will open a connection to GPU 0 if there is no active context, so | ||
437 | // // expect long stalls. LAME. | ||
438 | // cutilSafeCall( cudaHostRegister(h_send_data, send_alloc_bytes, cudaHostRegisterPortable) ); | ||
439 | // } | ||
440 | // | ||
441 | // if(recv_alloc_bytes > 0) | ||
442 | // { | ||
443 | // h_recv_data = (char *)c_malloc(recv_alloc_bytes); | ||
444 | // memset(h_recv_data, 0xAA, recv_alloc_bytes); | ||
445 | // cutilSafeCall( cudaHostRegister(h_recv_data, recv_alloc_bytes, cudaHostRegisterPortable) ); | ||
446 | // } | ||
447 | // | ||
448 | // if(state_alloc_bytes > 0) | ||
449 | // { | ||
450 | // h_state_data = (char *)c_malloc(state_alloc_bytes); | ||
451 | // memset(h_state_data, 0xCC, state_alloc_bytes); // write some random value | ||
452 | // cutilSafeCall( cudaHostRegister(h_state_data, state_alloc_bytes, cudaHostRegisterPortable) ); | ||
453 | // } | ||
454 | |||
455 | printf("Host memory allocated.\n"); | ||
456 | } | ||
457 | |||
458 | static void allocate_device_memory() | ||
459 | { | ||
460 | printf("Allocating device memory.\n"); | ||
461 | // establish a connection to each GPU. | ||
462 | // for(int i = 0; i < GPU_PARTITION_SIZE; ++i) | ||
463 | // { | ||
464 | // int which_device = GPU_PARTITION*GPU_PARTITION_SIZE + i; | ||
465 | // | ||
466 | // if(ENABLE_WAIT) gpu_mgmt_mutexes[which_device].lock(); | ||
467 | // | ||
468 | // cutilSafeCall( cudaSetDevice(which_device) ); | ||
469 | // cutilSafeCall( cudaDeviceSetLimit(cudaLimitPrintfFifoSize, 0) ); | ||
470 | // cutilSafeCall( cudaDeviceSetLimit(cudaLimitMallocHeapSize, 0) ); | ||
471 | // | ||
472 | // cutilSafeCall( cudaStreamCreate(&streams[which_device]) ); | ||
473 | // | ||
474 | // /* pre-allocate memory, pray there's enough to go around */ | ||
475 | // if(SEND_SIZE > 0) { | ||
476 | // cutilSafeCall( cudaMalloc((void**)&d_send_data[which_device], SEND_SIZE) ); | ||
477 | // } | ||
478 | // if(RECV_SIZE > 0) { | ||
479 | // cutilSafeCall( cudaMalloc((void**)&h_recv_data[which_device], RECV_SIZE) ); | ||
480 | // } | ||
481 | // if(STATE_SIZE > 0) { | ||
482 | // cutilSafeCall( cudaMalloc((void**)&h_state_data[which_device], STATE_SIZE) ); | ||
483 | // } | ||
484 | // | ||
485 | // if(ENABLE_WAIT) gpu_mgmt_mutexes[which_device].unlock(); | ||
486 | // } | ||
487 | printf("Device memory allocated.\n"); | ||
488 | } | ||
489 | |||
490 | static void configure_gpus() | ||
491 | { | ||
492 | printf("Configuring GPU\n"); | ||
493 | |||
494 | // // SUSPEND WHEN BLOCKED!! | ||
495 | // cutilSafeCall( cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync) ); | ||
496 | // | ||
497 | // // establish a connection to each GPU. | ||
498 | // for(int i = 0; i < GPU_PARTITION_SIZE; ++i) | ||
499 | // { | ||
500 | // int which_device = GPU_PARTITION*GPU_PARTITION_SIZE + i; | ||
501 | // | ||
502 | // if(ENABLE_WAIT) gpu_mgmt_mutexes[which_device].lock(); | ||
503 | // | ||
504 | // cutilSafeCall( cudaSetDevice(which_device) ); | ||
505 | // cutilSafeCall( cudaDeviceSetLimit(cudaLimitPrintfFifoSize, 0) ); | ||
506 | // cutilSafeCall( cudaDeviceSetLimit(cudaLimitMallocHeapSize, 0) ); | ||
507 | // | ||
508 | // cutilSafeCall( cudaStreamCreate(&streams[which_device]) ); | ||
509 | // | ||
510 | // // enable P2P migrations. | ||
511 | // // we assume all GPUs are on the same I/O hub. | ||
512 | // for(int j = 0; j < GPU_PARTITION_SIZE; ++j) | ||
513 | // { | ||
514 | // int other_device = GPU_PARTITION*GPU_PARTITION_SIZE + j; | ||
515 | // | ||
516 | // if(which_device != other_device) | ||
517 | // { | ||
518 | // cutilSafeCall( cudaDeviceEnablePeerAccess(other_device, 0) ); | ||
519 | // } | ||
520 | // } | ||
521 | // | ||
522 | // if(i == 0) | ||
523 | // { | ||
524 | // struct cudaDeviceProp pi; | ||
525 | // cudaGetDeviceProperties(&pi, i); | ||
526 | // gpuCyclesPerSecond = pi.clockRate * 1000; /* khz -> hz */ | ||
527 | // } | ||
528 | // | ||
529 | // if(ENABLE_WAIT) gpu_mgmt_mutexes[which_device].unlock(); | ||
530 | // } | ||
531 | |||
532 | printf("GPUs have been configured.\n"); | ||
533 | } | ||
534 | |||
535 | static void init_cuda() | ||
536 | { | ||
537 | configure_gpus(); | ||
538 | allocate_host_memory(); | ||
539 | allocate_device_memory(); | ||
540 | allocate_locks(); | ||
541 | } | ||
542 | |||
543 | static void exit_cuda() | ||
544 | { | ||
545 | for(int i = 0; i < GPU_PARTITION_SIZE; ++i) | ||
546 | { | ||
547 | int which_device = GPU_PARTITION*GPU_PARTITION_SIZE + i; | ||
548 | |||
549 | if(ENABLE_WAIT) gpu_mgmt_mutexes[which_device].lock(); | ||
550 | |||
551 | // cutilSafeCall( cudaSetDevice(which_device) ); | ||
552 | // cutilSafeCall( cudaDeviceReset() ); | ||
553 | |||
554 | if(ENABLE_WAIT) gpu_mgmt_mutexes[which_device].unlock(); | ||
555 | } | ||
556 | } | ||
557 | |||
558 | static void catchExit(void) | ||
559 | { | ||
560 | if(GPU_TASK) | ||
561 | { | ||
562 | // try to unlock everything. litmus will prevent bogus calls. | ||
563 | if(NUM_SIMULT_USERS > 1) | ||
564 | { | ||
565 | for(int i = 0; i < GPU_PARTITION_SIZE; ++i) | ||
566 | { | ||
567 | int which_device = GPU_PARTITION*GPU_PARTITION_SIZE + i; | ||
568 | |||
569 | litmus_unlock(EE_LOCKS[which_device]); | ||
570 | litmus_unlock(CE_SEND_LOCKS[which_device]); | ||
571 | if(NUM_SIMULT_USERS == 2) { | ||
572 | litmus_unlock(CE_RECV_LOCKS[which_device]); | ||
573 | } | ||
574 | } | ||
575 | } | ||
576 | |||
577 | if(CUR_DEVICE >= 0) { | ||
578 | unregister_nv_device(CUR_DEVICE); | ||
579 | } | ||
580 | |||
581 | litmus_unlock(KEXCLU_LOCK); | ||
582 | } | ||
583 | } | ||
584 | |||
585 | static void migrateToGPU(int destination) | ||
586 | { | ||
587 | if(!BROADCAST_STATE && STATE_SIZE > 0) | ||
588 | { | ||
589 | if(MIGRATE_VIA_SYSMEM) | ||
590 | { | ||
591 | chunkMemcpy(h_state_data, d_state_data[LAST_DEVICE], STATE_SIZE, | ||
592 | cudaMemcpyDeviceToHost, LAST_DEVICE, useEngineLocks()); | ||
593 | } | ||
594 | } | ||
595 | |||
596 | // cutilSafeCall( cudaSetDevice(destination) ); | ||
597 | |||
598 | if(!BROADCAST_STATE && STATE_SIZE > 0) | ||
599 | { | ||
600 | if(MIGRATE_VIA_SYSMEM) | ||
601 | { | ||
602 | chunkMemcpy(d_state_data[CUR_DEVICE], h_state_data, STATE_SIZE, | ||
603 | cudaMemcpyHostToDevice, CUR_DEVICE, useEngineLocks()); | ||
604 | } | ||
605 | else | ||
606 | { | ||
607 | chunkMemcpy(d_state_data[destination], | ||
608 | d_state_data[LAST_DEVICE], | ||
609 | STATE_SIZE, | ||
610 | cudaMemcpyDeviceToDevice, | ||
611 | CUR_DEVICE, | ||
612 | useEngineLocks(), | ||
613 | destination); | ||
614 | } | ||
615 | } | ||
616 | } | ||
617 | |||
618 | static void broadcastState(int from) | ||
619 | { | ||
620 | if(STATE_SIZE > 0) | ||
621 | { | ||
622 | assert(CUR_DEVICE == from); | ||
623 | |||
624 | if(MIGRATE_VIA_SYSMEM) | ||
625 | { | ||
626 | chunkMemcpy(h_state_data, d_state_data[from], STATE_SIZE, | ||
627 | cudaMemcpyDeviceToHost, from, useEngineLocks()); | ||
628 | } | ||
629 | |||
630 | for(int i = 0; i < GPU_PARTITION_SIZE; ++i) | ||
631 | { | ||
632 | int which_device = GPU_PARTITION*GPU_PARTITION_SIZE + i; | ||
633 | if(which_device != from) | ||
634 | { | ||
635 | if(MIGRATE_VIA_SYSMEM) | ||
636 | { | ||
637 | // cutilSafeCall( cudaSetDevice(which_device) ); | ||
638 | CUR_DEVICE = which_device; // temporary | ||
639 | chunkMemcpy(d_state_data[which_device], h_state_data, STATE_SIZE, | ||
640 | cudaMemcpyHostToDevice, which_device, useEngineLocks()); | ||
641 | } | ||
642 | else | ||
643 | { | ||
644 | chunkMemcpy(d_state_data[which_device], | ||
645 | d_state_data[from], | ||
646 | STATE_SIZE, | ||
647 | cudaMemcpyDeviceToDevice, | ||
648 | from, | ||
649 | useEngineLocks(), | ||
650 | which_device); | ||
651 | } | ||
652 | } | ||
653 | } | ||
654 | |||
655 | if(MIGRATE_VIA_SYSMEM && CUR_DEVICE != from) | ||
656 | { | ||
657 | // cutilSafeCall( cudaSetDevice(from) ); | ||
658 | CUR_DEVICE = from; | ||
659 | } | ||
660 | } | ||
661 | } | ||
662 | |||
663 | //// Executes on graphics card. | ||
664 | //__global__ void docudaspin(unsigned int cycles) | ||
665 | //{ | ||
666 | // long long unsigned int elapsed = 0; | ||
667 | // long long int now = clock64(); | ||
668 | // long long int last; | ||
669 | // do | ||
670 | // { | ||
671 | // last = now; | ||
672 | // now = clock64(); | ||
673 | // elapsed += max(0ll, (long long int)(now - last)); // don't count iterations with clock roll-over | ||
674 | // }while(elapsed < cycles); | ||
675 | // | ||
676 | // return; | ||
677 | //} | ||
678 | |||
679 | |||
680 | |||
681 | static void gpu_loop_for(double gpu_sec_time, double emergency_exit) | ||
682 | { | ||
683 | unsigned int numcycles = (unsigned int)(gpuCyclesPerSecond * gpu_sec_time); | ||
684 | int numblocks = 1; | ||
685 | int blocksz = 1; | ||
686 | |||
687 | CUR_DEVICE = litmus_lock(KEXCLU_LOCK); | ||
688 | { | ||
689 | if(CUR_DEVICE != LAST_DEVICE && LAST_DEVICE != -1) | ||
690 | { | ||
691 | migrateToGPU(CUR_DEVICE); | ||
692 | } | ||
693 | |||
694 | if(SEND_SIZE > 0) | ||
695 | { | ||
696 | // handles chunking and locking, as appropriate. | ||
697 | chunkMemcpy(d_send_data[CUR_DEVICE], h_send_data, SEND_SIZE, | ||
698 | cudaMemcpyHostToDevice, CUR_DEVICE, useEngineLocks()); | ||
699 | } | ||
700 | |||
701 | if(useEngineLocks()) litmus_lock(EE_LOCKS[CUR_DEVICE]); | ||
702 | |||
703 | // docudaspin <<<numblocks,blocksz, 0, streams[CUR_DEVICE]>>> (numcycles); | ||
704 | // cutilSafeCall( cudaStreamSynchronize(streams[CUR_DEVICE]) ); | ||
705 | |||
706 | if(useEngineLocks()) litmus_unlock(EE_LOCKS[CUR_DEVICE]); | ||
707 | |||
708 | if(RECV_SIZE > 0) | ||
709 | { | ||
710 | chunkMemcpy(h_recv_data, d_recv_data[CUR_DEVICE], RECV_SIZE, | ||
711 | cudaMemcpyDeviceToHost, CUR_DEVICE, useEngineLocks()); | ||
712 | } | ||
713 | |||
714 | if(BROADCAST_STATE) | ||
715 | { | ||
716 | broadcastState(CUR_DEVICE); | ||
717 | } | ||
718 | } | ||
719 | litmus_unlock(KEXCLU_LOCK); | ||
720 | |||
721 | LAST_DEVICE = CUR_DEVICE; | ||
722 | CUR_DEVICE = -1; | ||
723 | } | ||
724 | |||
725 | |||
726 | static void debug_delay_loop(void) | ||
727 | { | ||
728 | double start, end, delay; | ||
729 | |||
730 | while (1) { | ||
731 | for (delay = 0.5; delay > 0.01; delay -= 0.01) { | ||
732 | start = wctime(); | ||
733 | loop_for(delay, 0); | ||
734 | end = wctime(); | ||
735 | printf("%6.4fs: looped for %10.8fs, delta=%11.8fs, error=%7.4f%%\n", | ||
736 | delay, | ||
737 | end - start, | ||
738 | end - start - delay, | ||
739 | 100 * (end - start - delay) / delay); | ||
740 | } | ||
741 | } | ||
742 | } | ||
743 | |||
744 | static int job(double exec_time, double gpu_sec_time, double program_end) | ||
745 | { | ||
746 | if (wctime() > program_end) | ||
747 | return 0; | ||
748 | else if (!GPU_TASK) | ||
749 | { | ||
750 | loop_for(exec_time, program_end + 1); | ||
751 | } | ||
752 | else | ||
753 | { | ||
754 | double cpu_bookend = (exec_time)/2.0; | ||
755 | |||
756 | loop_for(cpu_bookend, program_end + 1); | ||
757 | gpu_loop_for(gpu_sec_time, program_end + 1); | ||
758 | loop_for(cpu_bookend, program_end + 1); | ||
759 | } | ||
760 | return 1; | ||
761 | } | ||
762 | |||
763 | #define OPTSTR "p:ls:e:g:G:W:N:S:R:T:BMaLyC:rz:" | ||
764 | |||
765 | int main(int argc, char** argv) | ||
766 | { | ||
767 | atexit(catchExit); | ||
768 | |||
769 | int ret; | ||
770 | lt_t wcet; | ||
771 | lt_t period; | ||
772 | double wcet_ms, period_ms; | ||
773 | int migrate = 0; | ||
774 | int cpu = 0; | ||
775 | int opt; | ||
776 | int test_loop = 0; | ||
777 | // int column = 1; | ||
778 | const char *file = NULL; | ||
779 | int want_enforcement = 0; | ||
780 | double duration = 0, releaseTime = 0; | ||
781 | double *exec_times = NULL; | ||
782 | double scale = 1.0; | ||
783 | uint64_t cur_job; | ||
784 | uint64_t num_jobs; | ||
785 | |||
786 | int create_shm = -1; | ||
787 | int num_tasks = 0; | ||
788 | |||
789 | double gpu_sec_ms = 0; | ||
790 | |||
791 | while ((opt = getopt(argc, argv, OPTSTR)) != -1) { | ||
792 | // printf("opt = %c optarg = %s\n", opt, optarg); | ||
793 | switch (opt) { | ||
794 | // case 'w': | ||
795 | // ENABLE_WAIT = 1; | ||
796 | // break; | ||
797 | case 'p': | ||
798 | cpu = atoi(optarg); | ||
799 | migrate = 1; | ||
800 | break; | ||
801 | case 'l': | ||
802 | test_loop = 1; | ||
803 | break; | ||
804 | case 's': | ||
805 | scale = atof(optarg); | ||
806 | break; | ||
807 | case 'e': | ||
808 | gpu_sec_ms = atof(optarg); | ||
809 | break; | ||
810 | // case 'x': | ||
811 | // trans_sec_ms = atof(optarg); | ||
812 | // break; | ||
813 | case 'z': | ||
814 | NUM_SIMULT_USERS = atoi(optarg); | ||
815 | break; | ||
816 | case 'g': | ||
817 | GPU_TASK = 1; | ||
818 | GPU_PARTITION_SIZE = atoi(optarg); | ||
819 | break; | ||
820 | case 'G': | ||
821 | GPU_PARTITION = atoi(optarg); | ||
822 | break; | ||
823 | case 'S': | ||
824 | SEND_SIZE = (size_t)(atof(optarg)*1024); | ||
825 | break; | ||
826 | case 'R': | ||
827 | RECV_SIZE = (size_t)(atof(optarg)*1024); | ||
828 | break; | ||
829 | case 'T': | ||
830 | STATE_SIZE = (size_t)(atof(optarg)*1024); | ||
831 | break; | ||
832 | case 'B': | ||
833 | BROADCAST_STATE = true; | ||
834 | break; | ||
835 | case 'M': | ||
836 | MIGRATE_VIA_SYSMEM = true; | ||
837 | break; | ||
838 | case 'a': | ||
839 | ENABLE_AFFINITY = true; | ||
840 | break; | ||
841 | case 'r': | ||
842 | RELAX_FIFO_MAX_LEN = true; | ||
843 | break; | ||
844 | case 'L': | ||
845 | USE_KFMLP = true; | ||
846 | break; | ||
847 | case 'y': | ||
848 | USE_DYNAMIC_GROUP_LOCKS = true; | ||
849 | break; | ||
850 | case 'C': | ||
851 | ENABLE_CHUNKING = true; | ||
852 | CHUNK_SIZE = (size_t)(atof(optarg)*1024); | ||
853 | break; | ||
854 | case 'W': | ||
855 | create_shm = atoi(optarg); | ||
856 | break; | ||
857 | case 'N': | ||
858 | num_tasks = atoi(optarg); | ||
859 | break; | ||
860 | case ':': | ||
861 | usage("Argument missing."); | ||
862 | break; | ||
863 | case '?': | ||
864 | default: | ||
865 | usage("Bad argument."); | ||
866 | break; | ||
867 | } | ||
868 | } | ||
869 | |||
870 | if (test_loop) { | ||
871 | debug_delay_loop(); | ||
872 | return 0; | ||
873 | } | ||
874 | |||
875 | // if (file) { | ||
876 | // int num_jobs_tmp; | ||
877 | // get_exec_times(file, column, &num_jobs_tmp, &exec_times); | ||
878 | // num_jobs = num_jobs_tmp; | ||
879 | // | ||
880 | // if (argc - optind < 2) | ||
881 | // usage("Arguments missing."); | ||
882 | // | ||
883 | // for (cur_job = 0; cur_job < num_jobs; ++cur_job) { | ||
884 | // /* convert the execution time to seconds */ | ||
885 | // duration += exec_times[cur_job] * 0.001; | ||
886 | // } | ||
887 | // } else { | ||
888 | /* | ||
889 | * if we're not reading from the CSV file, then we need | ||
890 | * three parameters | ||
891 | */ | ||
892 | if (argc - optind < 3) | ||
893 | usage("Arguments missing."); | ||
894 | // } | ||
895 | |||
896 | wcet_ms = atof(argv[optind + 0]); | ||
897 | period_ms = atof(argv[optind + 1]); | ||
898 | |||
899 | wcet = wcet_ms * __NS_PER_MS; | ||
900 | period = period_ms * __NS_PER_MS; | ||
901 | if (wcet <= 0) | ||
902 | usage("The worst-case execution time must be a " | ||
903 | "positive number."); | ||
904 | if (period <= 0) | ||
905 | usage("The period must be a positive number."); | ||
906 | if (!file && wcet > period) { | ||
907 | usage("The worst-case execution time must not " | ||
908 | "exceed the period."); | ||
909 | } | ||
910 | |||
911 | if (!file) | ||
912 | { | ||
913 | duration = atof(argv[optind + 2]); | ||
914 | num_jobs = ((double)duration*1e3)/period_ms; | ||
915 | ++num_jobs; // padding | ||
916 | } | ||
917 | else if (file && num_jobs > 1) | ||
918 | { | ||
919 | duration += period_ms * 0.001 * (num_jobs - 1); | ||
920 | } | ||
921 | |||
922 | if (migrate) { | ||
923 | ret = be_migrate_to(cpu); | ||
924 | if (ret < 0) | ||
925 | bail_out("could not migrate to target partition"); | ||
926 | } | ||
927 | |||
928 | if(ENABLE_WAIT) | ||
929 | { | ||
930 | if(num_tasks > 0) | ||
931 | { | ||
932 | printf("%d creating release shared memory\n", getpid()); | ||
933 | shared_memory_object::remove("release_barrier_memory"); | ||
934 | release_segment_ptr = new managed_shared_memory(create_only, "release_barrier_memory", 4*1024); | ||
935 | |||
936 | printf("%d creating release barrier for %d users\n", getpid(), num_tasks); | ||
937 | release_barrier = release_segment_ptr->construct<barrier>("barrier release_barrier")(num_tasks); | ||
938 | |||
939 | init_release_time = release_segment_ptr->construct<uint64_t>("uint64_t instance")(); | ||
940 | *init_release_time = 0; | ||
941 | } | ||
942 | else | ||
943 | { | ||
944 | do | ||
945 | { | ||
946 | try | ||
947 | { | ||
948 | printf("%d opening release shared memory\n", getpid()); | ||
949 | segment_ptr = new managed_shared_memory(open_only, "release_barrier_memory"); | ||
950 | } | ||
951 | catch(...) | ||
952 | { | ||
953 | printf("%d shared memory not ready. sleeping\n", getpid()); | ||
954 | sleep(1); | ||
955 | } | ||
956 | }while(segment_ptr == NULL); | ||
957 | |||
958 | release_barrier = segment_ptr->find<barrier>("barrier release_barrier").first; | ||
959 | init_release_time = segment_ptr->find<uint64_t>("uint64_t instance").first; | ||
960 | } | ||
961 | } | ||
962 | |||
963 | |||
964 | if(GPU_TASK) | ||
965 | { | ||
966 | if(ENABLE_WAIT) | ||
967 | { | ||
968 | if(create_shm > -1) | ||
969 | { | ||
970 | printf("%d creating shared memory\n", getpid()); | ||
971 | shared_memory_object::remove("gpu_barrier_memory"); | ||
972 | segment_ptr = new managed_shared_memory(create_only, "gpu_barrier_memory", 4*1024); | ||
973 | |||
974 | printf("%d creating a barrier for %d users\n", getpid(), create_shm); | ||
975 | gpu_barrier = segment_ptr->construct<barrier>("barrier instance")(create_shm); | ||
976 | printf("%d creating gpu mgmt mutexes for 8 devices\n", getpid()); | ||
977 | gpu_mgmt_mutexes = segment_ptr->construct<interprocess_mutex>("interprocess_mutex m")[8](); | ||
978 | } | ||
979 | else | ||
980 | { | ||
981 | do | ||
982 | { | ||
983 | try | ||
984 | { | ||
985 | printf("%d opening shared memory\n", getpid()); | ||
986 | segment_ptr = new managed_shared_memory(open_only, "gpu_barrier_memory"); | ||
987 | } | ||
988 | catch(...) | ||
989 | { | ||
990 | printf("%d shared memory not ready. sleeping\n", getpid()); | ||
991 | sleep(1); | ||
992 | } | ||
993 | }while(segment_ptr == NULL); | ||
994 | |||
995 | gpu_barrier = segment_ptr->find<barrier>("barrier instance").first; | ||
996 | gpu_mgmt_mutexes = segment_ptr->find<interprocess_mutex>("interprocess_mutex m").first; | ||
997 | } | ||
998 | } | ||
999 | |||
1000 | // scale data transmission too?? | ||
1001 | SEND_SIZE *= scale; | ||
1002 | RECV_SIZE *= scale; | ||
1003 | STATE_SIZE *= scale; | ||
1004 | |||
1005 | init_cuda(); | ||
1006 | } | ||
1007 | |||
1008 | ret = sporadic_task_ns(wcet, period, 0, cpu, RT_CLASS_SOFT, | ||
1009 | want_enforcement ? PRECISE_ENFORCEMENT | ||
1010 | : NO_ENFORCEMENT, | ||
1011 | migrate); | ||
1012 | if (ret < 0) | ||
1013 | bail_out("could not setup rt task params"); | ||
1014 | |||
1015 | init_litmus(); | ||
1016 | |||
1017 | ret = task_mode(LITMUS_RT_TASK); | ||
1018 | if (ret != 0) | ||
1019 | bail_out("could not become RT task"); | ||
1020 | |||
1021 | |||
1022 | |||
1023 | uint64_t jobCount = 0; | ||
1024 | blitz::Array<uint64_t, 1> responseTimeLog(num_jobs+1); | ||
1025 | |||
1026 | struct timespec spec; | ||
1027 | uint64_t release; | ||
1028 | uint64_t finish; | ||
1029 | |||
1030 | |||
1031 | if (ENABLE_WAIT) { | ||
1032 | printf("Waiting for release.\n"); | ||
1033 | ret = wait_for_ts_release(); | ||
1034 | if (ret != 0) | ||
1035 | bail_out("wait_for_ts_release()"); | ||
1036 | } | ||
1037 | else | ||
1038 | { | ||
1039 | sleep_next_period(); | ||
1040 | } | ||
1041 | |||
1042 | clock_gettime(CLOCK_MONOTONIC, &spec); | ||
1043 | release = timespec_to_ns(spec); | ||
1044 | if (!__sync_bool_compare_and_swap(init_release_time, 0, release)) | ||
1045 | { | ||
1046 | release = *init_release_time; | ||
1047 | } | ||
1048 | |||
1049 | releaseTime = wctime(); | ||
1050 | double failsafeEnd = releaseTime + duration; | ||
1051 | |||
1052 | |||
1053 | if (file) { | ||
1054 | /* use times read from the CSV file */ | ||
1055 | for (cur_job = 0; cur_job < num_jobs; ++cur_job) { | ||
1056 | /* convert job's length to seconds */ | ||
1057 | job(exec_times[cur_job] * 0.001 * scale, | ||
1058 | gpu_sec_ms * 0.001 * scale, | ||
1059 | failsafeEnd); | ||
1060 | } | ||
1061 | } else { | ||
1062 | /* convert to seconds and scale */ | ||
1063 | int keepGoing; | ||
1064 | do | ||
1065 | { | ||
1066 | keepGoing = job(wcet_ms * 0.001 * scale, gpu_sec_ms * 0.001 * scale, failsafeEnd); | ||
1067 | |||
1068 | |||
1069 | clock_gettime(CLOCK_MONOTONIC, &spec); | ||
1070 | finish = timespec_to_ns(spec); | ||
1071 | |||
1072 | responseTimeLog(min(num_jobs,jobCount++)) = finish - release; | ||
1073 | |||
1074 | // this is an estimated upper-bound on release time. it may be off by several microseconds. | ||
1075 | #ifdef RESET_RELEASE_ON_MISS | ||
1076 | release = (release + period < finish) ? | ||
1077 | finish : /* missed deadline. adopt next release as current time. */ | ||
1078 | release + period; /* some time in the future. */ | ||
1079 | #else | ||
1080 | release = release + period; // allow things to get progressively later. | ||
1081 | #endif | ||
1082 | |||
1083 | sleep_next_period(); | ||
1084 | clock_gettime(CLOCK_MONOTONIC, &spec); | ||
1085 | release = min(timespec_to_ns(spec), release); | ||
1086 | |||
1087 | } while(keepGoing); | ||
1088 | } | ||
1089 | |||
1090 | if(GPU_TASK && ENABLE_WAIT) | ||
1091 | { | ||
1092 | printf("%d waiting at barrier\n", getpid()); | ||
1093 | gpu_barrier->wait(); | ||
1094 | } | ||
1095 | |||
1096 | ret = task_mode(BACKGROUND_TASK); | ||
1097 | if (ret != 0) | ||
1098 | bail_out("could not become regular task (huh?)"); | ||
1099 | |||
1100 | if (file) | ||
1101 | free(exec_times); | ||
1102 | |||
1103 | if(GPU_TASK) | ||
1104 | { | ||
1105 | /* | ||
1106 | if(ENABLE_WAIT) | ||
1107 | { | ||
1108 | // wait for all GPU using tasks ext RT mode. | ||
1109 | printf("%d waiting at barrier\n", getpid()); | ||
1110 | gpu_barrier->wait(); | ||
1111 | } | ||
1112 | */ | ||
1113 | |||
1114 | exit_cuda(); | ||
1115 | |||
1116 | if(ENABLE_WAIT) | ||
1117 | { | ||
1118 | /* wait before we clean up memory */ | ||
1119 | printf("%d waiting for all to shutdown GPUs\n", getpid()); | ||
1120 | gpu_barrier->wait(); | ||
1121 | |||
1122 | /* | ||
1123 | if(create_shm > -1) | ||
1124 | { | ||
1125 | printf("%d removing shared memory\n", getpid()); | ||
1126 | shared_memory_object::remove("gpu_barrier_memory"); | ||
1127 | } | ||
1128 | */ | ||
1129 | } | ||
1130 | } | ||
1131 | |||
1132 | |||
1133 | if (ENABLE_WAIT) | ||
1134 | { | ||
1135 | printf("%d waiting at exit barrier\n", getpid()); | ||
1136 | release_barrier->wait(); | ||
1137 | } | ||
1138 | |||
1139 | |||
1140 | char gpu_using_str[] = "GPU\n"; | ||
1141 | char cpu_only_str[] = "CPU\n"; | ||
1142 | #define USED(arr) (arr)(Range(fromStart,min(num_jobs-1,jobCount-1))) | ||
1143 | // period (ms), avg-rt, min-rt, max-rt, avg-slack, numMisses | ||
1144 | printf("DONE,%d,%d,%f,%f,%f,%lu,%lu,%f,%lu,%d,%d,%s", | ||
1145 | cpu, | ||
1146 | getpid(), | ||
1147 | period_ms, | ||
1148 | // average | ||
1149 | blitz::mean(USED(responseTimeLog)), | ||
1150 | // average pct of period | ||
1151 | 100.0*(blitz::mean(USED(responseTimeLog))/period), | ||
1152 | // min | ||
1153 | blitz::min(USED(responseTimeLog)), | ||
1154 | // max | ||
1155 | blitz::max(USED(responseTimeLog)), | ||
1156 | // average slack | ||
1157 | blitz::mean((uint64_t)period - USED(responseTimeLog)), | ||
1158 | // num jobs | ||
1159 | min(num_jobs-1,jobCount-1), | ||
1160 | // num misses | ||
1161 | blitz::count(USED(responseTimeLog) > (uint64_t)period), | ||
1162 | // num misses w/ unbounded | ||
1163 | blitz::count(USED(responseTimeLog) > (uint64_t)(2*period)), | ||
1164 | // flag gpu-using tasks | ||
1165 | ((GPU_TASK) ? gpu_using_str : cpu_only_str) | ||
1166 | ); | ||
1167 | |||
1168 | return 0; | ||
1169 | } | ||