aboutsummaryrefslogtreecommitdiffstats
path: root/gpu
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2013-02-28 10:27:11 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2013-02-28 10:27:11 -0500
commitf338b34ea0fb6136ea3895a07161ece030c4b998 (patch)
treea874ad94b1af1dc31254583932c777b8cd1df32c /gpu
parentaf6f6aa0477fc3887792a3646bfb98d469a695b7 (diff)
Work with wip-2012.3-gpu
Diffstat (limited to 'gpu')
-rw-r--r--gpu/dgl.c93
-rw-r--r--gpu/ikglptest.c48
-rw-r--r--gpu/nested.c22
-rw-r--r--gpu/rtspin_fake_cuda.cpp40
4 files changed, 136 insertions, 67 deletions
diff --git a/gpu/dgl.c b/gpu/dgl.c
index a045879..dc68ead 100644
--- a/gpu/dgl.c
+++ b/gpu/dgl.c
@@ -17,44 +17,55 @@
17/* Include the LITMUS^RT API.*/ 17/* Include the LITMUS^RT API.*/
18#include "litmus.h" 18#include "litmus.h"
19 19
20#define xfprintf( ... ) do { \
21if(!SILENT) { fprintf( __VA_ARGS__ ) ; } \
22} while (0)
23
24
20/* Catch errors. 25/* Catch errors.
21 */ 26 */
22#define CALL( exp ) do { \ 27#define CALL( exp ) do { \
23 int ret; \ 28 int ret; \
24 ret = exp; \ 29 ret = exp; \
25 if (ret != 0) \ 30 if (ret != 0) \
26 fprintf(stderr, "%s failed: %m\n", #exp);\ 31 xfprintf(stderr, "%s failed: %m\n", #exp);\
27 else \ 32 else \
28 fprintf(stderr, "%s ok.\n", #exp); \ 33 xfprintf(stderr, "%s ok.\n", #exp); \
29 } while (0) 34 } while (0)
30 35
31#define TH_CALL( exp ) do { \ 36#define TH_CALL( exp ) do { \
32 int ret; \ 37 int ret; \
33 ret = exp; \ 38 ret = exp; \
34 if (ret != 0) \ 39 if (ret != 0) \
35 fprintf(stderr, "[%d] %s failed: %m\n", ctx->id, #exp); \ 40 xfprintf(stderr, "[%d] %s failed: %m\n", ctx->id, #exp); \
36 else \ 41 else \
37 fprintf(stderr, "[%d] %s ok.\n", ctx->id, #exp); \ 42 xfprintf(stderr, "[%d] %s ok.\n", ctx->id, #exp); \
38 } while (0) 43 } while (0)
39 44
40#define TH_SAFE_CALL( exp ) do { \ 45#define TH_SAFE_CALL( exp ) do { \
41 int ret; \ 46 int ret; \
42 fprintf(stderr, "[%d] calling %s...\n", ctx->id, #exp); \ 47 xfprintf(stderr, "[%d] calling %s...\n", ctx->id, #exp); \
43 ret = exp; \ 48 ret = exp; \
44 if (ret != 0) \ 49 if (ret != 0) \
45 fprintf(stderr, "\t...[%d] %s failed: %m\n", ctx->id, #exp); \ 50 xfprintf(stderr, "\t...[%d] %s failed: %m\n", ctx->id, #exp); \
46 else \ 51 else \
47 fprintf(stderr, "\t...[%d] %s ok.\n", ctx->id, #exp); \ 52 xfprintf(stderr, "\t...[%d] %s ok.\n", ctx->id, #exp); \
48 } while (0) 53 } while (0)
49 54
50 55
56
57
58
51/* these are only default values */ 59/* these are only default values */
52int NUM_THREADS=3; 60int NUM_THREADS=3;
53int NUM_SEMS=1; 61int NUM_SEMS=1;
54int NUM_REPLICAS=1; 62unsigned int NUM_REPLICAS=0;
55int NEST_DEPTH=1; 63int NEST_DEPTH=1;
56 64
65int SILENT = 0;
66
57int SLEEP_BETWEEN_JOBS = 1; 67int SLEEP_BETWEEN_JOBS = 1;
68int USE_PRIOQ = 0;
58 69
59#define MAX_SEMS 1000 70#define MAX_SEMS 1000
60#define MAX_NEST_DEPTH 10 71#define MAX_NEST_DEPTH 10
@@ -78,7 +89,7 @@ void* rt_thread(void* _ctx);
78int nested_job(struct thread_context* ctx, int *count, int *next); 89int nested_job(struct thread_context* ctx, int *count, int *next);
79int job(struct thread_context*); 90int job(struct thread_context*);
80 91
81#define OPTSTR "t:k:s:d:f" 92#define OPTSTR "t:k:s:d:fqX"
82 93
83int main(int argc, char** argv) 94int main(int argc, char** argv)
84{ 95{
@@ -108,6 +119,12 @@ int main(int argc, char** argv)
108 case 'f': 119 case 'f':
109 SLEEP_BETWEEN_JOBS = 0; 120 SLEEP_BETWEEN_JOBS = 0;
110 break; 121 break;
122 case 'q':
123 USE_PRIOQ = 1;
124 break;
125 case 'X':
126 SILENT = 1;
127 break;
111 default: 128 default:
112 fprintf(stderr, "Unknown option: %c\n", opt); 129 fprintf(stderr, "Unknown option: %c\n", opt);
113 exit(-1); 130 exit(-1);
@@ -150,26 +167,38 @@ void* rt_thread(void* _ctx)
150 167
151 /* Vary period a little bit. */ 168 /* Vary period a little bit. */
152 TH_CALL( sporadic_task_ns(EXEC_COST, PERIOD + 10*ctx->id, 0, 0, LITMUS_LOWEST_PRIORITY, 169 TH_CALL( sporadic_task_ns(EXEC_COST, PERIOD + 10*ctx->id, 0, 0, LITMUS_LOWEST_PRIORITY,
153 RT_CLASS_SOFT, NO_ENFORCEMENT, NO_SIGNALS, 0) ); 170 RT_CLASS_SOFT, NO_ENFORCEMENT, NO_SIGNALS, 1) );
154 171
155 ctx->ikglp = open_ikglp_sem(ctx->fd, 0, (void*)&NUM_REPLICAS); 172 if (NUM_REPLICAS) {
156 if(ctx->ikglp < 0) 173 ctx->ikglp = open_ikglp_sem(ctx->fd, 0, NUM_REPLICAS);
157 perror("open_ikglp_sem"); 174 if(ctx->ikglp < 0)
158 else 175 perror("open_ikglp_sem");
159 printf("ikglp od = %d\n", ctx->ikglp); 176 else
177 xfprintf(stdout, "ikglp od = %d\n", ctx->ikglp);
178 }
160 179
180
161 for (i = 0; i < NUM_SEMS; i++) { 181 for (i = 0; i < NUM_SEMS; i++) {
162 ctx->od[i] = open_rsm_sem(ctx->fd, i+1); 182 if(!USE_PRIOQ) {
163 if(ctx->od[i] < 0) 183 ctx->od[i] = open_fifo_sem(ctx->fd, i+1);
164 perror("open_rsm_sem"); 184 if(ctx->od[i] < 0)
165 else 185 perror("open_fifo_sem");
166 printf("rsm[%d] od = %d\n", i, ctx->od[i]); 186 else
187 xfprintf(stdout, "fifo[%d] od = %d\n", i, ctx->od[i]);
188 }
189 else {
190 ctx->od[i] = open_prioq_sem(ctx->fd, i+1);
191 if(ctx->od[i] < 0)
192 perror("open_prioq_sem");
193 else
194 xfprintf(stdout, "prioq[%d] od = %d\n", i, ctx->od[i]);
195 }
167 } 196 }
168 197
169 TH_CALL( task_mode(LITMUS_RT_TASK) ); 198 TH_CALL( task_mode(LITMUS_RT_TASK) );
170 199
171 200
172 printf("[%d] Waiting for TS release.\n ", ctx->id); 201 xfprintf(stdout, "[%d] Waiting for TS release.\n ", ctx->id);
173 wait_for_ts_release(); 202 wait_for_ts_release();
174 ctx->count = 0; 203 ctx->count = 0;
175 204
@@ -186,27 +215,25 @@ void* rt_thread(void* _ctx)
186 } 215 }
187 216
188 217
189 replica = litmus_lock(ctx->ikglp); 218 if(NUM_REPLICAS) {
190 printf("[%d] got ikglp replica %d.\n", ctx->id, replica); 219 replica = litmus_lock(ctx->ikglp);
191 fflush(stdout); 220 xfprintf(stdout, "[%d] got ikglp replica %d.\n", ctx->id, replica);
221 }
192 222
193 223
194 litmus_dgl_lock(dgl, dgl_size); 224 litmus_dgl_lock(dgl, dgl_size);
195 printf("[%d] acquired dgl.\n", ctx->id); 225 xfprintf(stdout, "[%d] acquired dgl.\n", ctx->id);
196 fflush(stdout);
197
198 226
199 do_exit = job(ctx); 227 do_exit = job(ctx);
200 228
201 229
202 printf("[%d] unlocking dgl.\n", ctx->id); 230 xfprintf(stdout, "[%d] unlocking dgl.\n", ctx->id);
203 fflush(stdout);
204 litmus_dgl_unlock(dgl, dgl_size); 231 litmus_dgl_unlock(dgl, dgl_size);
205 232
206 233 if(NUM_REPLICAS) {
207 printf("[%d]: freeing ikglp replica %d.\n", ctx->id, replica); 234 xfprintf(stdout, "[%d]: freeing ikglp replica %d.\n", ctx->id, replica);
208 fflush(stdout); 235 litmus_unlock(ctx->ikglp);
209 litmus_unlock(ctx->ikglp); 236 }
210 237
211 if(SLEEP_BETWEEN_JOBS && !do_exit) { 238 if(SLEEP_BETWEEN_JOBS && !do_exit) {
212 sleep_next_period(); 239 sleep_next_period();
diff --git a/gpu/ikglptest.c b/gpu/ikglptest.c
index 5f566d5..f802801 100644
--- a/gpu/ikglptest.c
+++ b/gpu/ikglptest.c
@@ -70,6 +70,7 @@ int RELAX_FIFO_MAX_LEN = 0;
70int USE_DYNAMIC_GROUP_LOCKS = 0; 70int USE_DYNAMIC_GROUP_LOCKS = 0;
71 71
72int SLEEP_BETWEEN_JOBS = 1; 72int SLEEP_BETWEEN_JOBS = 1;
73int USE_PRIOQ = 0;
73 74
74int gAuxRun = 1; 75int gAuxRun = 1;
75pthread_mutex_t gMutex = PTHREAD_MUTEX_INITIALIZER; 76pthread_mutex_t gMutex = PTHREAD_MUTEX_INITIALIZER;
@@ -183,7 +184,7 @@ struct avg_info feedback(int _a, int _b)
183 184
184 185
185 186
186#define OPTSTR "t:k:o:z:s:d:lfaryA:" 187#define OPTSTR "t:k:o:z:s:d:lfaryA:q"
187 188
188int main(int argc, char** argv) 189int main(int argc, char** argv)
189{ 190{
@@ -238,6 +239,9 @@ int main(int argc, char** argv)
238 case 'r': 239 case 'r':
239 RELAX_FIFO_MAX_LEN = 1; 240 RELAX_FIFO_MAX_LEN = 1;
240 break; 241 break;
242 case 'q':
243 USE_PRIOQ = 1;
244 break;
241 default: 245 default:
242 fprintf(stderr, "Unknown option: %c\n", opt); 246 fprintf(stderr, "Unknown option: %c\n", opt);
243 exit(-1); 247 exit(-1);
@@ -458,14 +462,17 @@ void* rt_thread(void* _ctx)
458 } 462 }
459 else { 463 else {
460// ctx->kexclu = open_ikglp_sem(ctx->fd, 0, &NUM_GPUS); 464// ctx->kexclu = open_ikglp_sem(ctx->fd, 0, &NUM_GPUS);
461 ctx->kexclu = open_ikglp_gpu_sem(ctx->fd, 465 ctx->kexclu = open_gpusync_token_lock(ctx->fd,
462 0, /* name */ 466 0, /* name */
463 NUM_GPUS, 467 NUM_GPUS,
464 GPU_OFFSET, 468 GPU_OFFSET,
465 NUM_SIMULT_USERS, 469 NUM_SIMULT_USERS,
466 ENABLE_AFFINITY, 470 IKGLP_M_IN_FIFOS,
467 RELAX_FIFO_MAX_LEN 471 (!RELAX_FIFO_MAX_LEN) ?
468 ); 472 IKGLP_OPTIMAL_FIFO_LEN :
473 IKGLP_UNLIMITED_FIFO_LEN,
474 ENABLE_AFFINITY
475 );
469 } 476 }
470 if(ctx->kexclu < 0) 477 if(ctx->kexclu < 0)
471 perror("open_kexclu_sem"); 478 perror("open_kexclu_sem");
@@ -473,11 +480,20 @@ void* rt_thread(void* _ctx)
473 printf("kexclu od = %d\n", ctx->kexclu); 480 printf("kexclu od = %d\n", ctx->kexclu);
474 481
475 for (i = 0; i < NUM_SEMS; ++i) { 482 for (i = 0; i < NUM_SEMS; ++i) {
476 ctx->od[i] = open_rsm_sem(ctx->fd, i + ctx->kexclu + 2); 483 if(!USE_PRIOQ) {
477 if(ctx->od[i] < 0) 484 ctx->od[i] = open_fifo_sem(ctx->fd, i + ctx->kexclu + 2);
478 perror("open_rsm_sem"); 485 if(ctx->od[i] < 0)
479 else 486 perror("open_fifo_sem");
480 printf("rsm[%d] od = %d\n", i, ctx->od[i]); 487 else
488 printf("fifo[%d] od = %d\n", i, ctx->od[i]);
489 }
490 else {
491 ctx->od[i] = open_prioq_sem(ctx->fd, i + ctx->kexclu + 2);
492 if(ctx->od[i] < 0)
493 perror("open_prioq_sem");
494 else
495 printf("prioq[%d] od = %d\n", i, ctx->od[i]);
496 }
481 } 497 }
482 498
483 TH_CALL( task_mode(LITMUS_RT_TASK) ); 499 TH_CALL( task_mode(LITMUS_RT_TASK) );
@@ -486,10 +502,6 @@ void* rt_thread(void* _ctx)
486 wait_for_ts_release(); 502 wait_for_ts_release();
487 ctx->count = 0; 503 ctx->count = 0;
488 504
489// if (ctx->id == 0 && NUM_AUX_THREADS) {
490// CALL( enable_aux_rt_tasks() );
491// }
492
493 do { 505 do {
494 int first = (int)(NUM_SEMS * (rand_r(&(ctx->rand)) / (RAND_MAX + 1.0))); 506 int first = (int)(NUM_SEMS * (rand_r(&(ctx->rand)) / (RAND_MAX + 1.0)));
495 int last = (first + NEST_DEPTH - 1 >= NUM_SEMS) ? NUM_SEMS - 1 : first + NEST_DEPTH - 1; 507 int last = (first + NEST_DEPTH - 1 >= NUM_SEMS) ? NUM_SEMS - 1 : first + NEST_DEPTH - 1;
diff --git a/gpu/nested.c b/gpu/nested.c
index 07e237b..8c39152 100644
--- a/gpu/nested.c
+++ b/gpu/nested.c
@@ -59,6 +59,8 @@ int NUM_SEMS=10;
59 59
60int SLEEP_BETWEEN_JOBS = 1; 60int SLEEP_BETWEEN_JOBS = 1;
61 61
62int USE_PRIOQ = 0;
63
62#define MAX_SEMS 1000 64#define MAX_SEMS 1000
63 65
64//#define NEST_DEPTH 5 66//#define NEST_DEPTH 5
@@ -80,7 +82,7 @@ void* rt_thread(void* _ctx);
80int nested_job(struct thread_context* ctx, int *count, int *next); 82int nested_job(struct thread_context* ctx, int *count, int *next);
81int job(struct thread_context*); 83int job(struct thread_context*);
82 84
83#define OPTSTR "t:s:d:f" 85#define OPTSTR "t:s:d:fq"
84 86
85int main(int argc, char** argv) 87int main(int argc, char** argv)
86{ 88{
@@ -105,6 +107,9 @@ int main(int argc, char** argv)
105 case 'f': 107 case 'f':
106 SLEEP_BETWEEN_JOBS = 0; 108 SLEEP_BETWEEN_JOBS = 0;
107 break; 109 break;
110 case 'q':
111 USE_PRIOQ = 1;
112 break;
108 default: 113 default:
109 fprintf(stderr, "Unknown option: %c\n", opt); 114 fprintf(stderr, "Unknown option: %c\n", opt);
110 exit(-1); 115 exit(-1);
@@ -148,12 +153,19 @@ void* rt_thread(void* _ctx)
148 153
149 TH_CALL( init_rt_thread() ); 154 TH_CALL( init_rt_thread() );
150 TH_CALL( sporadic_task_ns(EXEC_COST, PERIOD + 10*ctx->id, 0, 0, 155 TH_CALL( sporadic_task_ns(EXEC_COST, PERIOD + 10*ctx->id, 0, 0,
151 LITMUS_LOWEST_PRIORITY, RT_CLASS_SOFT, NO_ENFORCEMENT, NO_SIGNALS, 0) ); 156 LITMUS_LOWEST_PRIORITY, RT_CLASS_SOFT, NO_ENFORCEMENT, NO_SIGNALS, 1) );
152 157
153 for (i = 0; i < NUM_SEMS; i++) { 158 for (i = 0; i < NUM_SEMS; i++) {
154 ctx->od[i] = open_rsm_sem(ctx->fd, i); 159 if (!USE_PRIOQ) {
155 if(ctx->od[i] < 0) 160 ctx->od[i] = open_fifo_sem(ctx->fd, i);
156 perror("open_rsm_sem"); 161 if(ctx->od[i] < 0)
162 perror("open_fifo_sem");
163 }
164 else {
165 ctx->od[i] = open_prioq_sem(ctx->fd, i);
166 if(ctx->od[i] < 0)
167 perror("open_prioq_sem");
168 }
157 //printf("[%d] ctx->od[%d]: %d\n", ctx->id, i, ctx->od[i]); 169 //printf("[%d] ctx->od[%d]: %d\n", ctx->id, i, ctx->od[i]);
158 } 170 }
159 171
diff --git a/gpu/rtspin_fake_cuda.cpp b/gpu/rtspin_fake_cuda.cpp
index 667c675..78e4f60 100644
--- a/gpu/rtspin_fake_cuda.cpp
+++ b/gpu/rtspin_fake_cuda.cpp
@@ -59,6 +59,7 @@ bool USE_DYNAMIC_GROUP_LOCKS = false;
59bool BROADCAST_STATE = false; 59bool BROADCAST_STATE = false;
60bool ENABLE_CHUNKING = false; 60bool ENABLE_CHUNKING = false;
61bool MIGRATE_VIA_SYSMEM = false; 61bool MIGRATE_VIA_SYSMEM = false;
62bool USE_PRIOQ = false;
62 63
63int GPU_PARTITION = 0; 64int GPU_PARTITION = 0;
64int GPU_PARTITION_SIZE = 0; 65int GPU_PARTITION_SIZE = 0;
@@ -378,37 +379,51 @@ static void allocate_locks()
378 ); 379 );
379 } 380 }
380 else { 381 else {
381 KEXCLU_LOCK = open_ikglp_gpu_sem(fd, 382 KEXCLU_LOCK = open_gpusync_token_lock(fd,
382 base_name, /* name */ 383 base_name, /* name */
383 GPU_PARTITION_SIZE, 384 GPU_PARTITION_SIZE,
384 GPU_PARTITION*GPU_PARTITION_SIZE, 385 GPU_PARTITION*GPU_PARTITION_SIZE,
385 NUM_SIMULT_USERS, 386 NUM_SIMULT_USERS,
386 ENABLE_AFFINITY, 387 IKGLP_M_IN_FIFOS,
387 RELAX_FIFO_MAX_LEN 388 (!RELAX_FIFO_MAX_LEN) ?
388 ); 389 IKGLP_OPTIMAL_FIFO_LEN :
390 IKGLP_UNLIMITED_FIFO_LEN,
391 ENABLE_AFFINITY
392 );
393// KEXCLU_LOCK = open_ikglp_gpu_sem(fd,
394// base_name, /* name */
395// GPU_PARTITION_SIZE,
396// GPU_PARTITION*GPU_PARTITION_SIZE,
397// NUM_SIMULT_USERS,
398// ENABLE_AFFINITY,
399// RELAX_FIFO_MAX_LEN
400// );
389 } 401 }
390 if(KEXCLU_LOCK < 0) 402 if(KEXCLU_LOCK < 0)
391 perror("open_kexclu_sem"); 403 perror("open_kexclu_sem");
392 404
393 if(NUM_SIMULT_USERS > 1) 405 if(NUM_SIMULT_USERS > 1)
394 { 406 {
407 open_sem_t opensem = (!USE_PRIOQ) ? open_fifo_sem : open_prioq_sem;
408 const char* opensem_label = (!USE_PRIOQ) ? "open_fifo_sem" : "open_prioq_sem";
409
395 // allocate the engine locks. 410 // allocate the engine locks.
396 for (int i = 0; i < MAX_GPUS; ++i) 411 for (int i = 0; i < MAX_GPUS; ++i)
397 { 412 {
398 EE_LOCKS[i] = open_rsm_sem(fd, (i+1)*10 + base_name); 413 EE_LOCKS[i] = opensem(fd, (i+1)*10 + base_name);
399 if(EE_LOCKS[i] < 0) 414 if(EE_LOCKS[i] < 0)
400 perror("open_rsm_sem"); 415 perror(opensem_label);
401 416
402 CE_SEND_LOCKS[i] = open_rsm_sem(fd, (i+1)*10 + base_name + 1); 417 CE_SEND_LOCKS[i] = opensem(fd, (i+1)*10 + base_name + 1);
403 if(CE_SEND_LOCKS[i] < 0) 418 if(CE_SEND_LOCKS[i] < 0)
404 perror("open_rsm_sem"); 419 perror(opensem_label);
405 420
406 if(NUM_SIMULT_USERS == 3) 421 if(NUM_SIMULT_USERS == 3)
407 { 422 {
408 // allocate a separate lock for the second copy engine 423 // allocate a separate lock for the second copy engine
409 CE_RECV_LOCKS[i] = open_rsm_sem(fd, (i+1)*10 + base_name + 2); 424 CE_RECV_LOCKS[i] = opensem(fd, (i+1)*10 + base_name + 2);
410 if(CE_RECV_LOCKS[i] < 0) 425 if(CE_RECV_LOCKS[i] < 0)
411 perror("open_rsm_sem"); 426 perror(opensem_label);
412 } 427 }
413 else 428 else
414 { 429 {
@@ -760,7 +775,7 @@ static int job(double exec_time, double gpu_sec_time, double program_end)
760 return 1; 775 return 1;
761} 776}
762 777
763#define OPTSTR "p:ls:e:g:G:W:N:S:R:T:BMaLyC:rz:" 778#define OPTSTR "p:ls:e:g:G:W:N:S:R:T:BMaLyC:rz:q"
764 779
765int main(int argc, char** argv) 780int main(int argc, char** argv)
766{ 781{
@@ -813,6 +828,9 @@ int main(int argc, char** argv)
813 case 'z': 828 case 'z':
814 NUM_SIMULT_USERS = atoi(optarg); 829 NUM_SIMULT_USERS = atoi(optarg);
815 break; 830 break;
831 case 'q':
832 USE_PRIOQ = true;
833 break;
816 case 'g': 834 case 'g':
817 GPU_TASK = 1; 835 GPU_TASK = 1;
818 GPU_PARTITION_SIZE = atoi(optarg); 836 GPU_PARTITION_SIZE = atoi(optarg);