aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2013-05-14 22:16:38 -0400
committerGlenn Elliott <gelliott@cs.unc.edu>2013-05-14 22:16:38 -0400
commit62bed6ce5f20e44c80369d224812c012f5dd5ef1 (patch)
tree381f493d2afc407a3a2414963f5ae47f7e4b0cfe
parent95e840f68892d46289120d1042ee36f9eaf41de7 (diff)
log migrations when affinity is off
-rw-r--r--gpu/gpuspin.cu51
-rw-r--r--include/litmus.h11
2 files changed, 53 insertions, 9 deletions
diff --git a/gpu/gpuspin.cu b/gpu/gpuspin.cu
index f361b86..304d937 100644
--- a/gpu/gpuspin.cu
+++ b/gpu/gpuspin.cu
@@ -26,6 +26,18 @@ using namespace ranlib;
26 26
27#define ms2s(ms) ((ms)*0.001) 27#define ms2s(ms) ((ms)*0.001)
28 28
29const unsigned int TOKEN_START = 100;
30const unsigned int TOKEN_END = 101;
31
32const unsigned int EE_START = 200;
33const unsigned int EE_END = 201;
34
35const unsigned int CE_SEND_START = 300;
36const unsigned int CE_SEND_END = 301;
37
38const unsigned int CE_RECV_START = 400;
39const unsigned int CE_RECV_END = 401;
40
29bool SILENT = true; 41bool SILENT = true;
30inline int xprintf(const char *format, ...) 42inline int xprintf(const char *format, ...)
31{ 43{
@@ -382,6 +394,12 @@ static cudaError_t __chunkMemcpy(void* a_dst, const void* a_src, size_t count,
382 //cudaStreamSynchronize(STREAMS[CUR_DEVICE]); 394 //cudaStreamSynchronize(STREAMS[CUR_DEVICE]);
383 cudaEventSynchronize(EVENTS[CUR_DEVICE]); 395 cudaEventSynchronize(EVENTS[CUR_DEVICE]);
384 ret = cudaGetLastError(); 396 ret = cudaGetLastError();
397
398 if (kind == cudaMemcpyDeviceToHost || kind == cudaMemcpyDeviceToDevice)
399 inject_action(CE_RECV_END);
400 if (kind == cudaMemcpyHostToDevice)
401 inject_action(CE_SEND_END);
402
385 state->unlock(); 403 state->unlock();
386 if(ret != cudaSuccess) 404 if(ret != cudaSuccess)
387 break; 405 break;
@@ -395,8 +413,13 @@ static cudaError_t __chunkMemcpy(void* a_dst, const void* a_src, size_t count,
395 } 413 }
396 } 414 }
397 415
398 if(state && !state->locked) 416 if(state && !state->locked) {
399 state->lock(); 417 state->lock();
418 if (kind == cudaMemcpyDeviceToHost || kind == cudaMemcpyDeviceToDevice)
419 inject_action(CE_RECV_START);
420 if (kind == cudaMemcpyHostToDevice)
421 inject_action(CE_SEND_START);
422 }
400 423
401 //ret = cudaMemcpy(dst+i*chunk_size, src+i*chunk_size, bytesToCopy, kind); 424 //ret = cudaMemcpy(dst+i*chunk_size, src+i*chunk_size, bytesToCopy, kind);
402 cudaMemcpyAsync(dst+i*chunk_size, src+i*chunk_size, bytesToCopy, kind, STREAMS[CUR_DEVICE]); 425 cudaMemcpyAsync(dst+i*chunk_size, src+i*chunk_size, bytesToCopy, kind, STREAMS[CUR_DEVICE]);
@@ -429,11 +452,23 @@ static cudaError_t chunkMemcpy(void* a_dst, const void* a_src, size_t count,
429 else { 452 else {
430 ce_lock_state state(device_a, kind, count, device_b, migration); 453 ce_lock_state state(device_a, kind, count, device_b, migration);
431 state.lock(); 454 state.lock();
455
456 if (kind == cudaMemcpyDeviceToHost || kind == cudaMemcpyDeviceToDevice)
457 inject_action(CE_RECV_START);
458 if (kind == cudaMemcpyHostToDevice)
459 inject_action(CE_SEND_START);
460
432 ret = __chunkMemcpy(a_dst, a_src, count, kind, &state); 461 ret = __chunkMemcpy(a_dst, a_src, count, kind, &state);
433 cudaEventSynchronize(cur_event()); 462 cudaEventSynchronize(cur_event());
434 // cudaStreamSynchronize(cur_stream()); 463 // cudaStreamSynchronize(cur_stream());
435 if(ret == cudaSuccess) 464 if(ret == cudaSuccess)
436 ret = cudaGetLastError(); 465 ret = cudaGetLastError();
466
467 if (kind == cudaMemcpyDeviceToHost || kind == cudaMemcpyDeviceToDevice)
468 inject_action(CE_RECV_END);
469 if (kind == cudaMemcpyHostToDevice)
470 inject_action(CE_SEND_END);
471
437 state.unlock(); 472 state.unlock();
438 } 473 }
439 return ret; 474 return ret;
@@ -1157,6 +1192,7 @@ static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double e
1157 goto out; 1192 goto out;
1158 1193
1159 next_gpu = litmus_lock(TOKEN_LOCK); 1194 next_gpu = litmus_lock(TOKEN_LOCK);
1195 inject_action(TOKEN_START);
1160 { 1196 {
1161 MigrateIfNeeded(next_gpu); 1197 MigrateIfNeeded(next_gpu);
1162 unsigned int numcycles = ((unsigned int)(cur_hz() * gpu_sec_time))/num_kernels; 1198 unsigned int numcycles = ((unsigned int)(cur_hz() * gpu_sec_time))/num_kernels;
@@ -1170,6 +1206,7 @@ static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double e
1170 { 1206 {
1171 if(useEngineLocks() && !locked) { 1207 if(useEngineLocks() && !locked) {
1172 litmus_lock(cur_ee()); 1208 litmus_lock(cur_ee());
1209 inject_action(EE_START);
1173 locked = true; 1210 locked = true;
1174 } 1211 }
1175 1212
@@ -1180,6 +1217,7 @@ static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double e
1180// cudaStreamSynchronize(cur_stream()); 1217// cudaStreamSynchronize(cur_stream());
1181 cudaEventRecord(cur_event(), cur_stream()); 1218 cudaEventRecord(cur_event(), cur_stream());
1182 cudaEventSynchronize(cur_event()); 1219 cudaEventSynchronize(cur_event());
1220 inject_action(EE_END);
1183 litmus_unlock(cur_ee()); 1221 litmus_unlock(cur_ee());
1184 locked = false; 1222 locked = false;
1185 } 1223 }
@@ -1187,6 +1225,7 @@ static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double e
1187 if (locked) { 1225 if (locked) {
1188 cudaEventRecord(cur_event(), cur_stream()); 1226 cudaEventRecord(cur_event(), cur_stream());
1189 cudaEventSynchronize(cur_event()); 1227 cudaEventSynchronize(cur_event());
1228 inject_action(EE_END);
1190 litmus_unlock(cur_ee()); 1229 litmus_unlock(cur_ee());
1191 locked = false; 1230 locked = false;
1192 } 1231 }
@@ -1198,6 +1237,7 @@ static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double e
1198 if (MIGRATE_VIA_SYSMEM) 1237 if (MIGRATE_VIA_SYSMEM)
1199 PullState(); 1238 PullState();
1200 } 1239 }
1240 inject_action(TOKEN_END);
1201 litmus_unlock(TOKEN_LOCK); 1241 litmus_unlock(TOKEN_LOCK);
1202 1242
1203 last_gpu() = cur_gpu(); 1243 last_gpu() = cur_gpu();
@@ -1696,6 +1736,11 @@ void apply_args(struct Args* args)
1696 CHUNK_SIZE = args->chunk_size; 1736 CHUNK_SIZE = args->chunk_size;
1697 MIGRATE_VIA_SYSMEM = args->use_sysmem_migration; 1737 MIGRATE_VIA_SYSMEM = args->use_sysmem_migration;
1698 1738
1739 if (args->scheduler == LITMUS && !ENABLE_AFFINITY)
1740 TRACE_MIGRATIONS = true;
1741 else if (args->scheduler == LITMUS)
1742 TRACE_MIGRATIONS = false;
1743
1699 // roll back other globals to an initial state 1744 // roll back other globals to an initial state
1700 CUR_DEVICE = -1; 1745 CUR_DEVICE = -1;
1701 LAST_DEVICE = -1; 1746 LAST_DEVICE = -1;
@@ -2089,8 +2134,10 @@ int init_daemon(struct Args* args, int num_total_users, bool is_daemon)
2089 pthread_mutex_unlock(daemon_mutex); 2134 pthread_mutex_unlock(daemon_mutex);
2090 } 2135 }
2091 2136
2092 if (!my_run_entry) 2137 if (!my_run_entry) {
2138 fprintf(stderr, "Could not find task <wcet, gpu_wcet, period>: <%f %f %f>\n", args->wcet_ms, args->gpu_wcet_ms, args->period_ms);
2093 return -1; 2139 return -1;
2140 }
2094 return 0; 2141 return 0;
2095} 2142}
2096 2143
diff --git a/include/litmus.h b/include/litmus.h
index a6c2b13..e785f92 100644
--- a/include/litmus.h
+++ b/include/litmus.h
@@ -270,19 +270,16 @@ int inject_completion(unsigned int job_no);
270int inject_gpu_migration(unsigned int to, unsigned int from); 270int inject_gpu_migration(unsigned int to, unsigned int from);
271int __inject_action(unsigned int action); 271int __inject_action(unsigned int action);
272 272
273/* 273#if 1
274#define inject_action(COUNT) \ 274#define inject_action(COUNT) \
275do { \ 275do { \
276unsigned int temp = (COUNT); \ 276__inject_action(COUNT); \
277printf("%s:%d:%d\n",__FUNCTION__,__LINE__,temp); \
278__inject_action(temp); \
279}while(0); 277}while(0);
280*/ 278#else
281
282#define inject_action(COUNT) \ 279#define inject_action(COUNT) \
283do { \ 280do { \
284}while(0); 281}while(0);
285 282#endif
286 283
287/* Litmus signal handling */ 284/* Litmus signal handling */
288 285