diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2013-05-14 22:16:38 -0400 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2013-05-14 22:16:38 -0400 |
commit | 62bed6ce5f20e44c80369d224812c012f5dd5ef1 (patch) | |
tree | 381f493d2afc407a3a2414963f5ae47f7e4b0cfe | |
parent | 95e840f68892d46289120d1042ee36f9eaf41de7 (diff) |
log migrations when affinity is off
-rw-r--r-- | gpu/gpuspin.cu | 51 | ||||
-rw-r--r-- | include/litmus.h | 11 |
2 files changed, 53 insertions, 9 deletions
diff --git a/gpu/gpuspin.cu b/gpu/gpuspin.cu index f361b86..304d937 100644 --- a/gpu/gpuspin.cu +++ b/gpu/gpuspin.cu | |||
@@ -26,6 +26,18 @@ using namespace ranlib; | |||
26 | 26 | ||
27 | #define ms2s(ms) ((ms)*0.001) | 27 | #define ms2s(ms) ((ms)*0.001) |
28 | 28 | ||
29 | const unsigned int TOKEN_START = 100; | ||
30 | const unsigned int TOKEN_END = 101; | ||
31 | |||
32 | const unsigned int EE_START = 200; | ||
33 | const unsigned int EE_END = 201; | ||
34 | |||
35 | const unsigned int CE_SEND_START = 300; | ||
36 | const unsigned int CE_SEND_END = 301; | ||
37 | |||
38 | const unsigned int CE_RECV_START = 400; | ||
39 | const unsigned int CE_RECV_END = 401; | ||
40 | |||
29 | bool SILENT = true; | 41 | bool SILENT = true; |
30 | inline int xprintf(const char *format, ...) | 42 | inline int xprintf(const char *format, ...) |
31 | { | 43 | { |
@@ -382,6 +394,12 @@ static cudaError_t __chunkMemcpy(void* a_dst, const void* a_src, size_t count, | |||
382 | //cudaStreamSynchronize(STREAMS[CUR_DEVICE]); | 394 | //cudaStreamSynchronize(STREAMS[CUR_DEVICE]); |
383 | cudaEventSynchronize(EVENTS[CUR_DEVICE]); | 395 | cudaEventSynchronize(EVENTS[CUR_DEVICE]); |
384 | ret = cudaGetLastError(); | 396 | ret = cudaGetLastError(); |
397 | |||
398 | if (kind == cudaMemcpyDeviceToHost || kind == cudaMemcpyDeviceToDevice) | ||
399 | inject_action(CE_RECV_END); | ||
400 | if (kind == cudaMemcpyHostToDevice) | ||
401 | inject_action(CE_SEND_END); | ||
402 | |||
385 | state->unlock(); | 403 | state->unlock(); |
386 | if(ret != cudaSuccess) | 404 | if(ret != cudaSuccess) |
387 | break; | 405 | break; |
@@ -395,8 +413,13 @@ static cudaError_t __chunkMemcpy(void* a_dst, const void* a_src, size_t count, | |||
395 | } | 413 | } |
396 | } | 414 | } |
397 | 415 | ||
398 | if(state && !state->locked) | 416 | if(state && !state->locked) { |
399 | state->lock(); | 417 | state->lock(); |
418 | if (kind == cudaMemcpyDeviceToHost || kind == cudaMemcpyDeviceToDevice) | ||
419 | inject_action(CE_RECV_START); | ||
420 | if (kind == cudaMemcpyHostToDevice) | ||
421 | inject_action(CE_SEND_START); | ||
422 | } | ||
400 | 423 | ||
401 | //ret = cudaMemcpy(dst+i*chunk_size, src+i*chunk_size, bytesToCopy, kind); | 424 | //ret = cudaMemcpy(dst+i*chunk_size, src+i*chunk_size, bytesToCopy, kind); |
402 | cudaMemcpyAsync(dst+i*chunk_size, src+i*chunk_size, bytesToCopy, kind, STREAMS[CUR_DEVICE]); | 425 | cudaMemcpyAsync(dst+i*chunk_size, src+i*chunk_size, bytesToCopy, kind, STREAMS[CUR_DEVICE]); |
@@ -429,11 +452,23 @@ static cudaError_t chunkMemcpy(void* a_dst, const void* a_src, size_t count, | |||
429 | else { | 452 | else { |
430 | ce_lock_state state(device_a, kind, count, device_b, migration); | 453 | ce_lock_state state(device_a, kind, count, device_b, migration); |
431 | state.lock(); | 454 | state.lock(); |
455 | |||
456 | if (kind == cudaMemcpyDeviceToHost || kind == cudaMemcpyDeviceToDevice) | ||
457 | inject_action(CE_RECV_START); | ||
458 | if (kind == cudaMemcpyHostToDevice) | ||
459 | inject_action(CE_SEND_START); | ||
460 | |||
432 | ret = __chunkMemcpy(a_dst, a_src, count, kind, &state); | 461 | ret = __chunkMemcpy(a_dst, a_src, count, kind, &state); |
433 | cudaEventSynchronize(cur_event()); | 462 | cudaEventSynchronize(cur_event()); |
434 | // cudaStreamSynchronize(cur_stream()); | 463 | // cudaStreamSynchronize(cur_stream()); |
435 | if(ret == cudaSuccess) | 464 | if(ret == cudaSuccess) |
436 | ret = cudaGetLastError(); | 465 | ret = cudaGetLastError(); |
466 | |||
467 | if (kind == cudaMemcpyDeviceToHost || kind == cudaMemcpyDeviceToDevice) | ||
468 | inject_action(CE_RECV_END); | ||
469 | if (kind == cudaMemcpyHostToDevice) | ||
470 | inject_action(CE_SEND_END); | ||
471 | |||
437 | state.unlock(); | 472 | state.unlock(); |
438 | } | 473 | } |
439 | return ret; | 474 | return ret; |
@@ -1157,6 +1192,7 @@ static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double e | |||
1157 | goto out; | 1192 | goto out; |
1158 | 1193 | ||
1159 | next_gpu = litmus_lock(TOKEN_LOCK); | 1194 | next_gpu = litmus_lock(TOKEN_LOCK); |
1195 | inject_action(TOKEN_START); | ||
1160 | { | 1196 | { |
1161 | MigrateIfNeeded(next_gpu); | 1197 | MigrateIfNeeded(next_gpu); |
1162 | unsigned int numcycles = ((unsigned int)(cur_hz() * gpu_sec_time))/num_kernels; | 1198 | unsigned int numcycles = ((unsigned int)(cur_hz() * gpu_sec_time))/num_kernels; |
@@ -1170,6 +1206,7 @@ static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double e | |||
1170 | { | 1206 | { |
1171 | if(useEngineLocks() && !locked) { | 1207 | if(useEngineLocks() && !locked) { |
1172 | litmus_lock(cur_ee()); | 1208 | litmus_lock(cur_ee()); |
1209 | inject_action(EE_START); | ||
1173 | locked = true; | 1210 | locked = true; |
1174 | } | 1211 | } |
1175 | 1212 | ||
@@ -1180,6 +1217,7 @@ static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double e | |||
1180 | // cudaStreamSynchronize(cur_stream()); | 1217 | // cudaStreamSynchronize(cur_stream()); |
1181 | cudaEventRecord(cur_event(), cur_stream()); | 1218 | cudaEventRecord(cur_event(), cur_stream()); |
1182 | cudaEventSynchronize(cur_event()); | 1219 | cudaEventSynchronize(cur_event()); |
1220 | inject_action(EE_END); | ||
1183 | litmus_unlock(cur_ee()); | 1221 | litmus_unlock(cur_ee()); |
1184 | locked = false; | 1222 | locked = false; |
1185 | } | 1223 | } |
@@ -1187,6 +1225,7 @@ static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double e | |||
1187 | if (locked) { | 1225 | if (locked) { |
1188 | cudaEventRecord(cur_event(), cur_stream()); | 1226 | cudaEventRecord(cur_event(), cur_stream()); |
1189 | cudaEventSynchronize(cur_event()); | 1227 | cudaEventSynchronize(cur_event()); |
1228 | inject_action(EE_END); | ||
1190 | litmus_unlock(cur_ee()); | 1229 | litmus_unlock(cur_ee()); |
1191 | locked = false; | 1230 | locked = false; |
1192 | } | 1231 | } |
@@ -1198,6 +1237,7 @@ static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double e | |||
1198 | if (MIGRATE_VIA_SYSMEM) | 1237 | if (MIGRATE_VIA_SYSMEM) |
1199 | PullState(); | 1238 | PullState(); |
1200 | } | 1239 | } |
1240 | inject_action(TOKEN_END); | ||
1201 | litmus_unlock(TOKEN_LOCK); | 1241 | litmus_unlock(TOKEN_LOCK); |
1202 | 1242 | ||
1203 | last_gpu() = cur_gpu(); | 1243 | last_gpu() = cur_gpu(); |
@@ -1696,6 +1736,11 @@ void apply_args(struct Args* args) | |||
1696 | CHUNK_SIZE = args->chunk_size; | 1736 | CHUNK_SIZE = args->chunk_size; |
1697 | MIGRATE_VIA_SYSMEM = args->use_sysmem_migration; | 1737 | MIGRATE_VIA_SYSMEM = args->use_sysmem_migration; |
1698 | 1738 | ||
1739 | if (args->scheduler == LITMUS && !ENABLE_AFFINITY) | ||
1740 | TRACE_MIGRATIONS = true; | ||
1741 | else if (args->scheduler == LITMUS) | ||
1742 | TRACE_MIGRATIONS = false; | ||
1743 | |||
1699 | // roll back other globals to an initial state | 1744 | // roll back other globals to an initial state |
1700 | CUR_DEVICE = -1; | 1745 | CUR_DEVICE = -1; |
1701 | LAST_DEVICE = -1; | 1746 | LAST_DEVICE = -1; |
@@ -2089,8 +2134,10 @@ int init_daemon(struct Args* args, int num_total_users, bool is_daemon) | |||
2089 | pthread_mutex_unlock(daemon_mutex); | 2134 | pthread_mutex_unlock(daemon_mutex); |
2090 | } | 2135 | } |
2091 | 2136 | ||
2092 | if (!my_run_entry) | 2137 | if (!my_run_entry) { |
2138 | fprintf(stderr, "Could not find task <wcet, gpu_wcet, period>: <%f %f %f>\n", args->wcet_ms, args->gpu_wcet_ms, args->period_ms); | ||
2093 | return -1; | 2139 | return -1; |
2140 | } | ||
2094 | return 0; | 2141 | return 0; |
2095 | } | 2142 | } |
2096 | 2143 | ||
diff --git a/include/litmus.h b/include/litmus.h index a6c2b13..e785f92 100644 --- a/include/litmus.h +++ b/include/litmus.h | |||
@@ -270,19 +270,16 @@ int inject_completion(unsigned int job_no); | |||
270 | int inject_gpu_migration(unsigned int to, unsigned int from); | 270 | int inject_gpu_migration(unsigned int to, unsigned int from); |
271 | int __inject_action(unsigned int action); | 271 | int __inject_action(unsigned int action); |
272 | 272 | ||
273 | /* | 273 | #if 1 |
274 | #define inject_action(COUNT) \ | 274 | #define inject_action(COUNT) \ |
275 | do { \ | 275 | do { \ |
276 | unsigned int temp = (COUNT); \ | 276 | __inject_action(COUNT); \ |
277 | printf("%s:%d:%d\n",__FUNCTION__,__LINE__,temp); \ | ||
278 | __inject_action(temp); \ | ||
279 | }while(0); | 277 | }while(0); |
280 | */ | 278 | #else |
281 | |||
282 | #define inject_action(COUNT) \ | 279 | #define inject_action(COUNT) \ |
283 | do { \ | 280 | do { \ |
284 | }while(0); | 281 | }while(0); |
285 | 282 | #endif | |
286 | 283 | ||
287 | /* Litmus signal handling */ | 284 | /* Litmus signal handling */ |
288 | 285 | ||