aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2013-05-19 22:32:58 -0400
committerGlenn Elliott <gelliott@cs.unc.edu>2013-05-19 22:32:58 -0400
commit3f44de5fe6cdf76bc6a53dc985ee58dd4504eda8 (patch)
tree1a249865f1a3ae2d0d30fc8b6a168fdfd26a5fd5
parent992ce8df6eae19c6826018d62cb337fbc632de75 (diff)
Support signals and aberrant supportwip-2012.3-gpu-rtss13
-rw-r--r--gpu/gpuspin.cu147
1 files changed, 96 insertions, 51 deletions
diff --git a/gpu/gpuspin.cu b/gpu/gpuspin.cu
index 8a9b717..c42dea9 100644
--- a/gpu/gpuspin.cu
+++ b/gpu/gpuspin.cu
@@ -210,6 +210,10 @@ struct Args
210 double scale; 210 double scale;
211 211
212 double duration; 212 double duration;
213
214 bool is_aberrant;
215 double aberrant_prob;
216 double aberrant_factor;
213}; 217};
214 218
215 219
@@ -379,7 +383,7 @@ struct ce_lock_state
379 yield = litmus_dgl_should_yield_lock(locks, num_locks); 383 yield = litmus_dgl_should_yield_lock(locks, num_locks);
380 else 384 else
381 for(int l = num_locks - 1; l >= 0; --l) // reverse order 385 for(int l = num_locks - 1; l >= 0; --l) // reverse order
382 yield = litmus_should_yield_lock(locks[l]); 386 yield |= litmus_should_yield_lock(locks[l]);
383 } 387 }
384 return (yield); 388 return (yield);
385 } 389 }
@@ -424,7 +428,7 @@ static cudaError_t __chunkMemcpy(void* a_dst, const void* a_src, size_t count,
424 // optimization - don't unlock if no one else needs the engine 428 // optimization - don't unlock if no one else needs the engine
425 if (state->should_yield()) { 429 if (state->should_yield()) {
426 gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS); 430 gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS);
427 cudaEventSynchronize(EVENTS[CUR_DEVICE]); 431 cudaEventSynchronize(cur_event());
428 ret = cudaGetLastError(); 432 ret = cudaGetLastError();
429 if (kind == cudaMemcpyDeviceToHost || kind == cudaMemcpyDeviceToDevice) 433 if (kind == cudaMemcpyDeviceToHost || kind == cudaMemcpyDeviceToDevice)
430 inject_action(CE_RECV_END); 434 inject_action(CE_RECV_END);
@@ -455,8 +459,8 @@ static cudaError_t __chunkMemcpy(void* a_dst, const void* a_src, size_t count,
455 459
456 //ret = cudaMemcpy(dst+i*chunk_size, src+i*chunk_size, bytesToCopy, kind); 460 //ret = cudaMemcpy(dst+i*chunk_size, src+i*chunk_size, bytesToCopy, kind);
457 gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS); 461 gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS);
458 cudaMemcpyAsync(dst+i*chunk_size, src+i*chunk_size, bytesToCopy, kind, STREAMS[CUR_DEVICE]); 462 cudaMemcpyAsync(dst+i*chunk_size, src+i*chunk_size, bytesToCopy, kind, cur_stream());
459 cudaEventRecord(EVENTS[CUR_DEVICE], STREAMS[CUR_DEVICE]); 463 cudaEventRecord(cur_event(), cur_stream());
460 gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS); 464 gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS);
461 465
462 if(state) 466 if(state)
@@ -1219,33 +1223,41 @@ __global__ void docudaspin(float* data, /*unsigned int* iterations,*/ unsigned i
1219 return; 1223 return;
1220} 1224}
1221 1225
1226
1227int next_gpu = -1;
1228static bool ee_locked = false;
1229static bool early_exit = false;
1230static bool have_token = false;
1231
1222static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double emergency_exit) 1232static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double emergency_exit)
1223{ 1233{
1224 int next_gpu; 1234// int next_gpu;
1225 bool ee_locked = false; 1235 next_gpu = -1;
1226 bool early_exit = false; 1236 ee_locked = false;
1237 early_exit = false;
1238 have_token = false;
1227 1239
1228 if (gpu_sec_time <= 0.0) 1240 if (gpu_sec_time <= 0.0)
1229 goto out; 1241 goto out;
1230 if (emergency_exit && wctime() > emergency_exit) 1242 if (emergency_exit && wctime() > emergency_exit)
1231 goto out; 1243 goto out;
1232 1244
1233 gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS);
1234 next_gpu = litmus_lock(TOKEN_LOCK);
1235 inject_action(TOKEN_START);
1236 gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS);
1237
1238 LITMUS_TRY 1245 LITMUS_TRY
1239 { 1246 {
1240 gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS); 1247 gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS);
1248 next_gpu = litmus_lock(TOKEN_LOCK);
1249 inject_action(TOKEN_START);
1250 have_token = true;
1251 __sync_synchronize();
1241 MigrateIfNeeded(next_gpu); 1252 MigrateIfNeeded(next_gpu);
1242 gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS); 1253 gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS);
1243 1254
1244 unsigned int numcycles = ((unsigned int)(cur_hz() * gpu_sec_time))/num_kernels; 1255 unsigned int numcycles = ((unsigned int)(cur_hz() * gpu_sec_time))/num_kernels;
1245 1256
1246 if(SEND_SIZE > 0) 1257 if(SEND_SIZE > 0) {
1247 chunkMemcpy(this_gpu(d_state_data), h_send_data, SEND_SIZE, 1258 chunkMemcpy(this_gpu(d_state_data), h_send_data, SEND_SIZE,
1248 cudaMemcpyHostToDevice, CUR_DEVICE, useEngineLocks()); 1259 cudaMemcpyHostToDevice, cur_gpu(), useEngineLocks());
1260 }
1249 1261
1250 for(unsigned int i = 0; i < num_kernels; ++i) 1262 for(unsigned int i = 0; i < num_kernels; ++i)
1251 { 1263 {
@@ -1255,50 +1267,59 @@ static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double e
1255 litmus_lock(cur_ee()); 1267 litmus_lock(cur_ee());
1256 inject_action(EE_START); 1268 inject_action(EE_START);
1257 ee_locked = true; 1269 ee_locked = true;
1270 __sync_synchronize();
1258 } 1271 }
1259 /* one block per sm, one warp per block */ 1272 /* one block per sm, one warp per block */
1260 docudaspin <<<cur_sms(), cur_warp_size(), 0, cur_stream()>>> (d_spin_data[cur_gpu()], cur_elem_per_thread(), numcycles); 1273 docudaspin <<<cur_sms(), cur_warp_size(), 0, cur_stream()>>> (d_spin_data[cur_gpu()], cur_elem_per_thread(), numcycles);
1261 if(useEngineLocks() && (!YIELD_LOCKS || (YIELD_LOCKS && litmus_should_yield_lock(cur_ee())))) { 1274 if(useEngineLocks() &&
1262// cudaStreamSynchronize(cur_stream()); 1275 (i == num_kernels - 1 || /* last kernel */
1276 !YIELD_LOCKS || /* always yeild */
1277 (YIELD_LOCKS && litmus_should_yield_lock(cur_ee())) /* we should yield */
1278 )
1279 ) {
1263 cudaEventRecord(cur_event(), cur_stream()); 1280 cudaEventRecord(cur_event(), cur_stream());
1264 cudaEventSynchronize(cur_event()); 1281 cudaEventSynchronize(cur_event());
1265 inject_action(EE_END); 1282 inject_action(EE_END);
1266 litmus_unlock(cur_ee()); 1283 litmus_unlock(cur_ee());
1267 ee_locked = false; 1284 ee_locked = false;
1285 __sync_synchronize();
1268 } 1286 }
1269 gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS);
1270 }
1271
1272 if (ee_locked) {
1273 gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS);
1274
1275 cudaEventRecord(cur_event(), cur_stream());
1276 cudaEventSynchronize(cur_event());
1277 inject_action(EE_END);
1278 litmus_unlock(cur_ee());
1279 1287
1280 gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS); 1288 gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS);
1281 ee_locked = false;
1282 } 1289 }
1283 1290
1284 if(RECV_SIZE > 0) 1291 if(RECV_SIZE > 0) {
1285 chunkMemcpy(h_recv_data, this_gpu(d_state_data), RECV_SIZE, 1292 chunkMemcpy(h_recv_data, this_gpu(d_state_data), RECV_SIZE,
1286 cudaMemcpyDeviceToHost, CUR_DEVICE, useEngineLocks()); 1293 cudaMemcpyDeviceToHost, cur_gpu(), useEngineLocks());
1294 }
1287 1295
1288 if (MIGRATE_VIA_SYSMEM) { 1296 if (MIGRATE_VIA_SYSMEM) {
1289 gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS); 1297 gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS);
1290 PullState(); 1298 PullState();
1291 gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS); 1299 gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS);
1292 } 1300 }
1301
1302 gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS);
1303 inject_action(TOKEN_END);
1304 litmus_unlock(TOKEN_LOCK);
1305 last_gpu() = cur_gpu();
1306 have_token = false;
1307 __sync_synchronize();
1308 gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS);
1293 } 1309 }
1294 LITMUS_CATCH(SIG_BUDGET) 1310 LITMUS_CATCH(SIG_BUDGET)
1295 { 1311 {
1312 if (have_token)
1313 {
1296 cudaEventRecord(cur_event(), cur_stream()); 1314 cudaEventRecord(cur_event(), cur_stream());
1297 cudaEventSynchronize(cur_event()); 1315 cudaEventSynchronize(cur_event());
1298 1316
1299 if (useEngineLocks()) { 1317 if (useEngineLocks()) {
1300 /* unlock all engine locks. will fail safely if not held */ 1318 if (ee_locked) {
1301 litmus_unlock(cur_ee()); 1319 litmus_unlock(cur_ee());
1320 }
1321
1322 /* we don't know which CEs might be locked... unlock them all. */
1302 if (NUM_COPY_ENGINES == 1) { 1323 if (NUM_COPY_ENGINES == 1) {
1303 litmus_unlock(cur_send()); 1324 litmus_unlock(cur_send());
1304 } 1325 }
@@ -1311,16 +1332,14 @@ static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double e
1311 litmus_unlock(cur_recv()); 1332 litmus_unlock(cur_recv());
1312 } 1333 }
1313 } 1334 }
1314 early_exit = true;
1315 }
1316 END_LITMUS_TRY
1317 1335
1318 gpuspin_block_litmus_signals(ALL_LITMUS_SIG_MASKS); 1336 litmus_unlock(TOKEN_LOCK);
1319 inject_action(TOKEN_END); 1337 last_gpu() = cur_gpu();
1320 litmus_unlock(TOKEN_LOCK); 1338 }
1321 gpuspin_unblock_litmus_signals(ALL_LITMUS_SIG_MASKS);
1322 1339
1323 last_gpu() = cur_gpu(); 1340 early_exit = true;
1341 }
1342 END_LITMUS_TRY
1324 1343
1325 if (early_exit) 1344 if (early_exit)
1326 throw std::exception(); 1345 throw std::exception();
@@ -1361,7 +1380,6 @@ static void gpu_loop_for_linux(double gpu_sec_time, unsigned int num_kernels, do
1361 docudaspin <<<cur_sms(),cur_warp_size(), 0, cur_stream()>>> (d_spin_data[cur_gpu()], cur_elem_per_thread(), numcycles); 1380 docudaspin <<<cur_sms(),cur_warp_size(), 0, cur_stream()>>> (d_spin_data[cur_gpu()], cur_elem_per_thread(), numcycles);
1362 cudaEventRecord(cur_event(), cur_stream()); 1381 cudaEventRecord(cur_event(), cur_stream());
1363 cudaEventSynchronize(cur_event()); 1382 cudaEventSynchronize(cur_event());
1364// cudaStreamSynchronize(cur_stream());
1365 } 1383 }
1366 1384
1367 if(RECV_SIZE > 0) 1385 if(RECV_SIZE > 0)
@@ -1846,6 +1864,8 @@ void apply_args(struct Args* args)
1846 TRACE_MIGRATIONS = true; 1864 TRACE_MIGRATIONS = true;
1847 else if (args->scheduler == LITMUS) 1865 else if (args->scheduler == LITMUS)
1848 TRACE_MIGRATIONS = false; 1866 TRACE_MIGRATIONS = false;
1867 else if (args->scheduler != LITMUS)
1868 TRACE_MIGRATIONS = true;
1849 1869
1850 WANT_SIGNALS = args->want_signals; 1870 WANT_SIGNALS = args->want_signals;
1851 1871
@@ -1945,6 +1965,9 @@ int __do_normal(struct Args* args)
1945 job_no = 0; 1965 job_no = 0;
1946 } 1966 }
1947 1967
1968
1969 ignore_litmus_signals(SIG_BUDGET_MASK);
1970
1948 init_rt_task_param(&param); 1971 init_rt_task_param(&param);
1949 param.exec_cost = budget; 1972 param.exec_cost = budget;
1950 param.period = period; 1973 param.period = period;
@@ -1965,12 +1988,6 @@ int __do_normal(struct Args* args)
1965 goto out; 1988 goto out;
1966 } 1989 }
1967 1990
1968 if (args->want_signals)
1969 /* bind default longjmp signal handler to SIG_BUDGET. */
1970 activate_litmus_signals(SIG_BUDGET_MASK, longjmp_on_litmus_signal);
1971 else
1972 ignore_litmus_signals(SIG_BUDGET_MASK);
1973
1974 if (args->gpu_using) 1991 if (args->gpu_using)
1975 allocate_locks(args->num_gpu_tasks, args->scheduler != LITMUS); 1992 allocate_locks(args->num_gpu_tasks, args->scheduler != LITMUS);
1976 1993
@@ -2035,11 +2052,22 @@ int __do_normal(struct Args* args)
2035 2052
2036 start = wctime(); 2053 start = wctime();
2037 2054
2055 if (args->want_signals) {
2056 ignore_litmus_signals(SIG_BUDGET_MASK); /* flush signals? */
2057 activate_litmus_signals(SIG_BUDGET_MASK, longjmp_on_litmus_signal);
2058 }
2059
2038 if (!args->gpu_using) { 2060 if (!args->gpu_using) {
2039 bool keepgoing; 2061 bool keepgoing;
2040 do 2062 do
2041 { 2063 {
2042 double job_ms = wcet_dist_ms->random(); 2064 double job_ms = wcet_dist_ms->random();
2065 if (args->is_aberrant) {
2066 double roll = drand48();
2067 if (roll <= args->aberrant_prob)
2068 job_ms *= args->aberrant_factor;
2069 }
2070
2043 if (job_ms < 0.0) 2071 if (job_ms < 0.0)
2044 job_ms = 0.0; 2072 job_ms = 0.0;
2045 keepgoing = cjobfn(ms2s(job_ms * args->scale), start + args->duration); 2073 keepgoing = cjobfn(ms2s(job_ms * args->scale), start + args->duration);
@@ -2050,6 +2078,13 @@ int __do_normal(struct Args* args)
2050 do 2078 do
2051 { 2079 {
2052 double job_ms = wcet_dist_ms->random(); 2080 double job_ms = wcet_dist_ms->random();
2081
2082 if (args->is_aberrant) {
2083 double roll = drand48();
2084 if (roll <= args->aberrant_prob)
2085 job_ms *= args->aberrant_factor;
2086 }
2087
2053 if (job_ms < 0.0) 2088 if (job_ms < 0.0)
2054 job_ms = 0.0; 2089 job_ms = 0.0;
2055 2090
@@ -2063,9 +2098,7 @@ int __do_normal(struct Args* args)
2063 }while(keepgoing); 2098 }while(keepgoing);
2064 } 2099 }
2065 2100
2066 if (args->want_signals) 2101 ignore_litmus_signals(SIG_BUDGET_MASK);
2067 ignore_litmus_signals(SIG_BUDGET_MASK);
2068
2069 2102
2070 if (args->gpu_using && ENABLE_RT_AUX_THREADS) { 2103 if (args->gpu_using && ENABLE_RT_AUX_THREADS) {
2071 if (args->scheduler == LITMUS) { 2104 if (args->scheduler == LITMUS) {
@@ -2126,10 +2159,12 @@ int do_normal(struct Args* args)
2126 init_linux(); 2159 init_linux();
2127 2160
2128 if (args->gpu_using) { 2161 if (args->gpu_using) {
2162#if 0
2129 signal(SIGABRT, catch_exit); 2163 signal(SIGABRT, catch_exit);
2130 signal(SIGTERM, catch_exit); 2164 signal(SIGTERM, catch_exit);
2131 signal(SIGQUIT, catch_exit); 2165 signal(SIGQUIT, catch_exit);
2132 signal(SIGSEGV, catch_exit); 2166 signal(SIGSEGV, catch_exit);
2167#endif
2133 2168
2134 cudaSetDeviceFlags(cudaDeviceScheduleSpin); 2169 cudaSetDeviceFlags(cudaDeviceScheduleSpin);
2135 init_cuda(args->num_gpu_tasks); 2170 init_cuda(args->num_gpu_tasks);
@@ -2419,7 +2454,7 @@ int do_daemon(struct Args* args)
2419} 2454}
2420 2455
2421#define CPU_OPTIONS "p:z:c:wlveio:f:s:q:X:L:Q:d:" 2456#define CPU_OPTIONS "p:z:c:wlveio:f:s:q:X:L:Q:d:"
2422#define GPU_OPTIONS "g:y:r:C:E:DG:xS:R:T:Z:aFm:b:MNIk:VW:u" 2457#define GPU_OPTIONS "g:y:r:C:E:DG:xS:R:T:Z:aFm:b:MNIk:VW:uU:O:"
2423#define PROXY_OPTIONS "B:PA" 2458#define PROXY_OPTIONS "B:PA"
2424 2459
2425// concat the option strings 2460// concat the option strings
@@ -2433,7 +2468,6 @@ int main(int argc, char** argv)
2433 eRunMode run_mode = NORMAL; 2468 eRunMode run_mode = NORMAL;
2434 2469
2435 int opt; 2470 int opt;
2436
2437 progname = argv[0]; 2471 progname = argv[0];
2438 2472
2439 while ((opt = getopt(argc, argv, OPTSTR)) != -1) { 2473 while ((opt = getopt(argc, argv, OPTSTR)) != -1) {
@@ -2447,6 +2481,14 @@ int main(int argc, char** argv)
2447 case 'A': 2481 case 'A':
2448 run_mode = DAEMON; 2482 run_mode = DAEMON;
2449 break; 2483 break;
2484 case 'U':
2485 myArgs.is_aberrant = true;
2486 myArgs.aberrant_prob = (double)atoi(optarg);
2487 break;
2488 case 'O':
2489 myArgs.is_aberrant = true;
2490 myArgs.aberrant_factor = atof(optarg);
2491 break;
2450 2492
2451 2493
2452 case 'w': 2494 case 'w':
@@ -2640,6 +2682,9 @@ int main(int argc, char** argv)
2640 myArgs.duration = atof(argv[optind + 3]); 2682 myArgs.duration = atof(argv[optind + 3]);
2641 } 2683 }
2642 2684
2685 double rate = (1000.0/myArgs.period_ms)*myArgs.aberrant_prob;
2686 myArgs.aberrant_prob = 1.0 / rate;
2687
2643 if (myArgs.num_tasks == 0 || myArgs.num_gpu_tasks == 0) { 2688 if (myArgs.num_tasks == 0 || myArgs.num_gpu_tasks == 0) {
2644 // safety w.r.t. shared mem. 2689 // safety w.r.t. shared mem.
2645 sleep(2); 2690 sleep(2);