aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2013-04-16 15:08:44 -0400
committerGlenn Elliott <gelliott@cs.unc.edu>2013-04-16 15:08:44 -0400
commit76b0d79069973bd58cda6028c65a9edaa6d2ea73 (patch)
treeae1a121c81d23d95e9c7d5478f2a13ec33eb465c
parent346efb91841609a8c1f2ecf40efd692ced0565cb (diff)
updates for further litmus development
-rw-r--r--Makefile4
-rw-r--r--gpu/budget.cpp5
-rw-r--r--gpu/gpuspin.cu101
-rw-r--r--src/litmus.c2
4 files changed, 69 insertions, 43 deletions
diff --git a/Makefile b/Makefile
index 720a585..f50af0f 100644
--- a/Makefile
+++ b/Makefile
@@ -57,8 +57,8 @@ headers = -I${LIBLITMUS}/include -I${LIBLITMUS}/arch/${include-${ARCH}}/include
57 57
58# combine options 58# combine options
59CPPFLAGS = ${flags-api} ${flags-debug-cpp} ${flags-misc} ${flags-${ARCH}} -DARCH=${ARCH} ${headers} 59CPPFLAGS = ${flags-api} ${flags-debug-cpp} ${flags-misc} ${flags-${ARCH}} -DARCH=${ARCH} ${headers}
60#CUFLAGS = ${flags-api} ${flags-cu-debug} ${flags-cu-optim} ${flags-cu-nvcc} ${flags-cu-misc} -DARCH=${ARCH} ${headers} 60CUFLAGS = ${flags-api} ${flags-cu-debug} ${flags-cu-optim} ${flags-cu-nvcc} ${flags-cu-misc} -DARCH=${ARCH} ${headers}
61CUFLAGS = ${flags-api} ${flags-cu-optim} ${flags-cu-nvcc} ${flags-cu-misc} -DARCH=${ARCH} ${headers} 61#CUFLAGS = ${flags-api} ${flags-cu-optim} ${flags-cu-nvcc} ${flags-cu-misc} -DARCH=${ARCH} ${headers}
62CFLAGS = ${flags-debug} ${flags-misc} 62CFLAGS = ${flags-debug} ${flags-misc}
63LDFLAGS = ${flags-${ARCH}} 63LDFLAGS = ${flags-${ARCH}}
64 64
diff --git a/gpu/budget.cpp b/gpu/budget.cpp
index 8a2546a..eebb14e 100644
--- a/gpu/budget.cpp
+++ b/gpu/budget.cpp
@@ -165,7 +165,7 @@ int job(lt_t exec_ns, lt_t budget_ns)
165 return 1; 165 return 1;
166} 166}
167 167
168#define OPTSTR "SbosOvalwqixdn:r:" 168#define OPTSTR "SbosOvzalwqixdn:r:"
169 169
170int main(int argc, char** argv) 170int main(int argc, char** argv)
171{ 171{
@@ -215,6 +215,9 @@ int main(int argc, char** argv)
215 case 'v': 215 case 'v':
216 drain_policy = DRAIN_SOBLIV; 216 drain_policy = DRAIN_SOBLIV;
217 break; 217 break;
218 case 'z':
219 drain_policy = DRAIN_SIMPLE_IO;
220 break;
218 case 'l': 221 case 'l':
219 CXS_OVERRUN = 1; 222 CXS_OVERRUN = 1;
220 NAMESPACE = open("semaphores", O_RDONLY | O_CREAT, S_IRUSR | S_IWUSR); 223 NAMESPACE = open("semaphores", O_RDONLY | O_CREAT, S_IRUSR | S_IWUSR);
diff --git a/gpu/gpuspin.cu b/gpu/gpuspin.cu
index 414e074..b096c82 100644
--- a/gpu/gpuspin.cu
+++ b/gpu/gpuspin.cu
@@ -66,6 +66,7 @@ int CE_MIGR_SEND_LOCKS[NR_GPUS];
66int CE_MIGR_RECV_LOCKS[NR_GPUS]; 66int CE_MIGR_RECV_LOCKS[NR_GPUS];
67bool RESERVED_MIGR_COPY_ENGINE = false; // only checked if NUM_COPY_ENGINES == 2 67bool RESERVED_MIGR_COPY_ENGINE = false; // only checked if NUM_COPY_ENGINES == 2
68 68
69//bool ENABLE_RT_AUX_THREADS = false;
69bool ENABLE_RT_AUX_THREADS = true; 70bool ENABLE_RT_AUX_THREADS = true;
70 71
71enum eGpuSyncMode 72enum eGpuSyncMode
@@ -874,8 +875,10 @@ static void catch_exit(int catch_exit)
874 875
875 876
876 877
878#ifdef VANILLA_LINUX
877static float ms_sum; 879static float ms_sum;
878static int gpucount = 0; 880static int gpucount = 0;
881#endif
879 882
880__global__ void docudaspin(float* data, /*unsigned int* iterations,*/ unsigned int num_elem, unsigned int cycles) 883__global__ void docudaspin(float* data, /*unsigned int* iterations,*/ unsigned int num_elem, unsigned int cycles)
881{ 884{
@@ -915,29 +918,30 @@ __global__ void docudaspin(float* data, /*unsigned int* iterations,*/ unsigned i
915 return; 918 return;
916} 919}
917 920
918static void gpu_loop_for(double gpu_sec_time, double emergency_exit) 921static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double emergency_exit)
919{ 922{
920 int next_gpu; 923 int next_gpu;
921 924
922 if (emergency_exit && wctime() > emergency_exit) 925 if (emergency_exit && wctime() > emergency_exit)
923 goto out; 926 goto out;
924 927
925 next_gpu = litmus_lock(TOKEN_LOCK); 928 next_gpu = litmus_lock(TOKEN_LOCK);
926 { 929 {
927 MigrateIfNeeded(next_gpu); 930 MigrateIfNeeded(next_gpu);
928 931 unsigned int numcycles = ((unsigned int)(cur_hz() * gpu_sec_time))/num_kernels;
929 unsigned int numcycles = (unsigned int)(cur_hz() * gpu_sec_time); 932
930
931 if(SEND_SIZE > 0) 933 if(SEND_SIZE > 0)
932 chunkMemcpy(this_gpu(d_state_data), h_send_data, SEND_SIZE, 934 chunkMemcpy(this_gpu(d_state_data), h_send_data, SEND_SIZE,
933 cudaMemcpyHostToDevice, CUR_DEVICE, useEngineLocks()); 935 cudaMemcpyHostToDevice, CUR_DEVICE, useEngineLocks());
934 936
935 if(useEngineLocks()) litmus_lock(cur_ee()); 937 for(unsigned int i = 0; i < num_kernels; ++i)
936 /* one block per sm, one warp per block */ 938 {
937 docudaspin <<<cur_sms(),cur_warp_size(), 0, cur_stream()>>> (d_spin_data[cur_gpu()], cur_elem_per_thread(), numcycles); 939 if(useEngineLocks()) litmus_lock(cur_ee());
938// docudaspin <<<cur_sms(),cur_warp_size(), 0, cur_stream()>>> (d_spin_data[cur_gpu()], d_iteration_count[cur_gpu()], cur_elem_per_thread(), numcycles); 940 /* one block per sm, one warp per block */
939 cudaStreamSynchronize(cur_stream()); 941 docudaspin <<<cur_sms(),cur_warp_size(), 0, cur_stream()>>> (d_spin_data[cur_gpu()], cur_elem_per_thread(), numcycles);
940 if(useEngineLocks()) litmus_unlock(cur_ee()); 942 cudaStreamSynchronize(cur_stream());
943 if(useEngineLocks()) litmus_unlock(cur_ee());
944 }
941 945
942 if(RECV_SIZE > 0) 946 if(RECV_SIZE > 0)
943 chunkMemcpy(h_recv_data, this_gpu(d_state_data), RECV_SIZE, 947 chunkMemcpy(h_recv_data, this_gpu(d_state_data), RECV_SIZE,
@@ -954,12 +958,13 @@ out:
954 return; 958 return;
955} 959}
956 960
957static void gpu_loop_for_linux(double gpu_sec_time, double emergency_exit) 961static void gpu_loop_for_linux(double gpu_sec_time, unsigned int num_kernels, double emergency_exit)
958{ 962{
959 static int GPU_OFFSET = GPU_PARTITION * GPU_PARTITION_SIZE; 963 static int GPU_OFFSET = GPU_PARTITION * GPU_PARTITION_SIZE;
960 static gpu_pool *pool = &GPU_LINUX_SEM_POOL[GPU_PARTITION]; 964 static gpu_pool *pool = &GPU_LINUX_SEM_POOL[GPU_PARTITION];
961 static pthread_mutex_t *mutex = &GPU_LINUX_MUTEX_POOL[GPU_PARTITION]; 965 static pthread_mutex_t *mutex = &GPU_LINUX_MUTEX_POOL[GPU_PARTITION];
962 966
967#ifdef VANILLA_LINUX
963 static bool once = false; 968 static bool once = false;
964 static cudaEvent_t start, end; 969 static cudaEvent_t start, end;
965 float ms; 970 float ms;
@@ -969,6 +974,7 @@ static void gpu_loop_for_linux(double gpu_sec_time, double emergency_exit)
969 cudaEventCreate(&start); 974 cudaEventCreate(&start);
970 cudaEventCreate(&end); 975 cudaEventCreate(&end);
971 } 976 }
977#endif
972 978
973 int next_gpu; 979 int next_gpu;
974 980
@@ -979,29 +985,33 @@ static void gpu_loop_for_linux(double gpu_sec_time, double emergency_exit)
979 { 985 {
980 MigrateIfNeeded(next_gpu); 986 MigrateIfNeeded(next_gpu);
981 987
982 unsigned int numcycles = (unsigned int)(cur_hz() * gpu_sec_time); 988 unsigned int numcycles = ((unsigned int)(cur_hz() * gpu_sec_time))/num_kernels;
983 989
984 if(SEND_SIZE > 0) 990 if(SEND_SIZE > 0)
985 chunkMemcpy(this_gpu(d_state_data), h_send_data, SEND_SIZE, 991 chunkMemcpy(this_gpu(d_state_data), h_send_data, SEND_SIZE,
986 cudaMemcpyHostToDevice, cur_gpu(), useEngineLocks()); 992 cudaMemcpyHostToDevice, cur_gpu(), useEngineLocks());
987 993
988 /* one block per sm, one warp per block */ 994 for(unsigned int i = 0; i < num_kernels; ++i)
989 cudaEventRecord(start, cur_stream()); 995 {
990 docudaspin <<<cur_sms(),cur_warp_size(), 0, cur_stream()>>> (d_spin_data[cur_gpu()], cur_elem_per_thread(), numcycles); 996 /* one block per sm, one warp per block */
991// docudaspin <<<cur_sms(),cur_warp_size(), 0, cur_stream()>>> (d_spin_data[cur_gpu()], d_iteration_count[cur_gpu()], cur_elem_per_thread(), numcycles); 997#ifdef VANILLA_LINUX
992 cudaEventRecord(end, cur_stream()); 998 cudaEventRecord(start, cur_stream());
993 cudaEventSynchronize(end); 999#endif
994 cudaStreamSynchronize(cur_stream()); 1000 docudaspin <<<cur_sms(),cur_warp_size(), 0, cur_stream()>>> (d_spin_data[cur_gpu()], cur_elem_per_thread(), numcycles);
1001#ifdef VANILLA_LINUX
1002 cudaEventRecord(end, cur_stream());
1003 cudaEventSynchronize(end);
1004#endif
1005 cudaStreamSynchronize(cur_stream());
995 1006
996// chunkMemcpy(this_gpu(h_iteration_count), this_gpu(d_iteration_count), sizeof(unsigned int), 1007#ifdef VANILLA_LINUX
997// cudaMemcpyDeviceToHost, cur_gpu(), useEngineLocks()); 1008 cudaEventElapsedTime(&ms, start, end);
998// 1009 ms_sum += ms;
999 cudaEventElapsedTime(&ms, start, end); 1010#endif
1000 ms_sum += ms; 1011 }
1012#ifdef VANILLA_LINUX
1001 ++gpucount; 1013 ++gpucount;
1002// printf("%f\n", ms); 1014#endif
1003// printf("%f: %u\n", ms, this_gpu(h_iteration_count)[0]);
1004
1005 1015
1006 if(RECV_SIZE > 0) 1016 if(RECV_SIZE > 0)
1007 chunkMemcpy(h_recv_data, this_gpu(d_state_data), RECV_SIZE, 1017 chunkMemcpy(h_recv_data, this_gpu(d_state_data), RECV_SIZE,
@@ -1166,7 +1176,7 @@ static void debug_delay_loop(void)
1166 } 1176 }
1167} 1177}
1168 1178
1169static int gpu_job(double exec_time, double gpu_exec_time, double program_end) 1179static int gpu_job(double exec_time, double gpu_exec_time, unsigned int num_kernels, double program_end)
1170{ 1180{
1171 double chunk1, chunk2; 1181 double chunk1, chunk2;
1172 1182
@@ -1178,7 +1188,7 @@ static int gpu_job(double exec_time, double gpu_exec_time, double program_end)
1178 chunk2 = exec_time - chunk1; 1188 chunk2 = exec_time - chunk1;
1179 1189
1180 loop_for(chunk1, program_end + 1); 1190 loop_for(chunk1, program_end + 1);
1181 gpu_loop_for(gpu_exec_time, program_end + 1); 1191 gpu_loop_for(gpu_exec_time, num_kernels, program_end + 1);
1182 loop_for(chunk2, program_end + 1); 1192 loop_for(chunk2, program_end + 1);
1183 1193
1184 sleep_next_period(); 1194 sleep_next_period();
@@ -1243,7 +1253,7 @@ static void init_linux()
1243 mlockall(MCL_CURRENT | MCL_FUTURE); 1253 mlockall(MCL_CURRENT | MCL_FUTURE);
1244} 1254}
1245 1255
1246static int gpu_job_linux(double exec_time, double gpu_exec_time, double program_end) 1256static int gpu_job_linux(double exec_time, double gpu_exec_time, unsigned int num_kernels, double program_end)
1247{ 1257{
1248 double chunk1, chunk2; 1258 double chunk1, chunk2;
1249 1259
@@ -1255,7 +1265,7 @@ static int gpu_job_linux(double exec_time, double gpu_exec_time, double program_
1255 chunk2 = exec_time - chunk1; 1265 chunk2 = exec_time - chunk1;
1256 1266
1257 loop_for(chunk1, program_end + 1); 1267 loop_for(chunk1, program_end + 1);
1258 gpu_loop_for_linux(gpu_exec_time, program_end + 1); 1268 gpu_loop_for_linux(gpu_exec_time, num_kernels, program_end + 1);
1259 loop_for(chunk2, program_end + 1); 1269 loop_for(chunk2, program_end + 1);
1260 1270
1261 sleep_next_period_linux(); 1271 sleep_next_period_linux();
@@ -1284,8 +1294,8 @@ enum eScheduler
1284 RT_LINUX 1294 RT_LINUX
1285}; 1295};
1286 1296
1287#define CPU_OPTIONS "p:z:c:wlveio:f:s:q:X:L:Q:" 1297#define CPU_OPTIONS "p:z:c:wlveio:f:s:q:X:L:Q:d"
1288#define GPU_OPTIONS "g:y:r:C:E:dG:xS:R:T:Z:aFm:b:MNI" 1298#define GPU_OPTIONS "g:y:r:C:E:DG:xS:R:T:Z:aFm:b:MNIk:"
1289 1299
1290// concat the option strings 1300// concat the option strings
1291#define OPTSTR CPU_OPTIONS GPU_OPTIONS 1301#define OPTSTR CPU_OPTIONS GPU_OPTIONS
@@ -1310,7 +1320,7 @@ int main(int argc, char** argv)
1310 double duration = 0, start = 0; 1320 double duration = 0, start = 0;
1311 double *exec_times = NULL; 1321 double *exec_times = NULL;
1312 double scale = 1.0; 1322 double scale = 1.0;
1313 task_class_t cls = RT_CLASS_HARD; 1323 task_class_t cls = RT_CLASS_SOFT;
1314 int cur_job = 0, num_jobs = 0; 1324 int cur_job = 0, num_jobs = 0;
1315 struct rt_task param; 1325 struct rt_task param;
1316 1326
@@ -1318,7 +1328,9 @@ int main(int argc, char** argv)
1318 lt_t budget; 1328 lt_t budget;
1319 1329
1320 int num_gpu_users = 0; 1330 int num_gpu_users = 0;
1321 1331 unsigned int num_kernels = 1;
1332
1333 budget_drain_policy_t drain = DRAIN_SIMPLE;
1322 1334
1323 eScheduler scheduler = LITMUS; 1335 eScheduler scheduler = LITMUS;
1324 1336
@@ -1365,7 +1377,7 @@ int main(int argc, char** argv)
1365 ENGINE_LOCK_TYPE = (eEngineLockTypes)atoi(optarg); 1377 ENGINE_LOCK_TYPE = (eEngineLockTypes)atoi(optarg);
1366 assert(ENGINE_LOCK_TYPE == FIFO || ENGINE_LOCK_TYPE == PRIOQ); 1378 assert(ENGINE_LOCK_TYPE == FIFO || ENGINE_LOCK_TYPE == PRIOQ);
1367 break; 1379 break;
1368 case 'd': 1380 case 'D':
1369 USE_DYNAMIC_GROUP_LOCKS = true; 1381 USE_DYNAMIC_GROUP_LOCKS = true;
1370 break; 1382 break;
1371 case 'G': 1383 case 'G':
@@ -1401,6 +1413,9 @@ int main(int argc, char** argv)
1401 num_gpu_users = atoi(optarg); 1413 num_gpu_users = atoi(optarg);
1402 assert(num_gpu_users > 0); 1414 assert(num_gpu_users > 0);
1403 break; 1415 break;
1416 case 'k':
1417 num_kernels = (unsigned int)atoi(optarg);
1418 break;
1404 case 'b': 1419 case 'b':
1405 budget_ms = atoi(optarg); 1420 budget_ms = atoi(optarg);
1406 break; 1421 break;
@@ -1424,6 +1439,9 @@ int main(int argc, char** argv)
1424 case 'i': 1439 case 'i':
1425 want_signals = 1; 1440 want_signals = 1;
1426 break; 1441 break;
1442 case 'd':
1443 drain = DRAIN_SOBLIV;
1444 break;
1427 case 'l': 1445 case 'l':
1428 test_loop = 1; 1446 test_loop = 1;
1429 break; 1447 break;
@@ -1475,7 +1493,7 @@ int main(int argc, char** argv)
1475 ENABLE_AFFINITY = false; 1493 ENABLE_AFFINITY = false;
1476 RELAX_FIFO_MAX_LEN = false; 1494 RELAX_FIFO_MAX_LEN = false;
1477 ENABLE_RT_AUX_THREADS = false; 1495 ENABLE_RT_AUX_THREADS = false;
1478 budget_ms = -1; 1496 budget_ms = -1.0;
1479 want_enforcement = 0; 1497 want_enforcement = 0;
1480 want_signals = 0; 1498 want_signals = 0;
1481 1499
@@ -1551,7 +1569,7 @@ int main(int argc, char** argv)
1551 if (GPU_USING && gpu_wcet_ms <= 0) 1569 if (GPU_USING && gpu_wcet_ms <= 0)
1552 usage("The worst-case gpu execution time must be a positive number."); 1570 usage("The worst-case gpu execution time must be a positive number.");
1553 1571
1554 if (budget_ms > 0) 1572 if (budget_ms > 0.0)
1555 budget = ms2ns(budget_ms); 1573 budget = ms2ns(budget_ms);
1556 else 1574 else
1557 budget = wcet; 1575 budget = wcet;
@@ -1586,6 +1604,7 @@ int main(int argc, char** argv)
1586 PRECISE_ENFORCEMENT : NO_ENFORCEMENT; 1604 PRECISE_ENFORCEMENT : NO_ENFORCEMENT;
1587 param.budget_signal_policy = (want_enforcement && want_signals) ? 1605 param.budget_signal_policy = (want_enforcement && want_signals) ?
1588 PRECISE_SIGNALS : NO_SIGNALS; 1606 PRECISE_SIGNALS : NO_SIGNALS;
1607 param.drain_policy = drain;
1589 param.release_policy = PERIODIC; 1608 param.release_policy = PERIODIC;
1590 1609
1591 if (migrate) 1610 if (migrate)
@@ -1665,6 +1684,7 @@ int main(int argc, char** argv)
1665 else { 1684 else {
1666 while (gpu_job(wcet_ms * 0.001 * scale, 1685 while (gpu_job(wcet_ms * 0.001 * scale,
1667 gpu_wcet_ms * 0.001 * scale, 1686 gpu_wcet_ms * 0.001 * scale,
1687 num_kernels,
1668 start + duration)); 1688 start + duration));
1669 } 1689 }
1670 } 1690 }
@@ -1676,6 +1696,7 @@ int main(int argc, char** argv)
1676 else { 1696 else {
1677 while (gpu_job_linux(wcet_ms * 0.001 * scale, 1697 while (gpu_job_linux(wcet_ms * 0.001 * scale,
1678 gpu_wcet_ms * 0.001 * scale, 1698 gpu_wcet_ms * 0.001 * scale,
1699 num_kernels,
1679 start + duration)); 1700 start + duration));
1680 } 1701 }
1681 } 1702 }
@@ -1710,7 +1731,7 @@ int main(int argc, char** argv)
1710 exit_cuda(); 1731 exit_cuda();
1711 1732
1712 1733
1713 printf("avg: %f\n", ms_sum/gpucount); 1734// printf("avg: %f\n", ms_sum/gpucount);
1714 } 1735 }
1715 1736
1716 if (file) 1737 if (file)
diff --git a/src/litmus.c b/src/litmus.c
index 213ac3f..70f7fb6 100644
--- a/src/litmus.c
+++ b/src/litmus.c
@@ -113,6 +113,8 @@ void init_rt_task_param(struct rt_task* tp)
113 tp->cls = RT_CLASS_SOFT; 113 tp->cls = RT_CLASS_SOFT;
114 tp->priority = LITMUS_LOWEST_PRIORITY; 114 tp->priority = LITMUS_LOWEST_PRIORITY;
115 tp->budget_policy = NO_ENFORCEMENT; 115 tp->budget_policy = NO_ENFORCEMENT;
116 tp->drain_policy = DRAIN_SIMPLE;
117 tp->budget_signal_policy = NO_SIGNALS;
116 tp->release_policy = SPORADIC; 118 tp->release_policy = SPORADIC;
117} 119}
118 120