log migrations when affinity is off

author: Glenn Elliott <gelliott@cs.unc.edu> 2013-05-14 22:16:38 -0400
committer: Glenn Elliott <gelliott@cs.unc.edu> 2013-05-14 22:16:38 -0400
commit: 62bed6ce5f20e44c80369d224812c012f5dd5ef1 (patch)
tree: 381f493d2afc407a3a2414963f5ae47f7e4b0cfe
parent: 95e840f68892d46289120d1042ee36f9eaf41de7 (diff)
2 files changed, 53 insertions, 9 deletions
diff --git a/gpu/gpuspin.cu b/gpu/gpuspin.cu
index f361b86..304d937 100644
--- a/gpu/gpuspin.cu
+++ b/gpu/gpuspin.cu
@@ -26,6 +26,18 @@ using namespace ranlib;
 #define ms2s(ms)  ((ms)*0.001)
+const unsigned int TOKEN_START = 100;
+const unsigned int TOKEN_END = 101;
+const unsigned int EE_START = 200;
+const unsigned int EE_END = 201;
+const unsigned int CE_SEND_START = 300;
+const unsigned int CE_SEND_END = 301;
+const unsigned int CE_RECV_START = 400;
+const unsigned int CE_RECV_END = 401;
 bool SILENT = true;
 inline int xprintf(const char *format, ...)
 {
@@ -382,6 +394,12 @@ static cudaError_t __chunkMemcpy(void* a_dst, const void* a_src, size_t count,
                                        //cudaStreamSynchronize(STREAMS[CUR_DEVICE]);
                                        cudaEventSynchronize(EVENTS[CUR_DEVICE]);
                                        ret = cudaGetLastError();
+                                        if (kind == cudaMemcpyDeviceToHost || kind == cudaMemcpyDeviceToDevice)
+                                                inject_action(CE_RECV_END);
+                                        if (kind == cudaMemcpyHostToDevice)
+                                                inject_action(CE_SEND_END);
                                        state->unlock();
                                        if(ret != cudaSuccess)
                                                break;
@@ -395,8 +413,13 @@ static cudaError_t __chunkMemcpy(void* a_dst, const void* a_src, size_t count,
                        }
                }
-                if(state && !state->locked)
+                if(state && !state->locked) {
                        state->lock();
+                        if (kind == cudaMemcpyDeviceToHost || kind == cudaMemcpyDeviceToDevice)
+                                inject_action(CE_RECV_START);
+                        if (kind == cudaMemcpyHostToDevice)
+                                inject_action(CE_SEND_START);
+                }
        //ret = cudaMemcpy(dst+i*chunk_size, src+i*chunk_size, bytesToCopy, kind);
                cudaMemcpyAsync(dst+i*chunk_size, src+i*chunk_size, bytesToCopy, kind, STREAMS[CUR_DEVICE]);
@@ -429,11 +452,23 @@ static cudaError_t chunkMemcpy(void* a_dst, const void* a_src, size_t count,
        else {
                ce_lock_state state(device_a, kind, count, device_b, migration);
                state.lock();
+                if (kind == cudaMemcpyDeviceToHost || kind == cudaMemcpyDeviceToDevice)
+                        inject_action(CE_RECV_START);
+                if (kind == cudaMemcpyHostToDevice)
+                        inject_action(CE_SEND_START);
                ret = __chunkMemcpy(a_dst, a_src, count, kind, &state);
                cudaEventSynchronize(cur_event());
                //              cudaStreamSynchronize(cur_stream());
                if(ret == cudaSuccess)
                        ret = cudaGetLastError();
+                if (kind == cudaMemcpyDeviceToHost || kind == cudaMemcpyDeviceToDevice)
+                        inject_action(CE_RECV_END);
+                if (kind == cudaMemcpyHostToDevice)
+                        inject_action(CE_SEND_END);
                state.unlock();
        }
        return ret;
@@ -1157,6 +1192,7 @@ static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double e
                goto out;
        next_gpu = litmus_lock(TOKEN_LOCK);
+        inject_action(TOKEN_START);
        {
                MigrateIfNeeded(next_gpu);
                unsigned int numcycles = ((unsigned int)(cur_hz() * gpu_sec_time))/num_kernels;
@@ -1170,6 +1206,7 @@ static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double e
                {
                        if(useEngineLocks() && !locked) {
                                litmus_lock(cur_ee());
+                                inject_action(EE_START);
                                locked = true;
                        }
@@ -1180,6 +1217,7 @@ static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double e
 //                              cudaStreamSynchronize(cur_stream());
                                cudaEventRecord(cur_event(), cur_stream());
                                cudaEventSynchronize(cur_event());
+                                inject_action(EE_END);
                                litmus_unlock(cur_ee());
                                locked = false;
                        }
@@ -1187,6 +1225,7 @@ static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double e
                if (locked) {
                        cudaEventRecord(cur_event(), cur_stream());
                        cudaEventSynchronize(cur_event());
+                        inject_action(EE_END);
                        litmus_unlock(cur_ee());
                        locked = false;
                }
@@ -1198,6 +1237,7 @@ static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double e
                if (MIGRATE_VIA_SYSMEM)
                        PullState();
        }
+        inject_action(TOKEN_END);
        litmus_unlock(TOKEN_LOCK);
        last_gpu() = cur_gpu();
@@ -1696,6 +1736,11 @@ void apply_args(struct Args* args)
        CHUNK_SIZE = args->chunk_size;
        MIGRATE_VIA_SYSMEM = args->use_sysmem_migration;
+        if (args->scheduler == LITMUS && !ENABLE_AFFINITY)
+                TRACE_MIGRATIONS = true;
+        else if (args->scheduler == LITMUS)
+                TRACE_MIGRATIONS = false;
        // roll back other globals to an initial state
        CUR_DEVICE = -1;
        LAST_DEVICE = -1;
@@ -2089,8 +2134,10 @@ int init_daemon(struct Args* args, int num_total_users, bool is_daemon)
                pthread_mutex_unlock(daemon_mutex);
        }
-        if (!my_run_entry)
+        if (!my_run_entry) {
+                fprintf(stderr, "Could not find task <wcet, gpu_wcet, period>: <%f %f %f>\n", args->wcet_ms, args->gpu_wcet_ms, args->period_ms);
                return -1;
+        }
        return 0;
 }
diff --git a/include/litmus.h b/include/litmus.h
index a6c2b13..e785f92 100644
--- a/include/litmus.h
+++ b/include/litmus.h
@@ -270,19 +270,16 @@ int inject_completion(unsigned int job_no);
 int inject_gpu_migration(unsigned int to, unsigned int from);
 int __inject_action(unsigned int action);
-/*
+#if 1
 #define inject_action(COUNT) \
 do { \
-unsigned int temp = (COUNT); \
+__inject_action(COUNT); \
-printf("%s:%d:%d\n",__FUNCTION__,__LINE__,temp); \
-__inject_action(temp); \
 }while(0);
-*/
+#else
 #define inject_action(COUNT) \
 do { \
 }while(0);
+#endif
 /* Litmus signal handling */
author	Glenn Elliott <gelliott@cs.unc.edu>	2013-05-14 22:16:38 -0400
committer	Glenn Elliott <gelliott@cs.unc.edu>	2013-05-14 22:16:38 -0400
commit	62bed6ce5f20e44c80369d224812c012f5dd5ef1 (patch)
tree	381f493d2afc407a3a2414963f5ae47f7e4b0cfe
parent	95e840f68892d46289120d1042ee36f9eaf41de7 (diff)

diff --git a/gpu/gpuspin.cu b/gpu/gpuspin.cu index f361b86..304d937 100644 --- a/gpu/gpuspin.cu +++ b/gpu/gpuspin.cu
@@ -26,6 +26,18 @@ using namespace ranlib;
26		26
27	#define ms2s(ms) ((ms)*0.001)	27	#define ms2s(ms) ((ms)*0.001)
28		28
		29	const unsigned int TOKEN_START = 100;
		30	const unsigned int TOKEN_END = 101;
		31
		32	const unsigned int EE_START = 200;
		33	const unsigned int EE_END = 201;
		34
		35	const unsigned int CE_SEND_START = 300;
		36	const unsigned int CE_SEND_END = 301;
		37
		38	const unsigned int CE_RECV_START = 400;
		39	const unsigned int CE_RECV_END = 401;
		40
29	bool SILENT = true;	41	bool SILENT = true;
30	inline int xprintf(const char *format, ...)	42	inline int xprintf(const char *format, ...)
31	{	43	{
@@ -382,6 +394,12 @@ static cudaError_t __chunkMemcpy(void* a_dst, const void* a_src, size_t count,
382	//cudaStreamSynchronize(STREAMS[CUR_DEVICE]);	394	//cudaStreamSynchronize(STREAMS[CUR_DEVICE]);
383	cudaEventSynchronize(EVENTS[CUR_DEVICE]);	395	cudaEventSynchronize(EVENTS[CUR_DEVICE]);
384	ret = cudaGetLastError();	396	ret = cudaGetLastError();
		397
		398	if (kind == cudaMemcpyDeviceToHost \|\| kind == cudaMemcpyDeviceToDevice)
		399	inject_action(CE_RECV_END);
		400	if (kind == cudaMemcpyHostToDevice)
		401	inject_action(CE_SEND_END);
		402
385	state->unlock();	403	state->unlock();
386	if(ret != cudaSuccess)	404	if(ret != cudaSuccess)
387	break;	405	break;
@@ -395,8 +413,13 @@ static cudaError_t __chunkMemcpy(void* a_dst, const void* a_src, size_t count,
395	}	413	}
396	}	414	}
397		415
398	if(state && !state->locked)	416	if(state && !state->locked) {
399	state->lock();	417	state->lock();
		418	if (kind == cudaMemcpyDeviceToHost \|\| kind == cudaMemcpyDeviceToDevice)
		419	inject_action(CE_RECV_START);
		420	if (kind == cudaMemcpyHostToDevice)
		421	inject_action(CE_SEND_START);
		422	}
400		423
401	//ret = cudaMemcpy(dst+ichunk_size, src+ichunk_size, bytesToCopy, kind);	424	//ret = cudaMemcpy(dst+ichunk_size, src+ichunk_size, bytesToCopy, kind);
402	cudaMemcpyAsync(dst+ichunk_size, src+ichunk_size, bytesToCopy, kind, STREAMS[CUR_DEVICE]);	425	cudaMemcpyAsync(dst+ichunk_size, src+ichunk_size, bytesToCopy, kind, STREAMS[CUR_DEVICE]);
@@ -429,11 +452,23 @@ static cudaError_t chunkMemcpy(void* a_dst, const void* a_src, size_t count,
429	else {	452	else {
430	ce_lock_state state(device_a, kind, count, device_b, migration);	453	ce_lock_state state(device_a, kind, count, device_b, migration);
431	state.lock();	454	state.lock();
		455
		456	if (kind == cudaMemcpyDeviceToHost \|\| kind == cudaMemcpyDeviceToDevice)
		457	inject_action(CE_RECV_START);
		458	if (kind == cudaMemcpyHostToDevice)
		459	inject_action(CE_SEND_START);
		460
432	ret = __chunkMemcpy(a_dst, a_src, count, kind, &state);	461	ret = __chunkMemcpy(a_dst, a_src, count, kind, &state);
433	cudaEventSynchronize(cur_event());	462	cudaEventSynchronize(cur_event());
434	// cudaStreamSynchronize(cur_stream());	463	// cudaStreamSynchronize(cur_stream());
435	if(ret == cudaSuccess)	464	if(ret == cudaSuccess)
436	ret = cudaGetLastError();	465	ret = cudaGetLastError();
		466
		467	if (kind == cudaMemcpyDeviceToHost \|\| kind == cudaMemcpyDeviceToDevice)
		468	inject_action(CE_RECV_END);
		469	if (kind == cudaMemcpyHostToDevice)
		470	inject_action(CE_SEND_END);
		471
437	state.unlock();	472	state.unlock();
438	}	473	}
439	return ret;	474	return ret;
@@ -1157,6 +1192,7 @@ static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double e
1157	goto out;	1192	goto out;
1158		1193
1159	next_gpu = litmus_lock(TOKEN_LOCK);	1194	next_gpu = litmus_lock(TOKEN_LOCK);
		1195	inject_action(TOKEN_START);
1160	{	1196	{
1161	MigrateIfNeeded(next_gpu);	1197	MigrateIfNeeded(next_gpu);
1162	unsigned int numcycles = ((unsigned int)(cur_hz() * gpu_sec_time))/num_kernels;	1198	unsigned int numcycles = ((unsigned int)(cur_hz() * gpu_sec_time))/num_kernels;
@@ -1170,6 +1206,7 @@ static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double e
1170	{	1206	{
1171	if(useEngineLocks() && !locked) {	1207	if(useEngineLocks() && !locked) {
1172	litmus_lock(cur_ee());	1208	litmus_lock(cur_ee());
		1209	inject_action(EE_START);
1173	locked = true;	1210	locked = true;
1174	}	1211	}
1175		1212
@@ -1180,6 +1217,7 @@ static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double e
1180	// cudaStreamSynchronize(cur_stream());	1217	// cudaStreamSynchronize(cur_stream());
1181	cudaEventRecord(cur_event(), cur_stream());	1218	cudaEventRecord(cur_event(), cur_stream());
1182	cudaEventSynchronize(cur_event());	1219	cudaEventSynchronize(cur_event());
		1220	inject_action(EE_END);
1183	litmus_unlock(cur_ee());	1221	litmus_unlock(cur_ee());
1184	locked = false;	1222	locked = false;
1185	}	1223	}
@@ -1187,6 +1225,7 @@ static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double e
1187	if (locked) {	1225	if (locked) {
1188	cudaEventRecord(cur_event(), cur_stream());	1226	cudaEventRecord(cur_event(), cur_stream());
1189	cudaEventSynchronize(cur_event());	1227	cudaEventSynchronize(cur_event());
		1228	inject_action(EE_END);
1190	litmus_unlock(cur_ee());	1229	litmus_unlock(cur_ee());
1191	locked = false;	1230	locked = false;
1192	}	1231	}
@@ -1198,6 +1237,7 @@ static void gpu_loop_for(double gpu_sec_time, unsigned int num_kernels, double e
1198	if (MIGRATE_VIA_SYSMEM)	1237	if (MIGRATE_VIA_SYSMEM)
1199	PullState();	1238	PullState();
1200	}	1239	}
		1240	inject_action(TOKEN_END);
1201	litmus_unlock(TOKEN_LOCK);	1241	litmus_unlock(TOKEN_LOCK);
1202		1242
1203	last_gpu() = cur_gpu();	1243	last_gpu() = cur_gpu();
@@ -1696,6 +1736,11 @@ void apply_args(struct Args* args)
1696	CHUNK_SIZE = args->chunk_size;	1736	CHUNK_SIZE = args->chunk_size;
1697	MIGRATE_VIA_SYSMEM = args->use_sysmem_migration;	1737	MIGRATE_VIA_SYSMEM = args->use_sysmem_migration;
1698		1738
		1739	if (args->scheduler == LITMUS && !ENABLE_AFFINITY)
		1740	TRACE_MIGRATIONS = true;
		1741	else if (args->scheduler == LITMUS)
		1742	TRACE_MIGRATIONS = false;
		1743
1699	// roll back other globals to an initial state	1744	// roll back other globals to an initial state
1700	CUR_DEVICE = -1;	1745	CUR_DEVICE = -1;
1701	LAST_DEVICE = -1;	1746	LAST_DEVICE = -1;
@@ -2089,8 +2134,10 @@ int init_daemon(struct Args* args, int num_total_users, bool is_daemon)
2089	pthread_mutex_unlock(daemon_mutex);	2134	pthread_mutex_unlock(daemon_mutex);
2090	}	2135	}
2091		2136
2092	if (!my_run_entry)	2137	if (!my_run_entry) {
		2138	fprintf(stderr, "Could not find task <wcet, gpu_wcet, period>: <%f %f %f>\n", args->wcet_ms, args->gpu_wcet_ms, args->period_ms);
2093	return -1;	2139	return -1;
		2140	}
2094	return 0;	2141	return 0;
2095	}	2142	}
2096		2143


diff --git a/include/litmus.h b/include/litmus.h index a6c2b13..e785f92 100644 --- a/include/litmus.h +++ b/include/litmus.h
@@ -270,19 +270,16 @@ int inject_completion(unsigned int job_no);
270	int inject_gpu_migration(unsigned int to, unsigned int from);	270	int inject_gpu_migration(unsigned int to, unsigned int from);
271	int __inject_action(unsigned int action);	271	int __inject_action(unsigned int action);
272		272
273	/*	273	#if 1
274	#define inject_action(COUNT) \	274	#define inject_action(COUNT) \
275	do { \	275	do { \
276	unsigned int temp = (COUNT); \	276	__inject_action(COUNT); \
277	printf("%s:%d:%d\n",__FUNCTION__,__LINE__,temp); \
278	__inject_action(temp); \
279	}while(0);	277	}while(0);
280	*/	278	#else
281
282	#define inject_action(COUNT) \	279	#define inject_action(COUNT) \
283	do { \	280	do { \
284	}while(0);	281	}while(0);
285		282	#endif
286		283
287	/* Litmus signal handling */	284	/* Litmus signal handling */
288		285