From e3935c7f68ce428e394eb53ea29ebef5509bcd7f Mon Sep 17 00:00:00 2001
From: Glenn Elliott <gelliott@cs.unc.edu>
Date: Wed, 24 Apr 2013 20:34:17 -0400
Subject: integrated numa support and fixed p2p migration

---
 Makefile        | 14 ++++++++++++++
 gpu/gpuspin.cu  | 53 +++++++++++++++++++++++++++++----------------------
 src/migration.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 100 insertions(+), 26 deletions(-)

diff --git a/Makefile b/Makefile
index f50af0f..b91dec5 100644
--- a/Makefile
+++ b/Makefile
@@ -14,6 +14,11 @@ ARCH ?= ${host-arch}
 # LITMUS_KERNEL -- where to find the litmus kernel?
 LITMUS_KERNEL ?= ../litmus-rt
 
+# NUMA Support. Comment out to disable. Requires libnuma dev files.
+#
+# Enabling this option will ensure all memory resides on NUMA nodes
+# that overlap clusters/partitions specified by a call to be_migrate*().
+NUMA_SUPPORT = dummyval
 
 # ##############################################################################
 # Internal configuration.
@@ -62,8 +67,17 @@ CUFLAGS  = ${flags-api} ${flags-cu-debug} ${flags-cu-optim} ${flags-cu-nvcc} ${f
 CFLAGS   = ${flags-debug} ${flags-misc}
 LDFLAGS  = ${flags-${ARCH}}
 
+ifdef NUMA_SUPPORT
+CFLAGS += -DLITMUS_NUMA_SUPPORT
+CPPFLAGS += -DLITMUS_NUMA_SUPPORT
+CUFLAGS += -DLITMUS_NUMA_SUPPORT
+endif
+
 # how to link against liblitmus
 liblitmus-flags = -L${LIBLITMUS} -llitmus
+ifdef NUMA_SUPPORT
+liblitmus-flags += -lnuma
+endif
 
 # how to link cuda
 cuda-flags-i386 = -L/usr/local/cuda/lib
diff --git a/gpu/gpuspin.cu b/gpu/gpuspin.cu
index b096c82..970d6f2 100644
--- a/gpu/gpuspin.cu
+++ b/gpu/gpuspin.cu
@@ -57,7 +57,7 @@ size_t CHUNK_SIZE = 0;
 
 int TOKEN_LOCK = -1;
 
-bool USE_ENGINE_LOCKS = true;
+bool USE_ENGINE_LOCKS = false;
 bool USE_DYNAMIC_GROUP_LOCKS = false;
 int EE_LOCKS[NR_GPUS];
 int CE_SEND_LOCKS[NR_GPUS];
@@ -692,12 +692,13 @@ static void init_cuda(int num_gpu_users)
 			{
 				if (i != j)
 				{
+					int other = GPU_PARTITION*GPU_PARTITION_SIZE + j;
 					int canAccess = 0;
-					cudaDeviceCanAccessPeer(&canAccess, i, j);
+					cudaDeviceCanAccessPeer(&canAccess, which, other);
 					if(canAccess)
 					{
-						cudaDeviceEnablePeerAccess(j, 0);
-						p2pMigration[i][j] = true;
+						cudaDeviceEnablePeerAccess(other, 0);
+						p2pMigration[which][other] = true;
 					}
 				}
 			}
@@ -1294,8 +1295,8 @@ enum eScheduler
 	RT_LINUX
 };
 
-#define CPU_OPTIONS "p:z:c:wlveio:f:s:q:X:L:Q:d"
-#define GPU_OPTIONS "g:y:r:C:E:DG:xS:R:T:Z:aFm:b:MNIk:"
+#define CPU_OPTIONS "p:z:c:wlveio:f:s:q:X:L:Q:d:"
+#define GPU_OPTIONS "g:y:r:C:E:DG:xS:R:T:Z:aFm:b:MNIk:V"
 
 // concat the option strings
 #define OPTSTR CPU_OPTIONS GPU_OPTIONS
@@ -1372,6 +1373,9 @@ int main(int argc, char** argv)
 			NUM_COPY_ENGINES = atoi(optarg);
 			assert(NUM_COPY_ENGINES == 1 || NUM_COPY_ENGINES == 2);
 			break;
+		case 'V':
+			RESERVED_MIGR_COPY_ENGINE = true;
+			break;
 		case 'E':
 			USE_ENGINE_LOCKS = true;
 			ENGINE_LOCK_TYPE = (eEngineLockTypes)atoi(optarg);
@@ -1440,7 +1444,9 @@ int main(int argc, char** argv)
 			want_signals = 1;
 			break;
 		case 'd':
-			drain = DRAIN_SOBLIV;
+			drain = (budget_drain_policy_t)atoi(optarg);
+			assert(drain >= DRAIN_SIMPLE && drain <= DRAIN_SOBLIV);
+			assert(drain != DRAIN_SAWARE); // unsupported
 			break;
 		case 'l':
 			test_loop = 1;
@@ -1623,18 +1629,6 @@ int main(int argc, char** argv)
 		activate_litmus_signals(SIG_BUDGET_MASK, longjmp_on_litmus_signal);
 	}
 
-	if (scheduler == LITMUS)
-	{
-		ret = task_mode(LITMUS_RT_TASK);
-		if (ret != 0)
-			bail_out("could not become RT task");
-	}
-	else
-	{
-		trace_name();
-		trace_param();
-	}
-
 //	if (protocol >= 0) {
 //		/* open reference to semaphore */
 //		lock_od = litmus_open_lock(protocol, resource_id, lock_namespace, &cluster);
@@ -1654,12 +1648,20 @@ int main(int argc, char** argv)
 		
 		init_cuda(num_gpu_users);
 		safetynet = true;
-		
-		if (ENABLE_RT_AUX_THREADS)
-			if (enable_aux_rt_tasks(AUX_CURRENT | AUX_FUTURE) != 0)
-				bail_out("enable_aux_rt_tasks() failed");
 	}
 	
+	if (scheduler == LITMUS)
+	{
+		ret = task_mode(LITMUS_RT_TASK);
+		if (ret != 0)
+			bail_out("could not become RT task");
+	}
+	else
+	{
+		trace_name();
+		trace_param();
+	}
+
 	if (wait) {
 		ret = wait_for_ts_release2(&releaseTime);
 		if (ret != 0)
@@ -1674,6 +1676,11 @@ int main(int argc, char** argv)
 		sleep_next_period_linux();
 	}
 
+	if (scheduler == LITMUS && GPU_USING && ENABLE_RT_AUX_THREADS) {
+		if (enable_aux_rt_tasks(AUX_CURRENT | AUX_FUTURE) != 0)
+			bail_out("enable_aux_rt_tasks() failed");
+	}
+
 	start = wctime();
 
 	if (scheduler == LITMUS)
diff --git a/src/migration.c b/src/migration.c
index 152d81b..7ac320e 100644
--- a/src/migration.c
+++ b/src/migration.c
@@ -4,8 +4,13 @@
 #include <sched.h> /* for cpu sets */
 #include <unistd.h>
 
+#ifdef LITMUS_NUMA_SUPPORT
+#include <numa.h>
+#endif
+
 #include "migration.h"
 
+
 extern ssize_t read_file(const char* fname, void* buf, size_t maxlen);
 
 int release_master()
@@ -54,6 +59,50 @@ int cluster_to_first_cpu(int cluster, int cluster_sz)
 	return first_cpu;
 }
 
+#ifdef LITMUS_NUMA_SUPPORT
+/* Restrict the task to the numa nodes in the cpu mask. */
+/* Call this before setting up CPU affinity masks since that mask may be
+ * a subset of the numa nodes. */
+static int setup_numa(pid_t tid, int sz, const cpu_set_t *cpus)
+{
+	int nr_nodes;
+	struct bitmask* new_nodes;
+	struct bitmask* old_nodes;
+	int i;
+	int ret = 0;
+
+	if (numa_available() != 0)
+		goto out;
+
+	nr_nodes = numa_max_node()+1;
+	new_nodes = numa_bitmask_alloc(nr_nodes);
+	old_nodes = numa_bitmask_alloc(nr_nodes);
+	/* map the cpu mask to a numa mask */
+	for (i = 0; i < sz; ++i) {
+		if(CPU_ISSET_S(i, sz, cpus)) {
+			numa_bitmask_setbit(new_nodes, numa_node_of_cpu(i));
+		}
+	}
+	/* compute the complement numa mask */
+	for (i = 0; i < nr_nodes; ++i) {
+		if (!numa_bitmask_isbitset(new_nodes, i)) {
+			numa_bitmask_setbit(old_nodes, i);
+		}
+	}
+
+	numa_set_strict(1);
+	numa_bind(new_nodes); /* sets CPU and memory policy */
+	ret = numa_migrate_pages(tid, old_nodes, new_nodes); /* move over prio alloc'ed pages */
+	numa_bitmask_free(new_nodes);
+	numa_bitmask_free(old_nodes);
+
+out:
+	return ret;
+}
+#else
+#define setup_numa(x, y, z) 0
+#endif
+
 int be_migrate_thread_to_cpu(pid_t tid, int target_cpu)
 {
 	cpu_set_t *cpu_set;
@@ -82,7 +131,9 @@ int be_migrate_thread_to_cpu(pid_t tid, int target_cpu)
 	if (tid == 0)
 		tid = gettid();
 
-	ret = sched_setaffinity(tid, sz, cpu_set);
+	ret = (setup_numa(tid, sz, cpu_set) >= 0) ? 0 : -1;
+	if (!ret)
+		ret = sched_setaffinity(tid, sz, cpu_set);
 
 	CPU_FREE(cpu_set);
 
@@ -114,7 +165,7 @@ int __be_migrate_thread_to_cluster(pid_t tid, int cluster, int cluster_sz,
 	}
 
 	master = (ignore_rm) ? -1 : release_master();
-	num_cpus = num_online_cpus();
+		num_cpus = num_online_cpus();
 
 	if (num_cpus == -1 || last_cpu >= num_cpus || first_cpu < 0)
 		return -1;
@@ -133,7 +184,9 @@ int __be_migrate_thread_to_cluster(pid_t tid, int cluster, int cluster_sz,
 	if (tid == 0)
 		tid = gettid();
 
-	ret = sched_setaffinity(tid, sz, cpu_set);
+	ret = (setup_numa(tid, sz, cpu_set) >= 0) ? 0 : -1;
+	if (!ret)
+		ret = sched_setaffinity(tid, sz, cpu_set);
 
 	CPU_FREE(cpu_set);
 
-- 
cgit v1.2.2