integrated numa support and fixed p2p migration

author: Glenn Elliott <gelliott@cs.unc.edu> 2013-04-24 20:34:17 -0400
committer: Glenn Elliott <gelliott@cs.unc.edu> 2013-04-24 20:34:17 -0400
commit: e3935c7f68ce428e394eb53ea29ebef5509bcd7f (patch)
tree: 33e5cff0aae98c00ce777b18fbaed888171ad334
parent: 76b0d79069973bd58cda6028c65a9edaa6d2ea73 (diff)
3 files changed, 100 insertions, 26 deletions
diff --git a/Makefile b/Makefile
index f50af0f..b91dec5 100644
--- a/Makefile
+++ b/Makefile
@@ -14,6 +14,11 @@ ARCH ?= ${host-arch}
 # LITMUS_KERNEL -- where to find the litmus kernel?
 LITMUS_KERNEL ?= ../litmus-rt
+# NUMA Support. Comment out to disable. Requires libnuma dev files.
+#
+# Enabling this option will ensure all memory resides on NUMA nodes
+# that overlap clusters/partitions specified by a call to be_migrate*().
+NUMA_SUPPORT = dummyval
 # ##############################################################################
 # Internal configuration.
@@ -62,8 +67,17 @@ CUFLAGS  = ${flags-api} ${flags-cu-debug} ${flags-cu-optim} ${flags-cu-nvcc} ${f
 CFLAGS   = ${flags-debug} ${flags-misc}
 LDFLAGS  = ${flags-${ARCH}}
+ifdef NUMA_SUPPORT
+CFLAGS += -DLITMUS_NUMA_SUPPORT
+CPPFLAGS += -DLITMUS_NUMA_SUPPORT
+CUFLAGS += -DLITMUS_NUMA_SUPPORT
+endif
 # how to link against liblitmus
 liblitmus-flags = -L${LIBLITMUS} -llitmus
+ifdef NUMA_SUPPORT
+liblitmus-flags += -lnuma
+endif
 # how to link cuda
 cuda-flags-i386 = -L/usr/local/cuda/lib
diff --git a/gpu/gpuspin.cu b/gpu/gpuspin.cu
index b096c82..970d6f2 100644
--- a/gpu/gpuspin.cu
+++ b/gpu/gpuspin.cu
@@ -57,7 +57,7 @@ size_t CHUNK_SIZE = 0;
 int TOKEN_LOCK = -1;
-bool USE_ENGINE_LOCKS = true;
+bool USE_ENGINE_LOCKS = false;
 bool USE_DYNAMIC_GROUP_LOCKS = false;
 int EE_LOCKS[NR_GPUS];
 int CE_SEND_LOCKS[NR_GPUS];
@@ -692,12 +692,13 @@ static void init_cuda(int num_gpu_users)
                        {
                                if (i != j)
                                {
+                                        int other = GPU_PARTITION*GPU_PARTITION_SIZE + j;
                                        int canAccess = 0;
-                                        cudaDeviceCanAccessPeer(&canAccess, i, j);
+                                        cudaDeviceCanAccessPeer(&canAccess, which, other);
                                        if(canAccess)
                                        {
-                                                cudaDeviceEnablePeerAccess(j, 0);
+                                                cudaDeviceEnablePeerAccess(other, 0);
-                                                p2pMigration[i][j] = true;
+                                                p2pMigration[which][other] = true;
                                        }
                                }
                        }
@@ -1294,8 +1295,8 @@ enum eScheduler
        RT_LINUX
 };
-#define CPU_OPTIONS "p:z:c:wlveio:f:s:q:X:L:Q:d"
+#define CPU_OPTIONS "p:z:c:wlveio:f:s:q:X:L:Q:d:"
-#define GPU_OPTIONS "g:y:r:C:E:DG:xS:R:T:Z:aFm:b:MNIk:"
+#define GPU_OPTIONS "g:y:r:C:E:DG:xS:R:T:Z:aFm:b:MNIk:V"
 // concat the option strings
 #define OPTSTR CPU_OPTIONS GPU_OPTIONS
@@ -1372,6 +1373,9 @@ int main(int argc, char** argv)
                        NUM_COPY_ENGINES = atoi(optarg);
                        assert(NUM_COPY_ENGINES == 1 || NUM_COPY_ENGINES == 2);
                        break;
+                case 'V':
+                        RESERVED_MIGR_COPY_ENGINE = true;
+                        break;
                case 'E':
                        USE_ENGINE_LOCKS = true;
                        ENGINE_LOCK_TYPE = (eEngineLockTypes)atoi(optarg);
@@ -1440,7 +1444,9 @@ int main(int argc, char** argv)
                        want_signals = 1;
                        break;
                case 'd':
-                        drain = DRAIN_SOBLIV;
+                        drain = (budget_drain_policy_t)atoi(optarg);
+                        assert(drain >= DRAIN_SIMPLE && drain <= DRAIN_SOBLIV);
+                        assert(drain != DRAIN_SAWARE); // unsupported
                        break;
                case 'l':
                        test_loop = 1;
@@ -1623,18 +1629,6 @@ int main(int argc, char** argv)
                activate_litmus_signals(SIG_BUDGET_MASK, longjmp_on_litmus_signal);
        }
-        if (scheduler == LITMUS)
-        {
-                ret = task_mode(LITMUS_RT_TASK);
-                if (ret != 0)
-                        bail_out("could not become RT task");
-        }
-        else
-        {
-                trace_name();
-                trace_param();
-        }
 //      if (protocol >= 0) {
 //              /* open reference to semaphore */
 //              lock_od = litmus_open_lock(protocol, resource_id, lock_namespace, &cluster);
@@ -1654,12 +1648,20 @@ int main(int argc, char** argv)
                
                init_cuda(num_gpu_users);
                safetynet = true;
-                
-                if (ENABLE_RT_AUX_THREADS)
-                        if (enable_aux_rt_tasks(AUX_CURRENT | AUX_FUTURE) != 0)
-                                bail_out("enable_aux_rt_tasks() failed");
        }
        
+        if (scheduler == LITMUS)
+        {
+                ret = task_mode(LITMUS_RT_TASK);
+                if (ret != 0)
+                        bail_out("could not become RT task");
+        }
+        else
+        {
+                trace_name();
+                trace_param();
+        }
        if (wait) {
                ret = wait_for_ts_release2(&releaseTime);
                if (ret != 0)
@@ -1674,6 +1676,11 @@ int main(int argc, char** argv)
                sleep_next_period_linux();
        }
+        if (scheduler == LITMUS && GPU_USING && ENABLE_RT_AUX_THREADS) {
+                if (enable_aux_rt_tasks(AUX_CURRENT | AUX_FUTURE) != 0)
+                        bail_out("enable_aux_rt_tasks() failed");
+        }
        start = wctime();
        if (scheduler == LITMUS)
diff --git a/src/migration.c b/src/migration.c
index 152d81b..7ac320e 100644
--- a/src/migration.c
+++ b/src/migration.c
@@ -4,8 +4,13 @@
 #include <sched.h> /* for cpu sets */
 #include <unistd.h>
+#ifdef LITMUS_NUMA_SUPPORT
+#include <numa.h>
+#endif
 #include "migration.h"
 extern ssize_t read_file(const char* fname, void* buf, size_t maxlen);
 int release_master()
@@ -54,6 +59,50 @@ int cluster_to_first_cpu(int cluster, int cluster_sz)
        return first_cpu;
 }
+#ifdef LITMUS_NUMA_SUPPORT
+/* Restrict the task to the numa nodes in the cpu mask. */
+/* Call this before setting up CPU affinity masks since that mask may be
+ * a subset of the numa nodes. */
+static int setup_numa(pid_t tid, int sz, const cpu_set_t *cpus)
+{
+        int nr_nodes;
+        struct bitmask* new_nodes;
+        struct bitmask* old_nodes;
+        int i;
+        int ret = 0;
+        if (numa_available() != 0)
+                goto out;
+        nr_nodes = numa_max_node()+1;
+        new_nodes = numa_bitmask_alloc(nr_nodes);
+        old_nodes = numa_bitmask_alloc(nr_nodes);
+        /* map the cpu mask to a numa mask */
+        for (i = 0; i < sz; ++i) {
+                if(CPU_ISSET_S(i, sz, cpus)) {
+                        numa_bitmask_setbit(new_nodes, numa_node_of_cpu(i));
+                }
+        }
+        /* compute the complement numa mask */
+        for (i = 0; i < nr_nodes; ++i) {
+                if (!numa_bitmask_isbitset(new_nodes, i)) {
+                        numa_bitmask_setbit(old_nodes, i);
+                }
+        }
+        numa_set_strict(1);
+        numa_bind(new_nodes); /* sets CPU and memory policy */
+        ret = numa_migrate_pages(tid, old_nodes, new_nodes); /* move over prio alloc'ed pages */
+        numa_bitmask_free(new_nodes);
+        numa_bitmask_free(old_nodes);
+out:
+        return ret;
+}
+#else
+#define setup_numa(x, y, z) 0
+#endif
 int be_migrate_thread_to_cpu(pid_t tid, int target_cpu)
 {
        cpu_set_t *cpu_set;
@@ -82,7 +131,9 @@ int be_migrate_thread_to_cpu(pid_t tid, int target_cpu)
        if (tid == 0)
                tid = gettid();
-        ret = sched_setaffinity(tid, sz, cpu_set);
+        ret = (setup_numa(tid, sz, cpu_set) >= 0) ? 0 : -1;
+        if (!ret)
+                ret = sched_setaffinity(tid, sz, cpu_set);
        CPU_FREE(cpu_set);
@@ -114,7 +165,7 @@ int __be_migrate_thread_to_cluster(pid_t tid, int cluster, int cluster_sz,
        }
        master = (ignore_rm) ? -1 : release_master();
-        num_cpus = num_online_cpus();
+                num_cpus = num_online_cpus();
        if (num_cpus == -1 || last_cpu >= num_cpus || first_cpu < 0)
                return -1;
@@ -133,7 +184,9 @@ int __be_migrate_thread_to_cluster(pid_t tid, int cluster, int cluster_sz,
        if (tid == 0)
                tid = gettid();
-        ret = sched_setaffinity(tid, sz, cpu_set);
+        ret = (setup_numa(tid, sz, cpu_set) >= 0) ? 0 : -1;
+        if (!ret)
+                ret = sched_setaffinity(tid, sz, cpu_set);
        CPU_FREE(cpu_set);
author	Glenn Elliott <gelliott@cs.unc.edu>	2013-04-24 20:34:17 -0400
committer	Glenn Elliott <gelliott@cs.unc.edu>	2013-04-24 20:34:17 -0400
commit	e3935c7f68ce428e394eb53ea29ebef5509bcd7f (patch)
tree	33e5cff0aae98c00ce777b18fbaed888171ad334
parent	76b0d79069973bd58cda6028c65a9edaa6d2ea73 (diff)