5 files changed, 339 insertions, 248 deletions
diff --git a/include/litmus/ce_domain.h b/include/litmus/ce_domain.h
new file mode 100644
index 000000000000..373f3f5f78d3
--- /dev/null
+++ b/include/litmus/ce_domain.h
@@ -0,0 +1,23 @@
+#ifndef _LITMUS_CE_DOMAIN_H
+#define _LITMUS_CE_DOMAIN_H
+/*
+ * Functions that the MC plugin needs to call through a domain pointer.
+ */
+void ce_requeue(domain_t*, struct task_struct*);
+struct task_struct* ce_peek_and_take_ready(domain_t*);
+int ce_higher_prio(struct task_struct*, struct task_struct*);
+typedef enum hrtimer_restart (*ce_timer_callback_t)(struct hrtimer*);
+void ce_domain_init(domain_t*,
+                raw_spinlock_t*,
+                requeue_t,
+                peek_ready_t,
+                take_ready_t,
+                preempt_needed_t,
+                task_prio_t,
+                struct ce_dom_data*,
+                const int,
+                ce_timer_callback_t);
+#endif
diff --git a/include/litmus/sched_mc.h b/include/litmus/sched_mc.h
index ad5d097b3d61..384e65e4151d 100644
--- a/include/litmus/sched_mc.h
+++ b/include/litmus/sched_mc.h
@@ -21,7 +21,10 @@ struct mc_job {
 };
 #ifdef __KERNEL__
-/* only used in the kernel (no user space) */
+/*
+ * These are used only in the kernel. Userspace programs like RTSpin won't see
+ * them.
+ */
 struct mc_data {
        struct mc_task mc_task;
@@ -33,18 +36,8 @@ struct mc_data {
 #define is_ghost(t)      (tsk_mc_data(t)->mc_job.is_ghost)
 /*
- * Cache the budget along with the struct PID for a task so that we don't need
+ * The MC-CE scheduler uses this as domain data.
- * to fetch its task_struct every time we check to see what should be
- * scheduled.
 */
-struct ce_dom_pid_entry {
-        struct pid *pid;
-        lt_t budget;
-        /* accumulated (summed) budgets, including this one */
-        lt_t acc_time;
-        int expected_job;
-};
 struct ce_dom_data {
        int cpu;
        struct task_struct *scheduled, *should_schedule;
@@ -52,9 +45,6 @@ struct ce_dom_data {
         * Each CPU needs a mapping of level A ID (integer) to struct pid so
         * that we can get its task struct.
         */
-        struct ce_dom_pid_entry pid_entries[CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS];
-        int num_pid_entries;
-        lt_t cycle_time;
        struct hrtimer_start_on_info timer_info;
        struct hrtimer timer;
 };
@@ -90,6 +80,23 @@ typedef struct {
        crit_entry_t*   crit_entry;
 } domain_data_t;
+/*
+ * Functions that are used with the MC-CE plugin.
+ */
+long mc_ce_set_domains(const int, domain_data_t*[]);
+unsigned int mc_ce_get_expected_job(const int, const int);
+/*
+ * These functions are (lazily) inserted into the MC plugin code so that it
+ * manipulates the MC-CE state.
+ */
+long mc_ce_admit_task_common(struct task_struct*);
+void mc_ce_task_exit_common(struct task_struct*);
+void mc_ce_timer_callback_common(domain_t*, struct hrtimer*);
+void mc_ce_release_at_common(struct task_struct*, lt_t);
+long mc_ce_activate_plugin_common(void);
+long mc_ce_deactivate_plugin_common(void);
 #endif /* __KERNEL__ */
 #endif
diff --git a/litmus/ce_domain.c b/litmus/ce_domain.c
index 8797c05d9392..c00feaf45a5c 100644
--- a/litmus/ce_domain.c
+++ b/litmus/ce_domain.c
@@ -19,35 +19,25 @@ void ce_requeue(domain_t *dom, struct task_struct *ts)
 {
        const struct ce_dom_data *ce_data = dom->data;
        const int idx = tsk_mc_data(ts)->mc_task.lvl_a_id;
-        const struct ce_dom_pid_entry *pid_entry =
+        const unsigned int just_finished = tsk_rt(ts)->job_params.job_no;
-                &ce_data->pid_entries[idx];
+        const unsigned int expected_job =
-        const int just_finished = tsk_rt(ts)->job_params.job_no;
+                mc_ce_get_expected_job(ce_data->cpu, idx);
-        const int expected_job = pid_entry->expected_job;
        const int asleep = RT_F_SLEEP == get_rt_flags(ts);
-        TRACE_TASK(ts, "entered ce_requeue. asleep: %d  just_finished: %4d  "
+        TRACE_TASK(ts, "entered ce_requeue. asleep: %d  just_finished: %3u  "
-                        "expected_job: %4d\n",
+                        "expected_job: %3u\n",
                        asleep, just_finished, expected_job);
        /* When coming from job completion, the task will be asleep. */
        if (asleep && just_finished < expected_job) {
                TRACE_TASK(ts, "appears behind\n");
        } else if (asleep && expected_job < just_finished) {
-                TRACE_TASK(ts, "job %d completed in expected job %d which "
+                TRACE_TASK(ts, "job %u completed in expected job %u which "
                                "seems too early\n", just_finished,
                                expected_job);
        }
 }
-void mc_ce_task_exit(struct task_struct*);
-/*
- * Called when a task exits the system.
- */
-void ce_task_exit(domain_t *dom, struct task_struct *ts)
-{
-        mc_ce_task_exit(ts);
-}
 /*
 * ce_take_ready and ce_peek_ready
 */
@@ -74,8 +64,7 @@ struct task_struct* ce_peek_and_take_ready(domain_t *dom)
              (t) ? t->rt_param.job_params.job_no : 1,                  \
              (t && get_task_domain(t)) ? get_task_domain(t)->name : ""
-int ce_higher_prio(struct task_struct *_a,
+int ce_higher_prio(struct task_struct *_a, struct task_struct *_b)
-                struct task_struct *_b)
 {
        const struct task_struct *a = _a;
        const domain_t *dom = get_task_domain(a);
@@ -87,52 +76,22 @@ int ce_higher_prio(struct task_struct *_a,
        return (a == ce_data->should_schedule);
 }
-void __mc_ce_timer_callback(struct hrtimer *timer);
+void ce_domain_init(domain_t *dom,
-domain_data_t *ce_domain_for(int);
+                raw_spinlock_t *lock,
-void mc_check_for_preempt(domain_t*);
+                requeue_t requeue,
-static enum hrtimer_restart ce_timer_function(struct hrtimer *timer)
+                peek_ready_t peek_ready,
-{
+                take_ready_t take_ready,
-        struct ce_dom_data *ce_data;
+                preempt_needed_t preempt_needed,
-        domain_data_t *dom_data;
+                task_prio_t task_prio,
-        unsigned long flags;
+                struct ce_dom_data *dom_data,
+                const int cpu,
-        TRACE("timer callback\n");
+                ce_timer_callback_t ce_timer_callback)
-        ce_data = container_of(timer, struct ce_dom_data, timer);
-        dom_data = ce_domain_for(ce_data->cpu);
-        raw_spin_lock_irqsave(dom_data->domain.lock, flags);
-        __mc_ce_timer_callback(timer);
-        mc_check_for_preempt(&dom_data->domain);
-        raw_spin_unlock_irqrestore(dom_data->domain.lock, flags);
-        return HRTIMER_RESTART;
-}
-void mc_ce_release_at(struct task_struct*, lt_t);
-void ce_start(struct task_struct *ts, lt_t start)
-{
-        mc_ce_release_at(ts, start);
-}
-domain_data_t *ce_domain_for(int);
-long mc_ce_activate_plugin(void);
-long ce_activate_plugin(void)
-{
-        domain_data_t *dom_data;
-        struct ce_dom_data *ce_data;
-        int cpu;
-        /* first change the timer callback function */
-        for_each_online_cpu(cpu) {
-                dom_data = ce_domain_for(cpu);
-                ce_data = dom_data->domain.data;
-                ce_data->timer.function = ce_timer_function;
-        }
-        /* then run the regular CE activate plugin */
-        return mc_ce_activate_plugin();
-}
-long mc_ce_deactivate_plugin(void);
-long ce_deactivate_plugin(void)
 {
-        return mc_ce_deactivate_plugin();
+        domain_init(dom, lock, requeue, peek_ready, take_ready, preempt_needed,
+                        task_prio);
+        dom->data = dom_data;
+        dom_data->cpu = cpu;
+        hrtimer_start_on_info_init(&dom_data->timer_info);
+        hrtimer_init(&dom_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+        dom_data->timer.function = ce_timer_callback;
 }
diff --git a/litmus/sched_mc.c b/litmus/sched_mc.c
index 3b98a93511ab..11ba10a54f4d 100644
--- a/litmus/sched_mc.c
+++ b/litmus/sched_mc.c
@@ -507,7 +507,7 @@ static enum hrtimer_restart mc_ghost_exhausted(struct hrtimer *timer)
 {
        unsigned long flags;
        struct task_struct *tmp = NULL;
-        crit_entry_t *ce = container_of(timer, crit_entry_t, timer);;
+        crit_entry_t *ce = container_of(timer, crit_entry_t, timer);
        local_irq_save(flags);
        TRACE_CRIT_ENTRY(ce, "Ghost exhausted firing");
@@ -534,22 +534,38 @@ static enum hrtimer_restart mc_ghost_exhausted(struct hrtimer *timer)
        return HRTIMER_NORESTART;
 }
-void __mc_ce_timer_callback(struct hrtimer *timer);
-domain_data_t *ce_domain_for(int);
 static enum hrtimer_restart ce_timer_function(struct hrtimer *timer)
 {
-        struct ce_dom_data *ce_data;
+        struct ce_dom_data *ce_data =
-        domain_data_t *dom_data;
+                container_of(timer, struct ce_dom_data, timer);
+        crit_entry_t *ce = &cpus[ce_data->cpu]->crit_entries[CRIT_LEVEL_A];
+        domain_t *dom = ce->domain;
+        struct task_struct *old_link = NULL;
        unsigned long flags;
-        TRACE("timer callback\n");
+        TRACE("MC level-A timer callback for CPU %d\n", ce_data->cpu);
+        raw_spin_lock_irqsave(dom->lock, flags);
+        raw_spin_lock(&crit_cpu(ce)->lock);
+        if (ce->linked &&
+                        ce->linked == ce_data->should_schedule &&
+                        is_ghost(ce->linked))
+        {
+                update_ghost_time(ce->linked);
+                if (tsk_mc_data(ce->linked)->mc_job.ghost_budget == 0) {
+                        old_link = ce->linked;
+                        link_task_to_crit(ce, NULL);
+                }
+        }
+        raw_spin_unlock(&crit_cpu(ce)->lock);
+        if (NULL != old_link)
+                job_completion(old_link, 0);
-        ce_data = container_of(timer, struct ce_dom_data, timer);
+        mc_ce_timer_callback_common(dom, timer);
-        dom_data = ce_domain_for(ce_data->cpu);
+        mc_check_for_preempt(dom);
-        raw_spin_lock_irqsave(dom_data->domain.lock, flags);
+        raw_spin_unlock_irqrestore(dom->lock, flags);
-        __mc_ce_timer_callback(timer);
-        mc_check_for_preempt(&dom_data->domain);
-        raw_spin_unlock_irqrestore(dom_data->domain.lock, flags);
        return HRTIMER_RESTART;
 }
@@ -658,12 +674,11 @@ static void mc_task_exit(struct task_struct *task)
        }
        if (CRIT_LEVEL_A == tsk_mc_crit(task))
-                ce_task_exit(get_task_domain(task), task);
+                mc_ce_task_exit_common(task);
        local_irq_restore(flags);
 }
-long __mc_ce_admit_task(struct task_struct*);
 /**
 * mc_admit_task() - Return true if the task is valid.
 * Assumes there are no partitioned levels after level B.
@@ -685,7 +700,7 @@ static long mc_admit_task(struct task_struct* task)
                goto out;
        }
        if (crit == CRIT_LEVEL_A) {
-                ret = __mc_ce_admit_task(task);
+                ret = mc_ce_admit_task_common(task);
                if (ret)
                        goto out;
        }
@@ -806,12 +821,30 @@ static struct task_struct* mc_schedule(struct task_struct * prev)
 static long mc_activate_plugin(void)
 {
+        domain_data_t *dom_data;
+        domain_t *dom;
+        domain_data_t *our_domains[NR_CPUS];
+        int cpu, n = 0;
+        long ret;
 #ifdef CONFIG_RELEASE_MASTER
        interrupt_cpu = atomic_read(&release_master_cpu);
        if (interrupt_cpu == NO_CPU)
                interrupt_cpu = 0;
 #endif
-        return ce_activate_plugin();
+        for_each_online_cpu(cpu) {
+                BUG_ON(NR_CPUS <= n);
+                dom = cpus[cpu]->crit_entries[CRIT_LEVEL_A].domain;
+                dom_data = domain_data(dom);
+                our_domains[cpu] = dom_data;
+                n++;
+        }
+        ret = mc_ce_set_domains(n, our_domains);
+        if (ret)
+                goto out;
+        ret = mc_ce_activate_plugin_common();
+out:
+        return ret;
 }
 /*
@@ -823,14 +856,14 @@ void mc_release_at(struct task_struct *ts, lt_t start)
 {
        /* hack so that we can have CE timers start at the right time */
        if (CRIT_LEVEL_A == tsk_mc_crit(ts))
-                ce_start(ts, start);
+                mc_ce_release_at_common(ts, start);
        else
                release_at(ts, start);
 }
 long mc_deactivate_plugin(void)
 {
-        return ce_deactivate_plugin();
+        return mc_ce_deactivate_plugin_common();
 }
 /* **************************************************************************
@@ -843,7 +876,8 @@ long mc_deactivate_plugin(void)
 DEFINE_PER_CPU(cpu_entry_t, _mc_cpus);
 /* LVL-A */
 DEFINE_PER_CPU(domain_data_t, _mc_crit_a);
-DEFINE_PER_CPU(rt_domain_t, _mc_crit_a_rt);
+DEFINE_PER_CPU(raw_spinlock_t, _mc_crit_a_lock);
+DEFINE_PER_CPU(struct ce_dom_data, _mc_crit_a_ce_data);
 /* LVL-B */
 DEFINE_PER_CPU(domain_data_t, _mc_crit_b);
 DEFINE_PER_CPU(rt_domain_t, _mc_crit_b_rt);
@@ -925,7 +959,8 @@ static int __init init_mc(void)
        cpu_entry_t *entry;
        rt_domain_t *rt;
        domain_data_t *dom_data;
-        raw_spinlock_t *a_dom, *b_dom, *c_dom; /* For lock debugger */
+        raw_spinlock_t *a_dom_lock, *b_dom, *c_dom; /* For lock debugger */
+        struct ce_dom_data *ce_data;
        for_each_online_cpu(cpu) {
                entry = &per_cpu(_mc_cpus, cpu);
@@ -942,10 +977,16 @@ static int __init init_mc(void)
 #endif
                /* CRIT_LEVEL_A */
-                dom_data = ce_domain_for(cpu);
+                dom_data = &per_cpu(_mc_crit_a, cpu);
-                ce_domain_init(/* TODO */);
+                ce_data = &per_cpu(_mc_crit_a_ce_data, cpu);
+                a_dom_lock = &per_cpu(_mc_crit_a_lock, cpu);
+                raw_spin_lock_init(a_dom_lock);
+                ce_domain_init(&dom_data->domain,
+                                a_dom_lock, ce_requeue, ce_peek_and_take_ready,
+                                ce_peek_and_take_ready, mc_preempt_needed,
+                                ce_higher_prio, ce_data, cpu,
+                                ce_timer_function);
                init_local_domain(entry, dom_data, CRIT_LEVEL_A);
-                a_dom = dom_data->domain.lock;
                dom_data->domain.name = "LVL-A";
                /* CRIT_LEVEL_B */
diff --git a/litmus/sched_mc_ce.c b/litmus/sched_mc_ce.c
index 77acc67d05bd..8cac7ea58f66 100644
--- a/litmus/sched_mc_ce.c
+++ b/litmus/sched_mc_ce.c
@@ -27,25 +27,81 @@
 static struct sched_plugin mc_ce_plugin __cacheline_aligned_in_smp;
-#define is_active_plugin() (litmus == &mc_ce_plugin)
+#define using_linux_plugin() (litmus == &linux_sched_plugin)
-#define get_ce_data(dom_data_ref) (dom_data_ref->domain.data)
+/* get a reference to domain_t for a CPU */
+#define get_domain_for(cpu) (&domains[cpu]->domain)
+#define get_pid_table(cpu) (&per_cpu(ce_pid_table, cpu))
+#define get_pid_entry(cpu, idx) (&(get_pid_table(cpu)->entries[idx]))
 static atomic_t start_time_set = ATOMIC_INIT(-1);
 static atomic64_t start_time = ATOMIC64_INIT(0);
 static struct proc_dir_entry *mc_ce_dir = NULL, *ce_file = NULL;
+/*
+ * Cache the budget along with the struct PID for a task so that we don't need
+ * to fetch its task_struct every time we check to see what should be
+ * scheduled.
+ */
+struct ce_pid_entry {
+        struct pid *pid;
+        lt_t budget;
+        /* accumulated (summed) budgets, including this one */
+        lt_t acc_time;
+        unsigned int expected_job;
+};
+struct ce_pid_table {
+        struct ce_pid_entry entries[CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS];
+        int num_pid_entries;
+        lt_t cycle_time;
+};
+DEFINE_PER_CPU(struct ce_pid_table, ce_pid_table);
-DEFINE_PER_CPU(domain_data_t, mc_ce_doms);
+/*
+ * How we get the domain for a given CPU locally. Set with the
+ * mc_ce_set_domains function. Must be done before activating plugins. Be
+ * careful when using domains as a variable elsewhere in this file.
+ */
+static domain_data_t *domains[NR_CPUS] __cacheline_aligned_in_smp;
+/*
+ * The domains and other data used by the MC-CE plugin when it runs alone.
+ */
+DEFINE_PER_CPU(domain_data_t, _mc_ce_doms);
 DEFINE_PER_CPU(struct ce_dom_data, _mc_ce_dom_data);
-DEFINE_PER_CPU(raw_spinlock_t, _dom_locks);
+DEFINE_PER_CPU(raw_spinlock_t, _mc_ce_dom_locks);
-/* Return the address of the domain_t for this CPU, used by the
+long mc_ce_set_domains(const int n, domain_data_t *domains_in[])
- * mixed-criticality plugin. */
-domain_data_t *ce_domain_for(int cpu)
 {
-        return &per_cpu(mc_ce_doms, cpu);
+        const int max = (NR_CPUS < n) ? NR_CPUS : n;
+        domain_data_t *new_dom = NULL;
+        int i, ret;
+        if (!using_linux_plugin()) {
+                printk(KERN_WARNING "can't set MC-CE domains when not using "
+                                "Linux scheduler.\n");
+                ret = -EINVAL;
+                goto out;
+        }
+        for (i = 0; i < max; ++i) {
+                new_dom = domains_in[i];
+                domains[i] = new_dom;
+        }
+        ret = 0;
+out:
+        return ret;
 }
+unsigned int mc_ce_get_expected_job(const int cpu, const int idx)
+{
+        const struct ce_pid_table *pid_table = get_pid_table(cpu);
+        BUG_ON(0 > cpu);
+        BUG_ON(0 > idx);
+        BUG_ON(pid_table->num_pid_entries <= idx);
+        return pid_table->entries[idx].expected_job;
+}
 /*
 * Get the offset into the cycle taking the start time into account.
@@ -65,14 +121,12 @@ static inline lt_t get_cycle_offset(const lt_t when, const lt_t cycle_time)
 *
 * Do not call prepare_for_next_period on Level-A tasks!
 */
-static void mc_ce_job_completion(struct task_struct *ts)
+static void mc_ce_job_completion(domain_t *dom, struct task_struct *ts)
 {
-        const domain_data_t *dom_data = &per_cpu(mc_ce_doms, smp_processor_id());
+        const int cpu = task_cpu(ts);
-        const struct ce_dom_data *ce_data = get_ce_data(dom_data);
        const int idx = tsk_mc_data(ts)->mc_task.lvl_a_id;
-        const struct ce_dom_pid_entry *pid_entry =
+        const struct ce_pid_entry *pid_entry = get_pid_entry(cpu, idx);
-                &ce_data->pid_entries[idx];
+        unsigned int just_finished;
-        int just_finished;
        TRACE_TASK(ts, "completed\n");
@@ -87,11 +141,11 @@ static void mc_ce_job_completion(struct task_struct *ts)
        if (just_finished < pid_entry->expected_job) {
                /* this job is already released because it's running behind */
                set_rt_flags(ts, RT_F_RUNNING);
-                TRACE_TASK(ts, "appears behind: the expected job is %d but "
+                TRACE_TASK(ts, "appears behind: the expected job is %u but "
-                                "job %d just completed\n",
+                                "job %u just completed\n",
                                pid_entry->expected_job, just_finished);
        } else if (pid_entry->expected_job < just_finished) {
-                printk(KERN_CRIT "job %d completed in expected job %d which "
+                printk(KERN_CRIT "job %u completed in expected job %u which "
                                "seems too early\n", just_finished,
                                pid_entry->expected_job);
                BUG();
@@ -109,30 +163,30 @@ static void mc_ce_job_completion(struct task_struct *ts)
 static int mc_ce_schedule_at(const domain_t *dom, lt_t offset)
 {
        const struct ce_dom_data *ce_data = dom->data;
-        const struct ce_dom_pid_entry *pid_entry = NULL;
+        struct ce_pid_table *pid_table = get_pid_table(ce_data->cpu);
-        int i;
+        const struct ce_pid_entry *pid_entry = NULL;
+        int idx;
-        BUG_ON(ce_data->cycle_time < 1);
+        BUG_ON(pid_table->cycle_time < 1);
-        BUG_ON(ce_data->num_pid_entries < 1);
+        BUG_ON(pid_table->num_pid_entries < 1);
-        for (i = 0; i < ce_data->num_pid_entries; ++i) {
+        for (idx = 0; idx < pid_table->num_pid_entries; ++idx) {
-                pid_entry = &ce_data->pid_entries[i];
+                pid_entry = &pid_table->entries[idx];
                if (offset < pid_entry->acc_time) {
                        /* found task to schedule in this window */
                        break;
                }
        }
        /* can only happen if cycle_time is not right */
-        BUG_ON(pid_entry->acc_time > ce_data->cycle_time);
+        BUG_ON(pid_entry->acc_time > pid_table->cycle_time);
-        TRACE("schedule at returned task %d for CPU %d\n", i, ce_data->cpu);
+        TRACE("schedule at returning task %d for CPU %d\n", idx, ce_data->cpu);
-        return i;
+        return idx;
 }
 static struct task_struct *mc_ce_schedule(struct task_struct *prev)
 {
-        domain_data_t *dom_data = &per_cpu(mc_ce_doms, smp_processor_id());
+        domain_t *dom = get_domain_for(smp_processor_id());
-        domain_t *dom = &dom_data->domain;
+        struct ce_dom_data *ce_data = dom->data;
-        struct ce_dom_data *ce_data = get_ce_data(dom_data);
        struct task_struct *next = NULL;
        int exists, sleep, should_sched_exists, should_sched_blocked,
            should_sched_asleep;
@@ -150,7 +204,7 @@ static struct task_struct *mc_ce_schedule(struct task_struct *prev)
        TRACE("exists: %d, sleep: %d\n", exists, sleep);
        if (sleep)
-                mc_ce_job_completion(ce_data->scheduled);
+                mc_ce_job_completion(dom, ce_data->scheduled);
        /* these checks must go after the call to mc_ce_job_completion in case
         * a late task needs to be scheduled again right away and its the only
@@ -181,8 +235,8 @@ static struct task_struct *mc_ce_schedule(struct task_struct *prev)
 static void mc_ce_finish_switch(struct task_struct *prev)
 {
-        domain_data_t *dom_data = &per_cpu(mc_ce_doms, smp_processor_id());
+        domain_t *dom = get_domain_for(smp_processor_id());
-        struct ce_dom_data *ce_data = get_ce_data(dom_data);
+        struct ce_dom_data *ce_data = dom->data;
        TRACE("finish switch\n");
@@ -197,14 +251,17 @@ static void mc_ce_finish_switch(struct task_struct *prev)
 * Here we look up the task's PID structure and save it in the proper slot on
 * the CPU this task will run on.
 */
-long __mc_ce_admit_task(struct task_struct *ts)
+long mc_ce_admit_task_common(struct task_struct *ts)
 {
-        domain_data_t *dom_data = &per_cpu(mc_ce_doms, get_partition(ts));
+        domain_t *dom = get_domain_for(get_partition(ts));
-        struct ce_dom_data *ce_data = get_ce_data(dom_data);
+        struct ce_dom_data *ce_data = dom->data;
        struct mc_data *mcd = tsk_mc_data(ts);
        struct pid *pid = NULL;
        long retval = -EINVAL;
        const int lvl_a_id = mcd->mc_task.lvl_a_id;
+        struct ce_pid_table *pid_table = get_pid_table(ce_data->cpu);
+        BUG_ON(get_partition(ts) != ce_data->cpu);
        /* check the task has migrated to the right CPU (like in sched_cedf) */
        if (task_cpu(ts) != get_partition(ts)) {
@@ -228,26 +285,26 @@ long __mc_ce_admit_task(struct task_struct *ts)
                goto out;
        }
-        if (lvl_a_id >= ce_data->num_pid_entries) {
+        if (lvl_a_id >= pid_table->num_pid_entries) {
                printk(KERN_INFO "litmus: level A id greater than expected "
                                "number of tasks %d for %d cpu %d\n",
-                                ce_data->num_pid_entries, ts->pid,
+                                pid_table->num_pid_entries, ts->pid,
                                get_partition(ts));
                goto out_put_pid;
        }
-        if (ce_data->pid_entries[lvl_a_id].pid) {
+        if (pid_table->entries[lvl_a_id].pid) {
                printk(KERN_INFO "litmus: have saved pid info id: %d cpu: %d\n",
                                lvl_a_id, get_partition(ts));
                goto out_put_pid;
        }
-        if (get_exec_cost(ts) >= ce_data->pid_entries[lvl_a_id].budget) {
+        if (get_exec_cost(ts) >= pid_table->entries[lvl_a_id].budget) {
                printk(KERN_INFO "litmus: execution cost %llu is larger than "
                                "the budget %llu\n",
                                get_exec_cost(ts),
-                                ce_data->pid_entries[lvl_a_id].budget);
+                                pid_table->entries[lvl_a_id].budget);
                goto out_put_pid;
        }
-        ce_data->pid_entries[lvl_a_id].pid = pid;
+        pid_table->entries[lvl_a_id].pid = pid;
        retval = 0;
        /* don't call put_pid if we are successful */
        goto out;
@@ -260,11 +317,10 @@ out:
 static long mc_ce_admit_task(struct task_struct *ts)
 {
-        domain_data_t *dom_data = &per_cpu(mc_ce_doms, get_partition(ts));
+        domain_t *dom = get_domain_for(get_partition(ts));
-        domain_t *dom = &dom_data->domain;
        unsigned long flags, retval;
        raw_spin_lock_irqsave(dom->lock, flags);
-        retval = __mc_ce_admit_task(ts);
+        retval = mc_ce_admit_task_common(ts);
        raw_spin_unlock_irqrestore(dom->lock, flags);
        return retval;
 }
@@ -276,27 +332,26 @@ static long mc_ce_admit_task(struct task_struct *ts)
 */
 static void mc_ce_task_new(struct task_struct *ts, int on_rq, int running)
 {
-        domain_data_t *dom_data = &per_cpu(mc_ce_doms, task_cpu(ts));
+        const int cpu = task_cpu(ts);
-        domain_t *dom = &dom_data->domain;
+        domain_t *dom = get_domain_for(cpu);
-        struct ce_dom_data *ce_data = get_ce_data(dom_data);
+        struct ce_dom_data *ce_data = dom->data;
+        struct ce_pid_table *pid_table = get_pid_table(cpu);
        struct pid *pid_should_be_running;
-        struct ce_dom_pid_entry *pid_entry;
+        struct ce_pid_entry *pid_entry;
        unsigned long flags;
        int idx, should_be_running;
        lt_t offset;
-        /* have to call mc_ce_schedule_at because the task only gets a PID
-         * entry after calling admit_task */
        raw_spin_lock_irqsave(dom->lock, flags);
-        pid_entry = &ce_data->pid_entries[tsk_mc_data(ts)->mc_task.lvl_a_id];
+        pid_entry = get_pid_entry(cpu, tsk_mc_data(ts)->mc_task.lvl_a_id);
        /* initialize some task state */
        set_rt_flags(ts, RT_F_RUNNING);
-        tsk_rt(ts)->job_params.job_no = 1;
-        offset = get_cycle_offset(litmus_clock(), ce_data->cycle_time);
+        /* have to call mc_ce_schedule_at because the task only gets a PID
+         * entry after calling admit_task */
+        offset = get_cycle_offset(litmus_clock(), pid_table->cycle_time);
        idx = mc_ce_schedule_at(dom, offset);
-        pid_should_be_running = ce_data->pid_entries[idx].pid;
+        pid_should_be_running = get_pid_entry(cpu, idx)->pid;
        rcu_read_lock();
        should_be_running = (ts == pid_task(pid_should_be_running, PIDTYPE_PID));
        rcu_read_unlock();
@@ -323,9 +378,8 @@ static void mc_ce_task_new(struct task_struct *ts, int on_rq, int running)
 */
 static void mc_ce_task_wake_up(struct task_struct *ts)
 {
-        domain_data_t *dom_data = &per_cpu(mc_ce_doms, smp_processor_id());
+        domain_t *dom = get_domain_for(get_partition(ts));
-        domain_t *dom = &dom_data->domain;
+        struct ce_dom_data *ce_data = dom->data;
-        struct ce_dom_data *ce_data = get_ce_data(dom_data);
        unsigned long flags;
        TRACE_TASK(ts, "wake up\n");
@@ -349,26 +403,27 @@ static void mc_ce_task_block(struct task_struct *ts)
 /*
 * Called when a task switches from RT mode back to normal mode.
 */
-void mc_ce_task_exit(struct task_struct *ts)
+void mc_ce_task_exit_common(struct task_struct *ts)
 {
-        domain_data_t *dom_data = &per_cpu(mc_ce_doms, get_partition(ts));
+        domain_t *dom = get_domain_for(get_partition(ts));
-        domain_t *dom = &dom_data->domain;
+        struct ce_dom_data *ce_data = dom->data;
-        struct ce_dom_data *ce_data = get_ce_data(dom_data);
        unsigned long flags;
        struct pid *pid;
        const int lvl_a_id = tsk_mc_data(ts)->mc_task.lvl_a_id;
+        struct ce_pid_table *pid_table = get_pid_table(ce_data->cpu);
        TRACE_TASK(ts, "exited\n");
        BUG_ON(task_cpu(ts) != get_partition(ts));
        BUG_ON(CRIT_LEVEL_A != tsk_mc_crit(ts));
-        BUG_ON(lvl_a_id >= ce_data->num_pid_entries);
+        BUG_ON(lvl_a_id >= pid_table->num_pid_entries);
+        BUG_ON(ce_data->cpu != task_cpu(ts));
        raw_spin_lock_irqsave(dom->lock, flags);
-        pid = ce_data->pid_entries[lvl_a_id].pid;
+        pid = pid_table->entries[lvl_a_id].pid;
        BUG_ON(!pid);
        put_pid(pid);
-        ce_data->pid_entries[lvl_a_id].pid = NULL;
+        pid_table->entries[lvl_a_id].pid = NULL;
        if (ce_data->scheduled == ts)
                ce_data->scheduled = NULL;
        if (ce_data->should_schedule == ts)
@@ -380,29 +435,27 @@ void mc_ce_task_exit(struct task_struct *ts)
 * Timer stuff
 **********************************************************/
-void __mc_ce_timer_callback(struct hrtimer *timer)
+void mc_ce_timer_callback_common(domain_t *dom, struct hrtimer *timer)
 {
        /* relative and absolute times for cycles */
        lt_t now, offset_rel, cycle_start_abs, next_timer_abs;
        struct task_struct *should_schedule;
-        struct ce_dom_pid_entry *pid_entry;
+        struct ce_pid_table *pid_table;
+        struct ce_pid_entry *pid_entry;
        struct ce_dom_data *ce_data;
-        domain_data_t *dom_data;
-        domain_t *dom;
        int idx, budget_overrun;
-        ce_data = container_of(timer, struct ce_dom_data, timer);
+        ce_data = dom->data;
-        dom_data = &per_cpu(mc_ce_doms, ce_data->cpu);
+        pid_table = get_pid_table(ce_data->cpu);
-        dom = &dom_data->domain;
        /* Based off of the current time, figure out the offset into the cycle
         * and the cycle's start time, and determine what should be scheduled.
         */
        now = litmus_clock();
-        offset_rel = get_cycle_offset(now, ce_data->cycle_time);
+        offset_rel = get_cycle_offset(now, pid_table->cycle_time);
        cycle_start_abs = now - offset_rel;
        idx = mc_ce_schedule_at(dom, offset_rel);
-        pid_entry = &ce_data->pid_entries[idx];
+        pid_entry = get_pid_entry(ce_data->cpu, idx);
        /* set the timer to fire at the next cycle start */
        next_timer_abs = cycle_start_abs + pid_entry->acc_time;
        hrtimer_set_expires(timer, ns_to_ktime(next_timer_abs));
@@ -426,8 +479,8 @@ void __mc_ce_timer_callback(struct hrtimer *timer)
                budget_overrun = pid_entry->expected_job !=
                        tsk_rt(should_schedule)->job_params.job_no;
                if (budget_overrun)
-                        TRACE_TASK(should_schedule, "timer expected job number: %d "
+                        TRACE_TASK(should_schedule, "timer expected job number: %u "
-                                        "but current job: %d\n",
+                                        "but current job: %u\n",
                                        pid_entry->expected_job,
                                        tsk_rt(should_schedule)->job_params.job_no);
        }
@@ -452,17 +505,15 @@ static enum hrtimer_restart mc_ce_timer_callback(struct hrtimer *timer)
 {
        struct ce_dom_data *ce_data;
        unsigned long flags;
-        domain_data_t *dom_data;
        domain_t *dom;
        ce_data = container_of(timer, struct ce_dom_data, timer);
-        dom_data = &per_cpu(mc_ce_doms, ce_data->cpu);
+        dom = get_domain_for(ce_data->cpu);
-        dom = &dom_data->domain;
        TRACE("timer callback on CPU %d (before lock)\n", ce_data->cpu);
        raw_spin_lock_irqsave(dom->lock, flags);
-        __mc_ce_timer_callback(timer);
+        mc_ce_timer_callback_common(dom, timer);
        if (ce_data->scheduled != ce_data->should_schedule)
                preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
@@ -478,14 +529,14 @@ static enum hrtimer_restart mc_ce_timer_callback(struct hrtimer *timer)
 static int cancel_all_timers(void)
 {
        struct ce_dom_data *ce_data;
-        domain_data_t *dom_data;
+        domain_t *dom;
-        int cpu, ret = 0, cancel_res;
+        int cpu, cancel_res, ret = 0;
        TRACE("cancel all timers\n");
        for_each_online_cpu(cpu) {
-                dom_data = &per_cpu(mc_ce_doms, cpu);
+                dom = get_domain_for(cpu);
-                ce_data = get_ce_data(dom_data);
+                ce_data = dom->data;
                ce_data->should_schedule = NULL;
                cancel_res = hrtimer_cancel(&ce_data->timer);
                atomic_set(&ce_data->timer_info.state,
@@ -502,20 +553,22 @@ static int cancel_all_timers(void)
 */
 static void arm_all_timers(void)
 {
+        domain_t *dom;
        struct ce_dom_data *ce_data;
-        domain_data_t *dom_data;
+        struct ce_pid_table *pid_table;
        int cpu, idx;
        const lt_t start = atomic64_read(&start_time);
        TRACE("arm all timers\n");
        for_each_online_cpu(cpu) {
-                dom_data = &per_cpu(mc_ce_doms, cpu);
+                dom = get_domain_for(cpu);
-                ce_data = get_ce_data(dom_data);
+                ce_data = dom->data;
-                if (0 == ce_data->num_pid_entries)
+                pid_table = get_pid_table(cpu);
+                if (0 == pid_table->num_pid_entries)
                        continue;
-                for (idx = 0; idx < ce_data->num_pid_entries; idx++) {
+                for (idx = 0; idx < pid_table->num_pid_entries; idx++) {
-                        ce_data->pid_entries[idx].expected_job = 0;
+                        pid_table->entries[idx].expected_job = 1;
                }
                TRACE("arming timer for CPU %d\n", cpu);
                hrtimer_start_on(cpu, &ce_data->timer_info, &ce_data->timer,
@@ -528,7 +581,7 @@ static void arm_all_timers(void)
 * call this. We can re-set our notion of the CE period start to make
 * the schedule look pretty.
 */
-void mc_ce_release_at(struct task_struct *ts, lt_t start)
+void mc_ce_release_at_common(struct task_struct *ts, lt_t start)
 {
        TRACE_TASK(ts, "release at\n");
        if (atomic_inc_and_test(&start_time_set)) {
@@ -540,15 +593,15 @@ void mc_ce_release_at(struct task_struct *ts, lt_t start)
                atomic_dec(&start_time_set);
 }
-long mc_ce_activate_plugin(void)
+long mc_ce_activate_plugin_common(void)
 {
        struct ce_dom_data *ce_data;
-        domain_data_t *dom_data;
+        domain_t *dom;
        int cpu;
        for_each_online_cpu(cpu) {
-                dom_data = &per_cpu(mc_ce_doms, cpu);
+                dom = get_domain_for(cpu);
-                ce_data = get_ce_data(dom_data);
+                ce_data = dom->data;
                ce_data->scheduled = NULL;
                ce_data->should_schedule = NULL;
        }
@@ -560,33 +613,54 @@ long mc_ce_activate_plugin(void)
        return 0;
 }
+static long mc_ce_activate_plugin(void)
+{
+        domain_data_t *our_domains[NR_CPUS];
+        int cpu, n = 0;
+        long ret;
+        for_each_online_cpu(cpu) {
+                BUG_ON(NR_CPUS <= n);
+                our_domains[cpu] = &per_cpu(_mc_ce_doms, cpu);
+                n++;
+        }
+        ret = mc_ce_set_domains(n, our_domains);
+        if (ret)
+                goto out;
+        ret = mc_ce_activate_plugin_common();
+out:
+        return ret;
+}
 static void clear_pid_entries(void)
 {
+        struct ce_pid_table *pid_table = NULL;
        int cpu, entry;
-        domain_data_t *dom_data;
-        struct ce_dom_data *ce_data;
        for_each_online_cpu(cpu) {
-                dom_data = &per_cpu(mc_ce_doms, cpu);
+                pid_table = get_pid_table(cpu);
-                ce_data = get_ce_data(dom_data);
+                pid_table->num_pid_entries = 0;
-                ce_data->num_pid_entries = 0;
+                pid_table->cycle_time = 0;
-                ce_data->cycle_time = 0;
                for (entry = 0; entry < CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS;
                                ++entry) {
-                        if (NULL != ce_data->pid_entries[entry].pid) {
+                        if (NULL != pid_table->entries[entry].pid) {
-                                put_pid(ce_data->pid_entries[entry].pid);
+                                put_pid(pid_table->entries[entry].pid);
-                                ce_data->pid_entries[entry].pid = NULL;
+                                pid_table->entries[entry].pid = NULL;
                        }
-                        ce_data->pid_entries[entry].budget = 0;
+                        pid_table->entries[entry].budget = 0;
-                        ce_data->pid_entries[entry].acc_time = 0;
+                        pid_table->entries[entry].acc_time = 0;
-                        ce_data->pid_entries[entry].expected_job = 0;
+                        pid_table->entries[entry].expected_job = 1;
                }
        }
 }
-long mc_ce_deactivate_plugin(void)
+long mc_ce_deactivate_plugin_common(void)
 {
+        int cpu;
        cancel_all_timers();
+        for_each_online_cpu(cpu) {
+                domains[cpu] = NULL;
+        }
        return 0;
 }
@@ -596,43 +670,33 @@ static struct sched_plugin mc_ce_plugin __cacheline_aligned_in_smp = {
        .admit_task             = mc_ce_admit_task,
        .task_new               = mc_ce_task_new,
        .complete_job           = complete_job,
-        .release_at             = mc_ce_release_at,
+        .release_at             = mc_ce_release_at_common,
-        .task_exit              = mc_ce_task_exit,
+        .task_exit              = mc_ce_task_exit_common,
        .schedule               = mc_ce_schedule,
        .finish_switch          = mc_ce_finish_switch,
        .task_wake_up           = mc_ce_task_wake_up,
        .task_block             = mc_ce_task_block,
        .activate_plugin        = mc_ce_activate_plugin,
-        .deactivate_plugin      = mc_ce_deactivate_plugin,
+        .deactivate_plugin      = mc_ce_deactivate_plugin_common,
 };
-int mc_preempt_needed(domain_t*, struct task_struct*);
 static int setup_proc(void);
 static int __init init_sched_mc_ce(void)
 {
-        struct ce_dom_data *ce_data;
        raw_spinlock_t *ce_lock;
        domain_data_t *dom_data;
        domain_t *dom;
        int cpu, err;
        for_each_online_cpu(cpu) {
-                ce_lock = &per_cpu(_dom_locks, cpu);
+                domains[cpu] = NULL;
+                ce_lock = &per_cpu(_mc_ce_dom_locks, cpu);
                raw_spin_lock_init(ce_lock);
-                dom_data = &per_cpu(mc_ce_doms, cpu);
+                dom_data = &per_cpu(_mc_ce_doms, cpu);
                dom = &dom_data->domain;
-                /* initialize the domain. the ce_ functions are for the MC
+                ce_domain_init(dom, ce_lock, NULL, NULL, NULL, NULL, NULL,
-                 * plugin */
+                                &per_cpu(_mc_ce_dom_data, cpu), cpu,
-                /* move into ce_domain_init */
+                                mc_ce_timer_callback);
-                domain_init(dom, ce_lock, ce_requeue, ce_peek_and_take_ready,
-                                ce_peek_and_take_ready, mc_preempt_needed,
-                                ce_higher_prio);
-                dom->data = &per_cpu(_mc_ce_dom_data, cpu);
-                ce_data = get_ce_data(dom_data);
-                hrtimer_init(&ce_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-                hrtimer_start_on_info_init(&ce_data->timer_info);
-                ce_data->cpu = cpu;
-                ce_data->timer.function = mc_ce_timer_callback;
        }
        clear_pid_entries();
        err = setup_proc();
@@ -674,7 +738,7 @@ out:
 #define PID_SPACE 15
 #define TASK_INFO_BUF (PID_SPACE + TASK_COMM_LEN)
 static int write_pid_entry(char *page, const int count, const int cpu,
-                const int task, struct ce_dom_pid_entry *pid_entry)
+                const int task, struct ce_pid_entry *pid_entry)
 {
        static char task_info[TASK_INFO_BUF];
        struct task_struct *ts;
@@ -731,8 +795,7 @@ static int proc_read_ce_file(char *page, char **start, off_t off, int count,
                int *eof, void *data)
 {
        int n = 0, err, cpu, t;
-        struct ce_dom_data *ce_data;
+        struct ce_pid_table *pid_table;
-        domain_data_t *dom_data;
        if (off > 0) {
                printk(KERN_INFO "litmus: MC-CE called read with off > 0\n");
@@ -740,11 +803,10 @@ static int proc_read_ce_file(char *page, char **start, off_t off, int count,
        }
        for_each_online_cpu(cpu) {
-                dom_data = &per_cpu(mc_ce_doms, cpu);
+                pid_table = get_pid_table(cpu);
-                ce_data = get_ce_data(dom_data);
+                for (t = 0; t < pid_table->num_pid_entries; ++t) {
-                for (t = 0; t < ce_data->num_pid_entries; ++t) {
                        err = write_pid_entry(page + n, count - n,
-                                        cpu, t, &ce_data->pid_entries[t]);
+                                        cpu, t, get_pid_entry(cpu, t));
                        if (err < 0) {
                                n = -ENOSPC;
                                goto out;
@@ -781,9 +843,8 @@ static int skip_comment(const char *buf, const unsigned long max)
 #define BUDGET_THRESHOLD 5000000ULL
 static int setup_pid_entry(const int cpu, const int task, const lt_t budget)
 {
-        domain_data_t *dom_data = &per_cpu(mc_ce_doms, cpu);
+        struct ce_pid_table *pid_table = get_pid_table(cpu);
-        struct ce_dom_data *ce_data = get_ce_data(dom_data);
+        struct ce_pid_entry *new_entry = NULL;
-        struct ce_dom_pid_entry *new_entry;
        int err = 0;
        /* check the inputs */
@@ -801,20 +862,20 @@ static int setup_pid_entry(const int cpu, const int task, const lt_t budget)
                                "MC-CE task; that might be an issue.\n");
        }
        /* check that we have space for a new entry */
-        if (CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS <= ce_data->num_pid_entries) {
+        if (CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS <= pid_table->num_pid_entries) {
                printk(KERN_INFO "litmus: too many MC-CE tasks for cpu "
                                "%d\n", cpu);
                err = -EINVAL;
                goto out;
        }
        /* add the new entry */
-        new_entry = &ce_data->pid_entries[ce_data->num_pid_entries];
+        new_entry = get_pid_entry(cpu, pid_table->num_pid_entries);
        BUG_ON(NULL != new_entry->pid);
        new_entry->budget = budget;
-        new_entry->acc_time = ce_data->cycle_time + budget;
+        new_entry->acc_time = pid_table->cycle_time + budget;
        /* update the domain entry */
-        ce_data->cycle_time += budget;
+        pid_table->cycle_time += budget;
-        ce_data->num_pid_entries++;
+        pid_table->num_pid_entries++;
 out:
        return err;
 }
@@ -835,9 +896,9 @@ static int proc_write_ce_file(struct file *file, const char __user *buffer,
        int cpu, task, cnt = 0, chars_read, converted, err;
        lt_t budget;
-        if (is_active_plugin()) {
+        if (!using_linux_plugin()) {
-                printk(KERN_INFO "litmus: can't edit MC-CE proc when plugin "
+                printk(KERN_INFO "litmus: can only edit MC-CE proc under Linux "
-                                "active\n");
+                                "plugin\n");
                cnt = -EINVAL;
                goto out;
        }