Merged with ce domains

author: Jonathan Herman <hermanjl@cs.unc.edu> 2011-09-27 20:15:32 -0400
committer: Jonathan Herman <hermanjl@cs.unc.edu> 2011-09-27 20:36:04 -0400
commit: 23a00b911b968c6290251913ecc34171836b4d32 (patch)
tree: f6c8289054d2961902931e89bdc11ccc01bc3a73
parent: f21e1d0ef90c2e88ae6a563afc31ea601ed968c7 (diff)
parent: 609c45f71b7a2405230fd2f8436837d6389ec599 (diff)
8 files changed, 533 insertions, 340 deletions
diff --git a/include/litmus/ce_domain.h b/include/litmus/ce_domain.h
new file mode 100644
index 000000000000..373f3f5f78d3
--- /dev/null
+++ b/include/litmus/ce_domain.h
@@ -0,0 +1,23 @@
+#ifndef _LITMUS_CE_DOMAIN_H
+#define _LITMUS_CE_DOMAIN_H
+/*
+ * Functions that the MC plugin needs to call through a domain pointer.
+ */
+void ce_requeue(domain_t*, struct task_struct*);
+struct task_struct* ce_peek_and_take_ready(domain_t*);
+int ce_higher_prio(struct task_struct*, struct task_struct*);
+typedef enum hrtimer_restart (*ce_timer_callback_t)(struct hrtimer*);
+void ce_domain_init(domain_t*,
+                raw_spinlock_t*,
+                requeue_t,
+                peek_ready_t,
+                take_ready_t,
+                preempt_needed_t,
+                task_prio_t,
+                struct ce_dom_data*,
+                const int,
+                ce_timer_callback_t);
+#endif
diff --git a/include/litmus/event_group.h b/include/litmus/event_group.h
index 37d5012d770e..a2e4d4507738 100644
--- a/include/litmus/event_group.h
+++ b/include/litmus/event_group.h
@@ -66,7 +66,7 @@ void add_event(struct event_group* group, struct rt_event* e, lt_t time);
 /**
 * cancel_event() - Remove event from the group.
 */
-void cancel_event(struct rt_event*);
+void cancel_event(struct event_group *group, struct rt_event*);
 /**
 * init_event() - Create an event.
diff --git a/include/litmus/sched_mc.h b/include/litmus/sched_mc.h
index 9ddf860c83a7..95cc367c8ade 100644
--- a/include/litmus/sched_mc.h
+++ b/include/litmus/sched_mc.h
@@ -21,7 +21,10 @@ struct mc_job {
 };
 #ifdef __KERNEL__
-/* only used in the kernel (no user space) */
+/*
+ * These are used only in the kernel. Userspace programs like RTSpin won't see
+ * them.
+ */
 struct mc_data {
        struct mc_task mc_task;
@@ -36,22 +39,15 @@ struct mc_data {
              (t) ? t->pid : 1,                                 \
              (t) ? t->rt_param.job_params.job_no : 1,          \
              (t && get_task_domain(t)) ? get_task_domain(t)->name : ""
+#define STRACE(fmt, args...) \
+        sched_trace_log_message("%d P%d      [%s@%s:%d]: " fmt, \
+                                TRACE_ARGS,  ## args)
 #define TRACE_MC_TASK(t, fmt, args...)                          \
-        TRACE(TS " " fmt "\n", TA(t), ##args)
+        STRACE(TS " " fmt "\n", TA(t), ##args)
 /*
- * Cache the budget along with the struct PID for a task so that we don't need
+ * The MC-CE scheduler uses this as domain data.
- * to fetch its task_struct every time we check to see what should be
- * scheduled.
 */
-struct ce_dom_pid_entry {
-        struct pid *pid;
-        lt_t budget;
-        /* accumulated (summed) budgets, including this one */
-        lt_t acc_time;
-        int expected_job;
-};
 struct ce_dom_data {
        int cpu;
        struct task_struct *scheduled, *should_schedule;
@@ -59,13 +55,61 @@ struct ce_dom_data {
         * Each CPU needs a mapping of level A ID (integer) to struct pid so
         * that we can get its task struct.
         */
-        struct ce_dom_pid_entry pid_entries[CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS];
-        int num_pid_entries;
-        lt_t cycle_time;
        struct hrtimer_start_on_info timer_info;
        struct hrtimer timer;
 };
+/**
+ * struct crit_entry - State of a CPU within each criticality level system.
+ * @level       Criticality level of this entry
+ * @linked      Logically running task, ghost or regular
+ * @domain      Domain from which to draw tasks
+ * @usable      False if a higher criticality task is running
+ * @timer       For ghost task budget enforcement
+ * @node        Used to sort crit_entries by preemptability in global domains
+ */
+struct crit_entry {
+        enum crit_level         level;
+        struct task_struct*     linked;
+        struct domain*          domain;
+        int                     usable;
+#ifdef CONFIG_MERGE_TIMERS
+        struct rt_event         event;
+#else
+        struct hrtimer          timer;
+#endif
+        struct bheap_node*      node;
+};
+/**
+ * struct domain_data - Wrap domains with related CPU state
+ * @domain      A domain for a criticality level
+ * @heap        The preemptable heap of crit entries (for global domains)
+ * @crit_entry  The crit entry for this domain (for partitioned domains)
+ */
+struct domain_data {
+        struct domain           domain;
+        struct bheap*           heap;
+        struct crit_entry*      crit_entry;
+};
+/*
+ * Functions that are used with the MC-CE plugin.
+ */
+long mc_ce_set_domains(const int, struct domain_data*[]);
+unsigned int mc_ce_get_expected_job(const int, const int);
+/*
+ * These functions are (lazily) inserted into the MC plugin code so that it
+ * manipulates the MC-CE state.
+ */
+long mc_ce_admit_task_common(struct task_struct*);
+void mc_ce_task_exit_common(struct task_struct*);
+void mc_ce_timer_callback_common(domain_t*, struct hrtimer*);
+void mc_ce_release_at_common(struct task_struct*, lt_t);
+long mc_ce_activate_plugin_common(void);
+long mc_ce_deactivate_plugin_common(void);
 #endif /* __KERNEL__ */
 #endif
diff --git a/litmus/ce_domain.c b/litmus/ce_domain.c
index 5b4fd1cb438f..ac6cc14d44f7 100644
--- a/litmus/ce_domain.c
+++ b/litmus/ce_domain.c
@@ -6,7 +6,9 @@
 #include <litmus/debug_trace.h>
 #include <litmus/rt_param.h>
 #include <litmus/domain.h>
+#include <litmus/event_group.h>
 #include <litmus/sched_mc.h>
+#include <litmus/ce_domain.h>
 /*
 * Called for:
@@ -18,50 +20,26 @@ void ce_requeue(domain_t *dom, struct task_struct *ts)
 {
        const struct ce_dom_data *ce_data = dom->data;
        const int idx = tsk_mc_data(ts)->mc_task.lvl_a_id;
-        const struct ce_dom_pid_entry *pid_entry =
+        const unsigned int just_finished = tsk_rt(ts)->job_params.job_no;
-                &ce_data->pid_entries[idx];
+        const unsigned int expected_job =
-        const int just_finished = tsk_rt(ts)->job_params.job_no;
+                mc_ce_get_expected_job(ce_data->cpu, idx);
-        const int expected_job = pid_entry->expected_job;
        const int asleep = RT_F_SLEEP == get_rt_flags(ts);
-        TRACE_TASK(ts, "entered ce_requeue. asleep: %d  just_finished: %4d  "
+        TRACE_MC_TASK(ts, "entered ce_requeue. asleep: %d  just_finished: %3u  "
-                        "expected_job: %4d\n",
+                        "expected_job: %3u",
                        asleep, just_finished, expected_job);
        /* When coming from job completion, the task will be asleep. */
        if (asleep && just_finished < expected_job) {
-                /* this job is running behind, so don't put it to sleep */
+                TRACE_MC_TASK(ts, "appears behind");
-                set_rt_flags(ts, RT_F_RUNNING);
-                TRACE_TASK(ts, "appears behind, setting it to running again\n");
        } else if (asleep && expected_job < just_finished) {
-                printk(KERN_CRIT "job %d completed in expected job %d which "
+                TRACE_MC_TASK(ts, "job %u completed in expected job %u which "
-                                "seems too early\n", just_finished,
+                                "seems too early", just_finished,
                                expected_job);
-                BUG();
        }
 }
 /*
- * Called when a task exits the system.
- */
-void ce_exit(domain_t *dom, struct task_struct *ts)
-{
-        struct ce_dom_data *ce_data = dom->data;
-        const int lvl_a_id = tsk_mc_data(ts)->mc_task.lvl_a_id;
-        struct pid *pid;
-        BUG_ON(task_cpu(ts) != get_partition(ts));
-        BUG_ON(CRIT_LEVEL_A != tsk_mc_crit(ts));
-        BUG_ON(lvl_a_id >= ce_data->num_pid_entries);
-        pid = ce_data->pid_entries[lvl_a_id].pid;
-        BUG_ON(!pid);
-        put_pid(pid);
-        ce_data->pid_entries[lvl_a_id].pid = NULL;
-        if (ce_data->should_schedule == ts)
-                ce_data->should_schedule = NULL;
-}
-/*
 * ce_take_ready and ce_peek_ready
 */
 struct task_struct* ce_peek_and_take_ready(domain_t *dom)
@@ -71,57 +49,36 @@ struct task_struct* ce_peek_and_take_ready(domain_t *dom)
        const int exists = NULL != ce_data->should_schedule;
        const int blocked = exists && !is_running(ce_data->should_schedule);
-        /* Return the task we should schedule if it is not blocked. If it is
+        /* Return the task we should schedule if it is not blocked or sleeping. */
-         * asleep, return it anyway, because the MC-scheduler might as about
-         * ghost jobs.
-         */
        if (exists && !blocked)
                ret = ce_data->should_schedule;
        return ret;
 }
-int ce_higher_prio(domain_t *dom, struct task_struct *_a,
+int ce_higher_prio(struct task_struct *_a, struct task_struct *_b)
-                struct task_struct *_b)
 {
        const struct task_struct *a = _a;
-        struct ce_dom_data *ce_data = dom->data;
+        const domain_t *dom = get_task_domain(a);
+        const struct ce_dom_data *ce_data = dom->data;
        return (a == ce_data->should_schedule);
 }
-void __mc_ce_timer_callback(struct hrtimer *timer);
+void ce_domain_init(domain_t *dom,
-static enum hrtimer_restart ce_timer_function(struct hrtimer *timer)
+                raw_spinlock_t *lock,
-{
+                requeue_t requeue,
-        /* need to lock? */
+                peek_ready_t peek_ready,
-        __mc_ce_timer_callback(timer);
+                take_ready_t take_ready,
-        return HRTIMER_RESTART;
+                preempt_needed_t preempt_needed,
-}
+                task_prio_t task_prio,
+                struct ce_dom_data *dom_data,
-void mc_ce_release_at(struct task_struct*, lt_t);
+                const int cpu,
-void ce_start(struct task_struct *ts, lt_t start)
+                ce_timer_callback_t ce_timer_callback)
-{
-        mc_ce_release_at(ts, start);
-}
-long mc_ce_activate_plugin(void);
-domain_t *ce_domain_for(int);
-long ce_activate_plugin(void)
-{
-        domain_t *dom;
-        struct ce_dom_data *ce_data;
-        int cpu;
-        /* first change the timer callback function */
-        for_each_online_cpu(cpu) {
-                dom = ce_domain_for(cpu);
-                ce_data = dom->data;
-                ce_data->timer.function = ce_timer_function;
-        }
-        /* then run the regular CE activate plugin */
-        return mc_ce_activate_plugin();
-}
-long mc_ce_deactivate_plugin(void);
-long ce_deactivate_plugin(void)
 {
-        return mc_ce_deactivate_plugin();
+        domain_init(dom, lock, requeue, peek_ready, take_ready, preempt_needed,
+                        task_prio);
+        dom->data = dom_data;
+        dom_data->cpu = cpu;
+        hrtimer_start_on_info_init(&dom_data->timer_info);
+        hrtimer_init(&dom_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+        dom_data->timer.function = ce_timer_callback;
 }
diff --git a/litmus/event_group.c b/litmus/event_group.c
index b4521ab370d1..276ba5dd242d 100644
--- a/litmus/event_group.c
+++ b/litmus/event_group.c
@@ -6,7 +6,9 @@
 #include <litmus/event_group.h>
 #if 1
-#define VTRACE TRACE
+#define VTRACE(fmt, args...)                                    \
+sched_trace_log_message("%d P%d        [%s@%s:%d]: " fmt,       \
+                                TRACE_ARGS,  ## args)
 #else
 #define VTRACE(fmt, args...)
 #endif
@@ -46,8 +48,8 @@ static enum hrtimer_restart on_timer(struct hrtimer *timer)
        list_for_each_safe(pos, safe, &list) {
                num++;
                e = list_entry(pos, struct rt_event, list);
-                TRACE("Dequeueing event with prio %d from 0x%p\n",
+                TRACE("Dequeueing event 0x%p with prio %d from 0x%p\n",
-                      e->prio, el);
+                      e, e->prio, el);
                list_del_init(pos);
                e->function(e);
        }
@@ -69,12 +71,14 @@ void insert_event(struct event_list *el, struct rt_event *e)
                        VTRACE("Inserting priority %d 0x%p before %d 0x%p "
                               "in 0x%p, pos 0x%p\n", e->prio, &e->list,
                               queued->prio, &queued->list, el, pos);
+                        BUG_ON(!list_empty(&e->list));
                        list_add_tail(&e->list, pos);
                        return;
                }
        }
        VTRACE("Inserting priority %d 0x%p at end of 0x%p, last 0x%p\n",
               e->prio, &el->list, el, last);
+        BUG_ON(!list_empty(&e->list));
        list_add(&e->list, (last) ? last : pos);
 }
@@ -91,12 +95,14 @@ static struct event_list* get_event_list(struct event_group *group,
        struct list_head* pos;
        struct event_list *el = NULL, *tmp;
        unsigned int slot = time2slot(fire);
+        int remaining = 300;
-        VTRACE("Getting list for %llu\n", fire);
+        VTRACE("Getting list for %llu, event 0x%p\n", fire, e);
        /* Initialize pos for the case that the list is empty */
        pos = group->event_queue[slot].next;
        list_for_each(pos, &group->event_queue[slot]) {
+                BUG_ON(remaining-- < 0);
                tmp = list_entry(pos, struct event_list, list);
                if (lt_after_eq(fire, tmp->fire_time) &&
                    lt_before(fire, tmp->fire_time + group->res)) {
@@ -121,10 +127,11 @@ static struct event_list* get_event_list(struct event_group *group,
                tmp->fire_time = fire;
                tmp->group = group;
                /* Add to queue */
-                list_add(&tmp->list, pos->prev);
-                el = tmp;
                VTRACE("Using list for priority %d and time %llu\n",
                       e->prio, fire);
+                BUG_ON(!list_empty(&tmp->list));
+                list_add(&tmp->list, pos->prev);
+                el = tmp;
        }
        return el;
 }
@@ -135,8 +142,8 @@ static struct event_list* get_event_list(struct event_group *group,
 static void reinit_event_list(struct rt_event *e)
 {
        struct event_list *el = e->event_list;
-        BUG_ON(hrtimer_cancel(&el->timer));
+        VTRACE("Reinitting 0x%p for event 0x%p\n", el, e);
-        VTRACE("Reinitting 0x%p\n", el);
+        BUG_ON(hrtimer_try_to_cancel(&el->timer) == 1);
        INIT_LIST_HEAD(&el->events);
        atomic_set(&el->info.state, HRTIMER_START_ON_INACTIVE);
 }
@@ -148,8 +155,8 @@ void add_event(struct event_group *group, struct rt_event *e, lt_t fire)
 {
        struct event_list *el;
-        VTRACE("Adding event with priority %d for time %llu\n",
+        VTRACE("Adding event 0x%p with priority %d for time %llu\n",
-               e->prio, fire);
+               e, e->prio, fire);
        raw_spin_lock(&group->queue_lock);
        el = get_event_list(group, e, fire, 0);
@@ -167,7 +174,7 @@ void add_event(struct event_group *group, struct rt_event *e, lt_t fire)
        /* Arm timer if we are the owner */
        if (el == e->event_list) {
-                VTRACE("Arming timer for %llu\n", fire);
+                VTRACE("Arming timer on event 0x%p for %llu\n", e, fire);
                if (group->cpu == smp_processor_id()) {
                        __hrtimer_start_range_ns(&el->timer,
                                                 ns_to_ktime(el->fire_time),
@@ -185,14 +192,37 @@ void add_event(struct event_group *group, struct rt_event *e, lt_t fire)
 /**
 * cancel_event() - Remove event from the group.
 */
-void cancel_event(struct rt_event *e)
+void cancel_event(struct event_group *group, struct rt_event *e)
 {
-        struct event_group *group;
+        struct list_head *swap = NULL;
+        struct rt_event *swappy;
+        struct event_list *tmp;
        if (e->list.next != &e->list) {
-                group = e->event_list->group;
                raw_spin_lock(&group->queue_lock);
-                VTRACE("Canceling event with priority %d\n", e->prio);
+                VTRACE("Canceling event 0x%p with priority %d\n", e, e->prio);
-                list_del_init(&e->list);
+                /* If somebody else is hooked up to our event list, swap
+                 * with their event list and leave our old event list
+                 * to execute.
+                 */
+                if (!list_empty(&e->list)) {
+                        swap = (e->list.next == &e->event_list->events) ?
+                               (e->list.prev == &e->event_list->events) ?
+                                NULL : e->list.prev : e->list.next;
+                        list_del_init(&e->list);
+                }
+                if (swap) {
+                        swappy = list_entry(swap, struct rt_event, list);
+                        tmp = swappy->event_list;
+                        swappy->event_list = e->event_list;
+                        e->event_list = tmp;
+                        VTRACE("Swapping with event 0x%p", swappy);
+                }
+                hrtimer_try_to_cancel(&e->event_list->timer);
+                list_del_init(&e->event_list->list);
                raw_spin_unlock(&group->queue_lock);
        }
 }
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
index 50a6abfd7676..db92e849f084 100644
--- a/litmus/rt_domain.c
+++ b/litmus/rt_domain.c
@@ -104,8 +104,8 @@ void release_heap_free(struct release_heap* rh)
 {
        /* make sure timer is no longer in use */
 #ifdef CONFIG_MERGE_TIMERS
-        if (rh->dom)
+        /* if (rh->dom) */
-                cancel_event(&rh->event);
+        /*      cancel_event(&rh->event); */
 #else
        hrtimer_cancel(&rh->timer);
 #endif
@@ -166,7 +166,7 @@ static void reinit_release_heap(rt_domain_t *rt, struct task_struct* t)
 #ifdef CONFIG_MERGE_TIMERS
        rh->event.prio = rt->prio;
-        cancel_event(&rh->event);
+        cancel_event(rt->event_group, &rh->event);
 #else
        /* Make sure it is safe to use.  The timer callback could still
         * be executing on another CPU; hrtimer_cancel() will wait
@@ -323,7 +323,7 @@ void rt_domain_init(rt_domain_t *rt,
 */
 void __add_ready(rt_domain_t* rt, struct task_struct *new)
 {
-        TRACE("rt: adding %s/%d (%llu, %llu) rel=%llu to ready queue at %llu\n",
+        VTRACE("rt: adding %s/%d (%llu, %llu) rel=%llu to ready queue at %llu\n",
              new->comm, new->pid, get_exec_cost(new), get_rt_period(new),
              get_release(new), litmus_clock());
@@ -348,7 +348,7 @@ void __merge_ready(rt_domain_t* rt, struct bheap* tasks)
 void __add_release_on(rt_domain_t* rt, struct task_struct *task,
                      int target_cpu)
 {
-        TRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n",
+        VTRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n",
                   get_release(task), target_cpu);
        list_add(&tsk_rt(task)->list, &rt->tobe_released);
        task->rt_param.domain = rt;
@@ -367,7 +367,7 @@ void __add_release_on(rt_domain_t* rt, struct task_struct *task,
 */
 void __add_release(rt_domain_t* rt, struct task_struct *task)
 {
-        TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task));
+        VTRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task));
        list_add(&tsk_rt(task)->list, &rt->tobe_released);
        task->rt_param.domain = rt;
@@ -398,12 +398,12 @@ static void pd_requeue(domain_t *dom, struct task_struct *task)
        if (is_released(task, litmus_clock())) {
                __add_ready(domain, task);
-                TRACE("rt: adding %s/%d (%llu, %llu) rel=%llu to ready queue at %llu\n",
+                VTRACE("rt: adding %s/%d (%llu, %llu) rel=%llu to ready queue at %llu\n",
                      task->comm, task->pid, get_exec_cost(task), get_rt_period(task),
                      get_release(task), litmus_clock());
        } else {
                /* task has to wait for next release */
-                TRACE_TASK(task, "add release(), rel=%llu\n", get_release(task));
+                VTRACE_TASK(task, "add release(), rel=%llu\n", get_release(task));
                add_release(domain, task);
        }
 }
diff --git a/litmus/sched_mc.c b/litmus/sched_mc.c
index 30898246ea38..7b74958d1f4f 100644
--- a/litmus/sched_mc.c
+++ b/litmus/sched_mc.c
@@ -25,28 +25,7 @@
 #include <litmus/event_group.h>
 #include <litmus/sched_mc.h>
+#include <litmus/ce_domain.h>
-/**
- * crit_entry_t - State of a CPU within each criticality level system.
- * @level       Criticality level of this entry
- * @linked      Logically running task, ghost or regular
- * @domain      Domain from which to draw tasks
- * @usable      False if a higher criticality task is running
- * @timer       For ghost task budget enforcement
- * @node        Used to sort crit_entries by preemptability in global domains
- */
-struct crit_entry {
-        enum crit_level         level;
-        struct task_struct*     linked;
-        struct domain*          domain;
-        int                     usable;
-#ifdef CONFIG_MERGE_TIMERS
-        struct rt_event         event;
-#else
-        struct hrtimer          timer;
-#endif
-        struct bheap_node*      node;
-};
 /**
 * struct cpu_entry - State of a CPU for the entire MC system
@@ -71,18 +50,6 @@ struct cpu_entry {
 #endif
 };
-/**
- * struct domain_data - Wrap domains with related CPU state
- * @domain      A domain for a criticality level
- * @heap        The preemptable heap of crit entries (for global domains)
- * @crit_entry  The crit entry for this domain (for partitioned domains)
- */
-struct domain_data {
-        struct domain           domain;
-        struct bheap*           heap;
-        struct crit_entry*      crit_entry;
-};
 DEFINE_PER_CPU(struct cpu_entry, cpus);
 #ifdef CONFIG_RELEASE_MASTER
 static int interrupt_cpu;
@@ -101,9 +68,9 @@ static struct event_group* global_group;
 #define crit_cpu(ce) \
        (container_of((void*)((ce) - (ce)->level), struct cpu_entry, crit_entries))
 #define TRACE_ENTRY(e, fmt, args...)                            \
-        TRACE("P%d, linked=" TS " " fmt "\n", e->cpu, TA(e->linked), ##args)
+        STRACE("P%d, linked=" TS " " fmt "\n", e->cpu, TA(e->linked), ##args)
 #define TRACE_CRIT_ENTRY(ce, fmt, args...)                      \
-        TRACE("%s P%d, linked=" TS " " fmt "\n",                \
+        STRACE("%s P%d, linked=" TS " " fmt "\n",                       \
              (ce)->domain->name, crit_cpu(ce)->cpu, TA((ce)->linked), ##args)
 /*
@@ -162,7 +129,7 @@ static inline struct crit_entry* lowest_prio_cpu(struct domain *dom)
 static inline void cancel_ghost(struct crit_entry *ce)
 {
 #ifdef CONFIG_MERGE_TIMERS
-        cancel_event(&ce->event);
+        cancel_event(crit_cpu(ce)->event_group, &ce->event);
 #else
        hrtimer_try_to_cancel(&ce->timer);
 #endif
@@ -174,9 +141,7 @@ static inline void cancel_ghost(struct crit_entry *ce)
 static inline void arm_ghost(struct crit_entry *ce, lt_t fire)
 {
 #ifdef CONFIG_MERGE_TIMERS
-        struct event_group* group = (is_global(ce->domain)) ?
+        add_event(crit_cpu(ce)->event_group, &ce->event, fire);
-                global_group : crit_cpu(ce)->event_group;
-        add_event(group, &ce->event, fire);
 #else
        __hrtimer_start_range_ns(&ce->timer,
                                 ns_to_ktime(when_to_fire),
@@ -243,10 +208,14 @@ static void link_task_to_crit(struct crit_entry *ce,
        ce->linked = task;
        if (task) {
                task->rt_param.linked_on = crit_cpu(ce)->cpu;
-                if (is_ghost(task)) {
+                if (is_ghost(task) && CRIT_LEVEL_A != tsk_mc_crit(task)) {
+                        /* There is a level-A timer that will force a
+                         * preemption, so we don't set this for level-A
+                         * tasks.
+                         */
                        /* Reset budget timer */
                        task->se.exec_start = litmus_clock();
-                        when_to_fire = litmus_clock() +
+                        when_to_fire = task->se.exec_start +
                                tsk_mc_data(task)->mc_job.ghost_budget;
                        arm_ghost(ce, when_to_fire);
                }
@@ -261,6 +230,7 @@ static void link_task_to_crit(struct crit_entry *ce,
 }
 static void check_for_preempt(struct domain*);
 /**
 * job_arrival() - Called when a task re-enters the system.
 * Caller must hold no locks.
@@ -330,7 +300,7 @@ static void fix_global_levels(void)
        struct list_head *pos, *safe;
        struct task_struct *t;
-        TRACE("Fixing global levels\n");
+        STRACE("Fixing global levels");
        for_each_online_cpu(c) {
                e = &per_cpu(cpus, c);
                raw_spin_lock(&e->redir_lock);
@@ -527,6 +497,7 @@ static void remove_from_all(struct task_struct* task)
 */
 static void job_completion(struct task_struct *task, int forced)
 {
+        lt_t now;
        TRACE_MC_TASK(task, "Completed");
        sched_trace_task_completion(task, forced);
        BUG_ON(!task);
@@ -535,8 +506,11 @@ static void job_completion(struct task_struct *task, int forced)
        set_rt_flags(task, RT_F_SLEEP);
        remove_from_all(task);
+        now = litmus_clock();
        /* If it's not a ghost job, do ghost job conversion */
        if (!is_ghost(task)) {
+                TRACE_MC_TASK(task, "is not a ghost task");
                tsk_mc_data(task)->mc_job.ghost_budget = budget_remaining(task);
                tsk_mc_data(task)->mc_job.is_ghost = 1;
        }
@@ -546,6 +520,7 @@ static void job_completion(struct task_struct *task, int forced)
         * conversion. Revert back to a normal task and complete the period.
         */
        if (tsk_mc_data(task)->mc_job.ghost_budget == 0) {
+                TRACE_MC_TASK(task, "has zero ghost budget");
                tsk_mc_data(task)->mc_job.is_ghost = 0;
                prepare_for_next_period(task);
                if (is_released(task, litmus_clock()))
@@ -573,9 +548,9 @@ static enum hrtimer_restart mc_ghost_exhausted(struct hrtimer *timer)
        unsigned long flags;
        struct task_struct *tmp = NULL;
        local_irq_save(flags);
-        TRACE_CRIT_ENTRY(ce, "Ghost exhausted firing");
+        TRACE("Ghost exhausted\n");
+        TRACE_CRIT_ENTRY(ce, "Firing here");
        /* Due to race conditions, we cannot just set the linked
         * task's budget to 0 as it may no longer be the task
@@ -601,6 +576,52 @@ static enum hrtimer_restart mc_ghost_exhausted(struct hrtimer *timer)
 #endif
 }
+static enum hrtimer_restart ce_timer_function(struct hrtimer *timer)
+{
+        struct ce_dom_data *ce_data =
+                container_of(timer, struct ce_dom_data, timer);
+        struct crit_entry *ce = &per_cpu(cpus, ce_data->cpu).crit_entries[CRIT_LEVEL_A];
+        struct domain *dom = ce->domain;
+        struct task_struct *old_link = NULL;
+        unsigned long flags;
+        TRACE("MC level-A timer callback for CPU %d\n", ce_data->cpu);
+        local_irq_save(flags);
+        raw_spin_lock(dom->lock);
+        raw_spin_lock(&crit_cpu(ce)->lock);
+        if (ce->linked &&
+            ce->linked == ce_data->should_schedule &&
+            is_ghost(ce->linked))
+        {
+                old_link = ce->linked;
+                tsk_mc_data(ce->linked)->mc_job.ghost_budget = 0;
+                link_task_to_crit(ce, NULL);
+        }
+        raw_spin_unlock(&crit_cpu(ce)->lock);
+        mc_ce_timer_callback_common(dom, timer);
+        /* job completion will check for preemptions by means of calling job
+         * arrival if the task is not blocked */
+        if (NULL != old_link) {
+                STRACE("old_link " TS " so will call job completion\n", TA(old_link));
+                raw_spin_unlock(dom->lock);
+                job_completion(old_link, 0);
+        } else {
+                STRACE("old_link was null, so will call check for preempt\n");
+                raw_spin_unlock(dom->lock);
+                check_for_preempt(dom);
+        }
+        local_irq_restore(flags);
+        return HRTIMER_RESTART;
+}
 /**
 * mc_release_jobs() - Add heap of tasks to the system, check for preemptions.
 */
@@ -611,7 +632,7 @@ static void mc_release_jobs(rt_domain_t* rt, struct bheap* tasks)
        struct domain *dom = get_task_domain(first);
        raw_spin_lock_irqsave(dom->lock, flags);
-        TRACE_MC_TASK(first, "Jobs released");
+        TRACE(TS "Jobs released\n", TA(first));
        __merge_ready(rt, tasks);
        check_for_preempt(dom);
        raw_spin_unlock_irqrestore(dom->lock, flags);
@@ -664,7 +685,7 @@ static void mc_task_wake_up(struct task_struct *task)
        lt_t now = litmus_clock();
        local_irq_save(flags);
-        TRACE_MC_TASK(task, "Wakes up");
+        TRACE(TS " wakes up\n", TA(task));
        if (is_tardy(task, now)) {
                /* Task missed its last release */
                release_at(task, now);
@@ -683,7 +704,7 @@ static void mc_task_block(struct task_struct *task)
 {
        unsigned long flags;
        local_irq_save(flags);
-        TRACE_MC_TASK(task, "Block at %llu", litmus_clock());
+        TRACE(TS " blocks\n", TA(task));
        remove_from_all(task);
        local_irq_restore(flags);
 }
@@ -696,7 +717,7 @@ static void mc_task_exit(struct task_struct *task)
        unsigned long flags;
        local_irq_save(flags);
        BUG_ON(!is_realtime(task));
-        TRACE_MC_TASK(task, "RIP");
+        TRACE(TS " RIP\n", TA(task));
        remove_from_all(task);
        if (tsk_rt(task)->scheduled_on != NO_CPU) {
@@ -704,6 +725,9 @@ static void mc_task_exit(struct task_struct *task)
                tsk_rt(task)->scheduled_on = NO_CPU;
        }
+        if (CRIT_LEVEL_A == tsk_mc_crit(task))
+                mc_ce_task_exit_common(task);
        local_irq_restore(flags);
 }
@@ -713,19 +737,30 @@ static void mc_task_exit(struct task_struct *task)
 */
 static long mc_admit_task(struct task_struct* task)
 {
+        const enum crit_level crit = tsk_mc_crit(task);
+        long ret;
        if (!tsk_mc_data(task)) {
                printk(KERN_WARNING "Tried to admit task with no criticality "
                        "level\n");
-                return -EINVAL;
+                ret = -EINVAL;
+                goto out;
        }
-        if (tsk_mc_crit(task) < CRIT_LEVEL_C && get_partition(task) == NO_CPU) {
+        if (crit < CRIT_LEVEL_C && get_partition(task) == NO_CPU) {
                printk(KERN_WARNING "Tried to admit partitioned task with no "
                       "partition\n");
-                return -EINVAL;
+                ret = -EINVAL;
+                goto out;
+        }
+        if (crit == CRIT_LEVEL_A) {
+                ret = mc_ce_admit_task_common(task);
+                if (ret)
+                        goto out;
        }
        printk(KERN_INFO "Admitted task with criticality level %d\n",
                tsk_mc_crit(task));
-        return 0;
+        ret = 0;
+out:
+        return ret;
 }
 /**
@@ -761,11 +796,11 @@ static struct task_struct* mc_schedule(struct task_struct * prev)
        if (exists) {
                entry->scheduled->rt_param.scheduled_on = NO_CPU;
-                TRACE_MC_TASK(prev,
+                TRACE(TS
-                           "blocks:%d out_of_time:%d sleep:%d preempt:%d "
+                      " blocks:%d out_of_time:%d sleep:%d preempt:%d "
-                           "state:%d sig:%d global:%d",
+                      "state:%d sig:%d global:%d\n", TA(prev),
-                           blocks, out_of_time, sleep, preempt,
+                      blocks, out_of_time, sleep, preempt,
-                           prev->state, signal_pending(prev), global);
+                      prev->state, signal_pending(prev), global);
        }
        raw_spin_unlock(&entry->lock);
@@ -781,7 +816,7 @@ static struct task_struct* mc_schedule(struct task_struct * prev)
        /* Any task which exhausts its budget or sleeps waiting for its next
         * period completes unless its execution has been forcibly stopped.
         */
-        if ((out_of_time || sleep) && !blocks && !preempt)
+        if ((out_of_time || sleep) && !blocks)/* && !preempt)*/
                job_completion(entry->scheduled, !sleep);
        /* Global scheduled tasks must wait for a deschedule before they
         * can rejoin the global state. Rejoin them here.
@@ -836,10 +871,29 @@ static struct task_struct* mc_schedule(struct task_struct * prev)
        if (next)
                TRACE_MC_TASK(next, "Scheduled at %llu", litmus_clock());
        else if (exists && !next)
-                TRACE("Becomes idle at %llu\n", litmus_clock());
+                TRACE_ENTRY(entry, "Becomes idle at %llu", litmus_clock());
        return next;
 }
+/*
+ * This is the plugin's release at function, called by the release task-set
+ * system call. Other places in the file use the generic LITMUS release_at(),
+ * which is not this.
+ */
+void mc_release_at(struct task_struct *ts, lt_t start)
+{
+        /* hack so that we can have CE timers start at the right time */
+        if (CRIT_LEVEL_A == tsk_mc_crit(ts))
+                mc_ce_release_at_common(ts, start);
+        else
+                release_at(ts, start);
+}
+long mc_deactivate_plugin(void)
+{
+        return mc_ce_deactivate_plugin_common();
+}
 /* **************************************************************************
 * Initialization
 * ************************************************************************** */
@@ -850,7 +904,8 @@ static struct task_struct* mc_schedule(struct task_struct * prev)
 /* LVL-A */
 DEFINE_PER_CPU(struct domain_data, _mc_crit_a);
-DEFINE_PER_CPU(rt_domain_t, _mc_crit_a_rt);
+DEFINE_PER_CPU(raw_spinlock_t, _mc_crit_a_lock);
+DEFINE_PER_CPU(struct ce_dom_data, _mc_crit_a_ce_data);
 /* LVL-B */
 DEFINE_PER_CPU(struct domain_data, _mc_crit_b);
 DEFINE_PER_CPU(rt_domain_t, _mc_crit_b_rt);
@@ -870,12 +925,31 @@ DEFINE_PER_CPU(struct event_group, _mc_groups);
 static long mc_activate_plugin(void)
 {
+        struct domain_data *dom_data;
+        struct domain *dom;
+        struct domain_data *our_domains[NR_CPUS];
+        int cpu, n = 0;
+        long ret;
 #ifdef CONFIG_RELEASE_MASTER
        interrupt_cpu = atomic_read(&release_master_cpu);
        if (interrupt_cpu == NO_CPU)
                interrupt_cpu = 0;
 #endif
-        return 0;
+        for_each_online_cpu(cpu) {
+                BUG_ON(NR_CPUS <= n);
+                dom = per_cpu(cpus, cpu).crit_entries[CRIT_LEVEL_A].domain;
+                dom_data = domain_data(dom);
+                our_domains[cpu] = dom_data;
+                n++;
+        }
+        ret = mc_ce_set_domains(n, our_domains);
+        if (ret)
+                goto out;
+        ret = mc_ce_activate_plugin_common();
+out:
+        return ret;
 }
 static struct sched_plugin mc_plugin __cacheline_aligned_in_smp = {
@@ -888,6 +962,8 @@ static struct sched_plugin mc_plugin __cacheline_aligned_in_smp = {
        .task_block             = mc_task_block,
        .admit_task             = mc_admit_task,
        .activate_plugin        = mc_activate_plugin,
+        .release_at             = mc_release_at,
+        .deactivate_plugin      = mc_deactivate_plugin,
 };
 static void init_crit_entry(struct crit_entry *ce, enum crit_level level,
@@ -958,13 +1034,15 @@ static inline void init_edf_domain(struct domain *dom, rt_domain_t *rt,
 #endif
 }
+struct domain_data *ce_domain_for(int);
 static int __init init_mc(void)
 {
        int cpu;
        struct cpu_entry *entry;
        struct domain_data *dom_data;
        rt_domain_t *rt;
-        raw_spinlock_t *a_dom, *b_dom, *c_dom; /* For lock debugger */
+        raw_spinlock_t *a_dom_lock, *b_dom_lock, *c_dom_lock; /* For lock debugger */
+        struct ce_dom_data *ce_data;
        for_each_online_cpu(cpu) {
                entry = &per_cpu(cpus, cpu);
@@ -992,11 +1070,15 @@ static int __init init_mc(void)
                /* CRIT_LEVEL_A */
                dom_data = &per_cpu(_mc_crit_a, cpu);
-                rt = &per_cpu(_mc_crit_a_rt, cpu);
+                ce_data = &per_cpu(_mc_crit_a_ce_data, cpu);
+                a_dom_lock = &per_cpu(_mc_crit_a_lock, cpu);
+                raw_spin_lock_init(a_dom_lock);
+                ce_domain_init(&dom_data->domain,
+                                a_dom_lock, ce_requeue, ce_peek_and_take_ready,
+                                ce_peek_and_take_ready, mc_preempt_needed,
+                                ce_higher_prio, ce_data, cpu,
+                                ce_timer_function);
                init_local_domain(entry, dom_data, CRIT_LEVEL_A);
-                init_edf_domain(&dom_data->domain, rt, cpu, CRIT_LEVEL_A);
-                a_dom = dom_data->domain.lock;
-                raw_spin_lock_init(a_dom);
                dom_data->domain.name = "LVL-A";
                /* CRIT_LEVEL_B */
@@ -1004,8 +1086,8 @@ static int __init init_mc(void)
                rt = &per_cpu(_mc_crit_b_rt, cpu);
                init_local_domain(entry, dom_data, CRIT_LEVEL_B);
                init_edf_domain(&dom_data->domain, rt, cpu, CRIT_LEVEL_B);
-                b_dom = dom_data->domain.lock;
+                b_dom_lock = dom_data->domain.lock;
-                raw_spin_lock_init(b_dom);
+                raw_spin_lock_init(b_dom_lock);
                dom_data->domain.name = "LVL-B";
        }
@@ -1022,8 +1104,8 @@ static int __init init_mc(void)
        init_global_domain(&_mc_crit_c, CRIT_LEVEL_C,
                           &_mc_heap_c, _mc_nodes_c);
        init_edf_domain(&_mc_crit_c.domain, &_mc_crit_c_rt, 0, CRIT_LEVEL_C);
-        c_dom = _mc_crit_c.domain.lock;
+        c_dom_lock = _mc_crit_c.domain.lock;
-        raw_spin_lock_init(c_dom);
+        raw_spin_lock_init(c_dom_lock);
        _mc_crit_c.domain.name = "LVL-C";
        return register_sched_plugin(&mc_plugin);
diff --git a/litmus/sched_mc_ce.c b/litmus/sched_mc_ce.c
index dcb74f4ca67b..63b0470e1f52 100644
--- a/litmus/sched_mc_ce.c
+++ b/litmus/sched_mc_ce.c
@@ -23,27 +23,86 @@
 #include <litmus/sched_trace.h>
 #include <litmus/jobs.h>
 #include <litmus/sched_mc.h>
+#include <litmus/ce_domain.h>
 static struct sched_plugin mc_ce_plugin __cacheline_aligned_in_smp;
-#define is_active_plugin() (litmus == &mc_ce_plugin)
+#define using_linux_plugin() (litmus == &linux_sched_plugin)
+/* get a reference to struct domain for a CPU */
+#define get_domain_for(cpu) (&per_cpu(domains, cpu)->domain)
+#define get_pid_table(cpu) (&per_cpu(ce_pid_table, cpu))
+#define get_pid_entry(cpu, idx) (&(get_pid_table(cpu)->entries[idx]))
 static atomic_t start_time_set = ATOMIC_INIT(-1);
 static atomic64_t start_time = ATOMIC64_INIT(0);
 static struct proc_dir_entry *mc_ce_dir = NULL, *ce_file = NULL;
+/*
+ * Cache the budget along with the struct PID for a task so that we don't need
+ * to fetch its task_struct every time we check to see what should be
+ * scheduled.
+ */
+struct ce_pid_entry {
+        struct pid *pid;
+        lt_t budget;
+        /* accumulated (summed) budgets, including this one */
+        lt_t acc_time;
+        unsigned int expected_job;
+};
+struct ce_pid_table {
+        struct ce_pid_entry entries[CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS];
+        int num_pid_entries;
+        lt_t cycle_time;
+};
+DEFINE_PER_CPU(struct ce_pid_table, ce_pid_table);
+/*
+ * How we get the domain for a given CPU locally. Set with the
+ * mc_ce_set_domains function. Must be done before activating plugins. Be
+ * careful when using domains as a variable elsewhere in this file.
+ */
+DEFINE_PER_CPU(struct domain_data*, domains);
-DEFINE_PER_CPU(domain_t, mc_ce_doms);
+/*
-DEFINE_PER_CPU(rt_domain_t, mc_ce_rts);
+ * The domains and other data used by the MC-CE plugin when it runs alone.
+ */
+DEFINE_PER_CPU(struct domain_data, _mc_ce_doms);
 DEFINE_PER_CPU(struct ce_dom_data, _mc_ce_dom_data);
+DEFINE_PER_CPU(raw_spinlock_t, _mc_ce_dom_locks);
-/* Return the address of the domain_t for this CPU, used by the
+long mc_ce_set_domains(const int n, struct domain_data *domains_in[])
- * mixed-criticality plugin. */
-domain_t *ce_domain_for(int cpu)
 {
-        return &per_cpu(mc_ce_doms, cpu);
+        const int max = (NR_CPUS < n) ? NR_CPUS : n;
+        struct domain_data *new_dom = NULL;
+        int i, ret;
+        if (!using_linux_plugin()) {
+                printk(KERN_WARNING "can't set MC-CE domains when not using "
+                                "Linux scheduler.\n");
+                ret = -EINVAL;
+                goto out;
+        }
+        for (i = 0; i < max; ++i) {
+                new_dom = domains_in[i];
+                per_cpu(domains, i) = new_dom;
+        }
+        ret = 0;
+out:
+        return ret;
 }
+unsigned int mc_ce_get_expected_job(const int cpu, const int idx)
+{
+        const struct ce_pid_table *pid_table = get_pid_table(cpu);
+        BUG_ON(0 > cpu);
+        BUG_ON(0 > idx);
+        BUG_ON(pid_table->num_pid_entries <= idx);
+        return pid_table->entries[idx].expected_job;
+}
 /*
 * Get the offset into the cycle taking the start time into account.
@@ -63,16 +122,14 @@ static inline lt_t get_cycle_offset(const lt_t when, const lt_t cycle_time)
 *
 * Do not call prepare_for_next_period on Level-A tasks!
 */
-static void mc_ce_job_completion(struct task_struct *ts)
+static void mc_ce_job_completion(struct domain *dom, struct task_struct *ts)
 {
-        const domain_t *dom = &per_cpu(mc_ce_doms, smp_processor_id());
+        const int cpu = task_cpu(ts);
-        const struct ce_dom_data *ce_data = dom->data;
        const int idx = tsk_mc_data(ts)->mc_task.lvl_a_id;
-        const struct ce_dom_pid_entry *pid_entry =
+        const struct ce_pid_entry *pid_entry = get_pid_entry(cpu, idx);
-                &ce_data->pid_entries[idx];
+        unsigned int just_finished;
-        int just_finished;
-        TRACE_TASK(ts, "completed\n");
+        TRACE_TASK(ts, "Completed\n");
        sched_trace_task_completion(ts, 0);
        /* post-increment is important here */
@@ -85,11 +142,11 @@ static void mc_ce_job_completion(struct task_struct *ts)
        if (just_finished < pid_entry->expected_job) {
                /* this job is already released because it's running behind */
                set_rt_flags(ts, RT_F_RUNNING);
-                TRACE_TASK(ts, "appears behind: the expected job is %d but "
+                TRACE_TASK(ts, "appears behind: the expected job is %u but "
-                                "job %d just completed\n",
+                                "job %u just completed\n",
                                pid_entry->expected_job, just_finished);
        } else if (pid_entry->expected_job < just_finished) {
-                printk(KERN_CRIT "job %d completed in expected job %d which "
+                printk(KERN_CRIT "job %u completed in expected job %u which "
                                "seems too early\n", just_finished,
                                pid_entry->expected_job);
                BUG();
@@ -104,31 +161,32 @@ static void mc_ce_job_completion(struct task_struct *ts)
 *
 * TODO Currently O(n) in the number of tasks on the CPU. Binary search?
 */
-static int mc_ce_schedule_at(const domain_t *dom, lt_t offset)
+static int mc_ce_schedule_at(const struct domain *dom, lt_t offset)
 {
        const struct ce_dom_data *ce_data = dom->data;
-        const struct ce_dom_pid_entry *pid_entry = NULL;
+        struct ce_pid_table *pid_table = get_pid_table(ce_data->cpu);
-        int i;
+        const struct ce_pid_entry *pid_entry = NULL;
+        int idx;
-        BUG_ON(ce_data->cycle_time < 1);
+        BUG_ON(pid_table->cycle_time < 1);
-        BUG_ON(ce_data->num_pid_entries < 1);
+        BUG_ON(pid_table->num_pid_entries < 1);
-        for (i = 0; i < ce_data->num_pid_entries; ++i) {
+        for (idx = 0; idx < pid_table->num_pid_entries; ++idx) {
-                pid_entry = &ce_data->pid_entries[i];
+                pid_entry = &pid_table->entries[idx];
                if (offset < pid_entry->acc_time) {
                        /* found task to schedule in this window */
                        break;
                }
        }
        /* can only happen if cycle_time is not right */
-        BUG_ON(pid_entry->acc_time > ce_data->cycle_time);
+        BUG_ON(pid_entry->acc_time > pid_table->cycle_time);
-        TRACE("schedule at returned task %d for CPU %d\n", i, ce_data->cpu);
+        TRACE("schedule at returning task %d for CPU %d\n", idx, ce_data->cpu);
-        return i;
+        return idx;
 }
 static struct task_struct *mc_ce_schedule(struct task_struct *prev)
 {
-        domain_t *dom = &per_cpu(mc_ce_doms, smp_processor_id());
+        struct domain *dom = get_domain_for(smp_processor_id());
        struct ce_dom_data *ce_data = dom->data;
        struct task_struct *next = NULL;
        int exists, sleep, should_sched_exists, should_sched_blocked,
@@ -147,7 +205,7 @@ static struct task_struct *mc_ce_schedule(struct task_struct *prev)
        TRACE("exists: %d, sleep: %d\n", exists, sleep);
        if (sleep)
-                mc_ce_job_completion(ce_data->scheduled);
+                mc_ce_job_completion(dom, ce_data->scheduled);
        /* these checks must go after the call to mc_ce_job_completion in case
         * a late task needs to be scheduled again right away and its the only
@@ -178,7 +236,7 @@ static struct task_struct *mc_ce_schedule(struct task_struct *prev)
 static void mc_ce_finish_switch(struct task_struct *prev)
 {
-        domain_t *dom = &per_cpu(mc_ce_doms, smp_processor_id());
+        struct domain *dom = get_domain_for(smp_processor_id());
        struct ce_dom_data *ce_data = dom->data;
        TRACE("finish switch\n");
@@ -190,41 +248,21 @@ static void mc_ce_finish_switch(struct task_struct *prev)
 }
 /*
- * Called for every local timer interrupt.
- * Linux calls this with interrupts disabled, AFAIK.
- */
-static void mc_ce_tick(struct task_struct *ts)
-{
-        domain_t *dom = &per_cpu(mc_ce_doms, smp_processor_id());
-        struct ce_dom_data *ce_data = dom->data;
-        struct task_struct *should_schedule;
-        if (is_realtime(ts) && CRIT_LEVEL_A == tsk_mc_crit(ts)) {
-                raw_spin_lock(dom->lock);
-                should_schedule = ce_data->should_schedule;
-                raw_spin_unlock(dom->lock);
-                if (!is_np(ts) && ts != should_schedule) {
-                        litmus_reschedule_local();
-                } else if (is_user_np(ts)) {
-                        request_exit_np(ts);
-                }
-        }
-}
-/*
 * Admit task called to see if this task is permitted to enter the system.
 * Here we look up the task's PID structure and save it in the proper slot on
 * the CPU this task will run on.
 */
-static long __mc_ce_admit_task(struct task_struct *ts)
+long mc_ce_admit_task_common(struct task_struct *ts)
 {
-        domain_t *dom = &per_cpu(mc_ce_doms, get_partition(ts));
+        struct domain *dom = get_domain_for(get_partition(ts));
        struct ce_dom_data *ce_data = dom->data;
        struct mc_data *mcd = tsk_mc_data(ts);
        struct pid *pid = NULL;
        long retval = -EINVAL;
        const int lvl_a_id = mcd->mc_task.lvl_a_id;
+        struct ce_pid_table *pid_table = get_pid_table(ce_data->cpu);
+        BUG_ON(get_partition(ts) != ce_data->cpu);
        /* check the task has migrated to the right CPU (like in sched_cedf) */
        if (task_cpu(ts) != get_partition(ts)) {
@@ -248,26 +286,26 @@ static long __mc_ce_admit_task(struct task_struct *ts)
                goto out;
        }
-        if (lvl_a_id >= ce_data->num_pid_entries) {
+        if (lvl_a_id >= pid_table->num_pid_entries) {
                printk(KERN_INFO "litmus: level A id greater than expected "
                                "number of tasks %d for %d cpu %d\n",
-                                ce_data->num_pid_entries, ts->pid,
+                                pid_table->num_pid_entries, ts->pid,
                                get_partition(ts));
                goto out_put_pid;
        }
-        if (ce_data->pid_entries[lvl_a_id].pid) {
+        if (pid_table->entries[lvl_a_id].pid) {
                printk(KERN_INFO "litmus: have saved pid info id: %d cpu: %d\n",
                                lvl_a_id, get_partition(ts));
                goto out_put_pid;
        }
-        if (get_exec_cost(ts) >= ce_data->pid_entries[lvl_a_id].budget) {
+        if (get_exec_cost(ts) >= pid_table->entries[lvl_a_id].budget) {
                printk(KERN_INFO "litmus: execution cost %llu is larger than "
                                "the budget %llu\n",
                                get_exec_cost(ts),
-                                ce_data->pid_entries[lvl_a_id].budget);
+                                pid_table->entries[lvl_a_id].budget);
                goto out_put_pid;
        }
-        ce_data->pid_entries[lvl_a_id].pid = pid;
+        pid_table->entries[lvl_a_id].pid = pid;
        retval = 0;
        /* don't call put_pid if we are successful */
        goto out;
@@ -280,10 +318,10 @@ out:
 static long mc_ce_admit_task(struct task_struct *ts)
 {
-        domain_t *dom = &per_cpu(mc_ce_doms, get_partition(ts));
+        struct domain *dom = get_domain_for(get_partition(ts));
        unsigned long flags, retval;
        raw_spin_lock_irqsave(dom->lock, flags);
-        retval = __mc_ce_admit_task(ts);
+        retval = mc_ce_admit_task_common(ts);
        raw_spin_unlock_irqrestore(dom->lock, flags);
        return retval;
 }
@@ -295,26 +333,26 @@ static long mc_ce_admit_task(struct task_struct *ts)
 */
 static void mc_ce_task_new(struct task_struct *ts, int on_rq, int running)
 {
-        domain_t *dom = &per_cpu(mc_ce_doms, task_cpu(ts));
+        const int cpu = task_cpu(ts);
+        struct domain *dom = get_domain_for(cpu);
        struct ce_dom_data *ce_data = dom->data;
+        struct ce_pid_table *pid_table = get_pid_table(cpu);
        struct pid *pid_should_be_running;
-        struct ce_dom_pid_entry *pid_entry;
+        struct ce_pid_entry *pid_entry;
        unsigned long flags;
        int idx, should_be_running;
        lt_t offset;
-        /* have to call mc_ce_schedule_at because the task only gets a PID
-         * entry after calling admit_task */
        raw_spin_lock_irqsave(dom->lock, flags);
-        pid_entry = &ce_data->pid_entries[tsk_mc_data(ts)->mc_task.lvl_a_id];
+        pid_entry = get_pid_entry(cpu, tsk_mc_data(ts)->mc_task.lvl_a_id);
        /* initialize some task state */
        set_rt_flags(ts, RT_F_RUNNING);
-        tsk_rt(ts)->job_params.job_no = 0;
-        offset = get_cycle_offset(litmus_clock(), ce_data->cycle_time);
+        /* have to call mc_ce_schedule_at because the task only gets a PID
+         * entry after calling admit_task */
+        offset = get_cycle_offset(litmus_clock(), pid_table->cycle_time);
        idx = mc_ce_schedule_at(dom, offset);
-        pid_should_be_running = ce_data->pid_entries[idx].pid;
+        pid_should_be_running = get_pid_entry(cpu, idx)->pid;
        rcu_read_lock();
        should_be_running = (ts == pid_task(pid_should_be_running, PIDTYPE_PID));
        rcu_read_unlock();
@@ -341,7 +379,7 @@ static void mc_ce_task_new(struct task_struct *ts, int on_rq, int running)
 */
 static void mc_ce_task_wake_up(struct task_struct *ts)
 {
-        domain_t *dom = &per_cpu(mc_ce_doms, smp_processor_id());
+        struct domain *dom = get_domain_for(get_partition(ts));
        struct ce_dom_data *ce_data = dom->data;
        unsigned long flags;
@@ -366,25 +404,25 @@ static void mc_ce_task_block(struct task_struct *ts)
 /*
 * Called when a task switches from RT mode back to normal mode.
 */
-static void mc_ce_task_exit(struct task_struct *ts)
+void mc_ce_task_exit_common(struct task_struct *ts)
 {
-        domain_t *dom = &per_cpu(mc_ce_doms, get_partition(ts));
+        struct domain *dom = get_domain_for(get_partition(ts));
        struct ce_dom_data *ce_data = dom->data;
        unsigned long flags;
        struct pid *pid;
        const int lvl_a_id = tsk_mc_data(ts)->mc_task.lvl_a_id;
+        struct ce_pid_table *pid_table = get_pid_table(ce_data->cpu);
-        TRACE_TASK(ts, "exited\n");
        BUG_ON(task_cpu(ts) != get_partition(ts));
        BUG_ON(CRIT_LEVEL_A != tsk_mc_crit(ts));
-        BUG_ON(lvl_a_id >= ce_data->num_pid_entries);
+        BUG_ON(lvl_a_id >= pid_table->num_pid_entries);
+        BUG_ON(ce_data->cpu != task_cpu(ts));
        raw_spin_lock_irqsave(dom->lock, flags);
-        pid = ce_data->pid_entries[lvl_a_id].pid;
+        pid = pid_table->entries[lvl_a_id].pid;
        BUG_ON(!pid);
        put_pid(pid);
-        ce_data->pid_entries[lvl_a_id].pid = NULL;
+        pid_table->entries[lvl_a_id].pid = NULL;
        if (ce_data->scheduled == ts)
                ce_data->scheduled = NULL;
        if (ce_data->should_schedule == ts)
@@ -396,32 +434,32 @@ static void mc_ce_task_exit(struct task_struct *ts)
 * Timer stuff
 **********************************************************/
-void __mc_ce_timer_callback(struct hrtimer *timer)
+void mc_ce_timer_callback_common(struct domain *dom, struct hrtimer *timer)
 {
        /* relative and absolute times for cycles */
        lt_t now, offset_rel, cycle_start_abs, next_timer_abs;
        struct task_struct *should_schedule;
-        struct ce_dom_pid_entry *pid_entry;
+        struct ce_pid_table *pid_table;
+        struct ce_pid_entry *pid_entry;
        struct ce_dom_data *ce_data;
-        domain_t *dom;
        int idx, budget_overrun;
-        ce_data = container_of(timer, struct ce_dom_data, timer);
+        ce_data = dom->data;
-        dom = container_of(((void*)ce_data), domain_t, data);
+        pid_table = get_pid_table(ce_data->cpu);
        /* Based off of the current time, figure out the offset into the cycle
         * and the cycle's start time, and determine what should be scheduled.
         */
        now = litmus_clock();
-        offset_rel = get_cycle_offset(now, ce_data->cycle_time);
+        offset_rel = get_cycle_offset(now, pid_table->cycle_time);
        cycle_start_abs = now - offset_rel;
        idx = mc_ce_schedule_at(dom, offset_rel);
-        pid_entry = &ce_data->pid_entries[idx];
+        pid_entry = get_pid_entry(ce_data->cpu, idx);
        /* set the timer to fire at the next cycle start */
        next_timer_abs = cycle_start_abs + pid_entry->acc_time;
        hrtimer_set_expires(timer, ns_to_ktime(next_timer_abs));
-        TRACE("timer: now: %llu  offset_rel: %llu  cycle_start_abs: %llu  "
+        STRACE("timer: now: %llu  offset_rel: %llu  cycle_start_abs: %llu  "
                        "next_timer_abs: %llu\n", now, offset_rel,
                        cycle_start_abs, next_timer_abs);
@@ -440,10 +478,11 @@ void __mc_ce_timer_callback(struct hrtimer *timer)
                budget_overrun = pid_entry->expected_job !=
                        tsk_rt(should_schedule)->job_params.job_no;
                if (budget_overrun)
-                        TRACE_TASK(should_schedule, "timer expected job number: %d "
+                        TRACE_MC_TASK(should_schedule,
-                                        "but current job: %d\n",
+                                      "timer expected job number: %u "
-                                        pid_entry->expected_job,
+                                      "but current job: %u",
-                                        tsk_rt(should_schedule)->job_params.job_no);
+                                      pid_entry->expected_job,
+                                      tsk_rt(should_schedule)->job_params.job_no);
        }
        if (ce_data->should_schedule) {
@@ -466,15 +505,15 @@ static enum hrtimer_restart mc_ce_timer_callback(struct hrtimer *timer)
 {
        struct ce_dom_data *ce_data;
        unsigned long flags;
-        domain_t *dom;
+        struct domain *dom;
        ce_data = container_of(timer, struct ce_dom_data, timer);
-        dom = container_of(((void*)ce_data), domain_t, data);
+        dom = get_domain_for(ce_data->cpu);
        TRACE("timer callback on CPU %d (before lock)\n", ce_data->cpu);
        raw_spin_lock_irqsave(dom->lock, flags);
-        __mc_ce_timer_callback(timer);
+        mc_ce_timer_callback_common(dom, timer);
        if (ce_data->scheduled != ce_data->should_schedule)
                preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
@@ -490,13 +529,13 @@ static enum hrtimer_restart mc_ce_timer_callback(struct hrtimer *timer)
 static int cancel_all_timers(void)
 {
        struct ce_dom_data *ce_data;
-        domain_t *dom;
+        struct domain *dom;
-        int cpu, ret = 0, cancel_res;
+        int cpu, cancel_res, ret = 0;
        TRACE("cancel all timers\n");
        for_each_online_cpu(cpu) {
-                dom = &per_cpu(mc_ce_doms, cpu);
+                dom = get_domain_for(cpu);
                ce_data = dom->data;
                ce_data->should_schedule = NULL;
                cancel_res = hrtimer_cancel(&ce_data->timer);
@@ -514,20 +553,22 @@ static int cancel_all_timers(void)
 */
 static void arm_all_timers(void)
 {
+        struct domain *dom;
        struct ce_dom_data *ce_data;
-        domain_t *dom;
+        struct ce_pid_table *pid_table;
        int cpu, idx;
        const lt_t start = atomic64_read(&start_time);
        TRACE("arm all timers\n");
        for_each_online_cpu(cpu) {
-                dom = &per_cpu(mc_ce_doms, cpu);
+                dom = get_domain_for(cpu);
                ce_data = dom->data;
-                if (0 == ce_data->num_pid_entries)
+                pid_table = get_pid_table(cpu);
+                if (0 == pid_table->num_pid_entries)
                        continue;
-                for (idx = 0; idx < ce_data->num_pid_entries; idx++) {
+                for (idx = 0; idx < pid_table->num_pid_entries; idx++) {
-                        ce_data->pid_entries[idx].expected_job = -1;
+                        pid_table->entries[idx].expected_job = 0;
                }
                TRACE("arming timer for CPU %d\n", cpu);
                hrtimer_start_on(cpu, &ce_data->timer_info, &ce_data->timer,
@@ -540,7 +581,7 @@ static void arm_all_timers(void)
 * call this. We can re-set our notion of the CE period start to make
 * the schedule look pretty.
 */
-void mc_ce_release_at(struct task_struct *ts, lt_t start)
+void mc_ce_release_at_common(struct task_struct *ts, lt_t start)
 {
        TRACE_TASK(ts, "release at\n");
        if (atomic_inc_and_test(&start_time_set)) {
@@ -552,14 +593,14 @@ void mc_ce_release_at(struct task_struct *ts, lt_t start)
                atomic_dec(&start_time_set);
 }
-long mc_ce_activate_plugin(void)
+long mc_ce_activate_plugin_common(void)
 {
        struct ce_dom_data *ce_data;
-        domain_t *dom;
+        struct domain *dom;
        int cpu;
        for_each_online_cpu(cpu) {
-                dom = &per_cpu(mc_ce_doms, cpu);
+                dom = get_domain_for(cpu);
                ce_data = dom->data;
                ce_data->scheduled = NULL;
                ce_data->should_schedule = NULL;
@@ -572,33 +613,54 @@ long mc_ce_activate_plugin(void)
        return 0;
 }
+static long mc_ce_activate_plugin(void)
+{
+        struct domain_data *our_domains[NR_CPUS];
+        int cpu, n = 0;
+        long ret;
+        for_each_online_cpu(cpu) {
+                BUG_ON(NR_CPUS <= n);
+                our_domains[cpu] = &per_cpu(_mc_ce_doms, cpu);
+                n++;
+        }
+        ret = mc_ce_set_domains(n, our_domains);
+        if (ret)
+                goto out;
+        ret = mc_ce_activate_plugin_common();
+out:
+        return ret;
+}
 static void clear_pid_entries(void)
 {
+        struct ce_pid_table *pid_table = NULL;
        int cpu, entry;
-        domain_t *dom;
-        struct ce_dom_data *ce_data;
        for_each_online_cpu(cpu) {
-                dom = &per_cpu(mc_ce_doms, cpu);
+                pid_table = get_pid_table(cpu);
-                ce_data = dom->data;
+                pid_table->num_pid_entries = 0;
-                ce_data->num_pid_entries = 0;
+                pid_table->cycle_time = 0;
-                ce_data->cycle_time = 0;
                for (entry = 0; entry < CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS;
                                ++entry) {
-                        if (NULL != ce_data->pid_entries[entry].pid) {
+                        if (NULL != pid_table->entries[entry].pid) {
-                                put_pid(ce_data->pid_entries[entry].pid);
+                                put_pid(pid_table->entries[entry].pid);
-                                ce_data->pid_entries[entry].pid = NULL;
+                                pid_table->entries[entry].pid = NULL;
                        }
-                        ce_data->pid_entries[entry].budget = 0;
+                        pid_table->entries[entry].budget = 0;
-                        ce_data->pid_entries[entry].acc_time = 0;
+                        pid_table->entries[entry].acc_time = 0;
-                        ce_data->pid_entries[entry].expected_job = -1;
+                        pid_table->entries[entry].expected_job = 0;
                }
        }
 }
-long mc_ce_deactivate_plugin(void)
+long mc_ce_deactivate_plugin_common(void)
 {
+        int cpu;
        cancel_all_timers();
+        for_each_online_cpu(cpu) {
+                per_cpu(domains, cpu) = NULL;
+        }
        return 0;
 }
@@ -608,35 +670,33 @@ static struct sched_plugin mc_ce_plugin __cacheline_aligned_in_smp = {
        .admit_task             = mc_ce_admit_task,
        .task_new               = mc_ce_task_new,
        .complete_job           = complete_job,
-        .release_at             = mc_ce_release_at,
+        .release_at             = mc_ce_release_at_common,
-        .task_exit              = mc_ce_task_exit,
+        .task_exit              = mc_ce_task_exit_common,
        .schedule               = mc_ce_schedule,
        .finish_switch          = mc_ce_finish_switch,
-        .tick                   = mc_ce_tick,
        .task_wake_up           = mc_ce_task_wake_up,
        .task_block             = mc_ce_task_block,
        .activate_plugin        = mc_ce_activate_plugin,
-        .deactivate_plugin      = mc_ce_deactivate_plugin,
+        .deactivate_plugin      = mc_ce_deactivate_plugin_common,
 };
 static int setup_proc(void);
 static int __init init_sched_mc_ce(void)
 {
-        struct ce_dom_data *ce_data;
+        raw_spinlock_t *ce_lock;
-        domain_t *dom;
+        struct domain_data *dom_data;
-        rt_domain_t *rt;
+        struct domain *dom;
        int cpu, err;
        for_each_online_cpu(cpu) {
-                dom = &per_cpu(mc_ce_doms, cpu);
+                per_cpu(domains, cpu) = NULL;
-                rt = &per_cpu(mc_ce_rts, cpu);
+                ce_lock = &per_cpu(_mc_ce_dom_locks, cpu);
-                pd_domain_init(dom, rt, NULL, NULL, NULL, NULL, NULL);
+                raw_spin_lock_init(ce_lock);
-                dom->data = &per_cpu(_mc_ce_dom_data, cpu);
+                dom_data = &per_cpu(_mc_ce_doms, cpu);
-                ce_data = dom->data;
+                dom = &dom_data->domain;
-                hrtimer_init(&ce_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+                ce_domain_init(dom, ce_lock, NULL, NULL, NULL, NULL, NULL,
-                hrtimer_start_on_info_init(&ce_data->timer_info);
+                                &per_cpu(_mc_ce_dom_data, cpu), cpu,
-                ce_data->cpu = cpu;
+                                mc_ce_timer_callback);
-                ce_data->timer.function = mc_ce_timer_callback;
        }
        clear_pid_entries();
        err = setup_proc();
@@ -678,7 +738,7 @@ out:
 #define PID_SPACE 15
 #define TASK_INFO_BUF (PID_SPACE + TASK_COMM_LEN)
 static int write_pid_entry(char *page, const int count, const int cpu,
-                const int task, struct ce_dom_pid_entry *pid_entry)
+                const int task, struct ce_pid_entry *pid_entry)
 {
        static char task_info[TASK_INFO_BUF];
        struct task_struct *ts;
@@ -735,8 +795,7 @@ static int proc_read_ce_file(char *page, char **start, off_t off, int count,
                int *eof, void *data)
 {
        int n = 0, err, cpu, t;
-        struct ce_dom_data *ce_data;
+        struct ce_pid_table *pid_table;
-        domain_t *dom;
        if (off > 0) {
                printk(KERN_INFO "litmus: MC-CE called read with off > 0\n");
@@ -744,11 +803,10 @@ static int proc_read_ce_file(char *page, char **start, off_t off, int count,
        }
        for_each_online_cpu(cpu) {
-                dom = &per_cpu(mc_ce_doms, cpu);
+                pid_table = get_pid_table(cpu);
-                ce_data = dom->data;
+                for (t = 0; t < pid_table->num_pid_entries; ++t) {
-                for (t = 0; t < ce_data->num_pid_entries; ++t) {
                        err = write_pid_entry(page + n, count - n,
-                                        cpu, t, &ce_data->pid_entries[t]);
+                                        cpu, t, get_pid_entry(cpu, t));
                        if (err < 0) {
                                n = -ENOSPC;
                                goto out;
@@ -785,9 +843,8 @@ static int skip_comment(const char *buf, const unsigned long max)
 #define BUDGET_THRESHOLD 5000000ULL
 static int setup_pid_entry(const int cpu, const int task, const lt_t budget)
 {
-        domain_t *dom = &per_cpu(mc_ce_doms, cpu);
+        struct ce_pid_table *pid_table = get_pid_table(cpu);
-        struct ce_dom_data *ce_data = dom->data;
+        struct ce_pid_entry *new_entry = NULL;
-        struct ce_dom_pid_entry *new_entry;
        int err = 0;
        /* check the inputs */
@@ -805,20 +862,20 @@ static int setup_pid_entry(const int cpu, const int task, const lt_t budget)
                                "MC-CE task; that might be an issue.\n");
        }
        /* check that we have space for a new entry */
-        if (CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS <= ce_data->num_pid_entries) {
+        if (CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS <= pid_table->num_pid_entries) {
                printk(KERN_INFO "litmus: too many MC-CE tasks for cpu "
                                "%d\n", cpu);
                err = -EINVAL;
                goto out;
        }
        /* add the new entry */
-        new_entry = &ce_data->pid_entries[ce_data->num_pid_entries];
+        new_entry = get_pid_entry(cpu, pid_table->num_pid_entries);
        BUG_ON(NULL != new_entry->pid);
        new_entry->budget = budget;
-        new_entry->acc_time = ce_data->cycle_time + budget;
+        new_entry->acc_time = pid_table->cycle_time + budget;
        /* update the domain entry */
-        ce_data->cycle_time += budget;
+        pid_table->cycle_time += budget;
-        ce_data->num_pid_entries++;
+        pid_table->num_pid_entries++;
 out:
        return err;
 }
@@ -839,9 +896,9 @@ static int proc_write_ce_file(struct file *file, const char __user *buffer,
        int cpu, task, cnt = 0, chars_read, converted, err;
        lt_t budget;
-        if (is_active_plugin()) {
+        if (!using_linux_plugin()) {
-                printk(KERN_INFO "litmus: can't edit MC-CE proc when plugin "
+                printk(KERN_INFO "litmus: can only edit MC-CE proc under Linux "
-                                "active\n");
+                                "plugin\n");
                cnt = -EINVAL;
                goto out;
        }
author	Jonathan Herman <hermanjl@cs.unc.edu>	2011-09-27 20:15:32 -0400
committer	Jonathan Herman <hermanjl@cs.unc.edu>	2011-09-27 20:36:04 -0400
commit	23a00b911b968c6290251913ecc34171836b4d32 (patch)
tree	f6c8289054d2961902931e89bdc11ccc01bc3a73
parent	f21e1d0ef90c2e88ae6a563afc31ea601ed968c7 (diff)
parent	609c45f71b7a2405230fd2f8436837d6389ec599 (diff)