Merge with branch wip-mc.

author: Jonathan Herman <hermanjl@cs.unc.edu> 2013-01-22 15:30:43 -0500
committer: Jonathan Herman <hermanjl@cs.unc.edu> 2013-01-22 15:30:43 -0500
commit: 7806057274c493d53a214232d4df6f96aadc7547 (patch)
tree: b3aa02ee438a0c5f592e125f4657a2f1e5c225f5 /litmus/sched_mc.c
parent: a5d1599facc1b934e0b8d68e360dadd66c1df730 (diff)
1 files changed, 1803 insertions, 0 deletions
diff --git a/litmus/sched_mc.c b/litmus/sched_mc.c
new file mode 100644
index 00000000000..64de4ef9c46
--- /dev/null
+++ b/litmus/sched_mc.c
@@ -0,0 +1,1803 @@
+/*
+ * litmus/sched_mc.c
+ * Implementation of the Mixed Criticality scheduling algorithm.
+ *
+ * (Per Mollison, Erickson, Anderson, Baruah, Scoredos 2010)
+ * TODO: optimize reschedule
+ */
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/hrtimer.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/poison.h>
+#include <linux/pid.h>
+#include <litmus/litmus.h>
+#include <litmus/trace.h>
+#include <litmus/jobs.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/edf_common.h>
+#include <litmus/sched_trace.h>
+#include <litmus/domain.h>
+#include <litmus/bheap.h>
+#include <litmus/event_group.h>
+#include <litmus/budget.h>
+#include <litmus/server.h>
+#include <litmus/sched_mc.h>
+#include <litmus/ce_domain.h>
+#include <litmus/dgl.h>
+#include <litmus/color.h>
+#include <litmus/way_tracker.h>
+struct mc_signal {
+        int                     update:1;
+        int                     preempt:1;
+};
+struct cpu_entry {
+        int                     cpu;
+        struct crit_entry       crit_entries[NUM_CRIT_LEVELS];
+        struct task_struct*     scheduled;
+        struct task_struct*     will_schedule;
+        struct task_struct*     linked;
+        struct mc_signal        signal;
+        raw_spinlock_t          lock;
+        raw_spinlock_t          signal_lock;
+#ifdef CONFIG_PLUGIN_MC_REDIRECT
+        struct list_head        redir;
+        raw_spinlock_t          redir_lock;
+#endif
+#ifdef CONFIG_MERGE_TIMERS
+        struct event_group *event_group;
+#endif
+};
+static struct dgl group_lock;
+static raw_spinlock_t dgl_lock;
+DEFINE_PER_CPU(struct cpu_entry, cpus);
+static int interrupt_cpu;
+#define has_resources(t, c)   (tsk_rt(t)->req == group_lock.acquired[c])
+#define domain_data(dom)  (container_of(dom, struct domain_data, domain))
+#define is_global(dom)    (domain_data(dom)->heap)
+#define is_global_task(t) (is_global(get_task_domain(t)))
+#define can_requeue(t)                                                  \
+        ((t)->rt_param.linked_on == NO_CPU && /* Not linked anywhere */ \
+         !is_queued(t) &&                     /* Not gonna be linked */ \
+         (!is_global_task(t) || (t)->rt_param.scheduled_on == NO_CPU))
+#define entry_level(e) \
+        (((e)->linked) ? tsk_mc_crit((e)->linked) : NUM_CRIT_LEVELS - 1)
+#define get_crit_entry_for(cpu, level) (&per_cpu(cpus, cpu).crit_entries[level])
+#define crit_cpu(ce) \
+        (container_of((void*)((ce) - (ce)->level), struct cpu_entry, crit_entries))
+static void clear_signal(struct mc_signal *signal)
+{
+        signal->update = signal->preempt = 0;
+}
+/*
+ * Put in requests for resources needed by @t.
+ */
+static int acquire_resources(struct task_struct *t)
+{
+        int cpu, acquired;
+        struct server *task_server;
+        struct cpu_entry *entry;
+        if (!lock_cache)
+                return 1;
+        BUG_ON(tsk_rt(t)->linked_on == NO_CPU);
+        raw_spin_lock(&dgl_lock);
+        cpu = tsk_rt(t)->linked_on;
+        task_server = &tsk_rt(t)->server;
+        if (!cache_preempt && is_kernel_np(t)) {
+                TRACE_MC_TASK(t, "Already contending for resources\n");
+                return has_resources(t, cpu);
+        }
+        if (!has_resources(t, cpu)) {
+                sched_trace_task_block(t);
+                server_state_change(task_server, SS_BLOCKED, 0);
+                TRACE_MC_TASK(t, "Blocked at %llu\n", litmus_clock());
+                add_group_req(&group_lock, tsk_rt(t)->req, cpu);
+                if (!cache_preempt)
+                        make_np(t);
+        }
+        acquired = has_resources(t, cpu);
+        if (acquired) {
+                entry = &per_cpu(cpus, cpu);
+                entry->signal.update = 0;
+        }
+        raw_spin_unlock(&dgl_lock);
+        return acquired;
+}
+static void release_resources(struct task_struct *t)
+{
+        struct server *task_server = &tsk_rt(t)->server;
+        if (!lock_cache)
+                return;
+        raw_spin_lock(&dgl_lock);
+        server_state_change(task_server, SS_REMOVED, 0);
+        if (cache_preempt || is_kernel_np(t)) {
+                TRACE_MC_TASK(t, "Releasing resources\n");
+                remove_group_req(&group_lock, tsk_rt(t)->req);
+                take_np(t);
+        } else if (!cache_preempt) {
+                TRACE_MC_TASK(t, "No resources to release!\n");
+        }
+        raw_spin_unlock(&dgl_lock);
+}
+static int dumb_acquire(struct task_struct *t)
+{
+        struct server *server = &tsk_rt(t)->server;
+        server_state_change(server, SS_ACTIVE, 0);
+        return 1;
+}
+static void dumb_release(struct task_struct *t)
+{
+        struct server *server = &tsk_rt(t)->server;
+        server_state_change(server, SS_REMOVED, 0);
+}
+#define fully_removed(s) ((s)->state == SS_REMOVED && !(s)->in_transit)
+/*
+ * Sort CPUs within a global domain's heap.
+ */
+static int cpu_lower_prio(struct bheap_node *a, struct bheap_node *b)
+{
+        struct domain *domain;
+        struct crit_entry *first, *second;
+        struct task_struct *first_link, *second_link;
+        first  = a->value;
+        second = b->value;
+        first_link  = first->server.linked;
+        second_link = second->server.linked;
+        if (fully_removed(&first->server) || fully_removed(&second->server)){
+                /* Removed entries go at the back of the heap */
+                return fully_removed(&second->server) &&
+                        !fully_removed(&first->server);
+        } else if (!first_link || !second_link) {
+                /* Entry with nothing scheduled is lowest priority (front) */
+                return second_link && !first_link;
+        } else {
+                /* Sort by deadlines of tasks (later deadlines first) */
+                domain = get_task_domain(first_link);
+                return domain->higher_prio(second_link, first_link);
+        }
+}
+/*
+ * Return true if the domain has a higher priority ready task. The @curr
+ * task must belong to the domain.
+ */
+static int mc_preempt_needed(struct domain *dom, struct task_struct* curr)
+{
+        struct task_struct *next = dom->peek_ready(dom);
+        if (!next || !curr) {
+                return next && !curr;
+        } else {
+                BUG_ON(tsk_mc_crit(next) != tsk_mc_crit(curr));
+                return !is_np(curr) &&
+                        get_task_domain(next)->higher_prio(next, curr);
+        }
+}
+/*
+ * Update crit entry position in a global heap. Caller must hold
+ * @ce's domain lock.
+ */
+static void update_crit_position(struct crit_entry *ce)
+{
+        struct bheap *heap;
+        if (is_global(ce->domain)) {
+                heap = domain_data(ce->domain)->heap;
+                BUG_ON(!heap);
+                BUG_ON(!bheap_node_in_heap(ce->node));
+                bheap_delete(cpu_lower_prio, heap, ce->node);
+                bheap_insert(cpu_lower_prio, heap, ce->node);
+        }
+}
+/*
+ * Update crit entry position in a global heap if it has been marked
+ * for update. Caller must hold @ce's domain lock.
+ */
+static void fix_crit_position(struct crit_entry *ce)
+{
+        struct server *server = &ce->server;
+        if (is_global(ce->domain) && server->in_transit) {
+                server_state_change(server, server->state, 0);
+                update_crit_position(ce);
+        }
+}
+/*
+ * Return next CPU which should preempted or NULL if the domain has no
+ * preemptable CPUs. Caller must hold the @dom lock.
+ */
+static struct crit_entry* lowest_prio_cpu(struct domain *dom)
+{
+        struct bheap *heap = domain_data(dom)->heap;
+        struct bheap_node* hn;
+        struct crit_entry *ce, *res = NULL;
+        do {
+                hn = bheap_peek(cpu_lower_prio, heap);
+                ce = (hn) ? hn->value : NULL;
+                if (ce) {
+                        if (ce->server.in_transit)
+                                fix_crit_position(ce);
+                        else if (ce->server.state == SS_ACTIVE)
+                                res = ce;
+                        else if (ce->server.state == SS_REMOVED)
+                                ce = NULL;
+                }
+        } while (ce && !res);
+        return res;
+}
+/*
+ * Time accounting for ghost tasks.
+ * Must be called before a decision is made involving the task's budget.
+ */
+static void update_server_time(struct task_struct *p)
+{
+        u64 clock = litmus_clock();
+        u64 delta = clock - p->rt_param.last_exec_time;
+        if (unlikely ((s64)delta < 0)) {
+                delta = 0;
+        }
+        if (budget_remaining(p) <= delta) {
+                tsk_rt(p)->job_params.exec_time = get_exec_cost(p);
+        } else {
+                tsk_rt(p)->job_params.exec_time += delta;
+        }
+        p->rt_param.last_exec_time = clock;
+}
+/*
+ * Arm ghost timer. Will merge timers if the option is specified.
+ */
+static void start_crit(struct crit_entry *ce)
+{
+        lt_t fire;
+        struct task_struct *task;
+        struct server *task_server;
+        BUG_ON(ce->server.state != SS_ACTIVE);
+        task = ce->server.linked;
+        task_server = &tsk_rt(task)->server;
+        if (is_ghost(task) && CRIT_LEVEL_A != tsk_mc_crit(task)) {
+                /* There is a level-A timer that will force a
+                 * preemption, so we don't set this for level-A
+                 * tasks. Otherwise reset the budget timer
+                 */
+                fire = litmus_clock() + budget_remaining(task);
+#ifdef CONFIG_MERGE_TIMERS
+                add_event(crit_cpu(ce)->event_group, &ce->event, fire);
+#else
+                __hrtimer_start_range_ns(&ce->timer,
+                                         ns_to_ktime(fire),
+                                         0 /* delta */,
+                                         HRTIMER_MODE_ABS_PINNED,
+                                         0 /* no wakeup */);
+#endif
+        }
+        server_state_change(task_server, SS_ACTIVE, 0);
+}
+static void stop_crit(struct crit_entry *ce)
+{
+        struct server *task_server = &tsk_rt(ce->server.linked)->server;
+        if (is_ghost(ce->server.linked)) {
+                if (!budget_exhausted(ce->server.linked)) {
+                        /* Job isn't finished, so do accounting */
+                        update_server_time(ce->server.linked);
+                }
+#ifdef CONFIG_MERGE_TIMERS
+                cancel_event(&ce->event);
+#else
+                hrtimer_try_to_cancel(&ce->timer);
+#endif
+        }
+        if (task_server->state != SS_BLOCKED) {
+                server_state_change(task_server, SS_REMOVED, 0);
+        }
+}
+/**
+ * link_task_to_crit() - Logically run a task at a criticality level.
+ * Caller must hold @ce's CPU lock.
+ */
+static void link_task_to_crit(struct crit_entry *ce,
+                              struct task_struct *task)
+{
+        struct server *ce_server = &ce->server;
+        TRACE_CRIT_ENTRY(ce, "Linking " TS "\n", TA(task));
+        BUG_ON(task && ce_server->state != SS_ACTIVE);
+        BUG_ON(task && tsk_rt(task)->linked_on != NO_CPU);
+        BUG_ON(task && is_global(ce->domain) &&
+               !bheap_node_in_heap(ce->node));
+        /* Unlink last task */
+        if (ce->server.linked) {
+                ce->domain->release_resources(ce->server.linked);
+                if (ce_server->state == SS_BLOCKED) {
+                        server_state_change(ce_server, SS_ACTIVE, 0);
+                }
+                TRACE_MC_TASK(ce->server.linked, "Unlinking\n");
+                stop_crit(ce);
+                tsk_rt(ce->server.linked)->server.parent = 0;
+                tsk_rt(ce->server.linked)->server.cpu = NO_CPU;
+                ce->server.linked->rt_param.linked_on = NO_CPU;
+        }
+        /* Actually link task */
+        ce->server.linked = task;
+        if (task) {
+                /* Block if task cannot acquire resources */
+                task->rt_param.linked_on = crit_cpu(ce)->cpu;
+                tsk_rt(task)->server.parent = ce_sid(ce);
+                tsk_rt(ce->server.linked)->server.cpu = crit_cpu(ce)->cpu;
+                if (ce->domain->acquire_resources(task)) {
+                        start_crit(ce);
+                } else {
+                        server_state_change(ce_server, SS_BLOCKED, 0);
+                }
+        }
+}
+static void check_for_preempt(struct domain*);
+/**
+ * job_arrival() - Called when a task re-enters the system.
+ * Caller must hold no locks.
+ */
+static void job_arrival(struct task_struct *task)
+{
+        struct domain *dom = get_task_domain(task);
+        TRACE_MC_TASK(task, "Job arriving\n");
+        BUG_ON(!task);
+        raw_spin_lock(dom->lock);
+        if (can_requeue(task)) {
+                BUG_ON(task->rt_param.linked_on != NO_CPU);
+                dom->requeue(dom, task);
+                check_for_preempt(dom);
+        } else {
+                /* If a global task is scheduled on one cpu, it CANNOT
+                 * be requeued into a global domain. Another cpu might
+                 * dequeue the global task before it is descheduled,
+                 * causing the system to crash when the task is scheduled
+                 * in two places simultaneously.
+                 */
+                TRACE_MC_TASK(task, "Delayed arrival of scheduled task, "
+                              "linked: %d, sched: %d, queued: %d\n",
+                              tsk_rt(task)->linked_on, tsk_rt(task)->scheduled_on,
+                              is_queued(task));
+        }
+        raw_spin_unlock(dom->lock);
+}
+/**
+ * low_prio_arrival() - If CONFIG_PLUGIN_MC_REDIRECT is enabled, will
+ * redirect a lower priority job_arrival work to the interrupt_cpu.
+ */
+static void low_prio_arrival(struct task_struct *task)
+{
+        struct cpu_entry *entry;
+        /* Race conditions! */
+        if (!can_requeue(task)) return;
+#ifdef  CONFIG_PLUGIN_MC_REDIRECT
+        if (!is_global_task(task))
+                goto arrive;
+        if (smp_processor_id() != interrupt_cpu) {
+                entry = &__get_cpu_var(cpus);
+                raw_spin_lock(&entry->redir_lock);
+                TRACE_MC_TASK(task, "Adding to redirect queue\n");
+                list_add(&tsk_rt(task)->list, &entry->redir);
+                raw_spin_unlock(&entry->redir_lock);
+                litmus_reschedule(interrupt_cpu);
+        } else
+#endif
+        {
+arrive:
+                TRACE_MC_TASK(task, "On interrupt master, requeueing task\n");
+                job_arrival(task);
+        }
+}
+#ifdef CONFIG_PLUGIN_MC_REDIRECT
+/**
+ * fix_global_levels() - Execute redirected job arrivals on this cpu.
+ */
+static void fix_global_levels(void)
+{
+        int c;
+        struct cpu_entry *e;
+        struct list_head *pos, *safe;
+        struct task_struct *t;
+        STRACE("Fixing global levels\n");
+        for_each_online_cpu(c) {
+                e = &per_cpu(cpus, c);
+                raw_spin_lock(&e->redir_lock);
+                list_for_each_safe(pos, safe, &e->redir) {
+                        t = list_entry(pos, struct task_struct, rt_param.list);
+                        BUG_ON(!t);
+                        TRACE_MC_TASK(t, "Dequeued redirected job\n");
+                        list_del_init(pos);
+                        job_arrival(t);
+                }
+                raw_spin_unlock(&e->redir_lock);
+        }
+}
+#endif
+/**
+ * link_task_to_cpu() - Logically run a task on a CPU.
+ * The task must first have been linked to one of the CPU's crit_entries.
+ * Caller must hold the entry lock.
+ */
+static void link_task_to_cpu(struct cpu_entry *entry, struct task_struct *task)
+{
+        int i = entry_level(entry);
+        struct crit_entry *ce;
+        struct server *server;
+        TRACE_MC_TASK(task, "Linking to P%d\n", entry->cpu);
+        BUG_ON(task && tsk_rt(task)->linked_on != entry->cpu);
+        BUG_ON(task && is_ghost(task));
+        if (entry->linked) {
+                server = &tsk_rt(entry->linked)->server;
+                sched_trace_server_switch_away(server->sid, *server->job,
+                                               entry->linked->pid,
+                                               get_user_job(entry->linked),
+                                               entry->cpu);
+        }
+        if (task) {
+                server = &tsk_rt(task)->server;
+                sched_trace_server_switch_to(server->sid, *server->job,
+                                             task->pid,
+                                             get_user_job(task),
+                                             entry->cpu);
+        }
+        entry->linked = task;
+        /* Higher criticality crit entries are now usable */
+        for (; i < entry_level(entry) + 1; i++) {
+                ce = &entry->crit_entries[i];
+                server = &ce->server;
+                if (server->state == SS_REMOVED) {
+                        TRACE_CRIT_ENTRY(ce, "Moving up to active\n");
+                        server_state_change(server, SS_ACTIVE, 1);
+                }
+        }
+}
+static void preempt_cpu(struct cpu_entry *entry, struct task_struct *t)
+{
+        link_task_to_cpu(entry, t);
+        litmus_reschedule(entry->cpu);
+}
+/**
+ * preempt_crit() - Preempt a logically running task with a higher priority one.
+ * @dom Domain from which to draw higher priority task
+ * @ce  CPU criticality level to preempt
+ * @return Preempted task
+ *
+ * Caller must hold the lock for @dom and @ce's CPU lock.
+ */
+static struct task_struct* preempt_crit(struct domain *dom, struct crit_entry *ce)
+{
+        struct task_struct *task = dom->take_ready(dom);
+        struct cpu_entry *entry  = crit_cpu(ce);
+        struct task_struct *old  = ce->server.linked;
+        BUG_ON(!task);
+        TRACE_CRIT_ENTRY(ce, "Preempted by " TS "\n", TA(task));
+        /* Per-domain preemption */
+        link_task_to_crit(ce, task);
+        /* if (old && can_requeue(old)) { */
+        /*      dom->requeue(dom, old); */
+        /* } */
+        update_crit_position(ce);
+        /* Preempt actual execution if this is a running task.
+         * We know that our task is higher priority than what is currently
+         * running on this CPU as otherwise the crit_entry would have
+         * been disabled and a preemption could not have occurred
+         */
+        if (!is_ghost(task) && SS_BLOCKED != ce->server.state) {
+                preempt_cpu(entry, task);
+        } else if (old && old == entry->linked) {
+                /* Preempted running task with ghost job. Nothing should run */
+                preempt_cpu(entry, NULL);
+        }
+        return old;
+}
+/**
+ * update_crit_levels() - Update criticality entries for the new cpu state.
+ * This should be called after a new task has been linked to @entry.
+ * The caller must hold the @entry->lock, but this method will release it.
+ */
+static void update_crit_levels(struct cpu_entry *entry)
+{
+        int i, global_preempted;
+        struct server *server;
+        struct crit_entry *ce;
+        struct task_struct *readmit[NUM_CRIT_LEVELS];
+        enum crit_level level = entry_level(entry);
+        /* Remove lower priority tasks from the entry */
+        for (i = level + 1; i < NUM_CRIT_LEVELS; i++) {
+                ce = &entry->crit_entries[i];
+                server = &ce->server;
+                global_preempted = ce->server.linked &&
+                        /* This task is running on a cpu */
+                        ce->server.linked->rt_param.scheduled_on == entry->cpu &&
+                        /* But it was preempted */
+                        ce->server.linked != entry->linked &&
+                        /* And it is an eligible global task */
+                        !is_ghost(ce->server.linked) && is_global(ce->domain);
+                /* Do not readmit global tasks which are preempted! These can't
+                 * ever be re-admitted until they are descheduled for reasons
+                 * explained in job_arrival.
+                 */
+                readmit[i] = (!global_preempted) ? ce->server.linked : NULL;
+                if (server->state != SS_REMOVED) {
+                        if (ce->server.linked) {
+                                link_task_to_crit(ce, NULL);
+                        }
+                        TRACE_CRIT_ENTRY(ce, "Removing lower crit\n");
+                        server_state_change(server, SS_REMOVED, 1);
+                }
+        }
+        /* Need to unlock so we can access domains */
+        raw_spin_unlock(&entry->lock);
+        /* Re-admit tasks to the system */
+        for (i = level + 1; i < NUM_CRIT_LEVELS; i++) {
+                ce = &entry->crit_entries[i];
+                if (readmit[i]) {
+                        low_prio_arrival(readmit[i]);
+                }
+        }
+}
+/*
+ * Assumes a single, lowest-priority global criticicality level. This avoids
+ * unnecessary calls to update_crit_levels.
+ */
+static void check_global_preempt(struct domain *dom)
+{
+        int recheck;
+        struct crit_entry  *ce;
+        struct cpu_entry   *entry;
+        struct task_struct *preempted;
+        recheck = 1;
+        /* Loop until we find a non-preemptable CPU */
+        while (recheck && (ce = lowest_prio_cpu(dom))) {
+                entry = crit_cpu(ce);
+                recheck = 1;
+                preempted = NULL;
+                /* Cache next task */
+                dom->peek_ready(dom);
+                raw_spin_lock(&entry->lock);
+                if (ce->server.in_transit) {
+                        /* CPU disabled while locking! */
+                        fix_crit_position(ce);
+                } else if (mc_preempt_needed(dom, ce->server.linked)) {
+                        /* Success! Check for more preemptions */
+                        preempted = preempt_crit(dom, ce);
+                } else {
+                        /* Failure! */
+                        recheck = 0;
+                }
+                raw_spin_unlock(&entry->lock);
+                /* Only add preempted task after lock has been released */
+                if (preempted && can_requeue(preempted)) {
+                        dom->requeue(dom, preempted);
+                }
+        }
+}
+static void check_partitioned_preempt(struct domain *dom)
+{
+        struct cpu_entry *entry;
+        struct crit_entry *ce;
+        ce = domain_data(dom)->crit_entry;
+        entry = crit_cpu(ce);
+        if (ce->server.state == SS_REMOVED ||
+            !mc_preempt_needed(dom, ce->server.linked)) {
+                return;
+        }
+        entry->signal.preempt = 1;
+        litmus_reschedule(entry->cpu);
+}
+/**
+ * check_for_preempt() - Causes a preemption if higher-priority tasks are ready.
+ * Caller must hold domain lock.
+ */
+static void check_for_preempt(struct domain *dom)
+{
+        struct crit_entry *ce;
+        struct cpu_entry *entry;
+        if (is_global(dom)) {
+                check_global_preempt(dom);
+        } else {
+                ce = domain_data(dom)->crit_entry;
+                entry = crit_cpu(ce);
+                /* Cache next task */
+                dom->peek_ready(dom);
+                raw_spin_lock(&entry->lock);
+                check_partitioned_preempt(dom);
+                raw_spin_unlock(&entry->lock);
+        }
+}
+/**
+ * remove_from_all() - Logically remove a task from all structures.
+ * Caller must hold no locks.
+ */
+static void remove_from_all(struct task_struct* task)
+{
+        int update = 0;
+        struct cpu_entry *entry;
+        struct crit_entry *ce;
+        struct domain *dom = get_task_domain(task);
+        TRACE_MC_TASK(task, "Removing from everything\n");
+        BUG_ON(!task);
+        raw_spin_lock(dom->lock);
+        /* Remove the task from any CPU state */
+        if (task->rt_param.linked_on != NO_CPU) {
+                TRACE_MC_TASK(task, "Linked to something\n");
+                entry = &per_cpu(cpus, task->rt_param.linked_on);
+                raw_spin_lock(&entry->lock);
+                /* Unlink only if task is still linked post lock */
+                ce = &entry->crit_entries[tsk_mc_crit(task)];
+                if (task->rt_param.linked_on != NO_CPU) {
+                        BUG_ON(ce->server.linked != task);
+                        if (entry->linked == task) {
+                                update = 1;
+                                link_task_to_cpu(entry, NULL);
+                        }
+                        link_task_to_crit(ce, NULL);
+                        update_crit_position(ce);
+                } else {
+                        TRACE_MC_TASK(task, "Unlinked before we got lock!\n");
+                }
+                raw_spin_unlock(&entry->lock);
+        } else {
+                TRACE_MC_TASK(task, "Not linked to anything\n");
+        }
+        /* Ensure the task isn't returned by its domain */
+        dom->remove(dom, task);
+        raw_spin_unlock(dom->lock);
+}
+/**
+ * job_completion() - Update task state and re-enter it into the system.
+ * Converts tasks which have completed their execution early into ghost jobs.
+ * Caller must hold no locks.
+ */
+static void job_completion(struct task_struct *task, int forced)
+{
+        int release_server;
+        struct cpu_entry *entry;
+        struct crit_entry *ce;
+        TRACE_MC_TASK(task, "Completed\n");
+        if (!forced) {
+                /* Userspace signaled job completion */
+                sched_trace_task_completion(current, 0);
+                mb();
+                setup_user_release(current, get_user_deadline(current));
+        }
+#ifndef CONFIG_PLUGIN_MC_LINUX_SLACK_STEALING
+        /* Release lowest-criticality task's servers with their userspace tasks,
+         * preventing them from turning into idle ghost tasks
+         */
+        if (tsk_mc_crit(task) == NUM_CRIT_LEVELS - 1)
+                release_server = 1;
+        else
+#endif
+        release_server = budget_exhausted(task);
+        if (release_server || forced) {
+                if (release_server)
+                        sched_trace_server_completion(-task->pid,
+                                                      get_rt_job(task));
+                /* Only unlink (and release resources) if the current server job
+                 * must stop logically running
+                 */
+                remove_from_all(task);
+        }
+        if (lt_before(get_user_release(task), litmus_clock()) ||
+            (release_server && tsk_rt(task)->completed)){
+                TRACE_TASK(task, "Executable task going back to running\n");
+                tsk_rt(task)->completed = 0;
+        }
+        if (release_server || forced) {
+                /* TODO: Level A does this independently and should not */
+                if (release_server && CRIT_LEVEL_A != tsk_mc_crit(task)) {
+                        prepare_for_next_period(task);
+                }
+                TRACE_TASK(task, "Is released: %d, now: %llu, rel: %llu\n",
+                           is_released(task, litmus_clock()), litmus_clock(),
+                           get_release(task));
+                /* Requeue non-blocking tasks */
+                if (is_running(task)) {
+                        job_arrival(task);
+                }
+        } else if (is_ghost(task)) {
+                entry = &per_cpu(cpus, tsk_rt(task)->linked_on);
+                ce = &entry->crit_entries[tsk_mc_crit(task)];
+                raw_spin_lock(&entry->lock);
+                if (ce->server.linked == task) {
+                        /* The task went ghost while it was linked to a CPU */
+                        link_task_to_cpu(entry, NULL);
+                        stop_crit(ce);
+                        if (ce->server.state == SS_ACTIVE)
+                                start_crit(ce);
+                }
+                raw_spin_unlock(&entry->lock);
+        }
+}
+/**
+ * mc_ghost_exhausted() - Complete logically running ghost task.
+ */
+#ifdef CONFIG_MERGE_TIMERS
+static void mc_ghost_exhausted(struct rt_event *e)
+{
+        struct crit_entry *ce = container_of(e, struct crit_entry, event);
+#else
+static enum hrtimer_restart mc_ghost_exhausted(struct hrtimer *timer)
+{
+        struct crit_entry *ce = container_of(timer, struct crit_entry, timer);
+#endif
+        struct task_struct *tmp = NULL;
+        struct cpu_entry *entry = crit_cpu(ce);
+        TRACE("Firing here at %llu\n", litmus_clock());
+        TRACE_CRIT_ENTRY(ce, "For this\n");
+        raw_spin_lock(&entry->lock);
+        if (is_ghost(ce->server.linked)) {
+                update_server_time(ce->server.linked);
+                if (budget_exhausted(ce->server.linked)) {
+                        tmp = ce->server.linked;
+                }
+        } else {
+                litmus_reschedule(crit_cpu(ce)->cpu);
+        }
+        raw_spin_unlock(&entry->lock);
+        if (tmp)
+                job_completion(tmp, 1);
+#ifndef CONFIG_MERGE_TIMERS
+        return HRTIMER_NORESTART;
+#endif
+}
+/*
+ * The MC-CE common timer callback code for merged and non-merged timers.
+ * Returns the next time the timer should fire.
+ */
+static lt_t __ce_timer_function(struct ce_dom_data *ce_data)
+{
+        struct crit_entry *ce = get_crit_entry_for(ce_data->cpu, CRIT_LEVEL_A);
+        struct domain *dom = ce->domain;
+        struct task_struct *old_link = NULL;
+        lt_t next_timer_abs;
+        TRACE("MC level-A timer callback for CPU %d\n", ce_data->cpu);
+        raw_spin_lock(dom->lock);
+        raw_spin_lock(&crit_cpu(ce)->lock);
+        if (ce->server.linked &&
+            ce->server.linked == ce_data->should_schedule)
+        {
+                old_link = ce->server.linked;
+                link_task_to_crit(ce, NULL);
+                mc_ce_job_completion(dom, old_link);
+        }
+        raw_spin_unlock(&crit_cpu(ce)->lock);
+        next_timer_abs = mc_ce_timer_callback_common(dom);
+        /* Job completion will check for preemptions by means of calling job
+         * arrival if the task is not blocked */
+        if (NULL != old_link) {
+                STRACE("old_link " TS " so will call job completion\n", TA(old_link));
+                raw_spin_unlock(dom->lock);
+                job_completion(old_link, 1);
+        } else {
+                STRACE("old_link was null, so will call check for preempt\n");
+                check_for_preempt(dom);
+                raw_spin_unlock(dom->lock);
+        }
+        return next_timer_abs;
+}
+#ifdef CONFIG_MERGE_TIMERS
+static void ce_timer_function(struct rt_event *e)
+{
+        struct ce_dom_data *ce_data =
+                container_of(e, struct ce_dom_data, event);
+        unsigned long flags;
+        lt_t next_timer_abs;
+        TS_LVLA_RELEASE_START;
+        local_irq_save(flags);
+        next_timer_abs = __ce_timer_function(ce_data);
+        add_event(per_cpu(cpus, ce_data->cpu).event_group, e, next_timer_abs);
+        local_irq_restore(flags);
+        TS_LVLA_RELEASE_END;
+}
+#else /* else to CONFIG_MERGE_TIMERS */
+static enum hrtimer_restart ce_timer_function(struct hrtimer *timer)
+{
+        struct ce_dom_data *ce_data =
+                container_of(timer, struct ce_dom_data, timer);
+        unsigned long flags;
+        lt_t next_timer_abs;
+        TS_LVLA_RELEASE_START;
+        local_irq_save(flags);
+        next_timer_abs = __ce_timer_function(ce_data);
+        hrtimer_set_expires(timer, ns_to_ktime(next_timer_abs));
+        local_irq_restore(flags);
+        TS_LVLA_RELEASE_END;
+        return HRTIMER_RESTART;
+}
+#endif /* CONFIG_MERGE_TIMERS */
+/**
+ * mc_release_jobs() - Add heap of tasks to the system, check for preemptions.
+ */
+static void mc_release_jobs(rt_domain_t* rt, struct bheap* tasks)
+{
+        unsigned long flags;
+        struct task_struct *first = bheap_peek(rt->order, tasks)->value;
+        struct domain *dom = get_task_domain(first);
+        raw_spin_lock_irqsave(dom->lock, flags);
+        TRACE(TS "Jobs released\n", TA(first));
+        __merge_ready(rt, tasks);
+        check_for_preempt(dom);
+        raw_spin_unlock_irqrestore(dom->lock, flags);
+}
+/**
+ * ms_task_new() - Setup new mixed-criticality task.
+ * Assumes that there are no partitioned domains after level B.
+ */
+static void mc_task_new(struct task_struct *t, int on_rq, int running)
+{
+        unsigned long flags;
+        int i;
+        struct cpu_entry* entry;
+        enum crit_level level = tsk_mc_crit(t);
+        struct dgl_group_req *req;
+        struct control_page  *cp = tsk_rt(t)->ctrl_page;
+        struct color_ctrl_page *ccp = &tsk_rt(t)->color_ctrl_page;
+        local_irq_save(flags);
+        TRACE("New mixed criticality task %d\n", t->pid);
+        if (level == CRIT_LEVEL_A)
+                get_rt_relative_deadline(t) = get_exec_cost(t);
+        /* Assign domain */
+        if (level < CRIT_LEVEL_C)
+                entry = &per_cpu(cpus, get_partition(t));
+        else
+                entry = &per_cpu(cpus, task_cpu(t));
+        t->rt_param._domain = entry->crit_entries[level].domain;
+        tsk_rt(t)->flush = 0;
+        tsk_rt(t)->load  = 0;
+        /* Userspace and kernelspace view of task state may differ.
+         * Model kernel state as a budget enforced container
+         */
+        sched_trace_container_param(t->pid, t->comm);
+        sched_trace_server_param(-t->pid, t->pid,
+                                 get_exec_cost(t), get_rt_period(t));
+        server_init(&tsk_rt(t)->server, -t->pid,
+                    &tsk_rt(t)->job_params.job_no,
+                    NO_CPU);
+        tsk_rt(t)->task_params.budget_policy = PRECISE_ENFORCEMENT;
+        BUG_ON(!tsk_rt(t)->server.job);
+        /* Apply chunking */
+        if (level == CRIT_LEVEL_B && color_chunk &&
+            lt_after(get_exec_cost(t), color_chunk)) {
+                tsk_rt(t)->orig_cost = get_exec_cost(t);
+        }
+        /* Setup color request */
+        req = kmalloc(sizeof(*req), GFP_ATOMIC);
+        req->task = t;
+        tsk_rt(t)->req = req;
+        if (cp && ccp) {
+                TRACE_MC_TASK(t, "Initializing group request\n");
+                cp->colors_updated = 0;
+                dgl_group_req_init(&group_lock, req);
+                for (i = 0; ccp->pages[i]; ++i)
+                        set_req(&group_lock, req, ccp->colors[i], ccp->pages[i]);
+        } else {
+                BUG_ON(CRIT_LEVEL_B == tsk_mc_crit(t));
+        }
+        /* Setup job params */
+        release_at(t, litmus_clock());
+        if (running) {
+                BUG_ON(entry->scheduled);
+                TRACE_MC_TASK(t, "Was already running\n");
+                entry->scheduled = t;
+                tsk_rt(t)->scheduled_on = entry->cpu;
+                tsk_rt(t)->last_exec_time = litmus_clock();
+        } else {
+                t->rt_param.scheduled_on = NO_CPU;
+        }
+        t->rt_param.linked_on = NO_CPU;
+        job_arrival(t);
+        local_irq_restore(flags);
+}
+/**
+ * mc_task_new() - Add task back into its domain check for preemptions.
+ */
+static void mc_task_wake_up(struct task_struct *task)
+{
+        unsigned long flags;
+        lt_t now = litmus_clock();
+        local_irq_save(flags);
+        TRACE(TS " wakes up\n", TA(task));
+        if (is_tardy(task, now)) {
+                /* Task missed its last release */
+                release_at(task, now);
+                sched_trace_task_release(task);
+        }
+        if (budget_exhausted(task))
+                /* Rare, but possible, race condition */
+                job_completion(task, 1);
+        else
+                job_arrival(task);
+        local_irq_restore(flags);
+}
+/**
+ * mc_task_block() - Remove task from state to prevent it being run anywhere.
+ */
+static void mc_task_block(struct task_struct *task)
+{
+        unsigned long flags;
+        local_irq_save(flags);
+        TRACE(TS " blocks\n", TA(task));
+        remove_from_all(task);
+        local_irq_restore(flags);
+}
+/**
+ * mc_task_exit() - Remove task from the system.
+ */
+static void mc_task_exit(struct task_struct *task)
+{
+        unsigned long flags;
+        local_irq_save(flags);
+        BUG_ON(!is_realtime(task));
+        TRACE(TS " RIP\n", TA(task));
+        if (tsk_mc_crit(task) == CRIT_LEVEL_B && lock_cache) {
+                color_sched_out_task(task);
+        }
+        remove_from_all(task);
+        if (tsk_rt(task)->scheduled_on != NO_CPU) {
+                per_cpu(cpus, tsk_rt(task)->scheduled_on).scheduled = NULL;
+                tsk_rt(task)->scheduled_on = NO_CPU;
+        }
+        /* TODO: restore. This was geting triggered by race conditions even when
+         * no level-A task was executing */
+        /* if (CRIT_LEVEL_A == tsk_mc_crit(task)) */
+        /*      mc_ce_task_exit_common(task); */
+        local_irq_restore(flags);
+}
+/**
+ * mc_admit_task() - Return true if the task is valid.
+ * Assumes there are no partitioned levels after level B.
+ */
+static long mc_admit_task(struct task_struct* task)
+{
+        const enum crit_level crit = tsk_mc_crit(task);
+        long ret;
+        if (!tsk_mc_data(task)) {
+                printk(KERN_WARNING "Tried to admit task with no criticality "
+                        "level\n");
+                ret = -EINVAL;
+                goto out;
+        }
+        if (crit < CRIT_LEVEL_C && get_partition(task) == NO_CPU) {
+                printk(KERN_WARNING "Tried to admit partitioned task with no "
+                       "partition\n");
+                ret = -EINVAL;
+                goto out;
+        }
+        /* if (crit < CRIT_LEVEL_C && get_partition(task) == interrupt_cpu) { */
+        /*      printk(KERN_WARNING "Tried to admit partitioned task on " */
+        /*             "the interrupt master\n"); */
+        /*      ret = -EINVAL; */
+        /*      goto out; */
+        /* } */
+        if (crit == CRIT_LEVEL_A) {
+                ret = mc_ce_admit_task_common(task);
+                if (ret)
+                        goto out;
+        }
+        printk(KERN_INFO "Admitted task with criticality level %d\n",
+                tsk_mc_crit(task));
+        ret = 0;
+out:
+        return ret;
+}
+/*
+ * Caller must hold the entry lock.
+ */
+void pick_next_task(struct cpu_entry *entry)
+{
+        int i;
+        struct crit_entry *ce;
+        struct domain *dom;
+        struct task_struct *dtask, *ready_task;
+        struct server *server;
+        STRACE("Picking next task\n");
+        for (i = 0; i < NUM_CRIT_LEVELS && !entry->linked; i++) {
+                ce = &entry->crit_entries[i];
+                dom = ce->domain;
+                server = &ce->server;
+                /* Swap locks. We cannot acquire a domain lock while
+                 * holding an entry lock or deadlocks will happen
+                 */
+                raw_spin_unlock(&entry->lock);
+                raw_spin_lock(dom->lock);
+                /* Do domain stuff before grabbing CPU locks */
+                dtask = dom->peek_ready(dom);
+                fix_crit_position(ce);
+                raw_spin_lock(&entry->lock);
+                ready_task = NULL;
+                if (!entry->linked && server->state == SS_ACTIVE) {
+                        if (ce->server.linked) {
+                                ready_task = ce->server.linked;
+                        } else if (dtask) {
+                                /* Need a new task */
+                                dom->take_ready(dom);
+                                ready_task = dtask;
+                                link_task_to_crit(ce, dtask);
+                                update_crit_position(ce);
+                        }
+                }
+                if (ready_task && !is_ghost(ready_task) &&
+                    server->state == SS_ACTIVE) {
+                        link_task_to_cpu(entry, ready_task);
+                        raw_spin_unlock(dom->lock);
+                        update_crit_levels(entry);
+                        raw_spin_lock(&entry->lock);
+                        continue;
+                }
+                raw_spin_unlock(dom->lock);
+        }
+}
+static void process_update_signal(struct cpu_entry *entry)
+{
+        int locked;
+        struct crit_entry *ce;
+        struct server *crit_server, *task_server;
+        struct task_struct *linked;
+        STRACE("Reading update signal\n");
+        ce = &entry->crit_entries[CRIT_LEVEL_B];
+        /* Complete task state transitions */
+        crit_server = &ce->server;
+        if (!crit_server->linked) {
+                return;
+        }
+        linked = crit_server->linked;
+        task_server = &tsk_rt(linked)->server;
+        if (!task_server->in_transit) {
+                return;
+        }
+        raw_spin_lock(&dgl_lock);
+        /* Update and save lock state */
+        update_group_req(&group_lock, tsk_rt(linked)->req);
+        locked = has_resources(linked, entry->cpu);
+        raw_spin_unlock(&dgl_lock);
+        if (locked && crit_server->state != SS_ACTIVE) {
+                TRACE_MC_TASK(linked, "Activated\n");
+                server_state_change(crit_server, SS_ACTIVE, 0);
+                start_crit(ce);
+                server_state_change(task_server, SS_ACTIVE, 0);
+                if (!is_ghost(linked)) {
+                        link_task_to_cpu(entry, linked);
+                        update_crit_levels(entry);
+                        raw_spin_lock(&entry->lock);
+                }
+        } else if (!locked && crit_server->state != SS_BLOCKED) {
+                TRACE_MC_TASK(linked, "Blocked\n");
+                if (entry->linked == linked) {
+                        link_task_to_cpu(entry, NULL);
+                }
+                server_state_change(task_server, SS_BLOCKED, 0);
+                stop_crit(ce);
+                server_state_change(crit_server, SS_BLOCKED, 0);
+        }
+}
+static void process_signals(struct cpu_entry *entry)
+{
+        struct domain *dom;
+        struct crit_entry *ce;
+        struct mc_signal signal;
+        struct task_struct *preempted;
+        ce  = &entry->crit_entries[CRIT_LEVEL_B];
+        dom = ce->domain;
+        /* Load signals */
+        raw_spin_lock(&entry->signal_lock);
+        signal = entry->signal;
+        clear_signal(&entry->signal);
+        raw_spin_unlock(&entry->signal_lock);
+        if (signal.preempt) {
+                raw_spin_lock(dom->lock);
+                /* A higher-priority task may exist */
+                STRACE("Reading preempt signal\n");
+                dom->peek_ready(dom);
+                raw_spin_lock(&entry->lock);
+                if (ce->server.state == SS_ACTIVE &&
+                    mc_preempt_needed(ce->domain, ce->server.linked)) {
+                        preempted = preempt_crit(ce->domain, ce);
+                        raw_spin_unlock(dom->lock);
+                        /* Can't requeue while we hold the entry lock, but
+                         * can't release that lock until state of lower-crit
+                         * servers is updated
+                         */
+                        if (!is_ghost(ce->server.linked)) {
+                                update_crit_levels(entry);
+                        } else {
+                                raw_spin_unlock(&entry->lock);
+                        }
+                        if (preempted) {
+                                raw_spin_lock(dom->lock);
+                                dom->requeue(dom, preempted);
+                                raw_spin_unlock(dom->lock);
+                        }
+                        raw_spin_lock(&entry->lock);
+                } else {
+                        raw_spin_unlock(dom->lock);
+                }
+        } else {
+                raw_spin_lock(&entry->lock);
+        }
+        if (signal.update) {
+                process_update_signal(entry);
+        }
+}
+/**
+ * mc_schedule() - Return next task which should be scheduled.
+ */
+static struct task_struct* mc_schedule(struct task_struct* prev)
+{
+        lt_t start, exec;
+        int out_of_time, sleep, preempt, exists, blocks, global, lower, work;
+        struct cpu_entry* entry = &__get_cpu_var(cpus);
+        struct task_struct *next = NULL;
+        /* Litmus gave up because it couldn't access the stack of the CPU
+         * on which will_schedule was migrating from. Requeue it.
+         * This really only happens in VMs
+         */
+        if (entry->will_schedule && entry->will_schedule != prev) {
+                entry->will_schedule->rt_param.scheduled_on = NO_CPU;
+                low_prio_arrival(entry->will_schedule);
+        }
+        if (prev && tsk_rt(prev)->last_exec_time) {
+                exec = litmus_clock() - tsk_rt(prev)->last_exec_time;
+                tsk_rt(prev)->user_job.exec_time += exec;
+        }
+        if (prev && tsk_mc_crit(prev) == CRIT_LEVEL_B &&
+            is_realtime(prev) && get_rt_job(prev) > 1 && lock_cache) {
+                start = litmus_clock();
+                work = color_sched_out_task(prev);
+                tsk_rt(prev)->flush = litmus_clock() - start;
+                ++tsk_rt(prev)->flush_work;
+        }
+        TS_LVLA_SCHED_START;
+        TS_LVLB_SCHED_START;
+        TS_LVLC_SCHED_START;
+        raw_spin_lock(&entry->lock);
+        BUG_ON(entry->scheduled && entry->scheduled != prev);
+        BUG_ON(entry->scheduled && !is_realtime(prev));
+        BUG_ON(prev && is_realtime(prev) && !entry->scheduled);
+        if (entry->scheduled != NULL) {
+                entry->scheduled->rt_param.scheduled_on = NO_CPU;
+                update_server_time(entry->scheduled);
+        }
+        /* Determine state */
+        exists      = entry->scheduled != NULL;
+        blocks      = exists && !is_running(entry->scheduled);
+        out_of_time = exists && budget_exhausted(entry->scheduled);
+        sleep       = exists && tsk_rt(entry->scheduled)->completed;
+        global      = exists && is_global_task(entry->scheduled);
+        preempt     = entry->scheduled != entry->linked;
+        lower       = exists && preempt && entry->linked &&
+                tsk_mc_crit(entry->scheduled) > tsk_mc_crit(entry->linked);
+        TRACE(TS " block:%d oot:%d sleep:%d preempt:%d, now: %llu\n",
+              TA(prev), blocks, out_of_time, sleep, preempt, litmus_clock());
+        raw_spin_unlock(&entry->lock);
+#ifdef CONFIG_PLUGIN_MC_REDIRECT
+        if (smp_processor_id() == interrupt_cpu)
+                fix_global_levels();
+#endif
+        /* If a task blocks we have no choice but to reschedule */
+        if (blocks)
+                remove_from_all(entry->scheduled);
+        /* Any task which exhausts its budget or sleeps waiting for its next
+         * period completes unless its execution has been forcibly stopped
+         */
+        else if (out_of_time || sleep)/* && !preempt)*/
+                job_completion(entry->scheduled, !sleep || preempt);
+        /* Global scheduled tasks must wait for a deschedule before they
+         * can rejoin the global state. Rejoin them here
+         */
+        else if (global && preempt) {
+                if (lower)
+                        low_prio_arrival(entry->scheduled);
+                else
+                        job_arrival(entry->scheduled);
+        }
+        /* TODO: move this down somehow */
+        sched_state_task_picked();
+        process_signals(entry);
+        /* Pick next task if none is linked */
+        if (!entry->linked)
+                pick_next_task(entry);
+        /* Schedule next task */
+        next = entry->linked;
+        if (next) {
+                next->rt_param.scheduled_on = entry->cpu;
+        }
+        entry->will_schedule = next;
+        raw_spin_unlock(&entry->lock);
+        if (next) {
+                switch (tsk_mc_crit(next)) {
+                case CRIT_LEVEL_A: TS_LVLA_SCHED_END(next); break;
+                case CRIT_LEVEL_B: TS_LVLB_SCHED_END(next); break;
+                case CRIT_LEVEL_C: TS_LVLC_SCHED_END(next); break;
+                }
+        }
+        if (next && tsk_mc_crit(next) == CRIT_LEVEL_B && lock_cache && get_rt_job(next) > 1) {
+                start = litmus_clock();
+                work = color_sched_in_task(next);
+                tsk_rt(next)->load = litmus_clock() - start;
+                tsk_rt(next)->load_work = work;
+        }
+        if (next) {
+                tsk_rt(next)->last_exec_time = litmus_clock();
+                TRACE_MC_TASK(next, "Picked this task\n");
+        } else {
+                STRACE("CPU %d idles at %llu\n", entry->cpu, litmus_clock());
+        }
+        return next;
+}
+void mc_finish_switch(struct task_struct *prev)
+{
+        struct cpu_entry* entry = &__get_cpu_var(cpus);
+        entry->scheduled = is_realtime(current) ? current : NULL;
+        TRACE_TASK(prev, "Switched away from to " TS "\n",
+                   TA(entry->scheduled));
+}
+long mc_deactivate_plugin(void)
+{
+        return mc_ce_deactivate_plugin_common();
+}
+static unsigned long long deadline_prio(struct dgl *dgl, struct dgl_group_req *greq)
+{
+        return get_deadline(greq->task);
+}
+static void cpu_update(struct dgl_group_req *greq)
+{
+        struct cpu_entry *entry = &per_cpu(cpus, greq->cpu);
+        raw_spin_lock(&entry->signal_lock);
+        entry->signal.update = 1;
+        raw_spin_unlock(&entry->signal_lock);
+        litmus_reschedule(greq->cpu);
+}
+/*
+ * Setup and send signal to CPU for resource acquisition. To avoid touching
+ * CPU locks, all CPU state modifications are delayed until the signal is
+ * processed.
+ */
+static void cpu_acquired(struct dgl_group_req *greq)
+{
+        struct server *server = &tsk_rt(greq->task)->server;
+        TRACE_MC_TASK(greq->task, "Acquired CPU %d\n", greq->cpu);
+        sched_trace_task_resume(greq->task);
+        server_state_change(server, SS_ACTIVE, 1);
+        cpu_update(greq);
+}
+static void cpu_preempted(struct dgl_group_req *greq)
+{
+        struct server *server = &tsk_rt(greq->task)->server;
+        TRACE_MC_TASK(greq->task, "Dropping CPU %d\n", greq->cpu);
+        sched_trace_task_block(greq->task);
+        server_state_change(server, SS_BLOCKED, 1);
+        cpu_update(greq);
+}
+/* **************************************************************************
+ * Initialization
+ * ************************************************************************** */
+/* Initialize values here so that they are allocated with the module
+ * and destroyed when the module is unloaded.
+ */
+/* LVL-A */
+DEFINE_PER_CPU(struct domain_data, _mc_crit_a);
+DEFINE_PER_CPU(raw_spinlock_t, _mc_crit_a_lock);
+DEFINE_PER_CPU(struct ce_dom_data, _mc_crit_a_ce_data);
+/* LVL-B */
+DEFINE_PER_CPU(struct domain_data, _mc_crit_b);
+DEFINE_PER_CPU(rt_domain_t, _mc_crit_b_rt);
+/* LVL-C */
+static struct domain_data _mc_crit_c;
+static rt_domain_t _mc_crit_c_rt;
+struct bheap _mc_heap_c;
+struct bheap_node _mc_nodes_c[NR_CPUS];
+static long mc_activate_plugin(void)
+{
+        struct domain_data *dom_data;
+        struct domain *dom;
+        struct domain_data *our_domains[NR_CPUS];
+        rt_domain_t *rt_dom;
+        int cpu, n = 0;
+        long ret;
+        reset_way_tracker();
+        interrupt_cpu = atomic_read(&release_master_cpu);
+        for_each_online_cpu(cpu) {
+                rt_dom = &per_cpu(_mc_crit_b_rt, cpu);
+                /* rt_dom->release_master = cpu; */
+        }
+        if (cache_preempt && !lock_cache) {
+                printk(KERN_ERR "LITMUS-MC: specified cache preemption without "
+                       "enabling the locking protocol (lock_cache)\n");
+                ret = -EINVAL;
+                goto out;
+        }
+        dgl_init(&group_lock, color_cache_info.nr_colors,
+                 color_cache_info.ways);
+        if (cache_preempt) {
+                group_lock.assign_priority = deadline_prio;
+                group_lock.cpu_preempted = cpu_preempted;
+        }
+        group_lock.cpu_acquired  = cpu_acquired;
+        for_each_online_cpu(cpu) {
+                BUG_ON(NR_CPUS <= n);
+                dom = per_cpu(cpus, cpu).crit_entries[CRIT_LEVEL_A].domain;
+                dom_data = domain_data(dom);
+                our_domains[cpu] = dom_data;
+#if defined(CONFIG_MERGE_TIMERS) && defined(CONFIG_PLUGIN_MC_RELEASE_MASTER)
+                per_cpu(cpus, cpu).event_group =
+                        get_event_group_for(interrupt_cpu);
+#elif defined(CONFIG_MERGE_TIMERS) && !defined(CONFIG_PLUGIN_MC_RELEASE_MASTER)
+                per_cpu(cpus, cpu).event_group = get_event_group_for(cpu);
+#endif
+                n++;
+        }
+        ret = mc_ce_set_domains(n, our_domains);
+        if (ret)
+                goto out;
+        ret = mc_ce_activate_plugin_common();
+out:
+        return ret;
+}
+static void mc_release_ts(lt_t time)
+{
+        int cpu, cont_id = -1;
+        char name[TASK_COMM_LEN];
+        enum crit_level level;
+        struct cpu_entry *entry;
+        struct crit_entry *ce;
+        level = CRIT_LEVEL_A;
+        strcpy(name, "LVL-A");
+        for_each_online_cpu(cpu) {
+                /* if (cpu == interrupt_cpu) */
+                /*      continue; */
+                entry = &per_cpu(cpus, cpu);
+                sched_trace_container_param(++cont_id, (const char*)&name);
+                ce = &entry->crit_entries[level];
+                sched_trace_server_param(ce_sid(ce), cont_id, 0, 0);
+                server_state_change(&ce->server, SS_ACTIVE, 0);
+        }
+        level = CRIT_LEVEL_B;
+        strcpy(name, "LVL-B");
+        for_each_online_cpu(cpu) {
+                /* if (cpu == interrupt_cpu) */
+                /*      continue; */
+                entry = &per_cpu(cpus, cpu);
+                sched_trace_container_param(++cont_id, (const char*)&name);
+                ce = &entry->crit_entries[level];
+                sched_trace_server_param(ce_sid(ce), cont_id, 0, 0);
+                server_state_change(&ce->server, SS_ACTIVE, 0);
+        }
+        level = CRIT_LEVEL_C;
+        strcpy(name, "LVL-C");
+        sched_trace_container_param(++cont_id, (const char*)&name);
+        for_each_online_cpu(cpu) {
+                entry = &per_cpu(cpus, cpu);
+                ce = &entry->crit_entries[level];
+                sched_trace_server_param(ce_sid(ce), cont_id, 0, 0);
+                server_state_change(&ce->server, SS_ACTIVE, 0);
+        }
+        mc_ce_release_at_common(NULL, time);
+}
+static struct sched_plugin mc_plugin __cacheline_aligned_in_smp = {
+        .plugin_name            = "MC",
+        .task_new               = mc_task_new,
+        .complete_job           = complete_job,
+        .task_exit              = mc_task_exit,
+        .schedule               = mc_schedule,
+        .task_wake_up           = mc_task_wake_up,
+        .task_block             = mc_task_block,
+        .admit_task             = mc_admit_task,
+        .activate_plugin        = mc_activate_plugin,
+        .release_at             = release_at,
+        .deactivate_plugin      = mc_deactivate_plugin,
+        .finish_switch          = mc_finish_switch,
+        .release_ts             = mc_release_ts,
+};
+static void init_crit_entry(struct cpu_entry *entry,
+                            struct crit_entry *ce, enum crit_level level,
+                            struct domain_data *dom_data,
+                            struct bheap_node *node)
+{
+        ce->level  = level;
+        ce->server.linked = NULL;
+        ce->node   = node;
+        ce->domain = &dom_data->domain;
+        server_init(&ce->server, ce_sid(ce), 0, entry->cpu);
+        ce->server.parent = -entry->cpu - 1;
+#ifdef CONFIG_MERGE_TIMERS
+        init_event(&ce->event, level, mc_ghost_exhausted,
+                   event_list_alloc(GFP_ATOMIC));
+#else
+        hrtimer_init(&ce->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+        ce->timer.function = mc_ghost_exhausted;
+#endif
+}
+static void init_local_domain(struct cpu_entry *entry, struct domain_data *dom_data,
+                              enum crit_level level)
+{
+        dom_data->heap = NULL;
+        dom_data->crit_entry = &entry->crit_entries[level];
+        init_crit_entry(entry, dom_data->crit_entry, level, dom_data, NULL);
+}
+static void init_global_domain(struct domain_data *dom_data, enum crit_level level,
+                               struct bheap *heap, struct bheap_node *nodes)
+{
+        int cpu;
+        struct cpu_entry *entry;
+        struct crit_entry *ce;
+        struct bheap_node *node;
+        dom_data->crit_entry = NULL;
+        dom_data->heap = heap;
+        bheap_init(heap);
+        for_each_online_cpu(cpu) {
+                entry = &per_cpu(cpus, cpu);
+                node = &nodes[cpu];
+                ce = &entry->crit_entries[level];
+                init_crit_entry(entry, ce, level, dom_data, node);
+                bheap_node_init(&ce->node, ce);
+                bheap_insert(cpu_lower_prio, heap, node);
+        }
+}
+static void init_edf_domain(struct domain *dom, rt_domain_t *rt,
+                                   enum crit_level prio, int is_partitioned, int cpu)
+{
+        pd_domain_init(dom, rt, edf_ready_order, NULL,
+                       mc_release_jobs, edf_higher_prio);
+        rt->level = prio;
+#if defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) && defined(CONFIG_MERGE_TIMERS)
+        /* All timers are on one CPU and release-master is using the event
+         * merging interface as well. */
+        BUG_ON(NO_CPU == interrupt_cpu);
+        rt->event_group = get_event_group_for(interrupt_cpu);
+        rt->prio = prio;
+#elif defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) && !defined(CONFIG_MERGE_TIMERS)
+        /* Using release master, but not merging timers. */
+        /* rt->release_master = interrupt_cpu; */
+#elif !defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) && defined(CONFIG_MERGE_TIMERS)
+        /* Merge the timers, but don't move them to the release master. */
+        if (is_partitioned) {
+                rt->event_group = get_event_group_for(cpu);
+        } else {
+                /* Global timers will be added to the event groups that code is
+                 * executing on when add_event() is called.
+                 */
+                rt->event_group = NULL;
+        }
+        rt->prio = prio;
+#endif
+}
+static char* domain_name(const char *name, int cpu)
+{
+        char *buf = kmalloc(LITMUS_LOCKDEP_NAME_MAX_LEN * sizeof(char), GFP_ATOMIC);
+        snprintf(buf, LITMUS_LOCKDEP_NAME_MAX_LEN, "%s%d", name, cpu);
+        return buf;
+}
+struct domain_data *ce_domain_for(int);
+static int __init init_mc(void)
+{
+        int cpu;
+        rt_domain_t *rt;
+        raw_spinlock_t *a_dom_lock, *b_dom_lock, *c_dom_lock; /* For lock debugger */
+        struct cpu_entry *entry;
+        struct domain_data *dom_data;
+        struct ce_dom_data *ce_data;
+        for_each_online_cpu(cpu) {
+                entry = &per_cpu(cpus, cpu);
+                /* CPU */
+                entry->cpu = cpu;
+                entry->scheduled = NULL;
+                entry->linked = NULL;
+                raw_spin_lock_init(&entry->lock);
+                raw_spin_lock_init(&entry->signal_lock);
+                clear_signal(&entry->signal);
+#ifdef CONFIG_PLUGIN_MC_REDIRECT
+                raw_spin_lock_init(&entry->redir_lock);
+                INIT_LIST_HEAD(&entry->redir);
+#endif
+                /* CRIT_LEVEL_A */
+                dom_data = &per_cpu(_mc_crit_a, cpu);
+                ce_data = &per_cpu(_mc_crit_a_ce_data, cpu);
+                a_dom_lock = &per_cpu(_mc_crit_a_lock, cpu);
+                dom_data->domain.acquire_resources = dumb_acquire;
+                dom_data->domain.release_resources = dumb_release;
+                raw_spin_lock_init(a_dom_lock);
+                ce_domain_init(&dom_data->domain,
+                                a_dom_lock, ce_requeue, ce_peek_and_take_ready,
+                                ce_peek_and_take_ready, ce_higher_prio, ce_data, cpu,
+                                ce_timer_function);
+                init_local_domain(entry, dom_data, CRIT_LEVEL_A);
+                dom_data->domain.name = domain_name("LVL-A", cpu);
+                /* CRIT_LEVEL_B */
+                dom_data = &per_cpu(_mc_crit_b, cpu);
+                rt = &per_cpu(_mc_crit_b_rt, cpu);
+                init_local_domain(entry, dom_data, CRIT_LEVEL_B);
+                init_edf_domain(&dom_data->domain, rt, CRIT_LEVEL_B, 1, cpu);
+                dom_data->domain.acquire_resources = acquire_resources;
+                dom_data->domain.release_resources = release_resources;
+                b_dom_lock = dom_data->domain.lock;
+                raw_spin_lock_init(b_dom_lock);
+                dom_data->domain.name = domain_name("LVL-B", cpu);
+        }
+        /* CRIT_LEVEL_C */
+        init_global_domain(&_mc_crit_c, CRIT_LEVEL_C,
+                           &_mc_heap_c, _mc_nodes_c);
+        init_edf_domain(&_mc_crit_c.domain, &_mc_crit_c_rt, CRIT_LEVEL_C,
+                        0, NO_CPU);
+        _mc_crit_c.domain.acquire_resources = dumb_acquire;
+        _mc_crit_c.domain.release_resources = dumb_release;
+        c_dom_lock = _mc_crit_c.domain.lock;
+        raw_spin_lock_init(c_dom_lock);
+        _mc_crit_c.domain.name = "LVL-C";
+        /* GROUP LOCK */
+        raw_spin_lock_init(&dgl_lock);
+        return register_sched_plugin(&mc_plugin);
+}
+module_init(init_mc);
author	Jonathan Herman <hermanjl@cs.unc.edu>	2013-01-22 15:30:43 -0500
committer	Jonathan Herman <hermanjl@cs.unc.edu>	2013-01-22 15:30:43 -0500
commit	7806057274c493d53a214232d4df6f96aadc7547 (patch)
tree	b3aa02ee438a0c5f592e125f4657a2f1e5c225f5 /litmus/sched_mc.c
parent	a5d1599facc1b934e0b8d68e360dadd66c1df730 (diff)