63 files changed, 6686 insertions, 282 deletions
diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c
index 7445caa26d05..fe230def50d6 100644
--- a/Documentation/vm/page-types.c
+++ b/Documentation/vm/page-types.c
@@ -36,6 +36,8 @@
 #include <sys/statfs.h>
 #include "../../include/linux/magic.h"
+#define COLOR_MASK 0x1fULL
 #ifndef MAX_PATH
 # define MAX_PATH 256
@@ -181,13 +183,13 @@ static pid_t		opt_pid;	/* process to walk */
 #define MAX_ADDR_RANGES 1024
 static int              nr_addr_ranges;
-static unsigned long    opt_offset[MAX_ADDR_RANGES];
+static unsigned long long       opt_offset[MAX_ADDR_RANGES];
-static unsigned long    opt_size[MAX_ADDR_RANGES];
+static unsigned long long       opt_size[MAX_ADDR_RANGES];
 #define MAX_VMAS        10240
 static int              nr_vmas;
-static unsigned long    pg_start[MAX_VMAS];
+static unsigned long long       pg_start[MAX_VMAS];
-static unsigned long    pg_end[MAX_VMAS];
+static unsigned long long       pg_end[MAX_VMAS];
 #define MAX_BIT_FILTERS 64
 static int              nr_bit_filters;
@@ -259,21 +261,26 @@ static int checked_open(const char *pathname, int flags)
        return fd;
 }
+#define _LARGEFILE64_SOURCE
 /*
 * pagemap/kpageflags routines
 */
 static unsigned long do_u64_read(int fd, char *name,
                                 uint64_t *buf,
-                                 unsigned long index,
+                                 unsigned long long index,
                                 unsigned long count)
 {
        long bytes;
+        long long lseek_ret;
-        if (index > ULONG_MAX / 8)
+        if (index > ULLONG_MAX / 8)
-                fatal("index overflow: %lu\n", index);
+                fatal("index overflow: %llu\n", index);
-        if (lseek(fd, index * 8, SEEK_SET) < 0) {
+        lseek_ret = lseek64(fd, index * 8, SEEK_SET);
+        if (lseek_ret < 0) {
                perror(name);
                exit(EXIT_FAILURE);
        }
@@ -290,14 +297,14 @@ static unsigned long do_u64_read(int fd, char *name,
 }
 static unsigned long kpageflags_read(uint64_t *buf,
-                                     unsigned long index,
+                                     unsigned long long index,
                                     unsigned long pages)
 {
        return do_u64_read(kpageflags_fd, PROC_KPAGEFLAGS, buf, index, pages);
 }
 static unsigned long pagemap_read(uint64_t *buf,
-                                  unsigned long index,
+                                  unsigned long long index,
                                  unsigned long pages)
 {
        return do_u64_read(pagemap_fd, "/proc/pid/pagemap", buf, index, pages);
@@ -364,7 +371,7 @@ static char *page_flag_longname(uint64_t flags)
 */
 static void show_page_range(unsigned long voffset,
-                            unsigned long offset, uint64_t flags)
+                            unsigned long long offset, uint64_t flags)
 {
        static uint64_t      flags0;
        static unsigned long voff;
@@ -391,11 +398,11 @@ static void show_page_range(unsigned long voffset,
 }
 static void show_page(unsigned long voffset,
-                      unsigned long offset, uint64_t flags)
+                      unsigned long long offset, uint64_t flags)
 {
        if (opt_pid)
                printf("%lx\t", voffset);
-        printf("%lx\t%s\n", offset, page_flag_name(flags));
+        printf("%llx (%llu)\t%s\n", offset, offset & COLOR_MASK, page_flag_name(flags));
 }
 static void show_summary(void)
@@ -640,7 +647,7 @@ static int hash_slot(uint64_t flags)
 }
 static void add_page(unsigned long voffset,
-                     unsigned long offset, uint64_t flags)
+                     unsigned long long offset, uint64_t flags)
 {
        flags = kpageflags_flags(flags);
@@ -663,7 +670,7 @@ static void add_page(unsigned long voffset,
 #define KPAGEFLAGS_BATCH        (64 << 10)      /* 64k pages */
 static void walk_pfn(unsigned long voffset,
-                     unsigned long index,
+                     unsigned long long index,
                     unsigned long count)
 {
        uint64_t buf[KPAGEFLAGS_BATCH];
@@ -686,10 +693,10 @@ static void walk_pfn(unsigned long voffset,
 }
 #define PAGEMAP_BATCH   (64 << 10)
-static void walk_vma(unsigned long index, unsigned long count)
+static void walk_vma(unsigned long long index, unsigned long count)
 {
        uint64_t buf[PAGEMAP_BATCH];
-        unsigned long batch;
+        unsigned long long batch;
        unsigned long pages;
        unsigned long pfn;
        unsigned long i;
@@ -711,10 +718,10 @@ static void walk_vma(unsigned long index, unsigned long count)
        }
 }
-static void walk_task(unsigned long index, unsigned long count)
+static void walk_task(unsigned long long index, unsigned long long count)
 {
-        const unsigned long end = index + count;
+        const unsigned long long end = index + count;
-        unsigned long start;
+        unsigned long long start;
        int i = 0;
        while (index < end) {
@@ -725,21 +732,21 @@ static void walk_task(unsigned long index, unsigned long count)
                if (pg_start[i] >= end)
                        return;
-                start = max_t(unsigned long, pg_start[i], index);
+                start = max_t(unsigned long long, pg_start[i], index);
-                index = min_t(unsigned long, pg_end[i], end);
+                index = min_t(unsigned long long, pg_end[i], end);
                assert(start < index);
                walk_vma(start, index - start);
        }
 }
-static void add_addr_range(unsigned long offset, unsigned long size)
+static void add_addr_range(unsigned long long offset, unsigned long long size)
 {
        if (nr_addr_ranges >= MAX_ADDR_RANGES)
                fatal("too many addr ranges\n");
        opt_offset[nr_addr_ranges] = offset;
-        opt_size[nr_addr_ranges] = min_t(unsigned long, size, ULONG_MAX-offset);
+        opt_size[nr_addr_ranges] = min_t(unsigned long long, size, ULLONG_MAX-offset);
        nr_addr_ranges++;
 }
@@ -750,7 +757,7 @@ static void walk_addr_ranges(void)
        kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY);
        if (!nr_addr_ranges)
-                add_addr_range(0, ULONG_MAX);
+                add_addr_range(0, ULLONG_MAX);
        for (i = 0; i < nr_addr_ranges; i++)
                if (!opt_pid)
@@ -857,15 +864,15 @@ static void parse_pid(const char *str)
        }
        while (fgets(buf, sizeof(buf), file) != NULL) {
-                unsigned long vm_start;
+                unsigned long long vm_start;
-                unsigned long vm_end;
+                unsigned long long vm_end;
                unsigned long long pgoff;
                int major, minor;
                char r, w, x, s;
                unsigned long ino;
                int n;
-                n = sscanf(buf, "%lx-%lx %c%c%c%c %llx %x:%x %lu",
+                n = sscanf(buf, "%llx-%llx %c%c%c%c %llx %x:%x %lu",
                           &vm_start,
                           &vm_end,
                           &r, &w, &x, &s,
@@ -892,8 +899,8 @@ static void parse_file(const char *name)
 static void parse_addr_range(const char *optarg)
 {
-        unsigned long offset;
+        unsigned long long offset;
-        unsigned long size;
+        unsigned long long size;
        char *p;
        p = strchr(optarg, ',');
@@ -906,12 +913,12 @@ static void parse_addr_range(const char *optarg)
        } else if (p) {
                offset = parse_number(optarg);
                if (p[1] == '\0')
-                        size = ULONG_MAX;
+                        size = ULLONG_MAX;
                else {
                        size = parse_number(p + 1);
                        if (*p == ',') {
                                if (size < offset)
-                                        fatal("invalid range: %lu,%lu\n",
+                                        fatal("invalid range: %llu,%llu\n",
                                                        offset, size);
                                size -= offset;
                        }
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 253986bd6bb6..6bc40d9e3007 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -606,3 +606,12 @@ source "security/Kconfig"
 source "crypto/Kconfig"
 source "lib/Kconfig"
+config ARCH_HAS_FEATHER_TRACE
+        def_bool n
+# Probably add these later
+config ARCH_HAS_SEND_PULL_TIMERS
+        def_bool n
+source "litmus/Kconfig"
diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h
index 073936a8b275..0415e1867e92 100644
--- a/arch/sparc/include/asm/spinlock_64.h
+++ b/arch/sparc/include/asm/spinlock_64.h
@@ -13,82 +13,72 @@
 * and rebuild your kernel.
 */
-/* Because we play games to save cycles in the non-contention case, we
+#define arch_spin_is_locked(lp) ((lp)->tail != (lp)->head)
- * need to be extra careful about branch targets into the "spinning"
- * code.  They live in their own section, but the newer V9 branches
- * have a shorter range than the traditional 32-bit sparc branch
- * variants.  The rule is that the branches that go into and out of
- * the spinner sections must be pre-V9 branches.
- */
-#define arch_spin_is_locked(lp) ((lp)->lock != 0)
 #define arch_spin_unlock_wait(lp)       \
        do {    rmb();                  \
-        } while((lp)->lock)
+        } while((lp)->tail != (lp)->head)
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
-        unsigned long tmp;
+        int ticket, tmp;
        __asm__ __volatile__(
-"1:     ldstub          [%1], %0\n"
+"1:     lduw            [%2], %0 \n" /* read ticket */
-"       brnz,pn         %0, 2f\n"
+"       add             %0, 1, %1 \n"
-"        nop\n"
+"       cas             [%2], %0, %1 \n"
-"       .subsection     2\n"
+"       cmp             %0, %1 \n"
-"2:     ldub            [%1], %0\n"
+"       be,a,pt         %%icc, 2f \n"
-"       brnz,pt         %0, 2b\n"
+"        nop \n"
-"        nop\n"
+"       ba              1b\n"
-"       ba,a,pt         %%xcc, 1b\n"
+"        nop \n"
-"       .previous"
+"2:     lduw            [%3], %1 \n"
-        : "=&r" (tmp)
+"       cmp             %0, %1 \n"
-        : "r" (lock)
+"       be,a,pt         %%icc, 3f \n"
+"        nop \n"
+"       ba              2b\n"
+"3:      nop"
+        : "=&r" (ticket), "=&r" (tmp)
+        : "r" (&lock->tail), "r" (&lock->head)
        : "memory");
 }
 static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
-        unsigned long result;
+        int tail, head;
        __asm__ __volatile__(
-"       ldstub          [%1], %0\n"
+"       lduw            [%2], %0 \n" /* read tail */
-        : "=r" (result)
+"       lduw            [%3], %1 \n" /* read head */
-        : "r" (lock)
+"       cmp             %0, %1 \n"
+"       bne,a,pn        %%icc, 1f \n"
+"        nop \n"
+"       inc             %1 \n"
+"       cas             [%2], %0, %1 \n" /* try to inc ticket */
+"1:     "
+        : "=&r" (tail), "=&r" (head)
+        : "r" (&lock->tail), "r" (&lock->head)
        : "memory");
-        return (result == 0UL);
+        return (tail == head);
 }
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
+        int tmp;
        __asm__ __volatile__(
-"       stb             %%g0, [%0]"
+"       lduw            [%1], %0 \n"
-        : /* No outputs */
+"       inc             %0 \n"
-        : "r" (lock)
+"       st              %0, [%1] \n"
+        : "=&r" (tmp)
+        : "r" (&lock->head)
        : "memory");
 }
-static inline void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
+/* We don't handle this yet, but it looks like not re-enabling the interrupts
-{
+ * works fine, too. For example, lockdep also does it like this.
-        unsigned long tmp1, tmp2;
+ */
+#define arch_spin_lock_flags(l, f) arch_spin_lock(l)
-        __asm__ __volatile__(
-"1:     ldstub          [%2], %0\n"
-"       brnz,pn         %0, 2f\n"
-"        nop\n"
-"       .subsection     2\n"
-"2:     rdpr            %%pil, %1\n"
-"       wrpr            %3, %%pil\n"
-"3:     ldub            [%2], %0\n"
-"       brnz,pt         %0, 3b\n"
-"        nop\n"
-"       ba,pt           %%xcc, 1b\n"
-"        wrpr           %1, %%pil\n"
-"       .previous"
-        : "=&r" (tmp1), "=&r" (tmp2)
-        : "r"(lock), "r"(flags)
-        : "memory");
-}
 /* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */
diff --git a/arch/sparc/include/asm/spinlock_types.h b/arch/sparc/include/asm/spinlock_types.h
index 9c454fdeaad8..49b89fe2ccfc 100644
--- a/arch/sparc/include/asm/spinlock_types.h
+++ b/arch/sparc/include/asm/spinlock_types.h
@@ -6,10 +6,11 @@
 #endif
 typedef struct {
-        volatile unsigned char lock;
+        volatile int tail;
+        volatile int head;
 } arch_spinlock_t;
-#define __ARCH_SPIN_LOCK_UNLOCKED       { 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED       { 0, 0 }
 typedef struct {
        volatile unsigned int lock;
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index 6260d5deeabc..54c43b6bc1d2 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -407,7 +407,11 @@
 #define __NR_sendmmsg           336
 #define __NR_setns              337
-#define NR_syscalls             338
+#define __NR_LITMUS             338
+#include "litmus/unistd_32.h"
+#define NR_syscalls             338 + NR_litmus_syscalls
 #ifdef __32bit_syscall_numbers__
 /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 6e492d59f6b1..941f04216a6c 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -85,3 +85,6 @@ sys_call_table:
 /*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
 /*330*/ .long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
 /*335*/ .long sys_syncfs, sys_sendmmsg, sys_setns
+/*338*/ sys_set_rt_task_param, sys_get_rt_task_param
+/*340*/ sys_complete_job, sys_od_open, sys_od_close, sys_litmus_lock, sys_litmus_unlock
+/*345*/ sys_query_job_no, sys_wait_for_job_release, sys_wait_for_ts_release, sys_release_ts, sys_null_call
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index f566518483b5..8543ae0db2d4 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -85,7 +85,9 @@ sys_call_table32:
 /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
        .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init
 /*330*/ .word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime
-        .word sys_syncfs, compat_sys_sendmmsg, sys_setns
+        .word sys_syncfs, sys_sendmmsg, sys_setns, sys_set_rt_task_param, sys_get_rt_task_param
+/*340*/ .word sys_complete_job, sys_od_open, sys_od_close, sys_litmus_lock, sys_litmus_unlock
+        .word sys_query_job_no, sys_wait_for_job_release, sys_wait_for_ts_release, sys_release_ts, sys_null_call
 #endif /* CONFIG_COMPAT */
@@ -162,4 +164,6 @@ sys_call_table:
 /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
        .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
 /*330*/ .word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
-        .word sys_syncfs, sys_sendmmsg, sys_setns
+        .word sys_syncfs, sys_sendmmsg, sys_setns, sys_set_rt_task_param, sys_get_rt_task_param
+/*340*/ .word sys_complete_job, sys_od_open, sys_od_close, sys_litmus_lock, sys_litmus_unlock
+        .word sys_query_job_no, sys_wait_for_job_release, sys_wait_for_ts_release, sys_release_ts, sys_null_call
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 0bf12644aa73..95d5636720a6 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -1215,6 +1215,21 @@ static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
        .notifier_call = cacheinfo_cpu_callback,
 };
+#include <litmus/color.h>
+static void set_color_vars(void)
+{
+        struct _cpuid4_info *leaf = CPUID4_INFO_IDX(
+                        smp_processor_id(), num_cache_leaves - 1);
+        color_cache_info.size = leaf->size;
+        color_cache_info.line_size =
+                (unsigned long)leaf->ebx.split.coherency_line_size + 1;
+        color_cache_info.ways =
+                (unsigned long)leaf->ebx.split.ways_of_associativity + 1;
+        color_cache_info.sets =
+                (unsigned long)leaf->ecx.split.number_of_sets + 1;
+}
 static int __cpuinit cache_sysfs_init(void)
 {
        int i;
@@ -1231,6 +1246,7 @@ static int __cpuinit cache_sysfs_init(void)
                        return err;
        }
        register_hotcpu_notifier(&cacheinfo_cpu_notifier);
+        set_color_vars();
        return 0;
 }
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index d0126222b394..a40c15970421 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -358,3 +358,4 @@ ENTRY(sys_call_table)
        .long sys_wait_for_ts_release
        .long sys_release_ts            /* +10 */
        .long sys_null_call
+        .long sys_set_rt_task_mc_param
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index d91bba539ca8..dbb3fcd28928 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -385,6 +385,8 @@ extern void hrtimer_start_on_info_init(struct hrtimer_start_on_info *info);
 extern int hrtimer_start_on(int cpu, struct hrtimer_start_on_info *info,
                        struct hrtimer *timer, ktime_t time,
                        const enum hrtimer_mode mode);
+extern int hrtimer_pull_cancel(int cpu, struct hrtimer *timer,
+                               struct hrtimer_start_on_info *info);
 #endif
 extern int hrtimer_cancel(struct hrtimer *timer);
diff --git a/include/litmus/budget.h b/include/litmus/budget.h
index 33344ee8d5f9..d1c73f5cf73e 100644
--- a/include/litmus/budget.h
+++ b/include/litmus/budget.h
@@ -1,10 +1,49 @@
 #ifndef _LITMUS_BUDGET_H_
 #define _LITMUS_BUDGET_H_
-/* Update the per-processor enforcement timer (arm/reproram/cancel) for
+struct enforcement_timer {
- * the next task. */
+        struct hrtimer          timer;
+        int                     armed;
+};
+/**
+ * update_enforcement_timer() - Update per-processor enforcement timer for
+ * the next scheduled task.
+ *
+ * If @t is not NULL and has a precisely enforced budget, the timer will be
+ * armed to trigger a reschedule when the budget is exhausted. Otherwise,
+ * the timer will be cancelled.
+*/
 void update_enforcement_timer(struct task_struct* t);
+void init_enforcement_timer(struct enforcement_timer *et);
+void arm_enforcement_timer(struct enforcement_timer* et, struct task_struct* t);
+void cancel_enforcement_timer(struct enforcement_timer* et);
+/* True if a task's server has progressed farther than the task
+ * itself. This happens when budget enforcement has caused a task to be
+ * booted off until the next period.
+ */
+#define behind_server(t)\
+        (lt_before((t)->rt_param.job_params.real_release, get_release(t)))
+/**
+ * server_release() - Prepare the task server parameters for the next period.
+ * The server for @t is what is actually executed from the schedulers
+ * perspective.
+ */
+void server_release(struct task_struct *t);
+/**
+ * task_release() - Prepare actual task parameters for the next period.
+ * The actual task parameters for @t, real_deadline and real_release, are
+ * the deadline and release from the tasks perspective. We only record these
+ * so that we can write them to feather trace.
+ */
+void task_release(struct task_struct *t);
 inline static int budget_exhausted(struct task_struct* t)
 {
        return get_exec_time(t) >= get_exec_cost(t);
diff --git a/include/litmus/ce_domain.h b/include/litmus/ce_domain.h
new file mode 100644
index 000000000000..5d5fdf7d1efc
--- /dev/null
+++ b/include/litmus/ce_domain.h
@@ -0,0 +1,27 @@
+#ifndef _LITMUS_CE_DOMAIN_H
+#define _LITMUS_CE_DOMAIN_H
+/*
+ * Functions that the MC plugin needs to call through a domain pointer.
+ */
+void ce_requeue(domain_t*, struct task_struct*);
+struct task_struct* ce_peek_and_take_ready(domain_t*);
+int ce_higher_prio(struct task_struct*, struct task_struct*);
+#ifdef CONFIG_MERGE_TIMERS
+typedef void (*ce_timer_callback_t)(struct rt_event*);
+#else
+typedef enum hrtimer_restart (*ce_timer_callback_t)(struct hrtimer*);
+#endif
+void ce_domain_init(domain_t*,
+                raw_spinlock_t*,
+                requeue_t,
+                peek_ready_t,
+                take_ready_t,
+                preempt_needed_t,
+                task_prio_t,
+                struct ce_dom_data*,
+                const int,
+                ce_timer_callback_t);
+#endif
diff --git a/include/litmus/color.h b/include/litmus/color.h
new file mode 100644
index 000000000000..eefb6c6dddf5
--- /dev/null
+++ b/include/litmus/color.h
@@ -0,0 +1,51 @@
+#ifndef LITMUS_COLOR_H
+#define LITMUS_COLOR_H
+#ifdef __KERNEL__
+#define ONE_COLOR_LEN 11
+#define ONE_COLOR_FMT "%4lu: %4d\n"
+struct color_cache_info {
+        unsigned long size;
+        unsigned long line_size;
+        unsigned long ways;
+        unsigned long sets;
+        unsigned long nr_colors;
+};
+/* defined in litmus/color.c */
+extern struct color_cache_info color_cache_info;
+extern unsigned long color_chunk;
+struct page* get_colored_page(unsigned long);
+void add_page_to_color_list(struct page*);
+void add_page_to_alloced_list(struct page*, struct vm_area_struct*);
+void reclaim_pages(struct vm_area_struct*);
+int color_server_params(int cpu, unsigned long *wcet, unsigned long *period);
+int color_add_pages_handler(struct ctl_table *, int, void __user *,
+                size_t *, loff_t *);
+int color_nr_pages_handler(struct ctl_table *, int, void __user *,
+                size_t *, loff_t *);
+int color_reclaim_pages_handler(struct ctl_table *, int, void __user *,
+                size_t *, loff_t *);
+#ifdef CONFIG_LOCKDEP
+#define LITMUS_LOCKDEP_NAME_MAX_LEN 50
+#define LOCKDEP_DYNAMIC_ALLOC(lock, key, name_buf, fmt, args...)        \
+        do {                                                            \
+                snprintf(name_buf, LITMUS_LOCKDEP_NAME_MAX_LEN,         \
+                                fmt, ## args);                          \
+                lockdep_set_class_and_name(lock, key, name_buf);        \
+        } while (0)
+#else
+#define LITMUS_LOCKDEP_NAME_MAX_LEN 0
+#define LOCKDEP_DYNAMIC_ALLOC(lock, key, name_buf, fmt, args)           \
+        do { (void)(key); } while (0)
+#endif
+#endif
+#endif
diff --git a/include/litmus/dgl.h b/include/litmus/dgl.h
new file mode 100644
index 000000000000..acd58f80b58b
--- /dev/null
+++ b/include/litmus/dgl.h
@@ -0,0 +1,65 @@
+#ifndef __DGL_H_
+#define __DGL_H_
+#include <litmus/color.h>
+#include <linux/list.h>
+/*
+ * A request for @replica amount of a single resource.
+ */
+struct dgl_req {
+        unsigned short          replicas;
+        struct list_head        list;
+        struct dgl_group_req    *greq;
+};
+/*
+ * Simultaneous @requests for multiple resources.
+ */
+struct dgl_group_req {
+        int                     cpu;
+        unsigned long           *requested;
+        unsigned long           *waiting;
+        struct dgl_req          *requests;
+        unsigned long long      ts;
+};
+/*
+ * A single resource.
+ */
+struct dgl_resource {
+        unsigned long           free_replicas;
+        struct list_head        waiting;
+};
+/*
+ * A group of resources.
+ */
+struct dgl {
+        struct dgl_resource     *resources;
+        struct dgl_group_req*   *acquired;
+        char                    requests;
+        char                    running;
+        unsigned long long      ts;
+        unsigned long           num_resources;
+        unsigned long           num_replicas;
+};
+void dgl_init(struct dgl *dgl, unsigned long num_resources,
+              unsigned long num_replicas);
+void dgl_free(struct dgl *dgl);
+void dgl_group_req_init(struct dgl *dgl, struct dgl_group_req *greq);
+void dgl_group_req_free(struct dgl_group_req *greq);
+void set_req(struct dgl *dgl, struct dgl_group_req *greq,
+             int resource, int replicas);
+void add_group_req(struct dgl *dgl, struct dgl_group_req *greq, int cpu);
+void remove_group_req(struct dgl *dgl, struct dgl_group_req *greq);
+#endif
diff --git a/include/litmus/domain.h b/include/litmus/domain.h
new file mode 100644
index 000000000000..d16ed1872a52
--- /dev/null
+++ b/include/litmus/domain.h
@@ -0,0 +1,50 @@
+/**
+ * --Todo--
+ * Naming: this should become rt_domain while the old rt_domain should be
+ * changed to sd_domain (sporadic) or pd_domain (periodic).
+ * task_new: need to add and use this method
+ */
+#ifndef _LITMUS_DOMAIN_H_
+#define _LITMUS_DOMAIN_H_
+struct domain;
+typedef void (*requeue_t)(struct domain*, struct task_struct*);
+typedef void (*remove_t)(struct domain*, struct task_struct*);
+typedef struct task_struct* (*peek_ready_t)(struct domain*);
+typedef struct task_struct* (*take_ready_t)(struct domain*);
+typedef int (*preempt_needed_t)(struct domain*, struct task_struct*);
+typedef int (*task_prio_t)(struct task_struct*, struct task_struct*);
+typedef struct domain {
+        raw_spinlock_t*         lock; /* for coarse serialization       */
+        struct list_head        list; /* list membership                */
+        void*                   data; /* implementation-specific data   */
+        char*                   name; /* for debugging                  */
+        /* add a task to the domain */
+        requeue_t               requeue;
+        /* prevent a task from being returned by the domain */
+        remove_t                remove;
+        /* return next ready task */
+        peek_ready_t            peek_ready;
+        /* remove and return next ready task */
+        take_ready_t            take_ready;
+        /* return true if the domain has a task which should preempt the
+         * task given
+         */
+        preempt_needed_t        preempt_needed;
+        /* for tasks within this domain, returns true if the first has
+         * has a higher priority than the second
+         */
+        task_prio_t             higher_prio;
+} domain_t;
+void domain_init(domain_t *dom,
+                 raw_spinlock_t *lock,
+                 requeue_t requeue,
+                 peek_ready_t peek_ready,
+                 take_ready_t take_ready,
+                 preempt_needed_t preempt_needed,
+                 task_prio_t priority);
+#endif
diff --git a/include/litmus/event_group.h b/include/litmus/event_group.h
new file mode 100644
index 000000000000..b0654e0ec5e6
--- /dev/null
+++ b/include/litmus/event_group.h
@@ -0,0 +1,91 @@
+#ifndef _LINUX_EVENT_QUEUE_H_
+#define _LINUX_EVENT_QUEUE_H_
+#define EVENT_QUEUE_SLOTS 127 /* prime */
+#define NUM_EVENT_PRIORITIES 4 /* num crit levels really */
+struct rt_event;
+typedef void (*fire_event_t)(struct rt_event *e);
+struct event_group {
+        lt_t                    res;
+        int                     cpu;
+        struct list_head        event_queue[EVENT_QUEUE_SLOTS];
+        raw_spinlock_t          queue_lock;
+};
+/**
+ * A group of actions to fire at a given time
+ */
+struct event_list {
+        /* Use multiple list heads so that inserts are O(1) */
+        struct list_head events[NUM_EVENT_PRIORITIES];
+        /* For timer firing */
+        lt_t                            fire_time;
+        struct hrtimer                  timer;
+        struct hrtimer_start_on_info    info;
+        struct list_head    queue_node;  /* For event_queue */
+        struct event_group* group; /* For callback    */
+};
+/**
+ * A single action to fire at a time
+ */
+struct rt_event {
+        /* Function to call on event expiration */
+        fire_event_t     function;
+        /* Priority of this event (lower is better) */
+        int              prio;
+        /* For membership in the event_list */
+        struct list_head        events_node;
+        /* To avoid runtime allocation. This is NOT necessarily
+         * the event_list containing this event. This is just a
+         * pre-allocated event list which can be used for merging
+         * events.
+         */
+        struct event_list*      event_list;
+        /* Pointer set by add_event() so that we can cancel this event
+         * without knowing what group it is in (don't touch it).
+         */
+        struct event_group*     _event_group;
+};
+/**
+ * add_event() - Add timer to event group.
+ * @group       Group with which to merge event. If NULL, use the event
+ *              group of whatever CPU currently executing on.
+ * @e           Event to be fired at a specific time
+ * @time        Time to fire event
+ */
+void add_event(struct event_group* group, struct rt_event* e, lt_t time);
+/**
+ * cancel_event() - Remove event from the group.
+ */
+void cancel_event(struct rt_event*);
+/**
+ * init_event() - Create an event.
+ * @e           Event to create
+ * @prio        Priority of the event (lower is better)
+ * @function    Function to fire when event expires
+ * @el          Pre-allocated event list for timer merging
+ */
+void init_event(struct rt_event* e, int prio, fire_event_t function,
+                struct event_list *el);
+struct event_list* event_list_alloc(int);
+void event_list_free(struct event_list *el);
+/**
+ * get_event_group_for() - Get the event group for a CPU.
+ * @cpu         The CPU to get the event group for. Use NO_CPU to get the
+ *              event group of the CPU that the call is executing on.
+ */
+struct event_group *get_event_group_for(const int cpu);
+#endif
diff --git a/include/litmus/fifo_common.h b/include/litmus/fifo_common.h
new file mode 100644
index 000000000000..4756f77bd511
--- /dev/null
+++ b/include/litmus/fifo_common.h
@@ -0,0 +1,25 @@
+/*
+ * EDF common data structures and utility functions shared by all EDF
+ * based scheduler plugins
+ */
+/* CLEANUP: Add comments and make it less messy.
+ *
+ */
+#ifndef __FIFO_COMMON_H__
+#define __FIFO_COMMON_H__
+#include <litmus/rt_domain.h>
+void fifo_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
+                      release_jobs_t release);
+int fifo_higher_prio(struct task_struct* first,
+                    struct task_struct* second);
+int fifo_ready_order(struct bheap_node* a, struct bheap_node* b);
+int fifo_preemption_needed(rt_domain_t* rt, struct task_struct *t);
+#endif
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
index 3e78b9c61580..c3b91fe8115c 100644
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -44,6 +44,8 @@ void litmus_exit_task(struct task_struct *tsk);
 #define tsk_rt(t)               (&(t)->rt_param)
+#define get_server_job(t) (tsk_rt(t)->job_params.fake_job_no)
 /*      Realtime utility macros */
 #define get_rt_flags(t)         (tsk_rt(t)->flags)
 #define set_rt_flags(t,f)       (tsk_rt(t)->flags=(f))
@@ -55,6 +57,7 @@ void litmus_exit_task(struct task_struct *tsk);
 #define get_rt_period(t)        (tsk_rt(t)->task_params.period)
 #define get_rt_relative_deadline(t)     (tsk_rt(t)->task_params.relative_deadline)
 #define get_rt_phase(t)         (tsk_rt(t)->task_params.phase)
+#define get_rt_job(t)           (tsk_rt(t)->job_params.job_no)
 #define get_partition(t)        (tsk_rt(t)->task_params.cpu)
 #define get_priority(t)         (tsk_rt(t)->task_params.priority)
 #define get_class(t)            (tsk_rt(t)->task_params.cls)
@@ -64,6 +67,14 @@ void litmus_exit_task(struct task_struct *tsk);
 #define get_exec_time(t)        (tsk_rt(t)->job_params.exec_time)
 #define get_deadline(t)         (tsk_rt(t)->job_params.deadline)
 #define get_release(t)          (tsk_rt(t)->job_params.release)
+#define get_class(t)            (tsk_rt(t)->task_params.cls)
+#define get_task_domain(t)      (tsk_rt(t)->_domain)
+#define is_server(t)            (tsk_rt(t)->is_server)
+#define get_task_server(task)      (tsk_rt(task)->server)
+#define is_priority_boosted(t)  (tsk_rt(t)->priority_boosted)
+#define get_boost_start(t)      (tsk_rt(t)->boost_start_time)
 #define get_lateness(t)         (tsk_rt(t)->job_params.lateness)
 #define is_hrt(t)               \
@@ -116,6 +127,16 @@ void srp_ceiling_block(void);
 #define bheap2task(hn) ((struct task_struct*) hn->value)
+static inline struct control_page* get_control_page(struct task_struct *t)
+{
+        return tsk_rt(t)->ctrl_page;
+}
+static inline int has_control_page(struct task_struct* t)
+{
+        return tsk_rt(t)->ctrl_page != NULL;
+}
 #ifdef CONFIG_NP_SECTION
 static inline int is_kernel_np(struct task_struct *t)
@@ -142,7 +163,7 @@ static inline void request_exit_np(struct task_struct *t)
 static inline void make_np(struct task_struct *t)
 {
-        tsk_rt(t)->kernel_np++;
+        tsk_rt(t)->kernel_np = 1;
 }
 /* Caller should check if preemption is necessary when
@@ -150,7 +171,7 @@ static inline void make_np(struct task_struct *t)
 */
 static inline int take_np(struct task_struct *t)
 {
-        return --tsk_rt(t)->kernel_np;
+        return tsk_rt(t)->kernel_np = 0;
 }
 /* returns 0 if remote CPU needs an IPI to preempt, 1 if no IPI is required */
@@ -183,6 +204,20 @@ static inline int request_exit_np_atomic(struct task_struct *t)
 #else
+static inline void make_np(struct task_struct *t)
+{
+}
+/* Caller should check if preemption is necessary when
+ * the function return 0.
+ */
+static inline int take_np(struct task_struct *t)
+{
+        return 0;
+}
 static inline int is_kernel_np(struct task_struct* t)
 {
        return 0;
@@ -218,10 +253,6 @@ static inline int is_np(struct task_struct *t)
        int kernel, user;
        kernel = is_kernel_np(t);
        user   = is_user_np(t);
-        if (kernel || user)
-                TRACE_TASK(t, " is non-preemptive: kernel=%d user=%d\n",
-                           kernel, user);
        return kernel || user;
 #else
        return unlikely(is_kernel_np(t) || is_user_np(t));
@@ -260,4 +291,8 @@ static inline quanta_t time2quanta(lt_t time, enum round round)
 /* By how much is cpu staggered behind CPU 0? */
 u64 cpu_stagger_offset(int cpu);
+#define TS_SYSCALL_IN_START                                             \
+        if (has_control_page(current))                                  \
+                __TS_SYSCALL_IN_START(&get_control_page(current)->ts_syscall_start);
 #endif
diff --git a/include/litmus/locking.h b/include/litmus/locking.h
index 4d7b870cb443..41991d5af01b 100644
--- a/include/litmus/locking.h
+++ b/include/litmus/locking.h
@@ -9,6 +9,7 @@ struct litmus_lock_ops;
 struct litmus_lock {
        struct litmus_lock_ops *ops;
        int type;
+        int id;
 };
 struct litmus_lock_ops {
diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h
index 380b886d78ff..5b69e26fc57d 100644
--- a/include/litmus/preempt.h
+++ b/include/litmus/preempt.h
@@ -8,11 +8,13 @@
 #include <litmus/debug_trace.h>
 extern DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
 #ifdef CONFIG_PREEMPT_STATE_TRACE
 const char* sched_state_name(int s);
-#define TRACE_STATE(fmt, args...) TRACE("SCHED_STATE " fmt, args)
+//#define TRACE_STATE(fmt, args...) TRACE("SCHED_STATE " fmt, args)
+#define TRACE_STATE(fmt, args...) /* ignore */
 #else
 #define TRACE_STATE(fmt, args...) /* ignore */
 #endif
diff --git a/include/litmus/rm_common.h b/include/litmus/rm_common.h
new file mode 100644
index 000000000000..3e03d9b5d140
--- /dev/null
+++ b/include/litmus/rm_common.h
@@ -0,0 +1,25 @@
+/*
+ * RM common data structures and utility functions shared by all RM
+ * based scheduler plugins
+ */
+/* CLEANUP: Add comments and make it less messy.
+ *
+ */
+#ifndef __UNC_RM_COMMON_H__
+#define __UNC_RM_COMMON_H__
+#include <litmus/rt_domain.h>
+void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
+                     release_jobs_t release);
+int rm_higher_prio(struct task_struct* first,
+                    struct task_struct* second);
+int rm_ready_order(struct bheap_node* a, struct bheap_node* b);
+int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t);
+#endif
diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h
index ac249292e866..03826228dbd9 100644
--- a/include/litmus/rt_domain.h
+++ b/include/litmus/rt_domain.h
@@ -6,6 +6,9 @@
 #define __UNC_RT_DOMAIN_H__
 #include <litmus/bheap.h>
+#include <litmus/domain.h>
+#include <litmus/event_group.h>
+#include <litmus/sched_mc.h>
 #define RELEASE_QUEUE_SLOTS 127 /* prime */
@@ -29,7 +32,11 @@ typedef struct _rt_domain {
        raw_spinlock_t                  release_lock;
        struct release_queue            release_queue;
-#ifdef CONFIG_RELEASE_MASTER
+#if defined(CONFIG_MERGE_TIMERS)
+        struct event_group*             event_group;
+        int                             prio;
+#endif
+#if defined(CONFIG_RELEASE_MASTER)
        int                             release_master;
 #endif
@@ -45,6 +52,8 @@ typedef struct _rt_domain {
        /* how are tasks ordered in the ready queue? */
        bheap_prio_t                    order;
+        enum crit_level                 level;
 } rt_domain_t;
 struct release_heap {
@@ -53,13 +62,18 @@ struct release_heap {
        lt_t                            release_time;
        /* all tasks to be released at release_time */
        struct bheap                    heap;
+#ifdef CONFIG_MERGE_TIMERS
+        /* used to merge timer calls */
+        struct rt_event                 event;
+#else
        /* used to trigger the release */
        struct hrtimer                  timer;
 #ifdef CONFIG_RELEASE_MASTER
        /* used to delegate releases */
        struct hrtimer_start_on_info    info;
 #endif
+#endif
        /* required for the timer callback */
        rt_domain_t*                    dom;
 };
@@ -76,12 +90,22 @@ static inline struct task_struct* __next_ready(rt_domain_t* rt)
 void rt_domain_init(rt_domain_t *rt, bheap_prio_t order,
                    check_resched_needed_t check,
-                    release_jobs_t relase);
+                    release_jobs_t release);
+void pd_domain_init(domain_t *dom,
+                    rt_domain_t *rt,
+                    bheap_prio_t order,
+                    check_resched_needed_t check,
+                    release_jobs_t release,
+                    preempt_needed_t preempt_needed,
+                    task_prio_t priority);
 void __add_ready(rt_domain_t* rt, struct task_struct *new);
 void __merge_ready(rt_domain_t* rt, struct bheap *tasks);
 void __add_release(rt_domain_t* rt, struct task_struct *task);
+struct release_heap* release_heap_alloc(int gfp_flags);
 static inline struct task_struct* __take_ready(rt_domain_t* rt)
 {
        struct bheap_node* hn = bheap_take(rt->order, &rt->ready_queue);
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 20268190757f..a8c82eed5562 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -91,7 +91,31 @@ union np_flag {
 struct control_page {
        volatile union np_flag sched;
-        /* to be extended */
+        /* locking overhead tracing: time stamp prior to system call */
+        uint64_t ts_syscall_start; /* Feather-Trace cycles */
+        int colors_updated:8;
+};
+#ifndef __KERNEL__
+/*
+ * XXX This is a terrible hack so liblitmus can use the PAGE_SIZE macro.
+ * We should fix liblitmus to do setup the page size at runtime.
+ */
+#define CACHE_LINE_SIZE 64
+#if (ARCH == x86_64)
+#define PAGE_SIZE 4096
+#elif (ARCH == sparc64)
+#define PAGE_SIZE 8192
+#endif
+#endif /* ifndef __KERNEL__ */
+typedef uint8_t color_t;
+#define COLORS_PER_CONTROL_PAGE (PAGE_SIZE / (2 * sizeof(color_t)))
+struct color_ctrl_page {
+        color_t colors[COLORS_PER_CONTROL_PAGE];
+        /* must be same type to guarantee equal array sizes */
+        color_t pages[COLORS_PER_CONTROL_PAGE];
 };
 /* don't export internal data structures to user space (liblitmus) */
@@ -100,6 +124,9 @@ struct control_page {
 struct _rt_domain;
 struct bheap_node;
 struct release_heap;
+struct domain;
+struct rt_server;
+struct dgl_group_req;
 struct rt_job {
        /* Time instant the the job was or will be released.  */
@@ -107,6 +134,10 @@ struct rt_job {
        /* What is the current deadline? */
        lt_t    deadline;
+        lt_t    real_release;
+        lt_t    real_deadline;
+        unsigned int fake_job_no;
        /* How much service has this job received so far? */
        lt_t    exec_time;
@@ -127,6 +158,9 @@ struct rt_job {
 };
 struct pfair_param;
+#ifdef CONFIG_PLUGIN_MC
+struct mc_data;
+#endif
 /*      RT task parameters for scheduling extensions
 *      These parameters are inherited during clone and therefore must
@@ -142,6 +176,8 @@ struct rt_param {
        /* is the task present? (true if it can be scheduled) */
        unsigned int            present:1;
+        unsigned int            is_server:1;
 #ifdef CONFIG_LITMUS_LOCKING
        /* Is the task being priority-boosted by a locking protocol? */
        unsigned int            priority_boosted:1;
@@ -149,6 +185,17 @@ struct rt_param {
        lt_t                    boost_start_time;
 #endif
+#ifdef CONFIG_PLUGIN_MC
+        /* mixed criticality specific data */
+        struct mc_data *mc_data;
+#endif
+#ifdef CONFIG_MERGE_TIMERS
+        struct rt_event *event;
+#endif
+        struct rt_server *server;
        /* user controlled parameters */
        struct rt_task          task_params;
@@ -203,6 +250,9 @@ struct rt_param {
        int old_policy;
        int old_prio;
+        /* TODO: rename */
+        struct domain *_domain;
        /* ready queue for this task */
        struct _rt_domain* domain;
@@ -229,8 +279,12 @@ struct rt_param {
        lt_t            total_tardy;
        lt_t            max_tardy;
        unsigned int    missed;
        lt_t max_exec_time;
        lt_t tot_exec_time;
+        lt_t last_exec_time;
+        struct color_ctrl_page *color_ctrl_page;
+        struct dgl_group_req *req;
 };
 /*      Possible RT flags       */
diff --git a/include/litmus/rt_server.h b/include/litmus/rt_server.h
new file mode 100644
index 000000000000..0e2feb6c6b0e
--- /dev/null
+++ b/include/litmus/rt_server.h
@@ -0,0 +1,31 @@
+#ifndef __RT_SERVER_H
+#define __RT_SERVER_H
+#include <linux/sched.h>
+#include <litmus/litmus.h>
+#include <litmus/rt_domain.h>
+struct rt_server;
+typedef int (*need_preempt_t)(rt_domain_t *rt, struct task_struct *t);
+typedef void (*server_update_t)(struct rt_server *srv);
+struct rt_server {
+        int sid;
+        int cpu;
+        struct task_struct*     linked;
+        rt_domain_t*            domain;
+        int running;
+        /* Does this server have a higher-priority task? */
+        need_preempt_t          need_preempt;
+        /* System state has changed, so should server */
+        server_update_t         update;
+};
+void init_rt_server(struct rt_server *server,
+                    int sid, int cpu, rt_domain_t *domain,
+                    need_preempt_t need_preempt,
+                    server_update_t update);
+#endif
diff --git a/include/litmus/sched_mc.h b/include/litmus/sched_mc.h
new file mode 100644
index 000000000000..1d491ce6a31a
--- /dev/null
+++ b/include/litmus/sched_mc.h
@@ -0,0 +1,134 @@
+#ifndef _LINUX_SCHED_MC_H_
+#define _LINUX_SCHED_MC_H_
+/* criticality levels */
+enum crit_level {
+        /* probably don't need to assign these (paranoid) */
+        CRIT_LEVEL_A = 0,
+        CRIT_LEVEL_B = 1,
+        CRIT_LEVEL_C = 2,
+        NUM_CRIT_LEVELS = 3,
+};
+struct mc_task {
+        enum crit_level crit;
+        int lvl_a_id;
+        int lvl_a_eligible;
+};
+struct mc_job {
+        int is_ghost:1;
+        lt_t ghost_budget;
+};
+#ifdef __KERNEL__
+/*
+ * These are used only in the kernel. Userspace programs like RTSpin won't see
+ * them.
+ */
+struct mc_data {
+        struct mc_task mc_task;
+        struct mc_job mc_job;
+};
+#define tsk_mc_data(t)   (tsk_rt(t)->mc_data)
+#define tsk_mc_crit(t)   (tsk_mc_data(t) ? tsk_mc_data(t)->mc_task.crit : CRIT_LEVEL_C)
+#define is_ghost(t)      (tsk_mc_data(t)->mc_job.is_ghost)
+#define TS "(%s/%d:%d:%s)"
+#define TA(t) (t) ? tsk_mc_data(t) ? is_ghost(t) ? "ghost" : t->comm \
+                                                 : t->comm : "NULL", \
+              (t) ? t->pid : 1,                                 \
+              (t) ? t->rt_param.job_params.job_no : 1,          \
+              (t && get_task_domain(t)) ? get_task_domain(t)->name : ""
+#define STRACE(fmt, args...) \
+        sched_trace_log_message("%d P%d      [%s@%s:%d]: " fmt, \
+                                TRACE_ARGS,  ## args)
+#define TRACE_MC_TASK(t, fmt, args...)                          \
+        STRACE(TS " " fmt, TA(t), ##args)
+/*
+ * The MC-CE scheduler uses this as domain data.
+ */
+struct ce_dom_data {
+        int cpu;
+        struct task_struct *scheduled, *should_schedule;
+#ifdef CONFIG_MERGE_TIMERS
+        struct rt_event event;
+#else
+        struct hrtimer_start_on_info timer_info;
+        struct hrtimer timer;
+#endif
+};
+/**
+ * enum crit_state - Logically add / remove CPUs from criticality levels.
+ *
+ * Global crit levels need to use a two step process to remove CPUs so
+ * that the CPUs can be removed without holding domain locks.
+ *
+ * @CS_ACTIVE   The criticality entry can run a task
+ * @CS_ACTIVATE The criticality entry can run a task, but hasn't had its
+ *              position updated in a global heap. Set with ONLY CPU lock.
+ * @CS_REMOVE   The criticality entry is logically removed, but hasn't had its
+ *              position adjusted in a global heap. This should be set when
+ *              ONLY the CPU state is locked.
+ * @CS_REMOVED  The criticality entry has been removed from the crit level
+ */
+enum crit_state { CS_ACTIVE, CS_ACTIVATE, CS_REMOVE, CS_REMOVED };
+/**
+ * struct crit_entry - State of a CPU within each criticality level system.
+ * @level       Criticality level of this entry
+ * @linked      Logically running task, ghost or regular
+ * @domain      Domain from which to draw tasks
+ * @usable      False if a higher criticality task is running
+ * @event       For ghost task budget enforcement (merge timers)
+ * @timer       For ghost task budget enforcement (not merge timers)
+ * @node        Used to sort crit_entries by preemptability in global domains
+ */
+struct crit_entry {
+        enum crit_level         level;
+        struct task_struct*     linked;
+        struct domain*          domain;
+        enum crit_state         state;
+#ifdef CONFIG_MERGE_TIMERS
+        struct rt_event         event;
+#else
+        struct hrtimer          timer;
+#endif
+        struct bheap_node*      node;
+};
+/**
+ * struct domain_data - Wrap domains with related CPU state
+ * @domain      A domain for a criticality level
+ * @heap        The preemptable heap of crit entries (for global domains)
+ * @crit_entry  The crit entry for this domain (for partitioned domains)
+ */
+struct domain_data {
+        struct domain           domain;
+        struct bheap*           heap;
+        struct crit_entry*      crit_entry;
+};
+/*
+ * Functions that are used with the MC-CE plugin.
+ */
+long mc_ce_set_domains(const int, struct domain_data*[]);
+unsigned int mc_ce_get_expected_job(const int, const int);
+/*
+ * These functions are (lazily) inserted into the MC plugin code so that it
+ * manipulates the MC-CE state.
+ */
+long mc_ce_admit_task_common(struct task_struct*);
+void mc_ce_task_exit_common(struct task_struct*);
+lt_t mc_ce_timer_callback_common(domain_t*);
+void mc_ce_release_at_common(struct task_struct*, lt_t);
+long mc_ce_activate_plugin_common(void);
+long mc_ce_deactivate_plugin_common(void);
+#endif /* __KERNEL__ */
+#endif
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
index 6e7cabdddae8..0f529fa78b4d 100644
--- a/include/litmus/sched_plugin.h
+++ b/include/litmus/sched_plugin.h
@@ -11,6 +11,8 @@
 #include <litmus/locking.h>
 #endif
+struct litmus_lock;
 /************************ setup/tear down ********************/
 typedef long (*activate_plugin_t) (void);
@@ -67,6 +69,9 @@ typedef long (*admit_task_t)(struct task_struct* tsk);
 typedef void (*release_at_t)(struct task_struct *t, lt_t start);
+/* TODO remove me */
+typedef void (*release_ts_t)(lt_t time);
 struct sched_plugin {
        struct list_head        list;
        /*      basic info              */
@@ -93,6 +98,8 @@ struct sched_plugin {
        task_block_t            task_block;
        task_exit_t             task_exit;
+        release_ts_t            release_ts;
 #ifdef CONFIG_LITMUS_LOCKING
        /*      locking protocols       */
        allocate_lock_t         allocate_lock;
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
index 2f992789affb..0e050ac3748c 100644
--- a/include/litmus/sched_trace.h
+++ b/include/litmus/sched_trace.h
@@ -24,7 +24,8 @@ struct st_param_data {		/* regular params */
        u32     phase;
        u8      partition;
        u8      class;
-        u8      __unused[2];
+        u8      level;
+        u8      __unused[1];
 };
 struct st_release_data {        /* A job is was/is going to be released. */
@@ -71,8 +72,8 @@ struct st_resume_data {		/* A task resumes. */
 struct st_action_data {
        u64     when;
-        u8      action;
+        u32     action;
-        u8      __unused[7];
+        u8      __unused[4];
 };
 struct st_sys_release_data {
@@ -195,8 +196,9 @@ feather_callback void do_sched_trace_task_tardy(unsigned long id,
 #define trace_litmus_switch_to(t)
 #define trace_litmus_switch_away(prev)
 #define trace_litmus_task_completion(t, forced)
-#define trace_litmus_task_block(t)
-#define trace_litmus_task_resume(t)
+#define trace_litmus_task_block(t, i)
+#define trace_litmus_task_resume(t, i)
 #define trace_litmus_sys_release(start)
 #define trace_litmus_task_exit(t)
 #define trace_litmus_task_tardy(t)
@@ -325,14 +327,14 @@ feather_callback void do_sched_trace_task_tardy(unsigned long id,
                trace_litmus_server_param(sid, cid, wcet, period);      \
        } while(0)
-#define sched_trace_server_switch_to(sid, job, tid)                     \
+#define sched_trace_server_switch_to(sid, job, tid, tjob)               \
        do {                                                            \
-                trace_litmus_server_switch_to(sid, job, tid);           \
+                trace_litmus_server_switch_to(sid, job, tid, tjob);     \
        } while(0)
-#define sched_trace_server_switch_away(sid, job, tid)                   \
+#define sched_trace_server_switch_away(sid, job, tid, tjob)             \
        do {                                                            \
-                trace_litmus_server_switch_away(sid, job, tid);         \
+                trace_litmus_server_switch_away(sid, job, tid, tjob);   \
        } while (0)
 #define sched_trace_server_release(sid, job, rel, dead)                 \
diff --git a/include/litmus/trace.h b/include/litmus/trace.h
index e809376d6487..d868144f6928 100644
--- a/include/litmus/trace.h
+++ b/include/litmus/trace.h
@@ -12,7 +12,10 @@
 enum task_type_marker {
        TSK_BE,
        TSK_RT,
-        TSK_UNKNOWN
+        TSK_UNKNOWN,
+        TSK_LVLA,
+        TSK_LVLB,
+        TSK_LVLC
 };
 struct timestamp {
@@ -68,8 +71,6 @@ feather_callback void save_task_latency(unsigned long event, unsigned long when_
 * always the next number after the start time event id.
 */
 #define TS_SCHED_START                  DTIMESTAMP(100, TSK_UNKNOWN) /* we only
                                                                      * care
                                                                      * about
@@ -87,6 +88,26 @@ feather_callback void save_task_latency(unsigned long event, unsigned long when_
 #define TS_TICK_START(t)                TTIMESTAMP(110, t)
 #define TS_TICK_END(t)                  TTIMESTAMP(111, t)
+#define TS_LVLA_RELEASE_START           DTIMESTAMP(112, TSK_RT)
+#define TS_LVLA_RELEASE_END             DTIMESTAMP(113, TSK_RT)
+#define TS_LVLA_SCHED_START             DTIMESTAMP(114, TSK_UNKNOWN)
+#define TS_LVLA_SCHED_END_ID            115
+#define TS_LVLA_SCHED_END(t)            TTIMESTAMP(TS_LVLA_SCHED_END_ID, t)
+#define TS_LVLB_RELEASE_START           DTIMESTAMP(116, TSK_RT)
+#define TS_LVLB_RELEASE_END             DTIMESTAMP(117, TSK_RT)
+#define TS_LVLB_SCHED_START             DTIMESTAMP(118, TSK_UNKNOWN)
+#define TS_LVLB_SCHED_END_ID            119
+#define TS_LVLB_SCHED_END(t)            TTIMESTAMP(TS_LVLB_SCHED_END_ID, t)
+#define TS_LVLC_RELEASE_START           DTIMESTAMP(120, TSK_RT)
+#define TS_LVLC_RELEASE_END             DTIMESTAMP(121, TSK_RT)
+#define TS_LVLC_SCHED_START             DTIMESTAMP(122, TSK_UNKNOWN)
+#define TS_LVLC_SCHED_END_ID            123
+#define TS_LVLC_SCHED_END(t)            TTIMESTAMP(TS_LVLC_SCHED_END_ID, t)
 #define TS_PLUGIN_SCHED_START           /* TIMESTAMP(120) */  /* currently unused */
 #define TS_PLUGIN_SCHED_END             /* TIMESTAMP(121) */
diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
index 94264c27d9ac..71be3cd8d469 100644
--- a/include/litmus/unistd_32.h
+++ b/include/litmus/unistd_32.h
@@ -17,5 +17,6 @@
 #define __NR_wait_for_ts_release __LSC(9)
 #define __NR_release_ts         __LSC(10)
 #define __NR_null_call          __LSC(11)
+#define __NR_set_rt_task_mc_param __LSC(12)
-#define NR_litmus_syscalls 12
+#define NR_litmus_syscalls 13
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
index d5ced0d2642c..95cb74495104 100644
--- a/include/litmus/unistd_64.h
+++ b/include/litmus/unistd_64.h
@@ -29,5 +29,7 @@ __SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release)
 __SYSCALL(__NR_release_ts, sys_release_ts)
 #define __NR_null_call                          __LSC(11)
 __SYSCALL(__NR_null_call, sys_null_call)
+#define __NR_set_rt_task_mc_param               __LSC(12)
+__SYSCALL(__NR_set_rt_task_mc_param, sys_set_rt_task_mc_param)
-#define NR_litmus_syscalls 12
+#define NR_litmus_syscalls 13
diff --git a/include/trace/events/litmus.h b/include/trace/events/litmus.h
index c3a92f8ec6ef..474aa129c233 100644
--- a/include/trace/events/litmus.h
+++ b/include/trace/events/litmus.h
@@ -11,6 +11,7 @@
 #include <litmus/litmus.h>
 #include <litmus/rt_param.h>
 TRACE_EVENT(litmus_task_param,
        TP_PROTO(struct task_struct *t),
@@ -80,20 +81,17 @@ TRACE_EVENT(litmus_switch_to,
        TP_STRUCT__entry(
                __field( pid_t,         pid     )
                __field( unsigned int,  job     )
-                __field( unsigned long long,            when    )
                __field( unsigned long long,            exec_time       )
        ),
        TP_fast_assign(
                __entry->pid    = is_realtime(t) ? t->pid : 0;
                __entry->job    = is_realtime(t) ? t->rt_param.job_params.job_no : 0;
-                __entry->when           = litmus_clock();
                __entry->exec_time      = get_exec_time(t);
        ),
-        TP_printk("switch_to(job(%u, %u)): %Lu (exec: %Lu)\n",
+        TP_printk("switch_to(job(%u, %u)): (exec: %Lu)\n",
-                        __entry->pid, __entry->job,
+                        __entry->pid, __entry->job, __entry->exec_time)
-                        __entry->when, __entry->exec_time)
 );
 /*
@@ -108,20 +106,17 @@ TRACE_EVENT(litmus_switch_away,
        TP_STRUCT__entry(
                __field( pid_t,         pid     )
                __field( unsigned int,  job     )
-                __field( unsigned long long,            when    )
                __field( unsigned long long,            exec_time       )
        ),
        TP_fast_assign(
                __entry->pid    = is_realtime(t) ? t->pid : 0;
                __entry->job    = is_realtime(t) ? t->rt_param.job_params.job_no : 0;
-                __entry->when           = litmus_clock();
                __entry->exec_time      = get_exec_time(t);
        ),
-        TP_printk("switch_away(job(%u, %u)): %Lu (exec: %Lu)\n",
+        TP_printk("switch_away(job(%u, %u)): (exec: %Lu)\n",
-                        __entry->pid, __entry->job,
+                        __entry->pid, __entry->job, __entry->exec_time)
-                        __entry->when, __entry->exec_time)
 );
 /*
@@ -136,20 +131,17 @@ TRACE_EVENT(litmus_task_completion,
        TP_STRUCT__entry(
                __field( pid_t,         pid     )
                __field( unsigned int,  job     )
-                __field( unsigned long long,            when    )
                __field( unsigned long, forced  )
        ),
        TP_fast_assign(
                __entry->pid    = t ? t->pid : 0;
                __entry->job    = t ? t->rt_param.job_params.job_no : 0;
-                __entry->when   = litmus_clock();
                __entry->forced = forced;
        ),
-        TP_printk("completed(job(%u, %u)): %Lu (forced: %lu)\n",
+        TP_printk("completed(job(%u, %u)): (forced: %lu)\n",
-                        __entry->pid, __entry->job,
+                        __entry->pid, __entry->job, __entry->forced)
-                        __entry->when, __entry->forced)
 );
 /*
@@ -164,17 +156,14 @@ TRACE_EVENT(litmus_task_block,
        TP_STRUCT__entry(
                __field( pid_t,         pid     )
                __field( int,           lid      )
-                __field( unsigned long long,            when    )
        ),
        TP_fast_assign(
                __entry->pid    = t ? t->pid : 0;
                __entry->lid    = lid;
-                __entry->when   = litmus_clock();
        ),
-        TP_printk("(%u) blocks on %d: %Lu\n", __entry->pid,
+        TP_printk("(%u) blocks on %d\n", __entry->pid, __entry->lid)
-                  __entry->lid, __entry->when)
 );
 /*
@@ -189,17 +178,14 @@ TRACE_EVENT(litmus_resource_acquire,
        TP_STRUCT__entry(
                __field( pid_t,         pid     )
                __field( int,           lid      )
-                __field( unsigned long long,            when    )
        ),
        TP_fast_assign(
                __entry->pid    = t ? t->pid : 0;
                __entry->lid    = lid;
-                __entry->when   = litmus_clock();
        ),
-        TP_printk("(%u) acquires %d: %Lu\n", __entry->pid,
+        TP_printk("(%u) acquires %d\n", __entry->pid, __entry->lid)
-                  __entry->lid, __entry->when)
 );
 TRACE_EVENT(litmus_resource_release,
@@ -211,17 +197,39 @@ TRACE_EVENT(litmus_resource_release,
        TP_STRUCT__entry(
                __field( pid_t,         pid     )
                __field( int,           lid      )
-                __field( unsigned long long,            when    )
        ),
        TP_fast_assign(
                __entry->pid    = t ? t->pid : 0;
                __entry->lid    = lid;
-                __entry->when   = litmus_clock();
        ),
-        TP_printk("(%u) releases %d: %Lu\n", __entry->pid,
+        TP_printk("(%u) releases %d\n", __entry->pid,
-                  __entry->lid, __entry->when)
+                  __entry->lid)
+);
+TRACE_EVENT(litmus_priority_donate,
+        TP_PROTO(struct task_struct *t, struct task_struct *donor, int lid),
+        TP_ARGS(t, donor, lid),
+        TP_STRUCT__entry(
+                __field( pid_t, t_pid )
+                __field( pid_t, d_pid )
+                __field( unsigned long long, prio)
+                __field( int,   lid   )
+        ),
+        TP_fast_assign(
+                __entry->t_pid = t ? t->pid : 0;
+                __entry->d_pid = donor ? donor->pid : 0;
+                __entry->prio = get_deadline(donor);
+                __entry->lid = lid;
+        ),
+        TP_printk("(%u) inherits %llu from (%u) on %d\n", __entry->t_pid,
+                  __entry->d_pid, __entry->prio, __entry->lid)
 );
 /*
@@ -237,19 +245,16 @@ TRACE_EVENT(litmus_task_resume,
                __field( pid_t,         pid     )
                __field( int,           lid      )
                __field( unsigned int,  job     )
-                __field( unsigned long long,            when    )
        ),
        TP_fast_assign(
                __entry->pid    = t ? t->pid : 0;
                __entry->job    = t ? t->rt_param.job_params.job_no : 0;
-                __entry->when   = litmus_clock();
                __entry->lid    = lid;
        ),
-        TP_printk("resume(job(%u, %u)) on %d: %Lu\n",
+        TP_printk("resume(job(%u, %u)) on %d\n",
-                  __entry->pid, __entry->job,
+                  __entry->pid, __entry->job, __entry->lid)
-                  __entry->lid, __entry->when)
 );
 /*
@@ -263,15 +268,13 @@ TRACE_EVENT(litmus_sys_release,
        TP_STRUCT__entry(
                __field( unsigned long long,            rel     )
-                __field( unsigned long long,            when    )
        ),
        TP_fast_assign(
                __entry->rel    = *start;
-                __entry->when   = litmus_clock();
        ),
-        TP_printk("SynRelease(%Lu) at %Lu\n", __entry->rel, __entry->when)
+        TP_printk("SynRelease(%Lu)\n", __entry->rel)
 );
 /*
@@ -344,43 +347,50 @@ TRACE_EVENT(litmus_server_param,
 TRACE_EVENT(litmus_server_switch_to,
-        TP_PROTO(int sid, unsigned int job, int tid),
+        TP_PROTO(int sid, unsigned int job, int tid, unsigned int tjob),
-        TP_ARGS(sid, job, tid),
+        TP_ARGS(sid, job, tid, tjob),
        TP_STRUCT__entry(
                __field( int, sid)
                __field( unsigned int, job)
                __field( int, tid)
+                __field( unsigned int, tjob)
        ),
        TP_fast_assign(
                __entry->sid = sid;
                __entry->tid = tid;
                __entry->job = job;
+                __entry->tjob = tjob;
        ),
-        TP_printk("switch_to(server(%d, %u)): %d\n", __entry->sid, __entry->job, __entry->tid)
+        TP_printk("switch_to(server(%d, %u)): (%d, %d)\n",
+                  __entry->sid, __entry->job, __entry->tid, __entry->tjob)
 );
 TRACE_EVENT(litmus_server_switch_away,
-        TP_PROTO(int sid, unsigned int job, int tid),
+        TP_PROTO(int sid, unsigned int job, int tid, unsigned int tjob),
-        TP_ARGS(sid, job, tid),
+        TP_ARGS(sid, job, tid, tjob),
        TP_STRUCT__entry(
                __field( int, sid)
                __field( unsigned int, job)
                __field( int, tid)
+                __field( unsigned int, tjob)
        ),
        TP_fast_assign(
                __entry->sid = sid;
                __entry->tid = tid;
+                __entry->job = job;
+                __entry->tjob = tjob;
        ),
-        TP_printk("switch_away(server(%d, %u)): %d\n", __entry->sid, __entry->job, __entry->tid)
+        TP_printk("switch_away(server(%d, %u)): (%d, %d)\n",
+                  __entry->sid, __entry->job, __entry->tid, __entry->tjob)
 );
 TRACE_EVENT(litmus_server_release,
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 11e896903828..6cf73d371203 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1037,6 +1037,7 @@ void hrtimer_start_on_info_init(struct hrtimer_start_on_info *info)
 {
        memset(info, 0, sizeof(struct hrtimer_start_on_info));
        atomic_set(&info->state, HRTIMER_START_ON_INACTIVE);
+        INIT_LIST_HEAD(&info->list);
 }
 /**
@@ -1055,12 +1056,32 @@ void hrtimer_pull(void)
        list_for_each_safe(pos, safe, &list) {
                info = list_entry(pos, struct hrtimer_start_on_info, list);
                TRACE("pulled timer 0x%x\n", info->timer);
-                list_del(pos);
+                list_del_init(pos);
-                hrtimer_start(info->timer, info->time, info->mode);
+                if (!info->timer) continue;
+                if (atomic_read(&info->state) != HRTIMER_START_ON_INACTIVE)
+                        hrtimer_start(info->timer, info->time, info->mode);
+                if (atomic_read(&info->state) == HRTIMER_START_ON_INACTIVE)
+                        hrtimer_cancel(info->timer);
        }
 }
 /**
+ * hrtimer_pull_cancel - Cancel a remote timer pull
+ */
+int hrtimer_pull_cancel(int cpu, struct hrtimer *timer,
+                        struct hrtimer_start_on_info *info)
+{
+        struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
+        raw_spin_lock(&base->lock);
+        list_del_init(&info->list);
+        raw_spin_unlock(&base->lock);
+        atomic_set(&info->state, HRTIMER_START_ON_INACTIVE);
+        return hrtimer_try_to_cancel(timer);
+}
+/**
 *  hrtimer_start_on - trigger timer arming on remote cpu
 *  @cpu:       remote cpu
 *  @info:      save timer information for enqueuing on remote cpu
@@ -1069,8 +1090,8 @@ void hrtimer_pull(void)
 *  @mode:      timer mode
 */
 int hrtimer_start_on(int cpu, struct hrtimer_start_on_info* info,
-                struct hrtimer *timer, ktime_t time,
+                     struct hrtimer *timer, ktime_t time,
-                const enum hrtimer_mode mode)
+                     const enum hrtimer_mode mode)
 {
        unsigned long flags;
        struct hrtimer_cpu_base* base;
@@ -1102,7 +1123,8 @@ int hrtimer_start_on(int cpu, struct hrtimer_start_on_info* info,
                        __hrtimer_start_range_ns(info->timer, info->time,
                                                 0, info->mode, 0);
                } else {
-                        TRACE("hrtimer_start_on: pulling to remote CPU\n");
+                        TRACE("hrtimer_start_on: pulling 0x%x to remote CPU\n",
+                              info->timer);
                        base = &per_cpu(hrtimer_bases, cpu);
                        raw_spin_lock_irqsave(&base->lock, flags);
                        was_empty = list_empty(&base->to_pull);
diff --git a/kernel/sched.c b/kernel/sched.c
index 65aba7ec564d..2739b3339ffb 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4338,6 +4338,9 @@ need_resched:
         */
 litmus_need_resched_nonpreemptible:
        TS_SCHED_START;
+        TS_LVLA_SCHED_START;
+        TS_LVLB_SCHED_START;
+        TS_LVLC_SCHED_START;
        sched_trace_task_switch_away(prev);
        schedule_debug(prev);
@@ -4396,6 +4399,9 @@ litmus_need_resched_nonpreemptible:
                rq->curr = next;
                ++*switch_count;
+                TS_LVLA_SCHED_END(next);
+                TS_LVLB_SCHED_END(next);
+                TS_LVLC_SCHED_END(next);
                TS_SCHED_END(next);
                TS_CXS_START(next);
                context_switch(rq, prev, next); /* unlocks the rq */
diff --git a/litmus/Kconfig b/litmus/Kconfig
index f2dbfb396883..91bf81ea9fae 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -12,6 +12,19 @@ config PLUGIN_CEDF
          On smaller platforms (e.g., ARM PB11MPCore), using C-EDF
          makes little sense since there aren't any shared caches.
+config PLUGIN_COLOR
+        bool "Scheduling with Colors"
+        default y
+        help
+          Include the scheduling with colors scheduler.
+config PLUGIN_COLOR_UNCACHABLE
+        bool "Colored memory is not cachable"
+        depends on PLUGIN_COLOR && X86_PAT
+        default n
+        help
+          Any memory allocated to the color plugin is not CPU cached.
 config PLUGIN_PFAIR
        bool "PFAIR"
        depends on HIGH_RES_TIMERS && !NO_HZ
@@ -23,16 +36,67 @@ config PLUGIN_PFAIR
          If unsure, say Yes.
+config MERGE_TIMERS
+       bool "Timer-merging Support"
+       depends on HIGH_RES_TIMERS
+       default y
+       help
+         Include support for merging timers.
+config MERGE_TIMERS_WINDOW
+       int "Timer-merging Window (in nanoseconds)"
+       depends on MERGE_TIMERS
+       default 1000
+       help
+         Window within which seperate timers may be merged.
 config RELEASE_MASTER
        bool "Release-master Support"
        depends on ARCH_HAS_SEND_PULL_TIMERS
        default n
        help
-           Allow one processor to act as a dedicated interrupt processor
+          In GSN-EDF, allow one processor to act as a dedicated interrupt
-           that services all timer interrupts, but that does not schedule
+          processor that services all timer interrupts, but that does not schedule
-           real-time tasks. See RTSS'09 paper for details
+          real-time tasks. See RTSS'09 paper for details
-           (http://www.cs.unc.edu/~anderson/papers.html).
+          (http://www.cs.unc.edu/~anderson/papers.html).
-           Currently only supported by GSN-EDF.
+menu "Mixed Criticality"
+config PLUGIN_MC
+       bool "Mixed Criticality Scheduler"
+       depends on X86 && SYSFS
+       default y
+       help
+         Include the mixed criticality scheduler. This plugin depends
+         on the global release-master processor for its _REDIRECT and
+         _RELEASE_MASTER options.
+         If unsure, say Yes.
+config PLUGIN_MC_LEVEL_A_MAX_TASKS
+       int "Maximum level A tasks"
+       depends on PLUGIN_MC
+       range 1 128
+       default 32
+       help
+         The maximum number of level A tasks allowed (per-cpu) in level A.
+config PLUGIN_MC_RELEASE_MASTER
+       bool "Release-master support for MC"
+       depends on PLUGIN_MC && RELEASE_MASTER
+       default y
+       help
+         Send all timer interrupts to the system-wide release-master CPU.
+config PLUGIN_MC_REDIRECT
+       bool "Redirect Work to Release-master"
+       depends on PLUGIN_MC && RELEASE_MASTER
+       default y
+       help
+         Allow processors to send work involving global state to the
+         release-master cpu in order to avoid excess overheads during
+         partitioned decisions.
+endmenu
 endmenu
@@ -51,7 +115,6 @@ config NP_SECTION
 config LITMUS_LOCKING
        bool "Support for real-time locking protocols"
-        depends on NP_SECTION
        default n
        help
          Enable LITMUS^RT's deterministic multiprocessor real-time
@@ -167,7 +230,7 @@ config SCHED_TASK_TRACE
 config SCHED_TASK_TRACE_SHIFT
       int "Buffer size for sched_trace_xxx() events"
       depends on SCHED_TASK_TRACE
-       range 8 13
+       range 8 20
       default 9
       help
@@ -226,7 +289,7 @@ config SCHED_DEBUG_TRACE
 config SCHED_DEBUG_TRACE_SHIFT
       int "Buffer size for TRACE() buffer"
       depends on SCHED_DEBUG_TRACE
-       range 14 22
+       range 14 24
       default 18
       help
diff --git a/litmus/Makefile b/litmus/Makefile
index d26ca7076b62..76a07e8531c6 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -3,30 +3,41 @@
 #
 obj-y     = sched_plugin.o litmus.o \
-            preempt.o \
+            bheap.o \
-            litmus_proc.o \
+            binheap.o \
            budget.o \
            clustered.o \
-            jobs.o \
+            color.o \
-            sync.o \
+            color_dev.o \
-            rt_domain.o \
+            color_proc.o \
+            ctrldev.o \
+            dgl.o \
+            domain.o \
            edf_common.o \
-            fp_common.o \
            fdso.o \
+            fifo_common.o \
+            fp_common.o \
+            jobs.o \
+            litmus_proc.o \
            locking.o \
-            srp.o \
+            preempt.o \
-            bheap.o \
+            rm_common.o \
-            binheap.o \
+            rt_domain.o \
-            ctrldev.o \
+            rt_server.o \
            sched_gsn_edf.o \
+            sched_pfp.o \
            sched_psn_edf.o \
-            sched_pfp.o
+            srp.o \
+            sync.o
 obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
 obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
+obj-$(CONFIG_PLUGIN_COLOR) += sched_color.o
 obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
+obj-$(CONFIG_PLUGIN_MC) += sched_mc.o sched_mc_ce.o ce_domain.o
+obj-$(CONFIG_MERGE_TIMERS) += event_group.o
 obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
 obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
 obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
 obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
diff --git a/litmus/bheap.c b/litmus/bheap.c
index 528af97f18a6..42122d86be4c 100644
--- a/litmus/bheap.c
+++ b/litmus/bheap.c
@@ -248,13 +248,14 @@ int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node)
 void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap,
                 struct bheap_node* node)
 {
-        struct bheap_node *parent, *prev, *pos;
+        struct bheap_node *parent, *prev, *pos, *old;
        struct bheap_node** tmp_ref;
        void* tmp;
        if (heap->min != node) {
                /* bubble up */
                parent = node->parent;
+                old = node;
                while (parent) {
                        /* swap parent and node */
                        tmp           = parent->value;
diff --git a/litmus/budget.c b/litmus/budget.c
index f7712be29adb..f7505b0f86e5 100644
--- a/litmus/budget.c
+++ b/litmus/budget.c
@@ -4,19 +4,12 @@
 #include <litmus/litmus.h>
 #include <litmus/preempt.h>
 #include <litmus/budget.h>
+#include <litmus/sched_trace.h>
-struct enforcement_timer {
-        /* The enforcement timer is used to accurately police
-         * slice budgets. */
-        struct hrtimer          timer;
-        int                     armed;
-};
 DEFINE_PER_CPU(struct enforcement_timer, budget_timer);
-static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer)
+enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer)
 {
        struct enforcement_timer* et = container_of(timer,
                                                    struct enforcement_timer,
@@ -34,7 +27,7 @@ static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer)
 }
 /* assumes called with IRQs off */
-static void cancel_enforcement_timer(struct enforcement_timer* et)
+void cancel_enforcement_timer(struct enforcement_timer* et)
 {
        int ret;
@@ -56,11 +49,10 @@ static void cancel_enforcement_timer(struct enforcement_timer* et)
 }
 /* assumes called with IRQs off */
-static void arm_enforcement_timer(struct enforcement_timer* et,
+void arm_enforcement_timer(struct enforcement_timer* et,
-                                  struct task_struct* t)
+                           struct task_struct* t)
 {
        lt_t when_to_fire;
-        TRACE_TASK(t, "arming enforcement timer.\n");
        /* Calling this when there is no budget left for the task
         * makes no sense, unless the task is non-preemptive. */
@@ -69,8 +61,11 @@ static void arm_enforcement_timer(struct enforcement_timer* et,
        /* __hrtimer_start_range_ns() cancels the timer
         * anyway, so we don't have to check whether it is still armed */
-        if (likely(!is_np(t))) {
+        if (likely(!is_user_np(t))) {
                when_to_fire = litmus_clock() + budget_remaining(t);
+                TRACE_TASK(t, "arming enforcement timer for %llu.\n",
+                           when_to_fire);
                __hrtimer_start_range_ns(&et->timer,
                                         ns_to_ktime(when_to_fire),
                                         0 /* delta */,
@@ -96,6 +91,11 @@ void update_enforcement_timer(struct task_struct* t)
        }
 }
+void init_enforcement_timer(struct enforcement_timer *et)
+{
+        hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+        et->timer.function = on_enforcement_timeout;
+}
 static int __init init_budget_enforcement(void)
 {
@@ -104,10 +104,33 @@ static int __init init_budget_enforcement(void)
        for (cpu = 0; cpu < NR_CPUS; cpu++)  {
                et = &per_cpu(budget_timer, cpu);
-                hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+                init_enforcement_timer(et);
-                et->timer.function = on_enforcement_timeout;
        }
        return 0;
 }
+void task_release(struct task_struct *t)
+{
+        /* Also wrong */
+        t->rt_param.job_params.real_release = t->rt_param.job_params.real_deadline;
+        t->rt_param.job_params.real_deadline += get_rt_period(t);
+        t->rt_param.job_params.job_no++;
+        sched_trace_task_release(t);
+}
+void server_release(struct task_struct *t)
+{
+        t->rt_param.job_params.exec_time = 0;
+        t->rt_param.job_params.release = t->rt_param.job_params.deadline;
+        t->rt_param.job_params.deadline +=  get_rt_period(t);
+        t->rt_param.job_params.fake_job_no++;
+        /* don't confuse linux */
+        t->rt.time_slice = 1;
+        sched_trace_server_release(-t->pid, get_server_job(t),
+                                    t->rt_param.job_params.release,
+                                    t->rt_param.job_params.deadline);
+}
 module_init(init_budget_enforcement);
diff --git a/litmus/ce_domain.c b/litmus/ce_domain.c
new file mode 100644
index 000000000000..b2c5d4e935a5
--- /dev/null
+++ b/litmus/ce_domain.c
@@ -0,0 +1,102 @@
+#include <linux/pid.h>
+#include <linux/sched.h>
+#include <linux/hrtimer.h>
+#include <linux/slab.h>
+#include <litmus/litmus.h>
+#include <litmus/debug_trace.h>
+#include <litmus/rt_param.h>
+#include <litmus/domain.h>
+#include <litmus/event_group.h>
+#include <litmus/sched_mc.h>
+#include <litmus/ce_domain.h>
+/*
+ * Called for:
+ * task_new
+ * job_completion
+ * wake_up
+ */
+void ce_requeue(domain_t *dom, struct task_struct *ts)
+{
+        const struct ce_dom_data *ce_data = dom->data;
+        const int idx = tsk_mc_data(ts)->mc_task.lvl_a_id;
+        const unsigned int just_finished = tsk_rt(ts)->job_params.job_no;
+        const unsigned int expected_job =
+                mc_ce_get_expected_job(ce_data->cpu, idx);
+        const int asleep = RT_F_SLEEP == get_rt_flags(ts);
+        TRACE_MC_TASK(ts, "entered ce_requeue. asleep: %d  just_finished: %3u  "
+                        "expected_job: %3u\n",
+                        asleep, just_finished, expected_job);
+        tsk_mc_data(ts)->mc_task.lvl_a_eligible = 1;
+        /* When coming from job completion, the task will be asleep. */
+        if (asleep && just_finished < expected_job) {
+                TRACE_MC_TASK(ts, "appears behind\n");
+        } else if (asleep && expected_job < just_finished) {
+                TRACE_MC_TASK(ts, "job %u completed in expected job %u which "
+                                "seems too early\n", just_finished,
+                                expected_job);
+        }
+}
+/*
+ *
+ */
+void ce_remove(domain_t *dom, struct task_struct *ts)
+{
+        tsk_mc_data(ts)->mc_task.lvl_a_eligible = 0;
+}
+/*
+ * ce_take_ready and ce_peek_ready
+ */
+struct task_struct* ce_peek_and_take_ready(domain_t *dom)
+{
+        const struct ce_dom_data *ce_data = dom->data;
+        struct task_struct *ret = NULL, *sched = ce_data->should_schedule;
+        const int exists = NULL != sched;
+        const int blocked = exists && !is_running(sched);
+        const int elig = exists && tsk_mc_data(sched) &&
+                tsk_mc_data(sched)->mc_task.lvl_a_eligible;
+        /* Return the task we should schedule if it is not blocked or sleeping. */
+        if (exists && !blocked && elig)
+                ret = sched;
+        return ret;
+}
+int ce_higher_prio(struct task_struct *a, struct task_struct *b)
+{
+        const domain_t *dom = get_task_domain(a);
+        const struct ce_dom_data *ce_data = dom->data;
+        return (a != b && a == ce_data->should_schedule);
+}
+void ce_domain_init(domain_t *dom,
+                raw_spinlock_t *lock,
+                requeue_t requeue,
+                peek_ready_t peek_ready,
+                take_ready_t take_ready,
+                preempt_needed_t preempt_needed,
+                task_prio_t task_prio,
+                struct ce_dom_data *dom_data,
+                const int cpu,
+                ce_timer_callback_t ce_timer_callback)
+{
+        domain_init(dom, lock, requeue, peek_ready, take_ready, preempt_needed,
+                        task_prio);
+        dom->data = dom_data;
+        dom->remove = ce_remove;
+        dom_data->cpu = cpu;
+#ifdef CONFIG_MERGE_TIMERS
+        init_event(&dom_data->event, CRIT_LEVEL_A, ce_timer_callback,
+                        event_list_alloc(GFP_ATOMIC));
+#else
+        hrtimer_start_on_info_init(&dom_data->timer_info);
+        hrtimer_init(&dom_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+        dom_data->timer.function = ce_timer_callback;
+#endif
+}
diff --git a/litmus/color.c b/litmus/color.c
new file mode 100644
index 000000000000..ecc191137137
--- /dev/null
+++ b/litmus/color.c
@@ -0,0 +1,357 @@
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/sysctl.h>
+#include <linux/lockdep.h>
+#include <linux/sched.h> /* required by litmus.h */
+#include <asm/io.h> /* page_to_phys on SPARC */
+#ifdef CONFIG_PLUGIN_COLOR_UNCACHABLE
+#include <asm/cacheflush.h> /* set_memory_uc */
+#endif
+#include <litmus/color.h>
+#include <litmus/litmus.h> /* for in_list(...) */
+#define PAGES_PER_COLOR 3072
+/*
+ * This is used only to "trick" lockdep into permitting dynamically allocated
+ * locks of different classes that are initialized on the same line.
+ */
+#define LOCKDEP_MAX_NR_COLORS 512
+static struct lock_class_key color_lock_keys[LOCKDEP_MAX_NR_COLORS];
+struct color_group {
+        spinlock_t lock;
+        char _lock_name[LITMUS_LOCKDEP_NAME_MAX_LEN];
+        struct list_head list;
+        atomic_t nr_pages;
+};
+static unsigned long color_mask;
+static struct color_group *color_groups;
+/* non-static: extern'ed in various files */
+struct color_cache_info color_cache_info;
+int color_sysctl_add_pages_data;
+static inline unsigned long page_color(struct page *page)
+{
+        return ((page_to_phys(page) & color_mask) >> PAGE_SHIFT);
+}
+/*
+ * Page's count should be one, it sould not be on any LRU list.
+ */
+void add_page_to_color_list(struct page *page)
+{
+        const unsigned long color = page_color(page);
+        struct color_group *cgroup = &color_groups[color];
+        BUG_ON(in_list(&page->lru) || PageLRU(page));
+        BUG_ON(page_count(page) > 1);
+        spin_lock(&cgroup->lock);
+        list_add_tail(&page->lru, &cgroup->list);
+        atomic_inc(&cgroup->nr_pages);
+        SetPageLRU(page);
+        spin_unlock(&cgroup->lock);
+}
+/*
+ * Increase's page's count to two.
+ */
+struct page* get_colored_page(unsigned long color)
+{
+        struct color_group *cgroup;
+        struct page *page = NULL;
+        if (color >= color_cache_info.nr_colors)
+                goto out;
+        cgroup = &color_groups[color];
+        spin_lock(&cgroup->lock);
+        if (unlikely(!atomic_read(&cgroup->nr_pages))) {
+                TRACE_CUR("No free %lu colored pages.\n", color);
+                printk(KERN_WARNING "no free %lu colored pages.\n", color);
+                goto out_unlock;
+        }
+        page = list_first_entry(&cgroup->list, struct page, lru);
+        BUG_ON(page_count(page) > 1);
+        get_page(page);
+        list_del(&page->lru);
+        atomic_dec(&cgroup->nr_pages);
+        ClearPageLRU(page);
+out_unlock:
+        spin_unlock(&cgroup->lock);
+out:
+        return page;
+}
+static unsigned long smallest_nr_pages(void)
+{
+        unsigned long i, min_pages = -1;
+        struct color_group *cgroup;
+        for (i = 0; i < color_cache_info.nr_colors; ++i) {
+                cgroup = &color_groups[i];
+                if (atomic_read(&cgroup->nr_pages) < min_pages)
+                        min_pages = atomic_read(&cgroup->nr_pages);
+        }
+        return min_pages;
+}
+static int do_add_pages(void)
+{
+        struct page *page, *page_tmp;
+        LIST_HEAD(free_later);
+        unsigned long color;
+        int ret = 0;
+        while (smallest_nr_pages() < PAGES_PER_COLOR) {
+#ifdef CONFIG_PLUGIN_COLOR_UNCACHABLE
+                unsigned long vaddr;
+#endif
+#if defined(CONFIG_X86)
+                page = alloc_page(GFP_HIGHUSER | __GFP_ZERO |
+                                __GFP_MOVABLE);
+#elif defined(CONFIG_SPARC) /* X86 */
+                page = alloc_page(GFP_HIGHUSER | __GFP_MOVABLE);
+#else
+#error What architecture are you using?
+#endif
+                if (unlikely(!page)) {
+                        printk(KERN_WARNING "Could not allocate pages.\n");
+                        ret = -ENOMEM;
+                        goto out;
+                }
+                color = page_color(page);
+                if (atomic_read(&color_groups[color].nr_pages) < PAGES_PER_COLOR) {
+                        SetPageReserved(page);
+#ifdef CONFIG_PLUGIN_COLOR_UNCACHABLE
+                        vaddr = (unsigned long) pfn_to_kaddr(page_to_pfn(page));
+                        if (set_memory_uc(vaddr, 1)) {
+                                printk(KERN_ALERT "Could not set_memory_uc\n");
+                                BUG();
+                        }
+#endif
+                        add_page_to_color_list(page);
+                } else
+                        list_add_tail(&page->lru, &free_later);
+        }
+        list_for_each_entry_safe(page, page_tmp, &free_later, lru) {
+                list_del(&page->lru);
+                __free_page(page);
+        }
+out:
+        return ret;
+}
+static struct alloced_pages {
+        spinlock_t lock;
+        struct list_head list;
+} alloced_pages;
+struct alloced_page {
+        struct page *page;
+        struct vm_area_struct *vma;
+        struct list_head list;
+};
+static struct alloced_page * new_alloced_page(struct page *page,
+                struct vm_area_struct *vma)
+{
+        struct alloced_page *ap = kmalloc(sizeof(*ap), GFP_KERNEL);
+        INIT_LIST_HEAD(&ap->list);
+        ap->page = page;
+        ap->vma = vma;
+        return ap;
+}
+/*
+ * Page's count should be two or more. It should not be on aly LRU list.
+ */
+void add_page_to_alloced_list(struct page *page, struct vm_area_struct *vma)
+{
+        struct alloced_page *ap;
+        BUG_ON(page_count(page) < 2);
+        ap = new_alloced_page(page, vma);
+        spin_lock(&alloced_pages.lock);
+        list_add_tail(&ap->list, &alloced_pages.list);
+        spin_unlock(&alloced_pages.lock);
+}
+/*
+ * Reclaim pages.
+ */
+void reclaim_pages(struct vm_area_struct *vma)
+{
+        struct alloced_page *ap, *ap_tmp;
+        unsigned long nr_reclaimed = 0;
+        spin_lock(&alloced_pages.lock);
+        list_for_each_entry_safe(ap, ap_tmp, &alloced_pages.list, list) {
+                if (vma == ap->vma) {
+                        list_del(&ap->list);
+                        put_page(ap->page);
+                        add_page_to_color_list(ap->page);
+                        nr_reclaimed++;
+                        TRACE_CUR("reclaiming page (pa:0x%10llx, pfn:%8lu, "
+                                        "color:%3lu)\n", page_to_phys(ap->page),
+                                        page_to_pfn(ap->page), page_color(ap->page));
+                        kfree(ap);
+                }
+        }
+        spin_unlock(&alloced_pages.lock);
+        TRACE_CUR("Reclaimed %lu pages.\n", nr_reclaimed);
+}
+/***********************************************************
+ * Proc
+***********************************************************/
+int color_add_pages_handler(struct ctl_table *table, int write, void __user *buffer,
+                size_t *lenp, loff_t *ppos)
+{
+        int ret = 0;
+        ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+        if (ret)
+                goto out;
+        if (write && color_sysctl_add_pages_data)
+                ret = do_add_pages();
+out:
+        return ret;
+}
+int color_nr_pages_handler(struct ctl_table *table, int write, void __user *buffer,
+                size_t *lenp, loff_t *ppos)
+{
+        struct color_group *cgroup;
+        char *buf;
+        unsigned long i;
+        int used = 0, ret = 0;
+        if (write) {
+                ret = -EPERM;
+                goto out;
+        }
+        for (i = 0; i < color_cache_info.nr_colors; ++i) {
+                cgroup = &color_groups[i];
+                buf = ((char*)table->data) + used;
+                used += snprintf(buf, table->maxlen - used, ONE_COLOR_FMT,
+                                i, atomic_read(&cgroup->nr_pages));
+        }
+        ret = proc_dostring(table, write, buffer, lenp, ppos);
+out:
+        return ret;
+}
+/***********************************************************
+ * Initialization
+***********************************************************/
+#if defined(CONFIG_X86)
+/* slowest possible way to find a log, but we only do this once on boot */
+static unsigned int __init slow_log(unsigned int v)
+{
+        unsigned int r = 0;
+        while (v >>= 1)
+                r++;
+        return r;
+}
+static int __init init_mask(void)
+{
+        unsigned int line_size_log = slow_log(color_cache_info.line_size);
+        int err = 0;
+        BUG_ON(color_cache_info.size <= 1048576 ||
+                        color_cache_info.ways < 15 ||
+                        color_cache_info.line_size != 64);
+        printk("Cache size: %lu  line-size: %lu  ways: %lu  sets: %lu\n",
+                        color_cache_info.size, color_cache_info.line_size,
+                        color_cache_info.ways, color_cache_info.sets);
+        if (!color_cache_info.size) {
+                printk(KERN_WARNING "No cache information found.\n");
+                err = -EINVAL;
+                goto out;
+        }
+        BUG_ON(color_cache_info.size / color_cache_info.line_size /
+                        color_cache_info.ways != color_cache_info.sets);
+        BUG_ON(PAGE_SIZE >= (color_cache_info.sets  << line_size_log));
+        color_mask = ((color_cache_info.sets << line_size_log) - 1) ^
+                (PAGE_SIZE - 1);
+        color_cache_info.nr_colors = (color_mask >> PAGE_SHIFT) + 1;
+out:
+        return err;
+}
+#elif defined(CONFIG_SPARC) /* X86 */
+static int __init init_mask(void)
+{
+        /*
+         * Static assuming we are using Flare (our Niagara machine).
+         * This machine has weirdness with cache banks, and I don't want
+         * to waste time trying to auto-detect this.
+         */
+        color_mask = 0x3e000UL;                   /* bits 17:13 */
+        color_cache_info.size = 3 * 1024 * 1024;  /* 3 MB */
+        color_cache_info.line_size = 64;
+        color_cache_info.ways = 12;
+        color_cache_info.sets = 1024 * 4;
+        color_cache_info.nr_colors = (1 << hweight_long(color_mask));
+        return 0;
+}
+#endif /* SPARC/X86 */
+static int __init init_color_groups(void)
+{
+        struct color_group *cgroup;
+        unsigned long i;
+        int err = 0;
+        color_groups = kmalloc(color_cache_info.nr_colors *
+                        sizeof(struct color_group), GFP_KERNEL);
+        if (!color_groups) {
+                printk(KERN_WARNING "Could not allocate color groups.\n");
+                err = -ENOMEM;
+                goto out;
+        }
+        for (i = 0; i < color_cache_info.nr_colors; ++i) {
+                cgroup = &color_groups[i];
+                atomic_set(&cgroup->nr_pages, 0);
+                INIT_LIST_HEAD(&cgroup->list);
+                spin_lock_init(&cgroup->lock);
+                LOCKDEP_DYNAMIC_ALLOC(&cgroup->lock, &color_lock_keys[i],
+                                cgroup->_lock_name, "color%lu", i);
+        }
+out:
+        return err;
+}
+static int __init init_color(void)
+{
+        int err = 0;
+        printk("Initializing LITMUS^RT cache coloring.\n");
+        INIT_LIST_HEAD(&alloced_pages.list);
+        spin_lock_init(&alloced_pages.lock);
+        err = init_mask();
+        printk("PAGE_SIZE: %lu  Color mask: 0x%lx  Total colors: %lu\n",
+                        PAGE_SIZE, color_mask, color_cache_info.nr_colors);
+        BUG_ON(LOCKDEP_MAX_NR_COLORS < color_cache_info.nr_colors);
+        err = init_color_groups();
+        return err;
+}
+module_init(init_color);
diff --git a/litmus/color_dev.c b/litmus/color_dev.c
new file mode 100644
index 000000000000..51760328418e
--- /dev/null
+++ b/litmus/color_dev.c
@@ -0,0 +1,351 @@
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/highmem.h>
+#include <asm/io.h> /* page_to_phys on SPARC */
+#include <litmus/litmus.h>
+#include <litmus/color.h>
+#define ALLOC_NAME      "litmus/color_alloc"
+#define CTRL_NAME       "litmus/color_ctrl"
+extern unsigned long nr_colors;
+/***********************************************************
+ * Control device
+***********************************************************/
+static void litmus_color_ctrl_vm_close(struct vm_area_struct *vma)
+{
+        TRACE_CUR("%s flags=0x%lx prot=0x%lx\n", __FUNCTION__,
+                        vma->vm_flags, pgprot_val(vma->vm_page_prot));
+        TRACE_CUR(CTRL_NAME ": %p:%p vma:%p vma->vm_private_data:%p closed.\n",
+                        (void*) vma->vm_start, (void*) vma->vm_end, vma,
+                        vma->vm_private_data);
+}
+static int litmus_color_ctrl_vm_fault(struct vm_area_struct *vma,
+                struct vm_fault *vmf)
+{
+        /* This function should never be called, since
+         * all pages should have been mapped by mmap()
+         * already. */
+        TRACE_CUR("%s flags=0x%lx\n", __FUNCTION__, vma->vm_flags);
+        printk(KERN_WARNING "fault: %s flags=0x%lx\n", __FUNCTION__,
+                        vma->vm_flags);
+        /* nope, you only get one page */
+        return VM_FAULT_SIGBUS;
+}
+static struct vm_operations_struct litmus_color_ctrl_vm_ops = {
+        .close  = litmus_color_ctrl_vm_close,
+        .fault  = litmus_color_ctrl_vm_fault,
+};
+static int mmap_common_checks(struct vm_area_struct *vma)
+{
+        /* you can only map the "first" page */
+        if (vma->vm_pgoff != 0)
+                return -EINVAL;
+#if 0
+        /* you can't share it with anyone */
+        /* well, maybe you can... */
+        if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
+                return -EINVAL;
+#endif
+        return 0;
+}
+static int alloc_color_ctrl_page(void)
+{
+        struct task_struct *t;
+        int err = 0;
+        t = current;
+        /* only allocate if the task doesn't have one yet */
+        if (!tsk_rt(t)->color_ctrl_page) {
+                tsk_rt(t)->color_ctrl_page = (void*) get_zeroed_page(GFP_KERNEL);
+                if (!tsk_rt(t)->color_ctrl_page)
+                        err = -ENOMEM;
+                /* will get de-allocated in task teardown */
+                TRACE_TASK(t, "%s color_ctrl_page = %p\n", __FUNCTION__,
+                           tsk_rt(t)->color_ctrl_page);
+        }
+        return err;
+}
+static int map_color_ctrl_page(struct vm_area_struct *vma)
+{
+        int err;
+        unsigned long pfn;
+        struct task_struct *t = current;
+        struct page *color_ctrl = virt_to_page(tsk_rt(t)->color_ctrl_page);
+        t = current;
+        /* Increase ref count. Is decreased when vma is destroyed. */
+        get_page(color_ctrl);
+        pfn = page_to_pfn(color_ctrl);
+        TRACE_CUR(CTRL_NAME
+                  ": mapping %p (pfn:%lx, %lx) to 0x%lx (flags:%lx prot:%lx)\n",
+                  tsk_rt(t)->color_ctrl_page, pfn, page_to_pfn(color_ctrl),
+                  vma->vm_start, vma->vm_flags, pgprot_val(vma->vm_page_prot));
+        /* Map it into the vma. Make sure to use PAGE_SHARED, otherwise
+         * userspace actually gets a copy-on-write page. */
+        err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, PAGE_SHARED);
+        if (err)
+                TRACE_CUR(CTRL_NAME ": remap_pfn_range() failed (%d)\n", err);
+        return err;
+}
+static int litmus_color_ctrl_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+        int err = 0;
+        /* you can only get one page */
+        if (vma->vm_end - vma->vm_start != PAGE_SIZE) {
+                TRACE_CUR(CTRL_NAME ": must allocate a multiple of PAGE_SIZE\n");
+                err = -EINVAL;
+                goto out;
+        }
+        err = mmap_common_checks(vma);
+        if (err) {
+                TRACE_CUR(CTRL_NAME ": failed common mmap checks.\n");
+                goto out;
+        }
+        vma->vm_ops = &litmus_color_ctrl_vm_ops;
+        /* this mapping should not be kept across forks,
+         * and cannot be expanded */
+        vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
+        err = alloc_color_ctrl_page();
+        if (!err)
+                err = map_color_ctrl_page(vma);
+        TRACE_CUR("%s flags=0x%lx prot=0x%lx\n", __FUNCTION__, vma->vm_flags,
+                        pgprot_val(vma->vm_page_prot));
+out:
+        return err;
+}
+/***********************************************************
+ * Allocation device
+***********************************************************/
+#define vma_nr_pages(vma) \
+        ({unsigned long v = ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); v;})
+static int do_map_colored_pages(struct vm_area_struct *vma)
+{
+        const unsigned long nr_pages = vma_nr_pages(vma);
+        struct color_ctrl_page *color_ctrl = tsk_rt(current)->color_ctrl_page;
+        unsigned long nr_mapped;
+        int i, err = 0;
+        TRACE_CUR(ALLOC_NAME ": allocating %lu pages (flags:%lx prot:%lx)\n",
+                        nr_pages, vma->vm_flags, pgprot_val(vma->vm_page_prot));
+#ifdef CONFIG_PLUGIN_COLOR_UNCACHABLE
+        vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+#endif
+        for (i = 0, nr_mapped = 0; nr_mapped < nr_pages; ++i) {
+                const unsigned long color_no = color_ctrl->colors[i];
+                unsigned int page_no = 0;
+                for (; page_no < color_ctrl->pages[i]; ++page_no, ++nr_mapped) {
+                        const unsigned long addr = vma->vm_start +
+                                (nr_mapped << PAGE_SHIFT);
+                        struct page *page = get_colored_page(color_no);
+#ifdef CONFIG_PLUGIN_COLOR_UNCACHABLE
+                        const pgprot_t ins_prot = pgprot_noncached(PAGE_SHARED);
+#else
+                        const pgprot_t ins_prot = PAGE_SHARED;
+#endif
+                        if (!page) {
+                                TRACE_CUR(ALLOC_NAME ": Could not get page with"
+                                          " color %lu.\n", color_no);
+                                /* TODO unmap mapped pages */
+                                err = -ENOMEM;
+                                goto out;
+                        }
+#ifdef CONFIG_SPARC
+                        clear_user_highpage(page, addr);
+#endif
+                        TRACE_CUR(ALLOC_NAME ": insert page (pa:0x%10llx, "
+                                  "pfn:%8lu, color:%3lu, prot:%lx) at 0x%lx "
+                                  "vma:(flags:%16lx prot:%16lx)\n",
+                                  page_to_phys(page),
+                                  page_to_pfn(page), color_no,
+                                  pgprot_val(ins_prot), addr,
+                                  vma->vm_flags,
+                                  pgprot_val(vma->vm_page_prot));
+                        err = remap_pfn_range(vma, addr, page_to_pfn(page),
+                                        PAGE_SIZE, ins_prot);
+                        if (err) {
+                                TRACE_CUR(ALLOC_NAME ": remap_pfn_range() fail "
+                                          "(%d)\n", err);
+                                /* TODO unmap mapped pages */
+                                err = -EINVAL;
+                                goto out;
+                        }
+                        add_page_to_alloced_list(page, vma);
+                }
+                if (!page_no) {
+                        TRACE_CUR(ALLOC_NAME ": 0 pages given for color %lu\n",
+                                  color_no);
+                        err = -EINVAL;
+                        goto out;
+                }
+        }
+ out:
+        return err;
+}
+static int map_colored_pages(struct vm_area_struct *vma)
+{
+        int err = 0;
+        if (!tsk_rt(current)->color_ctrl_page) {
+                TRACE_CUR("Process has no color control page.\n");
+                err = -EINVAL;
+                goto out;
+        }
+        if (COLORS_PER_CONTROL_PAGE < vma_nr_pages(vma)) {
+                TRACE_CUR("Max page request %lu but want %lu.\n",
+                                COLORS_PER_CONTROL_PAGE, vma_nr_pages(vma));
+                err = -EINVAL;
+                goto out;
+        }
+        err = do_map_colored_pages(vma);
+out:
+        return err;
+}
+static void litmus_color_alloc_vm_close(struct vm_area_struct *vma)
+{
+        TRACE_CUR("%s flags=0x%lx prot=0x%lx\n", __FUNCTION__,
+                        vma->vm_flags, pgprot_val(vma->vm_page_prot));
+        TRACE_CUR(ALLOC_NAME ": %p:%p vma:%p vma->vm_private_data:%p closed.\n",
+                        (void*) vma->vm_start, (void*) vma->vm_end, vma,
+                        vma->vm_private_data);
+        reclaim_pages(vma);
+}
+static int litmus_color_alloc_vm_fault(struct vm_area_struct *vma,
+                struct vm_fault *vmf)
+{
+        /* This function should never be called, since
+         * all pages should have been mapped by mmap()
+         * already. */
+        TRACE_CUR("%s flags=0x%lx\n", __FUNCTION__, vma->vm_flags);
+        printk(KERN_WARNING "fault: %s flags=0x%lx\n", __FUNCTION__,
+                        vma->vm_flags);
+        /* nope, you only get one page */
+        return VM_FAULT_SIGBUS;
+}
+static struct vm_operations_struct litmus_color_alloc_vm_ops = {
+        .close  = litmus_color_alloc_vm_close,
+        .fault  = litmus_color_alloc_vm_fault,
+};
+static int litmus_color_alloc_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+        int err = 0;
+        /* you may only request integer multiple of PAGE_SIZE */
+        if (offset_in_page(vma->vm_end - vma->vm_start)) {
+                err = -EINVAL;
+                goto out;
+        }
+        err = mmap_common_checks(vma);
+        if (err)
+                goto out;
+        vma->vm_ops = &litmus_color_alloc_vm_ops;
+        /* this mapping should not be kept across forks,
+         * and cannot be expanded */
+        vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
+        err = map_colored_pages(vma);
+        TRACE_CUR("%s flags=0x%lx prot=0x%lx\n", __FUNCTION__, vma->vm_flags,
+                        pgprot_val(vma->vm_page_prot));
+out:
+        return err;
+}
+/***********************************************************
+ * Initilization
+***********************************************************/
+static struct file_operations litmus_color_ctrl_fops = {
+        .owner  = THIS_MODULE,
+        .mmap   = litmus_color_ctrl_mmap,
+};
+static struct miscdevice litmus_color_ctrl_dev = {
+        .name   = CTRL_NAME,
+        .minor  = MISC_DYNAMIC_MINOR,
+        .fops   = &litmus_color_ctrl_fops,
+};
+static struct file_operations litmus_color_alloc_fops = {
+        .owner  = THIS_MODULE,
+        .mmap   = litmus_color_alloc_mmap,
+};
+static struct miscdevice litmus_color_alloc_dev = {
+        .name   = ALLOC_NAME,
+        .minor  = MISC_DYNAMIC_MINOR,
+        .fops   = &litmus_color_alloc_fops,
+};
+static int __init init_dev(const char* name, struct miscdevice *dev)
+{
+        int err;
+        err = misc_register(dev);
+        if (err)
+                printk(KERN_WARNING "Could not allocate %s device (%d).\n",
+                                name, err);
+        return err;
+}
+static int __init init_color_devices(void)
+{
+        int err;
+        printk("Allocating LITMUS^RT color devices.\n");
+        err = init_dev(ALLOC_NAME, &litmus_color_alloc_dev);
+        if (err)
+                goto out;
+        err = init_dev(CTRL_NAME, &litmus_color_ctrl_dev);
+out:
+        return err;
+}
+module_init(init_color_devices);
diff --git a/litmus/color_proc.c b/litmus/color_proc.c
new file mode 100644
index 000000000000..d770123c5f02
--- /dev/null
+++ b/litmus/color_proc.c
@@ -0,0 +1,220 @@
+#include <linux/module.h>
+#include <linux/sysctl.h>
+#include <linux/slab.h>
+#include <litmus/sched_trace.h>
+#include <litmus/color.h>
+extern int color_sysctl_add_pages_data; /* litmus/color.c */
+static int zero = 0;
+static int one = 1;
+/* used as names for server proc entries */
+static char *period_str = "period";
+static char *wcet_str = "wcet";
+/* servers have a WCET and period */
+#define NR_SERVER_PARAMS 2
+#define CPU_NAME_LEN     3
+struct color_cpu_server {
+        char name[CPU_NAME_LEN];
+        unsigned long wcet;
+        unsigned long period;
+        /* the + 1 is for the sentinel element */
+        struct ctl_table table[NR_SERVER_PARAMS + 1];
+};
+static struct color_cpu_server color_cpu_servers[NR_CPUS];
+/* the + 1 is for the sentinel element */
+static struct ctl_table color_cpu_tables[NR_CPUS + 1];
+unsigned long color_chunk;
+#define INFO_BUFFER_SIZE 100
+static char info_buffer[100];
+#define NR_PAGES_INDEX 0 /* location of nr_pages in the table below */
+static struct ctl_table color_table[] =
+{
+        {
+                /* you MUST update NR_PAGES_INDEX if you move this entry */
+                .procname       = "nr_pages",
+                .mode           = 0444,
+                .proc_handler   = color_nr_pages_handler,
+                .data           = NULL,         /* dynamically set later */
+                .maxlen         = 0,            /* also set later */
+        },
+        {
+                .procname       = "servers",
+                .mode           = 0555,
+                .child          = color_cpu_tables,
+        },
+        {
+                .procname       = "add_pages",
+                .data           = &color_sysctl_add_pages_data,
+                .maxlen         = sizeof(int),
+                .mode           = 0644,
+                .proc_handler   = color_add_pages_handler,
+                .extra1         = &zero,
+                .extra2         = &one,
+        },
+        {
+                .procname       = "cache_info",
+                .mode           = 0444,
+                .proc_handler   = proc_dostring,
+                .data           = info_buffer,
+                .maxlen         = INFO_BUFFER_SIZE,
+        },
+        {
+                .procname       = "chunk_size",
+                .mode           = 0666,
+                .proc_handler   = proc_doulongvec_minmax,
+                .data           = &color_chunk,
+                .maxlen         = sizeof(color_chunk),
+        },
+        { }
+};
+static struct ctl_table litmus_table[] =
+{
+        {
+                .procname       = "color",
+                .mode           = 0555,
+                .child          = color_table,
+        },
+        { }
+};
+static struct ctl_table litmus_dir_table[] = {
+        {
+                .procname       = "litmus",
+                .mode           = 0555,
+                .child          = litmus_table,
+        },
+        { }
+};
+int color_server_params(int cpu, unsigned long *wcet, unsigned long *period)
+{
+        struct color_cpu_server *svr;
+        if (cpu >= num_online_cpus()) {
+                printk(KERN_WARNING "Cannot access illegal CPU: %d\n", cpu);
+                return -EFAULT;
+        }
+        svr = &color_cpu_servers[cpu];
+        if (svr->wcet == 0 || svr->period == 0) {
+                printk(KERN_WARNING "Server %d is uninitialized!\n", cpu);
+                return -EPERM;
+        }
+        *wcet = svr->wcet;
+        *period = svr->period;
+        TRACE("For %d: %lu, %lu\n", cpu, svr->wcet, svr->period);
+        return 0;
+}
+/* must be called AFTER nr_colors is set */
+static int __init init_sysctl_nr_colors(void)
+{
+        int ret = 0, maxlen = ONE_COLOR_LEN * color_cache_info.nr_colors;
+        color_table[NR_PAGES_INDEX].data = kmalloc(maxlen, GFP_KERNEL);
+        if (!color_table[NR_PAGES_INDEX].data) {
+                printk(KERN_WARNING "Could not allocate nr_pages buffer.\n");
+                ret = -ENOMEM;
+                goto out;
+        }
+        color_table[NR_PAGES_INDEX].maxlen = maxlen;
+out:
+        return ret;
+}
+static void __init init_server_entry(struct ctl_table *entry,
+                                     unsigned long *parameter,
+                                     char *name)
+{
+        entry->procname = name;
+        entry->mode = 0666;
+        entry->proc_handler = proc_doulongvec_minmax;
+        entry->data = parameter;
+        entry->maxlen = sizeof(*parameter);
+}
+static int __init init_cpu_entry(struct ctl_table *cpu_table,
+                struct color_cpu_server *svr, int cpu)
+{
+        struct ctl_table *entry = svr->table;
+        printk(KERN_INFO "Creating cpu %d\n", cpu);
+        init_server_entry(entry, &svr->wcet, wcet_str);
+        entry++;
+        init_server_entry(entry, &svr->period, period_str);
+        /* minus one for the null byte */
+        snprintf(svr->name, CPU_NAME_LEN - 1, "%d", cpu);
+        cpu_table->procname = svr->name;
+        cpu_table->mode = 0555;
+        cpu_table->child = svr->table;
+        return 0;
+}
+static int __init init_server_entries(void)
+{
+        int cpu, err = 0;
+        struct ctl_table *cpu_table;
+        struct color_cpu_server *svr;
+        for_each_online_cpu(cpu) {
+                cpu_table = &color_cpu_tables[cpu];
+                svr = &color_cpu_servers[cpu];
+                err = init_cpu_entry(cpu_table, svr, cpu);
+                if (err)
+                        goto out;
+        }
+out:
+        return err;
+}
+static struct ctl_table_header *litmus_sysctls;
+static int __init litmus_sysctl_init(void)
+{
+        int ret = 0;
+        printk(KERN_INFO "Registering LITMUS^RT proc sysctl.\n");
+        litmus_sysctls = register_sysctl_table(litmus_dir_table);
+        if (!litmus_sysctls) {
+                printk(KERN_WARNING "Could not register LITMUS^RT sysctl.\n");
+                ret = -EFAULT;
+                goto out;
+        }
+        ret = init_sysctl_nr_colors();
+        if (ret)
+                goto out;
+        ret = init_server_entries();
+        if (ret)
+                goto out;
+        snprintf(info_buffer, INFO_BUFFER_SIZE,
+                 "Cache size\t: %lu B\n"
+                 "Line size\t: %lu B\n"
+                 "Page size\t: %lu B\n"
+                 "Ways\t\t: %lu\n"
+                 "Sets\t\t: %lu\n"
+                 "Colors\t\t: %lu",
+                 color_cache_info.size, color_cache_info.line_size, PAGE_SIZE,
+                 color_cache_info.ways, color_cache_info.sets,
+                 color_cache_info.nr_colors);
+out:
+        return ret;
+}
+module_init(litmus_sysctl_init);
diff --git a/litmus/dgl.c b/litmus/dgl.c
new file mode 100644
index 000000000000..dd2a42cc9ca6
--- /dev/null
+++ b/litmus/dgl.c
@@ -0,0 +1,300 @@
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <litmus/litmus.h>
+#include <litmus/dgl.h>
+#include <litmus/sched_trace.h>
+#define MASK_SIZE     (sizeof(unsigned long) * 8)
+/* Return number of MASK_SIZE fields needed to store a mask in d */
+#define WP(num, word) (num / word + (num % word != 0))
+#define MASK_WORDS(d) WP(d->num_resources, MASK_SIZE)
+/* Word, bit -> resource id */
+#define ri(w, b) (w * MASK_SIZE + b)
+ /* For loop, where @i iterates over each set bit in @bit_arr */
+#define for_each_resource(bit_arr, d, w, b, i)                          \
+        for(w = 0; w < MASK_WORDS(d); ++w)                              \
+                 for(b = find_first_bit(&bit_arr[w],MASK_SIZE), i = ri(w, b);  \
+                     b < MASK_SIZE;                                            \
+                     b = find_next_bit(&bit_arr[w],MASK_SIZE,b+1), i = ri(w, b))
+/* Return resource id in dgl @d for resource @r */
+#define resource_id(d, r) ((((void*)r) - (void*)((d)->resources))/ sizeof(*r))
+/* Return request group of req @r for resource @i */
+#define req_group(r, i) (container_of(((void*)r) - sizeof(*r)*(i),      \
+                                      struct dgl_group_req, requests))
+/* Resource id -> word, bit */
+static inline void mask_idx(int resource, int *word, int *bit)
+{
+        *word = resource / MASK_SIZE;
+        *bit  = resource % MASK_SIZE;
+}
+static void print_waiting(struct dgl *dgl, struct dgl_resource *resource)
+{
+        struct dgl_req *pos;
+        struct dgl_group_req *greq;
+        unsigned long long last = 0;
+        TRACE("List for rid %d\n", resource_id(dgl, resource));
+        list_for_each_entry(pos, &resource->waiting, list) {
+                greq = pos->greq;
+                TRACE("  0x%p with timestamp %llu\n", greq, greq->ts);
+                BUG_ON(greq->ts < last);
+                last = greq->ts;
+        }
+}
+void dgl_init(struct dgl *dgl, unsigned long num_resources,
+              unsigned long num_replicas)
+{
+        int i;
+        struct dgl_resource *resource;
+        dgl->num_replicas  = num_replicas;
+        dgl->num_resources = num_resources;
+        dgl->resources = kmalloc(sizeof(*dgl->resources) * num_resources,
+                                 GFP_ATOMIC);
+        dgl->acquired  = kmalloc(sizeof(*dgl->acquired) * num_online_cpus(),
+                                 GFP_ATOMIC);
+        for (i = 0; i < num_online_cpus(); ++i)
+                dgl->acquired[i] = NULL;
+        for (i = 0; i < num_resources; i++) {
+                resource = &dgl->resources[i];
+                INIT_LIST_HEAD(&resource->waiting);
+                resource->free_replicas = dgl->num_replicas;
+        }
+        dgl->requests = 0;
+        dgl->running  = 0;
+        dgl->ts = 0;
+}
+void dgl_free(struct dgl *dgl)
+{
+        kfree(dgl->resources);
+        kfree(dgl->acquired);
+}
+void dgl_group_req_init(struct dgl *dgl, struct dgl_group_req *greq)
+{
+        int i;
+        greq->requested = kmalloc(sizeof(*greq->requested) * MASK_WORDS(dgl),
+                                  GFP_ATOMIC);
+        greq->waiting   = kmalloc(sizeof(*greq->waiting) * MASK_WORDS(dgl),
+                                  GFP_ATOMIC);
+        greq->requests  = kmalloc(sizeof(*greq->requests) * dgl->num_resources,
+                                  GFP_ATOMIC);
+        BUG_ON(!greq->requested);
+        BUG_ON(!greq->waiting);
+        BUG_ON(!greq->requests);
+        greq->cpu = NO_CPU;
+        for (i = 0; i < MASK_WORDS(dgl); ++i) {
+                greq->requested[i] = 0;
+                greq->waiting[i]   = 0;
+        }
+}
+void dgl_group_req_free(struct dgl_group_req *greq)
+{
+        kfree(greq->requested);
+        kfree(greq->waiting);
+        kfree(greq->requests);
+}
+/**
+ * set_req - create request for @replicas of @resource.
+ */
+void set_req(struct dgl *dgl, struct dgl_group_req *greq,
+             int resource, int replicas)
+{
+        int word, bit;
+        struct dgl_req *req;
+        if (replicas > dgl->num_replicas)
+                replicas = dgl->num_replicas;
+        mask_idx(resource, &word, &bit);
+        __set_bit(bit, &greq->requested[word]);
+        TRACE("0x%p requesting %d of %d\n", greq, replicas, resource);
+        req = &greq->requests[resource];
+        req->greq = greq;
+        INIT_LIST_HEAD(&req->list);
+        req->replicas = replicas;
+}
+/*
+ * Attempt to fulfill request @req for @resource.
+ * Return 1 if successful. If the matching group request has acquired all of
+ * its needed resources, this will then set that req as dgl->acquired[cpu].
+ */
+static unsigned long try_acquire(struct dgl *dgl, struct dgl_resource *resource,
+                                 struct dgl_req *req)
+{
+        int word, bit, rid, head, empty, room;
+        unsigned long waiting;
+        struct dgl_group_req *greq;
+        rid  = resource_id(dgl, resource);
+        greq = req->greq;
+        TRACE("0x%p greq\n", greq);
+        head  = resource->waiting.next == &req->list;
+        empty = list_empty(&resource->waiting);
+        room  = resource->free_replicas >= req->replicas;
+        if (! (room && (head || empty)) ) {
+                TRACE("0x%p cannot acquire %d replicas, %d free\n",
+                      greq, req->replicas, resource->free_replicas,
+                      room, head, empty);
+                return 0;
+        }
+        resource->free_replicas -= req->replicas;
+        TRACE("0x%p acquired %d replicas of rid %d\n",
+              greq, req->replicas, rid);
+        mask_idx(rid, &word, &bit);
+        TRACE("0x%p, %lu, 0x%p\n", greq->waiting, greq->waiting[word],
+              &greq->waiting[word]);
+        clear_bit(bit, &greq->waiting[word]);
+        waiting = 0;
+        for (word = 0; word < MASK_WORDS(dgl); word++) {
+                waiting |= greq->waiting[word];
+                if (waiting)
+                        break;
+        }
+        if (!waiting) {
+                TRACE("0x%p acquired all resources\n", greq);
+                BUG_ON(dgl->acquired[greq->cpu]);
+                dgl->acquired[greq->cpu] = greq;
+                litmus_reschedule(greq->cpu);
+                dgl->running++;
+        }
+        return 1;
+}
+/**
+ * add_group_req - initiate group request.
+ */
+void add_group_req(struct dgl *dgl, struct dgl_group_req *greq, int cpu)
+{
+        int b, w, i, succ, all_succ = 1;
+        struct dgl_req *req;
+        struct dgl_resource *resource;
+        greq->cpu = cpu;
+        greq->ts = dgl->ts++;
+        TRACE("0x%p group request added for CPU %d\n", greq, cpu);
+        BUG_ON(dgl->acquired[cpu] == greq);
+        ++dgl->requests;
+        for_each_resource(greq->requested, dgl, w, b, i) {
+                __set_bit(b, &greq->waiting[w]);
+        }
+        for_each_resource(greq->requested, dgl, w, b, i) {
+                req = &greq->requests[i];
+                resource = &dgl->resources[i];
+                succ = try_acquire(dgl, resource, req);
+                all_succ &= succ;
+                if (!succ) {
+                        TRACE("0x%p waiting on rid %d\n", greq, i);
+                        list_add_tail(&req->list, &resource->waiting);
+                }
+        }
+        /* Grant empty requests */
+        if (all_succ && !dgl->acquired[cpu]) {
+                TRACE("0x%p empty group request acquired cpu %d\n", greq, cpu);
+                dgl->acquired[cpu] = greq;
+                ++dgl->running;
+        }
+        BUG_ON(dgl->requests && !dgl->running);
+}
+/**
+ * remove_group_req - abandon group request.
+ *
+ * This will also progress the waiting queues of resources acquired by @greq.
+ */
+void remove_group_req(struct dgl *dgl, struct dgl_group_req *greq)
+{
+        int b, w, i;
+        struct dgl_req *req, *next;
+        struct dgl_resource *resource;
+        TRACE("0x%p removing group request for CPU %d\n", greq, greq->cpu);
+        --dgl->requests;
+        if (dgl->acquired[greq->cpu] == greq) {
+                TRACE("0x%p no longer acquired on CPU %d\n", greq, greq->cpu);
+                dgl->acquired[greq->cpu] = NULL;
+                --dgl->running;
+        }
+        for_each_resource(greq->requested, dgl, w, b, i) {
+                req = &greq->requests[i];
+                resource = &dgl->resources[i];
+                if (!list_empty(&req->list)) {
+                        /* Waiting on resource */
+                        clear_bit(b, &greq->waiting[w]);
+                        list_del_init(&req->list);
+                        TRACE("Quitting 0x%p from rid %d\n",
+                              req, i);
+                } else {
+                        /* Have resource */
+                        resource->free_replicas += req->replicas;
+                        BUG_ON(resource->free_replicas > dgl->num_replicas);
+                        TRACE("0x%p releasing %d of %d replicas, rid %d\n",
+                              greq, req->replicas, resource->free_replicas, i);
+                        if (!list_empty(&resource->waiting)) {
+                                /* Give it to the next guy */
+                                next = list_first_entry(&resource->waiting,
+                                                        struct dgl_req,
+                                                        list);
+                                BUG_ON(next->greq->ts < greq->ts);
+                                if (try_acquire(dgl, resource, next)) {
+                                        list_del_init(&next->list);
+                                        print_waiting(dgl, resource);
+                                }
+                        }
+                }
+        }
+        BUG_ON(dgl->requests && !dgl->running);
+}
diff --git a/litmus/domain.c b/litmus/domain.c
new file mode 100644
index 000000000000..4dc3649a0389
--- /dev/null
+++ b/litmus/domain.c
@@ -0,0 +1,21 @@
+#include <linux/list.h>
+#include <linux/spinlock_types.h>
+#include <litmus/domain.h>
+void domain_init(domain_t *dom,
+                 raw_spinlock_t *lock,
+                 requeue_t requeue,
+                 peek_ready_t peek_ready,
+                 take_ready_t take_ready,
+                 preempt_needed_t preempt_needed,
+                 task_prio_t priority)
+{
+        INIT_LIST_HEAD(&dom->list);
+        dom->lock = lock;
+        dom->requeue = requeue;
+        dom->peek_ready = peek_ready;
+        dom->take_ready = take_ready;
+        dom->preempt_needed = preempt_needed;
+        dom->higher_prio = priority;
+}
diff --git a/litmus/event_group.c b/litmus/event_group.c
new file mode 100644
index 000000000000..478698a6d17a
--- /dev/null
+++ b/litmus/event_group.c
@@ -0,0 +1,334 @@
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <litmus/litmus.h>
+#include <litmus/trace.h>
+#include <litmus/sched_trace.h>
+#include <litmus/event_group.h>
+#if 1
+#define VTRACE(fmt, args...)                                    \
+sched_trace_log_message("%d P%d        [%s@%s:%d]: " fmt,       \
+                                TRACE_ARGS,  ## args)
+#else
+#define VTRACE(fmt, args...)
+#endif
+/*
+ * Return event_queue slot for the given time.
+ */
+static unsigned int time2slot(lt_t time)
+{
+        return (unsigned int) time2quanta(time, FLOOR) % EVENT_QUEUE_SLOTS;
+}
+/*
+ * Executes events from an event_list in priority order.
+ * Events can requeue themselves when they are called.
+ */
+static enum hrtimer_restart on_timer(struct hrtimer *timer)
+{
+        int prio, num;
+        unsigned long flags;
+        struct event_list *el;
+        struct rt_event *e;
+        struct list_head *pos, events[NUM_EVENT_PRIORITIES];
+        raw_spinlock_t *queue_lock;
+        el = container_of(timer, struct event_list, timer);
+        queue_lock = &el->group->queue_lock;
+        raw_spin_lock_irqsave(queue_lock, flags);
+        /* Remove event_list from hashtable so that no more events
+         * are added to it.
+         */
+        VTRACE("Removing event list 0x%x\n", el);
+        list_del_init(&el->queue_node);
+        /* Copy over events so that the event_list can re-used when the lock
+         * is released.
+         */
+        VTRACE("Emptying event list 0x%x\n", el);
+        for (prio = 0; prio < NUM_EVENT_PRIORITIES; prio++) {
+                list_replace_init(&el->events[prio], &events[prio]);
+        }
+        for (prio = 0; prio < NUM_EVENT_PRIORITIES; prio++) {
+                /* Fire events. Complicated loop is used so that events
+                 * in the list can be canceled (removed) while other events are
+                 * executing.
+                 */
+                for (pos = events[prio].next, num = 0;
+                     prefetch(pos->next), events[prio].next != &events[prio];
+                     pos = events[prio].next, num++) {
+                        e = list_entry(pos, struct rt_event, events_node);
+                        list_del_init(pos);
+                        raw_spin_unlock_irqrestore(queue_lock, flags);
+                        VTRACE("Dequeueing event 0x%x with prio %d from 0x%x\n",
+                               e, e->prio, el);
+                        e->function(e);
+                        raw_spin_lock_irqsave(queue_lock, flags);
+                }
+        }
+        raw_spin_unlock_irqrestore(queue_lock, flags);
+        VTRACE("Exhausted %d events from list 0x%x\n", num, el);
+        return HRTIMER_NORESTART;
+}
+/*
+ * Return event_list for the given event and time. If no event_list
+ * is being used yet and use_event_heap is 1, will create the list
+ * and return it. Otherwise it will return NULL.
+ */
+static struct event_list* get_event_list(struct event_group *group,
+                                         struct rt_event *e,
+                                         lt_t fire,
+                                         int use_event_list)
+{
+        struct list_head* pos;
+        struct event_list *el = NULL, *tmp;
+        unsigned int slot = time2slot(fire);
+        int remaining = 300;
+        VTRACE("Getting list for time %llu, event 0x%x\n", fire, e);
+        /* Initialize pos for the case that the list is empty */
+        pos = group->event_queue[slot].next;
+        list_for_each(pos, &group->event_queue[slot]) {
+                BUG_ON(remaining-- < 0);
+                tmp = list_entry(pos, struct event_list, queue_node);
+                if (lt_after_eq(fire, tmp->fire_time) &&
+                    lt_before(fire, tmp->fire_time + group->res)) {
+                        VTRACE("Found match 0x%x at time %llu\n",
+                               tmp, tmp->fire_time);
+                        el = tmp;
+                        break;
+                } else if (lt_before(fire, tmp->fire_time)) {
+                        /* We need to insert a new node since el is
+                         * already in the future
+                         */
+                        VTRACE("Time %llu was before %llu\n",
+                               fire, tmp->fire_time);
+                        break;
+                } else {
+                        VTRACE("Time %llu was after %llu\n",
+                               fire, tmp->fire_time + group->res);
+                }
+        }
+        if (!el && use_event_list) {
+                /* Use pre-allocated list */
+                tmp = e->event_list;
+                tmp->fire_time = fire;
+                tmp->group = group;
+                /* Add to queue */
+                VTRACE("Using list 0x%x for priority %d and time %llu\n",
+                       tmp, e->prio, fire);
+                BUG_ON(!list_empty(&tmp->queue_node));
+                list_add(&tmp->queue_node, pos->prev);
+                el = tmp;
+        }
+        return el;
+}
+/*
+ * Prepare a release list for a new set of events.
+ */
+static void reinit_event_list(struct event_group *group, struct rt_event *e)
+{
+        int prio, t_ret;
+        struct event_list *el = e->event_list;
+        VTRACE("Reinitting list 0x%x for event 0x%x\n", el, e);
+        /* Cancel timer */
+        t_ret = hrtimer_pull_cancel(group->cpu, &el->timer, &el->info);
+        BUG_ON(t_ret == 1);
+        if (t_ret == -1) {
+                /* The on_timer callback is running for this list */
+                VTRACE("Timer is running concurrently!\n");
+        }
+        /* Clear event lists */
+        for (prio = 0; prio < NUM_EVENT_PRIORITIES; prio++)
+                INIT_LIST_HEAD(&el->events[prio]);
+}
+/**
+ * add_event() - Add timer to event group.
+ */
+void add_event(struct event_group *group, struct rt_event *e, lt_t fire)
+{
+        struct event_list *el;
+        int in_use;
+        VTRACE("Adding event 0x%x with priority %d for time %llu\n",
+               e, e->prio, fire);
+        /* A NULL group means use the group of the currently executing CPU  */
+        if (NULL == group)
+                group = get_event_group_for(NO_CPU);
+        /* Saving the group is important for cancellations */
+        e->_event_group = group;
+        raw_spin_lock(&group->queue_lock);
+        el = get_event_list(group, e, fire, 0);
+        if (!el) {
+                /* Use our own, but drop lock first */
+                raw_spin_unlock(&group->queue_lock);
+                reinit_event_list(group, e);
+                raw_spin_lock(&group->queue_lock);
+                el = get_event_list(group, e, fire, 1);
+        }
+        /* Add event to sorted list */
+        VTRACE("Inserting event 0x%x at end of event_list 0x%x\n", e, el);
+        list_add(&e->events_node, &el->events[e->prio]);
+        raw_spin_unlock(&group->queue_lock);
+        /* Arm timer if we are the owner */
+        if (el == e->event_list) {
+                VTRACE("Arming timer on event 0x%x for %llu\n", e, fire);
+                in_use = hrtimer_start_on(group->cpu, &el->info,
+                                          &el->timer, ns_to_ktime(el->fire_time),
+                                          HRTIMER_MODE_ABS_PINNED);
+                BUG_ON(in_use);
+        } else {
+                VTRACE("Not my timer @%llu\n", fire);
+        }
+}
+/**
+ * cancel_event() - Remove event from the group.
+ */
+void cancel_event(struct rt_event *e)
+{
+        int prio, cancel;
+        struct rt_event *swap, *entry;
+        struct event_list *tmp;
+        struct event_group *group;
+        struct list_head *list, *pos;
+        VTRACE("Canceling event 0x%x with priority %d\n", e, e->prio);
+        group = e->_event_group;
+        if (!group) return;
+        raw_spin_lock(&group->queue_lock);
+        /* Relies on the fact that an event_list's owner is ALWAYS present
+         * as one of the event_list's events.
+         */
+        for (prio = 0, cancel = 0, swap = NULL;
+             prio < NUM_EVENT_PRIORITIES && !swap;
+             prio++) {
+                list = &e->event_list->events[prio];
+                cancel |= !list_empty(list);
+                /* Find any element which is not the event_list's owner */
+                list_for_each(pos, list) {
+                        entry = list_entry(pos, struct rt_event, events_node);
+                        if (entry != e) {
+                                swap = entry;
+                                break;
+                        }
+                }
+        }
+        if (swap) {
+                /* Give the other guy ownership of the event_list */
+                VTRACE("Swapping list 0x%x with event 0x%x event list 0x%x\n",
+                       e->event_list, swap, swap->event_list);
+                tmp = swap->event_list;
+                swap->event_list = e->event_list;
+                BUG_ON(!tmp);
+                e->event_list = tmp;
+        } else if (cancel) {
+                /* Cancel the event_list we own */
+                hrtimer_pull_cancel(group->cpu,
+                                    &e->event_list->timer,
+                                    &e->event_list->info);
+                list_del_init(&e->event_list->queue_node);
+        }
+        /* Remove ourselves from any list we may be a part of */
+        list_del_init(&e->events_node);
+        e->_event_group = NULL;
+        raw_spin_unlock(&group->queue_lock);
+}
+struct kmem_cache *event_list_cache;
+struct event_list* event_list_alloc(int gfp_flags)
+{
+        int prio;
+        struct event_list *el = kmem_cache_alloc(event_list_cache, gfp_flags);
+        if (el) {
+                hrtimer_init(&el->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+                INIT_LIST_HEAD(&el->queue_node);
+                el->timer.function = on_timer;
+                hrtimer_start_on_info_init(&el->info);
+                for (prio = 0; prio < NUM_EVENT_PRIORITIES; prio++)
+                        INIT_LIST_HEAD(&el->events[prio]);
+        } else {
+                VTRACE("Failed to allocate event list!\n");
+                printk(KERN_CRIT "Failed to allocate event list.\n");
+                BUG();
+        }
+        return el;
+}
+void init_event(struct rt_event *e, int prio, fire_event_t function,
+                struct event_list *el)
+{
+        e->prio = prio;
+        e->function = function;
+        e->event_list = el;
+        e->_event_group = NULL;
+        INIT_LIST_HEAD(&e->events_node);
+}
+/**
+ * init_event_group() - Prepare group for events.
+ * @group       Group to prepare
+ * @res         Timer resolution. Two events of @res distance will be merged
+ * @cpu         Cpu on which to fire timers
+ */
+static void init_event_group(struct event_group *group, lt_t res, int cpu)
+{
+        int i;
+        VTRACE("Creating group with resolution %llu on CPU %d", res, cpu);
+        group->res = res;
+        group->cpu = cpu;
+        for (i = 0; i < EVENT_QUEUE_SLOTS; i++)
+                INIT_LIST_HEAD(&group->event_queue[i]);
+        raw_spin_lock_init(&group->queue_lock);
+}
+DEFINE_PER_CPU(struct event_group, _event_groups);
+struct event_group *get_event_group_for(const int cpu)
+{
+        return &per_cpu(_event_groups,
+                        (NO_CPU == cpu) ? smp_processor_id() : cpu);
+}
+static int __init _init_event_groups(void)
+{
+        int cpu;
+        printk("Initializing LITMUS^RT event groups.\n");
+        for_each_online_cpu(cpu) {
+                init_event_group(get_event_group_for(cpu),
+                                CONFIG_MERGE_TIMERS_WINDOW, cpu);
+        }
+        return 0;
+}
+module_init(_init_event_groups);
diff --git a/litmus/fifo_common.c b/litmus/fifo_common.c
new file mode 100644
index 000000000000..84ae98e42ae4
--- /dev/null
+++ b/litmus/fifo_common.c
@@ -0,0 +1,58 @@
+/*
+ * kernel/edf_common.c
+ *
+ * Common functions for EDF based scheduler.
+ */
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/sched_trace.h>
+#include <litmus/fifo_common.h>
+int fifo_higher_prio(struct task_struct* first,
+                    struct task_struct* second)
+{
+        /* There is no point in comparing a task to itself. */
+        if (first && first == second) {
+                TRACE_TASK(first,
+                           "WARNING: pointless fifo priority comparison.\n");
+                BUG_ON(1);
+                return 0;
+        }
+        if (!first || !second)
+                return first && !second;
+        /* Tiebreak by PID */
+        return  (get_release(first) == get_release(second) &&
+                 first->pid > second->pid) ||
+                (get_release(first) < get_release(second));
+}
+int fifo_ready_order(struct bheap_node* a, struct bheap_node* b)
+{
+        return fifo_higher_prio(bheap2task(a), bheap2task(b));
+}
+void fifo_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
+                      release_jobs_t release)
+{
+        rt_domain_init(rt,  fifo_ready_order, resched, release);
+}
+int fifo_preemption_needed(rt_domain_t* rt, struct task_struct *t)
+{
+        if (!__jobs_pending(rt))
+                return 0;
+        if (!t)
+                return 1;
+        return !is_realtime(t) || fifo_higher_prio(__next_ready(rt), t);
+}
diff --git a/litmus/ftdev.c b/litmus/ftdev.c
index 06fcf4cf77dc..999290fc8302 100644
--- a/litmus/ftdev.c
+++ b/litmus/ftdev.c
@@ -230,13 +230,20 @@ static ssize_t ftdev_read(struct file *filp,
                         * here with copied data because that data would get
                         * lost if the task is interrupted (e.g., killed).
                         */
+                        mutex_unlock(&ftdm->lock);
                        set_current_state(TASK_INTERRUPTIBLE);
                        schedule_timeout(50);
                        if (signal_pending(current)) {
                                if (err == 0)
                                        /* nothing read yet, signal problem */
                                        err = -ERESTARTSYS;
-                                break;
+                                goto out;
+                        }
+                        if (mutex_lock_interruptible(&ftdm->lock)) {
+                                err = -ERESTARTSYS;
+                                goto out;
                        }
                } else if (copied < 0) {
                        /* page fault */
@@ -303,7 +310,6 @@ struct file_operations ftdev_fops = {
        .owner   = THIS_MODULE,
        .open    = ftdev_open,
        .release = ftdev_release,
-        .write   = ftdev_write,
        .read    = ftdev_read,
        .unlocked_ioctl = ftdev_ioctl,
 };
diff --git a/litmus/jobs.c b/litmus/jobs.c
index 8a0c889e2cb8..bd3175baefae 100644
--- a/litmus/jobs.c
+++ b/litmus/jobs.c
@@ -11,13 +11,17 @@ static inline void setup_release(struct task_struct *t, lt_t release)
 {
        /* prepare next release */
        tsk_rt(t)->job_params.release   = release;
-        tsk_rt(t)->job_params.deadline += release + get_rt_period(t);
+        tsk_rt(t)->job_params.deadline  = release + get_rt_relative_deadline(t);
        tsk_rt(t)->job_params.exec_time = 0;
        /* update job sequence number */
        tsk_rt(t)->job_params.job_no++;
        /* don't confuse Linux */
        t->rt.time_slice = 1;
+        TRACE_TASK(t, "Releasing at %llu, deadline: %llu, period: %llu, now: %llu\n",
+                   release, get_deadline(t), get_rt_period(t), litmus_clock());
 }
 void prepare_for_next_period(struct task_struct *t)
@@ -41,9 +45,8 @@ void release_at(struct task_struct *t, lt_t start)
        set_rt_flags(t, RT_F_RUNNING);
 }
 /*
- *      Deactivate current task until the beginning of the next period.
+ * Deactivate current task until the beginning of the next period.
 */
 long complete_job(void)
 {
@@ -51,11 +54,14 @@ long complete_job(void)
        lt_t now = litmus_clock();
        lt_t exec_time = tsk_rt(current)->job_params.exec_time;
+        /* Task statistic summaries */
        tsk_rt(current)->tot_exec_time += exec_time;
        if (lt_before(tsk_rt(current)->max_exec_time, exec_time))
                tsk_rt(current)->max_exec_time = exec_time;
        if (is_tardy(current, now)) {
+                TRACE_TASK(current, "is tardy, now: %llu, deadline: %llu\n",
+                           now, get_deadline(current));
                amount = now - get_deadline(current);
                if (lt_after(amount, tsk_rt(current)->max_tardy))
                        tsk_rt(current)->max_tardy = amount;
@@ -63,8 +69,9 @@ long complete_job(void)
                ++tsk_rt(current)->missed;
        }
-        /* Mark that we do not excute anymore */
+        /* Mark that we do not execute anymore */
        set_rt_flags(current, RT_F_SLEEP);
        /* call schedule, this will return when a new job arrives
         * it also takes care of preparing for the next release
         */
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 5e8221da35e9..cb41548d3e2d 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -13,10 +13,19 @@
 #include <litmus/litmus.h>
 #include <litmus/bheap.h>
 #include <litmus/trace.h>
+#include <litmus/event_group.h>
 #include <litmus/rt_domain.h>
 #include <litmus/litmus_proc.h>
 #include <litmus/sched_trace.h>
+#ifdef CONFIG_PLUGIN_MC
+#include <linux/pid.h>
+#include <linux/hrtimer.h>
+#include <litmus/sched_mc.h>
+#else
+struct mc_task;
+#endif
 #ifdef CONFIG_SCHED_CPU_AFFINITY
 #include <litmus/affinity.h>
 #endif
@@ -35,8 +44,16 @@ atomic_t __log_seq_no = ATOMIC_INIT(0);
 atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU);
 #endif
-static struct kmem_cache * bheap_node_cache;
+static struct kmem_cache *bheap_node_cache;
-extern struct kmem_cache * release_heap_cache;
+extern struct kmem_cache *release_heap_cache;
+#ifdef CONFIG_MERGE_TIMERS
+extern struct kmem_cache *event_list_cache;
+#endif
+#ifdef CONFIG_PLUGIN_MC
+static struct kmem_cache *mc_data_cache;
+#endif
 struct bheap_node* bheap_node_alloc(int gfp_flags)
 {
@@ -290,17 +307,92 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
        return ret;
 }
+#ifdef CONFIG_PLUGIN_MC
+asmlinkage long sys_set_rt_task_mc_param(pid_t pid, struct mc_task __user *param)
+{
+        struct mc_task mc;
+        struct mc_data *mc_data;
+        struct task_struct *target;
+        int retval = -EINVAL;
+        printk("Setting up mixed-criticality task parameters for process %d.\n",
+                pid);
+        if (pid < 0 || param == 0) {
+                goto out;
+        }
+        if (copy_from_user(&mc, param, sizeof(mc))) {
+                retval = -EFAULT;
+                goto out;
+        }
+        /* Task search and manipulation must be protected */
+        read_lock_irq(&tasklist_lock);
+        if (!(target = find_task_by_vpid(pid))) {
+                retval = -ESRCH;
+                goto out_unlock;
+        }
+        if (is_realtime(target)) {
+                /* The task is already a real-time task.
+                 * We cannot not allow parameter changes at this point.
+                 */
+                retval = -EBUSY;
+                goto out_unlock;
+        }
+        /* check parameters passed in are valid */
+        if (mc.crit < CRIT_LEVEL_A || mc.crit >= NUM_CRIT_LEVELS) {
+                printk(KERN_WARNING "litmus: real-time task %d rejected because "
+                        "of invalid criticality level\n", pid);
+                goto out_unlock;
+        }
+        if (CRIT_LEVEL_A == mc.crit &&
+                        (mc.lvl_a_id < 0 ||
+                         mc.lvl_a_id >= CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS)) {
+                printk(KERN_WARNING "litmus: real-time task %d rejected because "
+                        "of invalid level A id\n", pid);
+                goto out_unlock;
+        }
+        mc_data = tsk_rt(target)->mc_data;
+        if (!mc_data) {
+                mc_data = kmem_cache_alloc(mc_data_cache, GFP_ATOMIC);
+                if (!mc_data) {
+                        retval = -ENOMEM;
+                        goto out_unlock;
+                }
+                tsk_rt(target)->mc_data = mc_data;
+        }
+        mc_data->mc_task = mc;
+        retval = 0;
+out_unlock:
+        read_unlock_irq(&tasklist_lock);
+out:
+        return retval;
+}
+#else
+asmlinkage long sys_set_rt_task_mc_param(pid_t pid, struct mc_task __user *param)
+{
+        /* don't allow this syscall if the plugin is not enabled */
+        return -EINVAL;
+}
+#endif
 /* p is a real-time task. Re-init its state as a best-effort task. */
 static void reinit_litmus_state(struct task_struct* p, int restore)
 {
        struct rt_task  user_config = {};
        void*  ctrl_page     = NULL;
+        void*  color_ctrl_page = NULL;
        if (restore) {
                /* Safe user-space provided configuration data.
                 * and allocated page. */
-                user_config = p->rt_param.task_params;
+                user_config     = p->rt_param.task_params;
-                ctrl_page   = p->rt_param.ctrl_page;
+                ctrl_page       = p->rt_param.ctrl_page;
+                color_ctrl_page = p->rt_param.color_ctrl_page;
        }
        /* We probably should not be inheriting any task's priority
@@ -313,8 +405,9 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
        /* Restore preserved fields. */
        if (restore) {
-                p->rt_param.task_params = user_config;
+                p->rt_param.task_params         = user_config;
-                p->rt_param.ctrl_page   = ctrl_page;
+                p->rt_param.ctrl_page           = ctrl_page;
+                p->rt_param.color_ctrl_page     = color_ctrl_page;
        }
 }
@@ -460,9 +553,11 @@ void litmus_fork(struct task_struct* p)
                reinit_litmus_state(p, 0);
                /* Don't let the child be a real-time task.  */
                p->sched_reset_on_fork = 1;
-        } else
+        } else {
                /* non-rt tasks might have ctrl_page set */
                tsk_rt(p)->ctrl_page = NULL;
+                tsk_rt(p)->color_ctrl_page = NULL;
+        }
        /* od tables are never inherited across a fork */
        p->od_table = NULL;
@@ -482,6 +577,10 @@ void litmus_exec(void)
                        free_page((unsigned long) tsk_rt(p)->ctrl_page);
                        tsk_rt(p)->ctrl_page = NULL;
                }
+                if (tsk_rt(p)->color_ctrl_page) {
+                        free_page((unsigned long) tsk_rt(p)->color_ctrl_page);
+                        tsk_rt(p)->color_ctrl_page = NULL;
+                }
        }
 }
@@ -499,6 +598,21 @@ void exit_litmus(struct task_struct *dead_tsk)
                           tsk_rt(dead_tsk)->ctrl_page);
                free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page);
        }
+        if (tsk_rt(dead_tsk)->color_ctrl_page) {
+                TRACE_TASK(dead_tsk,
+                                "freeing color_ctrl_page %p\n",
+                                tsk_rt(dead_tsk)->color_ctrl_page);
+                free_page((unsigned long) tsk_rt(dead_tsk)->color_ctrl_page);
+        }
+#ifdef CONFIG_PLUGIN_MC
+        /* The MC-setup syscall might succeed and allocate mc_data, but the
+         * task may not exit in real-time mode, and that memory will leak.
+         *  Check and free it here.
+         */
+        if (tsk_rt(dead_tsk)->mc_data)
+                kmem_cache_free(mc_data_cache, tsk_rt(dead_tsk)->mc_data);
+#endif
        /* main cleanup only for RT tasks */
        if (is_realtime(dead_tsk))
@@ -542,8 +656,14 @@ static int __init _init_litmus(void)
        register_sched_plugin(&linux_sched_plugin);
-        bheap_node_cache    = KMEM_CACHE(bheap_node, SLAB_PANIC);
+        bheap_node_cache   = KMEM_CACHE(bheap_node, SLAB_PANIC);
        release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC);
+#ifdef CONFIG_MERGE_TIMERS
+        event_list_cache   = KMEM_CACHE(event_list, SLAB_PANIC);
+#endif
+#ifdef CONFIG_PLUGIN_MC
+        mc_data_cache      = KMEM_CACHE(mc_data, SLAB_PANIC);
+#endif
 #ifdef CONFIG_MAGIC_SYSRQ
        /* offer some debugging help */
@@ -567,6 +687,12 @@ static void _exit_litmus(void)
        exit_litmus_proc();
        kmem_cache_destroy(bheap_node_cache);
        kmem_cache_destroy(release_heap_cache);
+#ifdef CONFIG_MERGE_TIMERS
+        kmem_cache_destroy(event_list_cache);
+#endif
+#ifdef CONFIG_PLUGIN_MC
+        kmem_cache_destroy(mc_data_cache);
+#endif
 }
 module_init(_init_litmus);
diff --git a/litmus/locking.c b/litmus/locking.c
index ca5a073a989e..1d32dcd8e726 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -1,3 +1,5 @@
+#include <linux/sched.h>
+#include <litmus/litmus.h>
 #include <litmus/fdso.h>
 #ifdef CONFIG_LITMUS_LOCKING
@@ -29,14 +31,18 @@ static inline struct litmus_lock* get_lock(struct od_table_entry* entry)
        return (struct litmus_lock*) entry->obj->obj;
 }
+atomic_t lock_id = ATOMIC_INIT(0);
 static  int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg)
 {
        struct litmus_lock* lock;
        int err;
        err = litmus->allocate_lock(&lock, type, arg);
-        if (err == 0)
+        if (err == 0) {
+                lock->id = atomic_add_return(1, &lock_id);
                *obj_ref = lock;
+        }
        return err;
 }
diff --git a/litmus/preempt.c b/litmus/preempt.c
index 92c5d1b26a13..3606cd7ffae7 100644
--- a/litmus/preempt.c
+++ b/litmus/preempt.c
@@ -6,6 +6,8 @@
 /* The rescheduling state of each processor.
 */
 DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
+#define TRACE_TASK(t, fmt, args...)
+#define TRACE(fmt, args...)
 void sched_state_will_schedule(struct task_struct* tsk)
 {
@@ -30,6 +32,7 @@ void sched_state_will_schedule(struct task_struct* tsk)
          /*    /\* Litmus tasks should never be subject to a remote */
          /*     * set_tsk_need_resched(). *\/ */
          /*    BUG_ON(is_realtime(tsk)); */
 #ifdef CONFIG_PREEMPT_STATE_TRACE
        TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
                   __builtin_return_address(0));
diff --git a/litmus/rm_common.c b/litmus/rm_common.c
new file mode 100644
index 000000000000..f608a084d3b8
--- /dev/null
+++ b/litmus/rm_common.c
@@ -0,0 +1,91 @@
+/*
+ * kernel/rm_common.c
+ *
+ * Common functions for RM based scheduler.
+ */
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/sched_trace.h>
+#include <litmus/rm_common.h>
+/* rm_higher_prio -  returns true if first has a higher RM priority
+ *                    than second. Deadline ties are broken by PID.
+ *
+ * both first and second may be NULL
+ */
+int rm_higher_prio(struct task_struct* first,
+                    struct task_struct* second)
+{
+        struct task_struct *first_task = first;
+        struct task_struct *second_task = second;
+        /* There is no point in comparing a task to itself. */
+        if (first && first == second) {
+                TRACE_TASK(first,
+                           "WARNING: pointless rm priority comparison.\n");
+                return 0;
+        }
+        /* check for NULL tasks */
+        if (!first || !second)
+                return first && !second;
+        return !is_realtime(second_task)  ||
+                /* is the deadline of the first task earlier?
+                 * Then it has higher priority.
+                 */
+                lt_before(get_rt_period(first_task), get_rt_period(second_task)) ||
+                /* Do we have a deadline tie?
+                 * Then break by PID.
+                 */
+                (get_rt_period(first_task) == get_rt_period(second_task) &&
+                (first_task->pid < second_task->pid ||
+                /* If the PIDs are the same then the task with the inherited
+                 * priority wins.
+                 */
+                (first_task->pid == second_task->pid &&
+                 !second->rt_param.inh_task)));
+}
+int rm_ready_order(struct bheap_node* a, struct bheap_node* b)
+{
+        return rm_higher_prio(bheap2task(a), bheap2task(b));
+}
+void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
+                      release_jobs_t release)
+{
+        rt_domain_init(rt,  rm_ready_order, resched, release);
+}
+/* need_to_preempt - check whether the task t needs to be preempted
+ *                   call only with irqs disabled and with  ready_lock acquired
+ *                   THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
+ */
+int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t)
+{
+        /* we need the read lock for rm_ready_queue */
+        /* no need to preempt if there is nothing pending */
+        if (!__jobs_pending(rt))
+                return 0;
+        /* we need to reschedule if t doesn't exist */
+        if (!t)
+                return 1;
+        /* NOTE: We cannot check for non-preemptibility since we
+         *       don't know what address space we're currently in.
+         */
+        /* make sure to get non-rt stuff out of the way */
+        return !is_realtime(t) || rm_higher_prio(__next_ready(rt), t);
+}
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
index d0b796611bea..c63bd0303916 100644
--- a/litmus/rt_domain.c
+++ b/litmus/rt_domain.c
@@ -12,17 +12,15 @@
 #include <linux/slab.h>
 #include <litmus/litmus.h>
+#include <litmus/event_group.h>
 #include <litmus/sched_plugin.h>
 #include <litmus/sched_trace.h>
 #include <litmus/rt_domain.h>
 #include <litmus/trace.h>
 #include <litmus/bheap.h>
 /* Uncomment when debugging timer races... */
-#if 0
+#if 1
 #define VTRACE_TASK TRACE_TASK
 #define VTRACE TRACE
 #else
@@ -51,36 +49,51 @@ static unsigned int time2slot(lt_t time)
        return (unsigned int) time2quanta(time, FLOOR) % RELEASE_QUEUE_SLOTS;
 }
-static enum hrtimer_restart on_release_timer(struct hrtimer *timer)
+static void do_release(struct release_heap *rh)
 {
        unsigned long flags;
-        struct release_heap* rh;
-        rh = container_of(timer, struct release_heap, timer);
+        if (CRIT_LEVEL_B == rh->dom->level)  {
+                TS_LVLB_RELEASE_START;
+        } else {
+                TS_LVLC_RELEASE_START;
+        }
        TS_RELEASE_LATENCY(rh->release_time);
-        VTRACE("on_release_timer(0x%p) starts.\n", timer);
+        VTRACE("on_release_timer starts.\n");
        TS_RELEASE_START;
        raw_spin_lock_irqsave(&rh->dom->release_lock, flags);
        VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock);
        /* remove from release queue */
-        list_del(&rh->list);
+        list_del_init(&rh->list);
        raw_spin_unlock_irqrestore(&rh->dom->release_lock, flags);
        VTRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock);
        /* call release callback */
        rh->dom->release_jobs(rh->dom, &rh->heap);
-        /* WARNING: rh can be referenced from other CPUs from now on. */
-        TS_RELEASE_END;
-        VTRACE("on_release_timer(0x%p) ends.\n", timer);
+        if (CRIT_LEVEL_B == rh->dom->level) {
+                TS_LVLB_RELEASE_END;
+        } else {
+                TS_LVLC_RELEASE_END;
+        }
+}
-        return  HRTIMER_NORESTART;
+#ifdef CONFIG_MERGE_TIMERS
+static void on_release(struct rt_event *e)
+{
+        do_release(container_of(e, struct release_heap, event));
 }
+#else
+static enum hrtimer_restart on_release(struct hrtimer *timer)
+{
+        do_release(container_of(timer, struct release_heap, timer));
+        return HRTIMER_NORESTART;
+}
+#endif
 /* allocated in litmus.c */
 struct kmem_cache * release_heap_cache;
@@ -88,19 +101,35 @@ struct kmem_cache * release_heap_cache;
 struct release_heap* release_heap_alloc(int gfp_flags)
 {
        struct release_heap* rh;
-        rh= kmem_cache_alloc(release_heap_cache, gfp_flags);
+        rh = kmem_cache_alloc(release_heap_cache, gfp_flags);
        if (rh) {
+#ifdef CONFIG_MERGE_TIMERS
+                init_event(&rh->event, 0, on_release,
+                           event_list_alloc(GFP_ATOMIC));
+#else
                /* initialize timer */
                hrtimer_init(&rh->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-                rh->timer.function = on_release_timer;
+                rh->timer.function = on_release;
+#endif
        }
        return rh;
 }
+#ifdef CONFIG_MERGE_TIMERS
+extern struct kmem_cache *event_list_cache;
+#endif
 void release_heap_free(struct release_heap* rh)
 {
        /* make sure timer is no longer in use */
+#ifdef CONFIG_MERGE_TIMERS
+        if (rh->dom) {
+                cancel_event(&rh->event);
+                kmem_cache_free(event_list_cache, rh->event.event_list);
+        }
+#else
        hrtimer_cancel(&rh->timer);
+#endif
        kmem_cache_free(release_heap_cache, rh);
 }
@@ -149,13 +178,17 @@ static struct release_heap* get_release_heap(rt_domain_t *rt,
        return heap;
 }
-static void reinit_release_heap(struct task_struct* t)
+static void reinit_release_heap(rt_domain_t *rt, struct task_struct* t)
 {
        struct release_heap* rh;
        /* use pre-allocated release heap */
        rh = tsk_rt(t)->rel_heap;
+#ifdef CONFIG_MERGE_TIMERS
+        rh->event.prio = rt->prio;
+        cancel_event(&rh->event);
+#else
        /* Make sure it is safe to use.  The timer callback could still
         * be executing on another CPU; hrtimer_cancel() will wait
         * until the timer callback has completed.  However, under no
@@ -167,13 +200,50 @@ static void reinit_release_heap(struct task_struct* t)
         */
        BUG_ON(hrtimer_cancel(&rh->timer));
+#ifdef CONFIG_RELEASE_MASTER
+        atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE);
+#endif
+#endif
        /* initialize */
        bheap_init(&rh->heap);
+}
 #ifdef CONFIG_RELEASE_MASTER
-        atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE);
+static void arm_release_timer_on(struct release_heap *rh, int target_cpu)
+#else
+static void arm_release_timer(struct release_heap *rh)
+#endif
+{
+#ifdef CONFIG_MERGE_TIMERS
+        add_event(rh->dom->event_group, &rh->event, rh->release_time);
+#else
+        VTRACE("arming timer 0x%p\n", &rh->timer);
+        /* we cannot arm the timer using hrtimer_start()
+         * as it may deadlock on rq->lock
+         * PINNED mode is ok on both local and remote CPU
+         */
+#ifdef CONFIG_RELEASE_MASTER
+        if (rh->dom->release_master == NO_CPU && target_cpu == NO_CPU)
+#endif
+                __hrtimer_start_range_ns(&rh->timer,
+                                         ns_to_ktime(rh->release_time),
+                                         0, HRTIMER_MODE_ABS_PINNED, 0);
+#ifdef CONFIG_RELEASE_MASTER
+        else
+                hrtimer_start_on(/* target_cpu overrides release master */
+                                 (target_cpu != NO_CPU ?
+                                  target_cpu : rh->dom->release_master),
+                                 &rh->info, &rh->timer,
+                                 ns_to_ktime(rh->release_time),
+                                 HRTIMER_MODE_ABS_PINNED);
+#endif
 #endif
 }
-/* arm_release_timer() - start local release timer or trigger
+/* setup_release() - start local release timer or trigger
 *     remote timer (pull timer)
 *
 * Called by add_release() with:
@@ -181,10 +251,10 @@ static void reinit_release_heap(struct task_struct* t)
 * - IRQ disabled
 */
 #ifdef CONFIG_RELEASE_MASTER
-#define arm_release_timer(t) arm_release_timer_on((t), NO_CPU)
+#define setup_release(t) setup_release_on((t), NO_CPU)
-static void arm_release_timer_on(rt_domain_t *_rt , int target_cpu)
+static void setup_release_on(rt_domain_t *_rt , int target_cpu)
 #else
-static void arm_release_timer(rt_domain_t *_rt)
+static void setup_release(rt_domain_t *_rt)
 #endif
 {
        rt_domain_t *rt = _rt;
@@ -193,14 +263,14 @@ static void arm_release_timer(rt_domain_t *_rt)
        struct task_struct* t;
        struct release_heap* rh;
-        VTRACE("arm_release_timer() at %llu\n", litmus_clock());
+        VTRACE("setup_release() at %llu\n", litmus_clock());
        list_replace_init(&rt->tobe_released, &list);
        list_for_each_safe(pos, safe, &list) {
                /* pick task of work list */
                t = list_entry(pos, struct task_struct, rt_param.list);
                sched_trace_task_release(t);
-                list_del(pos);
+                list_del_init(pos);
                /* put into release heap while holding release_lock */
                raw_spin_lock(&rt->release_lock);
@@ -213,7 +283,7 @@ static void arm_release_timer(rt_domain_t *_rt)
                        VTRACE_TASK(t, "Dropped release_lock 0x%p\n",
                                    &rt->release_lock);
-                        reinit_release_heap(t);
+                        reinit_release_heap(rt, t);
                        VTRACE_TASK(t, "release_heap ready\n");
                        raw_spin_lock(&rt->release_lock);
@@ -223,7 +293,7 @@ static void arm_release_timer(rt_domain_t *_rt)
                        rh = get_release_heap(rt, t, 1);
                }
                bheap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node);
-                VTRACE_TASK(t, "arm_release_timer(): added to release heap\n");
+                VTRACE_TASK(t, "setup_release(): added to release heap\n");
                raw_spin_unlock(&rt->release_lock);
                VTRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock);
@@ -233,39 +303,19 @@ static void arm_release_timer(rt_domain_t *_rt)
                 * this release_heap anyway).
                 */
                if (rh == tsk_rt(t)->rel_heap) {
-                        VTRACE_TASK(t, "arming timer 0x%p\n", &rh->timer);
-                        /* we cannot arm the timer using hrtimer_start()
-                         * as it may deadlock on rq->lock
-                         *
-                         * PINNED mode is ok on both local and remote CPU
-                         */
 #ifdef CONFIG_RELEASE_MASTER
-                        if (rt->release_master == NO_CPU &&
+                        arm_release_timer_on(rh, target_cpu);
-                            target_cpu == NO_CPU)
+#else
-#endif
+                        arm_release_timer(rh);
-                                __hrtimer_start_range_ns(&rh->timer,
-                                                ns_to_ktime(rh->release_time),
-                                                0, HRTIMER_MODE_ABS_PINNED, 0);
-#ifdef CONFIG_RELEASE_MASTER
-                        else
-                                hrtimer_start_on(
-                                        /* target_cpu overrides release master */
-                                        (target_cpu != NO_CPU ?
-                                         target_cpu : rt->release_master),
-                                        &rh->info, &rh->timer,
-                                        ns_to_ktime(rh->release_time),
-                                        HRTIMER_MODE_ABS_PINNED);
 #endif
-                } else
+                }
-                        VTRACE_TASK(t, "0x%p is not my timer\n", &rh->timer);
        }
 }
 void rt_domain_init(rt_domain_t *rt,
                    bheap_prio_t order,
                    check_resched_needed_t check,
-                    release_jobs_t release
+                    release_jobs_t release)
-                   )
 {
        int i;
@@ -277,7 +327,7 @@ void rt_domain_init(rt_domain_t *rt,
        if (!order)
                order = dummy_order;
-#ifdef CONFIG_RELEASE_MASTER
+#if defined(CONFIG_RELEASE_MASTER) && !defined(CONFIG_MERGE_TIMERS)
        rt->release_master = NO_CPU;
 #endif
@@ -300,14 +350,13 @@ void rt_domain_init(rt_domain_t *rt,
 */
 void __add_ready(rt_domain_t* rt, struct task_struct *new)
 {
-        TRACE("rt: adding %s/%d (%llu, %llu, %llu) rel=%llu "
+        VTRACE("rt: adding %s/%d (%llu, %llu) rel=%llu to ready queue at %llu\n",
-                "to ready queue at %llu\n",
+              new->comm, new->pid, get_exec_cost(new), get_rt_period(new),
-                new->comm, new->pid,
+              get_release(new), litmus_clock());
-                get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new),
-                get_release(new), litmus_clock());
        BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node));
+        new->rt_param.domain = rt;
        bheap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node);
        rt->check_resched(rt);
 }
@@ -326,7 +375,7 @@ void __merge_ready(rt_domain_t* rt, struct bheap* tasks)
 void __add_release_on(rt_domain_t* rt, struct task_struct *task,
                      int target_cpu)
 {
-        TRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n",
+        VTRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n",
                   get_release(task), target_cpu);
        list_add(&tsk_rt(task)->list, &rt->tobe_released);
        task->rt_param.domain = rt;
@@ -334,7 +383,7 @@ void __add_release_on(rt_domain_t* rt, struct task_struct *task,
        /* start release timer */
        TS_SCHED2_START(task);
-        arm_release_timer_on(rt, target_cpu);
+        setup_release_on(rt, target_cpu);
        TS_SCHED2_END(task);
 }
@@ -345,15 +394,88 @@ void __add_release_on(rt_domain_t* rt, struct task_struct *task,
 */
 void __add_release(rt_domain_t* rt, struct task_struct *task)
 {
-        TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task));
+        VTRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task));
        list_add(&tsk_rt(task)->list, &rt->tobe_released);
        task->rt_param.domain = rt;
        /* start release timer */
        TS_SCHED2_START(task);
-        arm_release_timer(rt);
+        setup_release(rt);
        TS_SCHED2_END(task);
 }
+/******************************************************************************
+ * domain_t wrapper
+ ******************************************************************************/
+/* pd_requeue - calls underlying rt_domain add methods.
+ * If the task is not yet released, it is inserted into the rt_domain
+ * ready queue. Otherwise, it is queued for release.
+ *
+ * Assumes the caller already holds dom->lock.
+ */
+static void pd_requeue(domain_t *dom, struct task_struct *task)
+{
+        rt_domain_t *domain = (rt_domain_t*)dom->data;
+        TRACE_TASK(task, "Requeueing\n");
+        BUG_ON(!task || !is_realtime(task));
+        BUG_ON(is_queued(task));
+        BUG_ON(get_task_domain(task) != dom);
+        if (is_released(task, litmus_clock())) {
+                __add_ready(domain, task);
+                VTRACE("rt: adding %s/%d (%llu, %llu) rel=%llu to ready queue at %llu\n",
+                      task->comm, task->pid, get_exec_cost(task), get_rt_period(task),
+                      get_release(task), litmus_clock());
+        } else {
+                /* task has to wait for next release */
+                VTRACE_TASK(task, "add release(), rel=%llu\n", get_release(task));
+                add_release(domain, task);
+        }
+}
+/* pd_take_ready - removes and returns the next ready task from the rt_domain
+ *
+ * Assumes the caller already holds dom->lock.
+ */
+static struct task_struct* pd_take_ready(domain_t *dom)
+{
+        return __take_ready((rt_domain_t*)dom->data);
+ }
+/* pd_peek_ready - returns the head of the rt_domain ready queue
+ *
+ * Assumes the caller already holds dom->lock.
+ */
+static struct task_struct* pd_peek_ready(domain_t *dom)
+{
+        return  __next_ready((rt_domain_t*)dom->data);
+}
+static void pd_remove(domain_t *dom, struct task_struct *task)
+{
+        if (is_queued(task))
+                remove((rt_domain_t*)dom->data, task);
+}
+/* pd_domain_init - create a generic domain wrapper for an rt_domain
+ */
+void pd_domain_init(domain_t *dom,
+                    rt_domain_t *domain,
+                    bheap_prio_t order,
+                    check_resched_needed_t check,
+                    release_jobs_t release,
+                    preempt_needed_t preempt_needed,
+                    task_prio_t priority)
+{
+        rt_domain_init(domain, order, check, release);
+        domain_init(dom, &domain->ready_lock,
+                    pd_requeue, pd_peek_ready, pd_take_ready,
+                    preempt_needed, priority);
+        dom->remove = pd_remove;
+        dom->data = domain;
+}
diff --git a/litmus/rt_server.c b/litmus/rt_server.c
new file mode 100644
index 000000000000..74d7c7b0f81a
--- /dev/null
+++ b/litmus/rt_server.c
@@ -0,0 +1,23 @@
+#include <litmus/rt_server.h>
+static void default_server_update(struct rt_server *srv)
+{
+}
+void init_rt_server(struct rt_server *server,
+                    int sid, int cpu, rt_domain_t *domain,
+                    need_preempt_t need_preempt,
+                    server_update_t update)
+{
+        if (!need_preempt)
+                BUG_ON(1);
+        server->need_preempt = need_preempt;
+        server->update = (update) ? update : default_server_update;
+        server->sid = sid;
+        server->cpu = cpu;
+        server->linked = NULL;
+        server->domain = domain;
+        server->running = 0;
+}
diff --git a/litmus/sched_color.c b/litmus/sched_color.c
new file mode 100644
index 000000000000..66ce40fd1b57
--- /dev/null
+++ b/litmus/sched_color.c
@@ -0,0 +1,889 @@
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <litmus/litmus.h>
+#include <litmus/jobs.h>
+#include <litmus/preempt.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/rm_common.h>
+#include <litmus/sched_trace.h>
+#include <litmus/color.h>
+#include <litmus/fifo_common.h>
+#include <litmus/budget.h>
+#include <litmus/rt_server.h>
+#include <litmus/dgl.h>
+/**
+ * @rt_server    Common server functionality.
+ * @task         Task used to schedule server.
+ * @timer        Budget enforcement for @task
+ * @start_time   If set, time at which server began running.
+ */
+struct fifo_server {
+        struct rt_server                server;
+        struct task_struct*             task;
+        struct enforcement_timer        timer;
+        lt_t                            start_time;
+};
+/**
+ * @server        Common server functionality.
+ * @rm_domain     PRM domain.
+ * @scheduled     Task physically running on CPU.
+ * @fifo_server   Server partitioned to this CPU.
+ */
+struct cpu_entry {
+        struct rt_server        server;
+        rt_domain_t             rm_domain;
+        struct task_struct*     scheduled;
+        struct fifo_server      fifo_server;
+        struct hrtimer          chunk_timer;
+};
+DEFINE_PER_CPU(struct cpu_entry, color_cpus);
+static rt_domain_t fifo_domain;
+static raw_spinlock_t fifo_lock;
+static struct dgl group_lock;
+static raw_spinlock_t dgl_lock;
+#define local_entry            (&__get_cpu_var(color_cpus))
+#define remote_entry(cpu)      (&per_cpu(color_cpus, cpu))
+#define task_entry(task)       remote_entry(get_partition(task))
+#define task_fserver(task)     (&task_entry(task)->fifo_server.server)
+#define entry_lock(entry)      (&(entry)->rm_domain.ready_lock)
+#define task_dom(entry, task)  (is_be(task) ? &fifo_domain : &entry->rm_domain)
+#define task_lock(entry, task) (is_be(task) ? &fifo_lock   : entry_lock(entry))
+#define is_fifo_server(s)      ((s)->sid > num_online_cpus())
+#define lock_if(lock, cond)    do { if (cond) raw_spin_lock(lock);} while(0)
+#define unlock_if(lock, cond)  do { if (cond) raw_spin_unlock(lock);} while(0)
+#ifdef CONFIG_NP_SECTION
+#define has_resources(t, c)   (tsk_rt(t)->req == group_lock.acquired[c])
+#else
+#define has_resources(t, c)   (1)
+#endif
+/*
+ * Requeue onto domain's release or ready queue based on task state.
+ */
+static void requeue(rt_domain_t *dom, struct task_struct* t)
+{
+        if (is_server(t) && !tsk_rt(t)->present)
+                /* Remove stopped server from the system */
+                return;
+        TRACE_TASK(t, "Requeueing\n");
+        if (is_queued(t)) {
+                TRACE_TASK(t, "Already queued!\n");
+                return;
+        }
+        set_rt_flags(t, RT_F_RUNNING);
+        if (is_released(t, litmus_clock()))
+                __add_ready(dom, t);
+        else
+                add_release(dom, t);
+}
+enum hrtimer_restart chunk_fire(struct hrtimer *timer)
+{
+        unsigned long flags;
+        local_irq_save(flags);
+        TRACE("Chunk timer fired.\n");
+        litmus_reschedule_local();
+        local_irq_restore(flags);
+        return HRTIMER_NORESTART;
+}
+void chunk_arm(struct cpu_entry *entry)
+{
+        unsigned long fire;
+        if (color_chunk) {
+                fire = litmus_clock() + color_chunk;
+                TRACE("Arming chunk timer for %llu\n", fire);
+                __hrtimer_start_range_ns(&entry->chunk_timer,
+                                         ns_to_ktime(fire), 0,
+                                         HRTIMER_MODE_ABS_PINNED, 0);
+        }
+}
+void chunk_cancel(struct cpu_entry *entry)
+{
+        TRACE("Cancelling chunk timer\n");
+        hrtimer_try_to_cancel(&entry->chunk_timer);
+}
+/*
+ * Relinquish resources held by @t (or its children).
+ */
+static void release_resources(struct task_struct *t)
+{
+        struct task_struct *sched;
+#ifdef CONFIG_NP_SECTION
+        TRACE_TASK(t, "Releasing resources\n");
+        if (is_server(t)) {
+                sched = task_fserver(t)->linked;
+                if (sched)
+                        release_resources(sched);
+        } else if (is_kernel_np(t))
+                remove_group_req(&group_lock, tsk_rt(t)->req);
+        take_np(t);
+#endif
+}
+/*
+ * Put in requests for resources needed by @t. If @t is a server, this will
+ * set @t's np flag to reflect resources held by @t's children.
+ */
+static void acquire_resources(struct task_struct *t)
+{
+        int cpu;
+        struct rt_server *server;
+        struct task_struct *sched;
+#ifdef CONFIG_NP_SECTION
+        /* Can't acquire resources if t is not running */
+        BUG_ON(!get_task_server(t));
+        if (is_kernel_np(t)) {
+                TRACE_TASK(t, "Already contending for resources\n");
+                return;
+        }
+        cpu = get_task_server(t)->cpu;
+        if (is_server(t)) {
+                server = task_fserver(t);
+                sched  = server->linked;
+                /* Happens when server is booted off on completion or
+                 * has just completed executing a task.
+                 */
+                if (sched && !is_kernel_np(sched))
+                        acquire_resources(sched);
+                /* Become np if there is a running task */
+                if (sched && has_resources(sched, cpu)) {
+                        TRACE_TASK(t, "Running task with resource\n");
+                        make_np(t);
+                } else {
+                        TRACE_TASK(t, "Running no resources\n");
+                        take_np(t);
+                }
+        } else {
+                TRACE_TASK(t, "Acquiring resources\n");
+                if (!has_resources(t, cpu))
+                        add_group_req(&group_lock, tsk_rt(t)->req, cpu);
+                make_np(t);
+        }
+#endif
+}
+/*
+ * Stop logically running the currently linked task.
+ */
+static void unlink(struct rt_server *server)
+{
+        BUG_ON(!server->linked);
+        if (is_server(server->linked))
+                task_fserver(server->linked)->running = 0;
+        sched_trace_server_switch_away(server->sid, 0,
+                                       server->linked->pid,
+                                       get_rt_job(server->linked));
+        TRACE_TASK(server->linked, "No longer run by server %d\n", server->sid);
+        raw_spin_lock(&dgl_lock);
+        release_resources(server->linked);
+        raw_spin_unlock(&dgl_lock);
+        get_task_server(server->linked) = NULL;
+        server->linked = NULL;
+}
+static struct task_struct* schedule_server(struct rt_server *server);
+/*
+ * Logically run @task.
+ */
+static void link(struct rt_server *server, struct task_struct *task)
+{
+        struct rt_server *tserv;
+        BUG_ON(server->linked);
+        BUG_ON(!server->running);
+        BUG_ON(is_kernel_np(task));
+        TRACE_TASK(task, "Run by server %d\n", server->sid);
+        if (is_server(task)) {
+                tserv = task_fserver(task);
+                tserv->running = 1;
+                schedule_server(tserv);
+        }
+        server->linked = task;
+        get_task_server(task) = server;
+        sched_trace_server_switch_to(server->sid, 0,
+                                     task->pid, get_rt_job(task));
+}
+/*
+ * Triggers preemption on first FIFO server which is running NULL.
+ */
+static void check_for_fifo_preempt(void)
+{
+        int ret = 0, cpu;
+        struct cpu_entry *entry;
+        struct rt_server *cpu_server, *fifo_server;
+        TRACE("Checking for FIFO preempt\n");
+        for_each_online_cpu(cpu) {
+                entry = remote_entry(cpu);
+                cpu_server = &entry->server;
+                fifo_server = &entry->fifo_server.server;
+                raw_spin_lock(entry_lock(entry));
+                raw_spin_lock(&fifo_lock);
+                if (cpu_server->linked && is_server(cpu_server->linked) &&
+                    !fifo_server->linked) {
+                        litmus_reschedule(cpu);
+                        ret = 1;
+                }
+                raw_spin_unlock(&fifo_lock);
+                raw_spin_unlock(entry_lock(entry));
+                if (ret)
+                        break;
+        }
+}
+/*
+ * Rejoin a task into the system.
+ */
+static void job_arrival(struct task_struct *t)
+{
+        rt_domain_t *dom = task_dom(task_entry(t), t);
+        lock_if(&fifo_lock, is_be(t));
+        requeue(dom, t);
+        unlock_if(&fifo_lock, is_be(t));
+}
+/*
+ * Complete job for task linked to @server.
+ */
+static void job_completion(struct rt_server *server)
+{
+        struct task_struct *t = server->linked;
+        lt_t et, now = litmus_clock();
+        TRACE_TASK(t, "Job completed\n");
+        if (is_server(t))
+                sched_trace_server_completion(t->pid, get_rt_job(t));
+        else
+                sched_trace_task_completion(t, 0);
+        if (1 < get_rt_job(t)) {
+                /* our releases happen at the second job */
+                et = get_exec_time(t);
+                if (et > tsk_rt(t)->max_exec_time)
+                        tsk_rt(t)->max_exec_time = et;
+        }
+        if (is_tardy(t, now)) {
+                lt_t miss = now - get_deadline(t);
+                ++tsk_rt(t)->missed;
+                tsk_rt(t)->total_tardy += miss;
+                if (lt_before(tsk_rt(t)->max_tardy, miss)) {
+                        tsk_rt(t)->max_tardy = miss;
+                }
+        }
+        unlink(server);
+        set_rt_flags(t, RT_F_SLEEP);
+        prepare_for_next_period(t);
+        if (is_server(t))
+                sched_trace_server_release(t->pid, get_rt_job(t),
+                                           get_release(t), get_deadline(t));
+        else
+                sched_trace_task_release(t);
+        if (is_running(t))
+                job_arrival(t);
+}
+/*
+ * Update @server state to reflect task's state.
+ */
+static void update_task(struct rt_server *server)
+{
+        int oot, sleep, block, np, chunked;
+        struct task_struct *t = server->linked;
+        lt_t last = tsk_rt(t)->last_exec_time;
+        block = !is_running(t);
+        oot   = budget_enforced(t) && budget_exhausted(t);
+        np    = is_kernel_np(t);
+        sleep = get_rt_flags(t) == RT_F_SLEEP;
+        chunked = color_chunk && last && (lt_after(litmus_clock() - last, color_chunk));
+        TRACE_TASK(t, "Updating task, block: %d, oot: %d, np: %d, sleep: %d, chunk: %d\n",
+                   block, oot, np, sleep, chunked);
+        if (block)
+                unlink(server);
+        else if (oot || sleep)
+                job_completion(server);
+        else if (chunked) {
+                unlink(server);
+                job_arrival(t);
+        }
+}
+/*
+ * Link next task for @server.
+ */
+static struct task_struct* schedule_server(struct rt_server *server)
+{
+        struct task_struct *next;
+        struct rt_server *lserver;
+        TRACE("Scheduling server %d\n", server->sid);
+        if (server->linked) {
+                if (is_server(server->linked)) {
+                        lserver = task_fserver(server->linked);
+                        lserver->update(lserver);
+                }
+                update_task(server);
+        }
+        next = server->linked;
+        lock_if(&fifo_lock, is_fifo_server(server));
+        if ((!next || !is_np(next)) &&
+             server->need_preempt(server->domain, next)) {
+                if (next) {
+                        TRACE_TASK(next, "Preempted\n");
+                        unlink(server);
+                        requeue(server->domain, next);
+                }
+                next = __take_ready(server->domain);
+                link(server, next);
+        }
+        unlock_if(&fifo_lock, is_fifo_server(server));
+        return next;
+}
+/*
+ * Update server state, including picking next running task and incrementing
+ * server execution time.
+ */
+static void fifo_update(struct rt_server *server)
+{
+        lt_t delta;
+        struct fifo_server *fserver;
+        fserver = container_of(server, struct fifo_server, server);
+        TRACE_TASK(fserver->task, "Updating FIFO server\n");
+        if (!server->linked || has_resources(server->linked, server->cpu)) {
+                /* Running here means linked to a parent server */
+                /* BUG_ON(!server->running); */
+                /* Stop executing */
+                if (fserver->start_time) {
+                        delta = litmus_clock() - fserver->start_time;
+                        tsk_rt(fserver->task)->job_params.exec_time += delta;
+                        fserver->start_time = 0;
+                        cancel_enforcement_timer(&fserver->timer);
+                } else {
+                        /* Server is linked, but not executing */
+                        /* BUG_ON(fserver->timer.armed); */
+                }
+                /* Calculate next task */
+                schedule_server(&fserver->server);
+                /* Reserve needed resources */
+                raw_spin_lock(&dgl_lock);
+                acquire_resources(fserver->task);
+                raw_spin_unlock(&dgl_lock);
+        }
+}
+/*
+ * Triggers preemption on rm-scheduled "linked" field only.
+ */
+static void color_rm_release(rt_domain_t *rm, struct bheap *tasks)
+{
+        unsigned long flags;
+        struct cpu_entry *entry;
+        TRACE_TASK(bheap2task(bheap_peek(rm->order, tasks)),
+                   "Released set of RM tasks\n");
+        entry = container_of(rm, struct cpu_entry, rm_domain);
+        raw_spin_lock_irqsave(entry_lock(entry), flags);
+        __merge_ready(rm, tasks);
+        if (rm_preemption_needed(rm, entry->server.linked) &&
+            (!entry->server.linked || !is_kernel_np(entry->server.linked))) {
+                litmus_reschedule(entry->server.cpu);
+        }
+        raw_spin_unlock_irqrestore(entry_lock(entry), flags);
+}
+static void color_fifo_release(rt_domain_t *dom, struct bheap *tasks)
+{
+        unsigned long flags;
+        TRACE_TASK(bheap2task(bheap_peek(dom->order, tasks)),
+                   "Released set of FIFO tasks\n");
+        local_irq_save(flags);
+        raw_spin_lock(&fifo_lock);
+        __merge_ready(dom, tasks);
+        raw_spin_unlock(&fifo_lock);
+        check_for_fifo_preempt();
+        local_irq_restore(flags);
+}
+#define cpu_empty(entry, run) \
+        (!(run) || (is_server(run) && !(entry)->fifo_server.server.linked))
+static struct task_struct* color_schedule(struct task_struct *prev)
+{
+        unsigned long flags;
+        int server_running;
+        struct cpu_entry *entry = local_entry;
+        struct task_struct *next, *plink = entry->server.linked;
+        TRACE("Reschedule on %d at %llu\n", entry->server.cpu, litmus_clock());
+        BUG_ON(entry->scheduled && entry->scheduled != prev);
+        BUG_ON(entry->scheduled && !is_realtime(prev));
+        raw_spin_lock_irqsave(entry_lock(entry), flags);
+        if (entry->scheduled && cpu_empty(entry, plink) && is_running(prev)) {
+                TRACE_TASK(prev, "Snuck in on new!\n");
+                job_arrival(entry->scheduled);
+        }
+        /* Pick next top-level task */
+        next = schedule_server(&entry->server);
+        /* Schedule hierarchically */
+        server_running = next && is_server(next);
+        if (server_running)
+                next = task_fserver(next)->linked;
+        /* Selected tasks must contend for group lock */
+        if (next) {
+                raw_spin_lock(&dgl_lock);
+                acquire_resources(next);
+                if (has_resources(next, entry->server.cpu)) {
+                        TRACE_TASK(next, "Has group lock\n");
+                        sched_trace_task_resume_on(next, 1);
+                } else {
+                        TRACE_TASK(next, "Does not have lock, 0x%p does\n",
+                                   group_lock.acquired[entry->server.cpu]);
+                        if (next != prev)
+                                sched_trace_task_block_on(next, 1);
+                        next = NULL;
+                        server_running = 0;
+                }
+                raw_spin_unlock(&dgl_lock);
+        }
+        /* Server is blocked if its running task is blocked. Note that if the
+         * server has no running task, the server will now execute NULL.
+         */
+        if (server_running) {
+                TRACE_TASK(entry->server.linked, "Server running\n");
+                arm_enforcement_timer(&entry->fifo_server.timer,
+                                      entry->fifo_server.task);
+                entry->fifo_server.start_time = litmus_clock();
+        }
+        if (prev) {
+                tsk_rt(prev)->scheduled_on = NO_CPU;
+                tsk_rt(prev)->last_exec_time = 0;
+                chunk_cancel(entry);
+        }
+        if (next) {
+                tsk_rt(next)->scheduled_on = entry->server.cpu;
+                tsk_rt(next)->last_exec_time = litmus_clock();
+                chunk_arm(entry);
+        }
+        entry->scheduled = next;
+        sched_state_task_picked();
+        raw_spin_unlock_irqrestore(entry_lock(entry), flags);
+        return entry->scheduled;
+}
+static void color_task_new(struct task_struct *t, int on_rq, int running)
+{
+        int i;
+        unsigned long flags;
+        struct cpu_entry *entry;
+        struct dgl_group_req *req;
+        struct control_page  *cp = tsk_rt(t)->ctrl_page;
+        struct color_ctrl_page *ccp = tsk_rt(t)->color_ctrl_page;
+        TRACE_TASK(t, "New colored task\n");
+        entry = (is_be(t)) ? local_entry : task_entry(t);
+        raw_spin_lock_irqsave(entry_lock(entry), flags);
+        req = kmalloc(sizeof(*req), GFP_ATOMIC);
+        tsk_rt(t)->req = req;
+        tsk_rt(t)->tot_exec_time = 0;
+        tsk_rt(t)->max_exec_time = 0;
+        tsk_rt(t)->max_tardy = 0;
+        tsk_rt(t)->missed = 0;
+        tsk_rt(t)->total_tardy = 0;
+        tsk_rt(t)->ctrl_page->colors_updated = 1;
+        tsk_rt(t)->last_exec_time = 0;
+        release_at(t, litmus_clock());
+        /* Fill request */
+        if (cp && ccp && cp->colors_updated) {
+                TRACE_TASK(t, "Initializing group request\n");
+                cp->colors_updated = 0;
+                dgl_group_req_init(&group_lock, req);
+                for (i = 0; ccp->pages[i]; ++i)
+                        set_req(&group_lock, req, ccp->colors[i], ccp->pages[i]);
+        } else {
+                TRACE("Oh noz: %p %p %d\n", cp, ccp, ((cp) ? cp->colors_updated : -1));
+        }
+        if (running) {
+                /* No need to lock with irqs disabled */
+                TRACE_TASK(t, "Already scheduled on %d\n", entry->server.cpu);
+                BUG_ON(entry->scheduled);
+                entry->scheduled = t;
+                tsk_rt(t)->scheduled_on = entry->server.cpu;
+        } else {
+                job_arrival(t);
+        }
+        raw_spin_unlock(entry_lock(entry));
+        if (is_be(t))
+                check_for_fifo_preempt();
+        else
+                litmus_reschedule_local();
+        local_irq_restore(flags);
+}
+static void color_task_wake_up(struct task_struct *task)
+{
+        unsigned long flags;
+        struct cpu_entry* entry = local_entry;
+        int sched;
+        lt_t now = litmus_clock();
+        TRACE_TASK(task, "Wake up at %llu\n", now);
+        raw_spin_lock_irqsave(entry_lock(entry), flags);
+        /* Abuse sporadic model */
+        if (is_tardy(task, now)) {
+                release_at(task, now);
+                sched_trace_task_release(task);
+        }
+        sched = (entry->scheduled == task);
+        if (!sched)
+                job_arrival(task);
+        else
+                TRACE_TASK(task, "Is already scheduled on %d!\n",
+                           entry->scheduled);
+        raw_spin_unlock(entry_lock(entry));
+        if (is_be(task))
+                check_for_fifo_preempt();
+        else
+                litmus_reschedule_local();
+        local_irq_restore(flags);
+}
+static void color_task_block(struct task_struct *t)
+{
+        TRACE_TASK(t, "Block at %llu, state=%d\n", litmus_clock(), t->state);
+        BUG_ON(!is_realtime(t));
+        BUG_ON(is_queued(t));
+}
+static void color_task_exit(struct task_struct *t)
+{
+        unsigned long flags;
+        struct cpu_entry *entry = task_entry(t);
+        raw_spinlock_t *lock = task_lock(entry, t);
+        TRACE_TASK(t, "RIP, now reschedule\n");
+        local_irq_save(flags);
+        sched_trace_task_exit(t);
+        sched_trace_task_tardy(t);
+        /* Remove from scheduler consideration */
+        if (is_queued(t)) {
+                raw_spin_lock(lock);
+                remove(task_dom(entry, t), t);
+                raw_spin_unlock(lock);
+        }
+        /* Stop parent server */
+        if (get_task_server(t))
+                unlink(get_task_server(t));
+        /* Unschedule running task */
+        if (tsk_rt(t)->scheduled_on != NO_CPU) {
+                entry = remote_entry(tsk_rt(t)->scheduled_on);
+                raw_spin_lock(entry_lock(entry));
+                tsk_rt(t)->scheduled_on = NO_CPU;
+                entry->scheduled = NULL;
+                litmus_reschedule(entry->server.cpu);
+                raw_spin_unlock(entry_lock(entry));
+        }
+        /* Remove dgl request from system */
+        raw_spin_lock(&dgl_lock);
+        release_resources(t);
+        raw_spin_unlock(&dgl_lock);
+        dgl_group_req_free(tsk_rt(t)->req);
+        kfree(tsk_rt(t)->req);
+        local_irq_restore(flags);
+}
+/*
+ * Non-be tasks must have migrated to the right CPU.
+ */
+static long color_admit_task(struct task_struct* t)
+{
+        int ret = is_be(t) || task_cpu(t) == get_partition(t) ? 0 : -EINVAL;
+        if (!ret) {
+                printk(KERN_WARNING "Task failed to migrate to CPU %d\n",
+                       get_partition(t));
+        }
+        return ret;
+}
+/*
+ * Load server parameters.
+ */
+static long color_activate_plugin(void)
+{
+        int cpu, ret = 0;
+        struct rt_task tp;
+        struct task_struct *server_task;
+        struct cpu_entry *entry;
+        color_chunk = 0;
+        for_each_online_cpu(cpu) {
+                entry = remote_entry(cpu);
+                server_task = entry->fifo_server.task;
+                raw_spin_lock(entry_lock(entry));
+                ret = color_server_params(cpu, ((unsigned long*)&tp.exec_cost),
+                                               ((unsigned long*)&tp.period));
+                if (ret) {
+                        printk(KERN_WARNING "Uninitialized server for CPU %d\n",
+                               entry->server.cpu);
+                        goto loop_end;
+                }
+                /* Fill rt parameters */
+                tp.phase = 0;
+                tp.cpu = cpu;
+                tp.cls = RT_CLASS_SOFT;
+                tp.budget_policy = PRECISE_ENFORCEMENT;
+                tsk_rt(server_task)->task_params = tp;
+                tsk_rt(server_task)->present = 1;
+                entry->scheduled = NULL;
+                TRACE_TASK(server_task, "Created server with wcet: %llu, "
+                           "period: %llu\n", tp.exec_cost, tp.period);
+        loop_end:
+                raw_spin_unlock(entry_lock(entry));
+        }
+        return ret;
+}
+/*
+ * Mark servers as unused, making future calls to requeue fail.
+ */
+static long color_deactivate_plugin(void)
+{
+        int cpu;
+        struct cpu_entry *entry;
+        for_each_online_cpu(cpu) {
+                entry = remote_entry(cpu);
+                if (entry->fifo_server.task) {
+                        tsk_rt(entry->fifo_server.task)->present = 0;
+                }
+        }
+        return 0;
+}
+/*
+ * Dump container and server parameters for tracing.
+ */
+static void color_release_ts(lt_t time)
+{
+        int cpu, fifo_cid;
+        char fifo_name[TASK_COMM_LEN], cpu_name[TASK_COMM_LEN];
+        struct cpu_entry *entry;
+        struct task_struct *stask;
+        strcpy(cpu_name, "CPU");
+        strcpy(fifo_name, "BE");
+        fifo_cid = num_online_cpus();
+        trace_litmus_container_param(fifo_cid, fifo_name);
+        for_each_online_cpu(cpu) {
+                entry = remote_entry(cpu);
+                trace_litmus_container_param(cpu, cpu_name);
+                trace_litmus_server_param(entry->server.sid, cpu, 0, 0);
+                stask = entry->fifo_server.task;
+                trace_litmus_server_param(stask->pid, fifo_cid,
+                                          get_exec_cost(stask),
+                                          get_rt_period(stask));
+                /* Make runnable */
+                release_at(stask, time);
+                entry->fifo_server.start_time = 0;
+                cancel_enforcement_timer(&entry->fifo_server.timer);
+                if (!is_queued(stask))
+                        requeue(&entry->rm_domain, stask);
+        }
+}
+static struct sched_plugin color_plugin __cacheline_aligned_in_smp = {
+        .plugin_name            = "COLOR",
+        .task_new               = color_task_new,
+        .complete_job           = complete_job,
+        .task_exit              = color_task_exit,
+        .schedule               = color_schedule,
+        .task_wake_up           = color_task_wake_up,
+        .task_block             = color_task_block,
+        .admit_task             = color_admit_task,
+        .release_ts             = color_release_ts,
+        .activate_plugin        = color_activate_plugin,
+        .deactivate_plugin      = color_deactivate_plugin,
+};
+static int __init init_color(void)
+{
+        int cpu;
+        struct cpu_entry *entry;
+        struct task_struct *server_task;
+        struct fifo_server *fifo_server;
+        struct rt_server *cpu_server;
+        for_each_online_cpu(cpu) {
+                entry = remote_entry(cpu);
+                rm_domain_init(&entry->rm_domain, NULL, color_rm_release);
+                entry->scheduled = NULL;
+                /* Create FIFO server */
+                fifo_server = &entry->fifo_server;
+                init_rt_server(&fifo_server->server,
+                               cpu + num_online_cpus() + 1,
+                               cpu,
+                               &fifo_domain,
+                               fifo_preemption_needed, fifo_update);
+                /* Create task struct for FIFO server */
+                server_task = kmalloc(sizeof(struct task_struct), GFP_ATOMIC);
+                memset(server_task, 0, sizeof(*server_task));
+                server_task->policy = SCHED_LITMUS;
+                strcpy(server_task->comm, "server");
+                server_task->pid = fifo_server->server.sid;
+                fifo_server->task = server_task;
+                /* Create rt_params for FIFO server */
+                tsk_rt(server_task)->heap_node = bheap_node_alloc(GFP_ATOMIC);
+                tsk_rt(server_task)->rel_heap = release_heap_alloc(GFP_ATOMIC);
+                bheap_node_init(&tsk_rt(server_task)->heap_node, server_task);
+                tsk_rt(server_task)->is_server = 1;
+                /* Create CPU server */
+                cpu_server = &entry->server;
+                init_rt_server(cpu_server, cpu + 1, cpu,
+                               &entry->rm_domain, rm_preemption_needed, NULL);
+                cpu_server->running = 1;
+                init_enforcement_timer(&fifo_server->timer);
+                hrtimer_init(&entry->chunk_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+                entry->chunk_timer.function = chunk_fire;
+        }
+        fifo_domain_init(&fifo_domain, NULL, color_fifo_release);
+        raw_spin_lock_init(&fifo_lock);
+        dgl_init(&group_lock, color_cache_info.nr_colors,
+                 color_cache_info.ways);
+        raw_spin_lock_init(&dgl_lock);
+        return register_sched_plugin(&color_plugin);
+}
+static void exit_color(void)
+{
+        dgl_free(&group_lock);
+}
+module_init(init_color);
+module_exit(exit_color);
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index c3344b9d288f..4f93d16b4d52 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -796,8 +796,10 @@ int gsnedf_fmlp_lock(struct litmus_lock* l)
                /* check if we need to activate priority inheritance */
                if (edf_higher_prio(t, sem->hp_waiter)) {
                        sem->hp_waiter = t;
-                        if (edf_higher_prio(t, sem->owner))
+                        if (edf_higher_prio(t, sem->owner)) {
                                set_priority_inheritance(sem->owner, sem->hp_waiter);
+                        }
                }
                TS_LOCK_SUSPEND;
@@ -805,6 +807,8 @@ int gsnedf_fmlp_lock(struct litmus_lock* l)
                /* release lock before sleeping */
                spin_unlock_irqrestore(&sem->wait.lock, flags);
+                sched_trace_task_block(t);
                /* We depend on the FIFO order.  Thus, we don't need to recheck
                 * when we wake up; we are guaranteed to have the lock since
                 * there is only one wake up per release.
diff --git a/litmus/sched_mc.c b/litmus/sched_mc.c
new file mode 100644
index 000000000000..2f4eb82b2220
--- /dev/null
+++ b/litmus/sched_mc.c
@@ -0,0 +1,1373 @@
+/**
+ * litmus/sched_mc.c
+ *
+ * Implementation of the Mixed Criticality scheduling algorithm.
+ *
+ * (Per Mollison, Erickson, Anderson, Baruah, Scoredos 2010)
+ *
+ * Absolute first: relative time spent doing different parts of release
+ * and scheduling overhead needs to be measured and graphed.
+ *
+ * Domain locks should be more fine-grained. There is no reason to hold the
+ * ready-queue lock when adding a task to the release-queue.
+ *
+ * The levels should be converted to linked-lists so that they are more
+ * adaptable and need not be identical on all processors.
+ *
+ * The interaction between remove_from_all and other concurrent operations
+ * should be re-examined. If a job_completion and a preemption happen
+ * simultaneously, a task could be requeued, removed, then requeued again.
+ *
+ * Level-C tasks should be able to swap CPUs a-la GSN-EDF. They should also
+ * try and swap with the last CPU they were on. This could be complicated for
+ * ghost tasks.
+ *
+ * Locking for timer-merging could be infinitely more fine-grained. A second
+ * hash could select a lock to use based on queue slot. This approach might
+ * also help with add_release in rt_domains.
+ *
+ * It should be possible to reserve a CPU for ftdumping.
+ *
+ * The real_deadline business seems sloppy.
+ *
+ * The amount of data in the header file should be cut down. The use of the
+ * header file in general needs to be re-examined.
+ *
+ * The plugin needs to be modified so that it doesn't freeze when it is
+ * deactivated in a VM.
+ *
+ * The locking in check_for_preempt is not fine-grained enough.
+ *
+ * The size of the structures could be smaller. Debugging info might be
+ * excessive as things currently stand.
+ *
+ * The macro can_requeue has been expanded too much. Anything beyond
+ * scheduled_on is a hack!
+ *
+ * Domain names (rt_domain) are still clumsy.
+ *
+ * Should BE be moved into the kernel? This will require benchmarking.
+ */
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/hrtimer.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/poison.h>
+#include <linux/pid.h>
+#include <litmus/litmus.h>
+#include <litmus/trace.h>
+#include <litmus/jobs.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/edf_common.h>
+#include <litmus/sched_trace.h>
+#include <litmus/domain.h>
+#include <litmus/bheap.h>
+#include <litmus/event_group.h>
+#include <litmus/budget.h>
+#include <litmus/sched_mc.h>
+#include <litmus/ce_domain.h>
+/**
+ * struct cpu_entry - State of a CPU for the entire MC system
+ * @cpu           CPU id
+ * @scheduled     Task that is physically running
+ * @linked        Task that should be running / is logically running
+ * @lock          For serialization
+ * @crit_entries  Array of CPU state per criticality level
+ * @redir         List of redirected work for this CPU.
+ * @redir_lock    Lock for @redir.
+ * @event_group   Event group for timer merging.
+ */
+struct cpu_entry {
+        int                     cpu;
+        struct task_struct*     scheduled;
+        struct task_struct*     will_schedule;
+        struct task_struct*     linked;
+        raw_spinlock_t          lock;
+        struct crit_entry       crit_entries[NUM_CRIT_LEVELS];
+#ifdef CONFIG_PLUGIN_MC_REDIRECT
+        struct list_head        redir;
+        raw_spinlock_t          redir_lock;
+#endif
+#ifdef CONFIG_MERGE_TIMERS
+        struct event_group *event_group;
+#endif
+};
+DEFINE_PER_CPU(struct cpu_entry, cpus);
+#ifdef CONFIG_RELEASE_MASTER
+static int interrupt_cpu;
+#endif
+#define domain_data(dom)  (container_of(dom, struct domain_data, domain))
+#define is_global(dom)    (domain_data(dom)->heap)
+#define is_global_task(t) (is_global(get_task_domain(t)))
+#define can_use(ce) \
+        ((ce)->state == CS_ACTIVE || (ce->state == CS_ACTIVATE))
+#define can_requeue(t)                                                  \
+        ((t)->rt_param.linked_on == NO_CPU && /* Not linked anywhere */ \
+         !is_queued(t) &&                     /* Not gonna be linked */ \
+         (!is_global_task(t) || (t)->rt_param.scheduled_on == NO_CPU))
+#define entry_level(e) \
+        (((e)->linked) ? tsk_mc_crit((e)->linked) : NUM_CRIT_LEVELS - 1)
+#define crit_cpu(ce) \
+        (container_of((void*)((ce) - (ce)->level), struct cpu_entry, crit_entries))
+#define get_crit_entry_for(cpu, level) (&per_cpu(cpus, cpu).crit_entries[level])
+#define TRACE_ENTRY(e, fmt, args...)                            \
+        STRACE("P%d, linked=" TS " " fmt, e->cpu, TA(e->linked), ##args)
+#define TRACE_CRIT_ENTRY(ce, fmt, args...)                      \
+        STRACE("%s P%d, linked=" TS " " fmt,                    \
+              (ce)->domain->name, crit_cpu(ce)->cpu, TA((ce)->linked), ##args)
+static int sid(struct crit_entry *ce)
+{
+        int level = ce->level * num_online_cpus() + crit_cpu(ce)->cpu + 1;
+        BUG_ON(level >= 0);
+        return -level;
+}
+/*
+ * Sort CPUs within a global domain's heap.
+ */
+static int cpu_lower_prio(struct bheap_node *a, struct bheap_node *b)
+{
+        struct domain *domain;
+        struct crit_entry *first, *second;
+        struct task_struct *first_link, *second_link;
+        first  = a->value;
+        second = b->value;
+        first_link  = first->linked;
+        second_link = second->linked;
+        if (first->state == CS_REMOVED || second->state == CS_REMOVED) {
+                /* Removed entries go at the back of the heap */
+                return first->state  != CS_REMOVED &&
+                       second->state != CS_REMOVED;
+        } else if (!first_link || !second_link) {
+                /* Entry with nothing scheduled is lowest priority */
+                return second_link && !first_link;
+        } else {
+                /* Sort by deadlines of tasks */
+                domain = get_task_domain(first_link);
+                return domain->higher_prio(second_link, first_link);
+        }
+}
+/*
+ * Return true if the domain has a higher priority ready task. The @curr
+ * task must belong to the domain.
+ */
+static int mc_preempt_needed(struct domain *dom, struct task_struct* curr)
+{
+        struct task_struct *next = dom->peek_ready(dom);
+        if (!next || !curr) {
+                return next && !curr;
+        } else {
+                BUG_ON(tsk_mc_crit(next) != tsk_mc_crit(curr));
+                return get_task_domain(next)->higher_prio(next, curr);
+        }
+}
+/*
+ * Update crit entry position in a global heap. Caller must hold
+ * @ce's domain lock.
+ */
+static inline void update_crit_position(struct crit_entry *ce)
+{
+        struct bheap *heap;
+        if (is_global(ce->domain)) {
+                heap = domain_data(ce->domain)->heap;
+                BUG_ON(!heap);
+                BUG_ON(!bheap_node_in_heap(ce->node));
+                bheap_delete(cpu_lower_prio, heap, ce->node);
+                bheap_insert(cpu_lower_prio, heap, ce->node);
+        }
+}
+/*
+ * Update crit entry position in a global heap if it has been marked
+ * for update. Caller must hold @ce's domain lock.
+ */
+static void fix_crit_position(struct crit_entry *ce)
+{
+        if (is_global(ce->domain)) {
+                if (CS_ACTIVATE == ce->state) {
+                        ce->state = CS_ACTIVE;
+                        update_crit_position(ce);
+                } else if (CS_REMOVE == ce->state) {
+                        ce->state = CS_REMOVED;
+                        update_crit_position(ce);
+                }
+        }
+}
+/*
+ * Return next CPU which should preempted or NULL if the domain has no
+ * preemptable CPUs. Caller must hold the @dom lock.
+ */
+static inline struct crit_entry* lowest_prio_cpu(struct domain *dom)
+{
+        struct bheap *heap = domain_data(dom)->heap;
+        struct bheap_node* hn;
+        struct crit_entry *ce, *res = NULL;
+        do {
+                hn = bheap_peek(cpu_lower_prio, heap);
+                ce = (hn) ? hn->value : NULL;
+                if (ce) {
+                        if (ce->state == CS_ACTIVE)
+                                res = ce;
+                        else if (ce->state == CS_REMOVED)
+                                ce = NULL;
+                        else
+                                fix_crit_position(ce);
+                }
+        } while (ce && !res);
+        return res;
+}
+/*
+ * Cancel ghost timer.
+ */
+static inline void cancel_ghost(struct crit_entry *ce)
+{
+#ifdef CONFIG_MERGE_TIMERS
+        cancel_event(&ce->event);
+#else
+        hrtimer_try_to_cancel(&ce->timer);
+#endif
+}
+/*
+ * Arm ghost timer. Will merge timers if the option is specified.
+ */
+static inline void arm_ghost(struct crit_entry *ce, lt_t fire)
+{
+#ifdef CONFIG_MERGE_TIMERS
+        add_event(crit_cpu(ce)->event_group, &ce->event, fire);
+#else
+        __hrtimer_start_range_ns(&ce->timer,
+                                 ns_to_ktime(fire),
+                                 0 /* delta */,
+                                 HRTIMER_MODE_ABS_PINNED,
+                                 0 /* no wakeup */);
+#endif
+}
+/*
+ * Time accounting for ghost tasks.
+ * Must be called before a decision is made involving the task's budget.
+ */
+static void update_ghost_time(struct task_struct *p)
+{
+        u64 clock = litmus_clock();
+        u64 delta = clock - p->se.exec_start;
+        BUG_ON(!is_ghost(p));
+        if (unlikely ((s64)delta < 0)) {
+                delta = 0;
+                TRACE_MC_TASK(p, "WARNING: negative time delta\n");
+        }
+        if (tsk_mc_data(p)->mc_job.ghost_budget <= delta) {
+                TRACE_MC_TASK(p, "Ghost job could have ended\n");
+                tsk_mc_data(p)->mc_job.ghost_budget = 0;
+                p->se.exec_start = clock;
+        } else {
+                TRACE_MC_TASK(p, "Ghost job updated, but didn't finish\n");
+                tsk_mc_data(p)->mc_job.ghost_budget -= delta;
+                p->se.exec_start = clock;
+        }
+}
+/**
+ * link_task_to_crit() - Logically run a task at a criticality level.
+ * Caller must hold @ce's CPU lock.
+ */
+static void link_task_to_crit(struct crit_entry *ce,
+                              struct task_struct *task)
+{
+        lt_t when_to_fire;
+        TRACE_CRIT_ENTRY(ce, "Linking " TS "\n", TA(task));
+        BUG_ON(!can_use(ce) && task);
+        BUG_ON(task && tsk_rt(task)->linked_on != NO_CPU);
+        BUG_ON(task && is_global(ce->domain) &&
+               !bheap_node_in_heap(ce->node));
+        /* Unlink last task */
+        if (ce->linked) {
+                TRACE_MC_TASK(ce->linked, "Unlinking\n");
+                ce->linked->rt_param.linked_on = NO_CPU;
+                if (is_ghost(ce->linked)) {
+                        cancel_ghost(ce);
+                        if (tsk_mc_data(ce->linked)->mc_job.ghost_budget > 0) {
+                                /* Job isn't finished, so do accounting */
+                                update_ghost_time(ce->linked);
+                        }
+                }
+                sched_trace_server_switch_away(sid(ce), 0, ce->linked->pid,
+                                               tsk_rt(ce->linked)->job_params.job_no);
+        }
+        /* Actually link task */
+        ce->linked = task;
+        if (task) {
+                task->rt_param.linked_on = crit_cpu(ce)->cpu;
+                if (is_ghost(task) && CRIT_LEVEL_A != tsk_mc_crit(task)) {
+                        /* There is a level-A timer that will force a
+                         * preemption, so we don't set this for level-A
+                         * tasks. Otherwise reset the budget timer.
+                         */
+                        task->se.exec_start = litmus_clock();
+                        when_to_fire = task->se.exec_start +
+                                tsk_mc_data(task)->mc_job.ghost_budget;
+                        arm_ghost(ce, when_to_fire);
+                        sched_trace_server_switch_to(sid(ce), 0, 0, 0);
+                } else {
+                        sched_trace_server_switch_to(sid(ce), 0, task->pid,
+                                                     get_rt_job(ce->linked));
+                }
+        }
+}
+static void check_for_preempt(struct domain*);
+/**
+ * job_arrival() - Called when a task re-enters the system.
+ * Caller must hold no locks.
+ */
+static void job_arrival(struct task_struct *task)
+{
+        struct domain *dom = get_task_domain(task);
+        TRACE_MC_TASK(task, "Job arriving\n");
+        BUG_ON(!task);
+        raw_spin_lock(dom->lock);
+        if (can_requeue(task)) {
+                BUG_ON(task->rt_param.linked_on != NO_CPU);
+                dom->requeue(dom, task);
+                check_for_preempt(dom);
+        } else {
+                /* If a global task is scheduled on one cpu, it CANNOT
+                 * be requeued into a global domain. Another cpu might
+                 * dequeue the global task before it is descheduled,
+                 * causing the system to crash when the task is scheduled
+                 * in two places simultaneously.
+                 */
+                TRACE_MC_TASK(task, "Delayed arrival of scheduled task\n");
+        }
+        raw_spin_unlock(dom->lock);
+}
+/**
+ * low_prio_arrival() - If CONFIG_PLUGIN_MC_REDIRECT is enabled, will
+ * redirect a lower priority job_arrival work to the interrupt_cpu.
+ */
+static void low_prio_arrival(struct task_struct *task)
+{
+        struct cpu_entry *entry;
+        /* Race conditions! */
+        if (!can_requeue(task)) return;
+#ifdef  CONFIG_PLUGIN_MC_REDIRECT
+        if (!is_global_task(task))
+                goto arrive;
+        if (smp_processor_id() != interrupt_cpu) {
+                entry = &__get_cpu_var(cpus);
+                raw_spin_lock(&entry->redir_lock);
+                TRACE_MC_TASK(task, "Adding to redirect queue\n");
+                list_add(&tsk_rt(task)->list, &entry->redir);
+                raw_spin_unlock(&entry->redir_lock);
+                litmus_reschedule(interrupt_cpu);
+        } else
+#endif
+        {
+arrive:
+                job_arrival(task);
+        }
+}
+#ifdef CONFIG_PLUGIN_MC_REDIRECT
+/**
+ * fix_global_levels() - Execute redirected job arrivals on this cpu.
+ */
+static void fix_global_levels(void)
+{
+        int c;
+        struct cpu_entry *e;
+        struct list_head *pos, *safe;
+        struct task_struct *t;
+        STRACE("Fixing global levels\n");
+        for_each_online_cpu(c) {
+                e = &per_cpu(cpus, c);
+                raw_spin_lock(&e->redir_lock);
+                list_for_each_safe(pos, safe, &e->redir) {
+                        t = list_entry(pos, struct task_struct, rt_param.list);
+                        BUG_ON(!t);
+                        TRACE_MC_TASK(t, "Dequeued redirected job\n");
+                        list_del_init(pos);
+                        job_arrival(t);
+                }
+                raw_spin_unlock(&e->redir_lock);
+        }
+}
+#endif
+/**
+ * link_task_to_cpu() - Logically run a task on a CPU.
+ * The task must first have been linked to one of the CPU's crit_entries.
+ * Caller must hold the entry lock.
+ */
+static void link_task_to_cpu(struct cpu_entry *entry, struct task_struct *task)
+{
+        int i = entry_level(entry);
+        struct crit_entry *ce;
+        TRACE_MC_TASK(task, "Linking to P%d\n", entry->cpu);
+        BUG_ON(task && tsk_rt(task)->linked_on != entry->cpu);
+        BUG_ON(task && is_ghost(task));
+        if (entry->linked) {
+                sched_trace_server_switch_away(-entry->linked->pid,
+                                               get_server_job(entry->linked),
+                                               entry->linked->pid,
+                                               get_rt_job(entry->linked));
+        }
+        if (task){
+                set_rt_flags(task, RT_F_RUNNING);
+                sched_trace_server_switch_to(-task->pid,
+                                             get_server_job(task),
+                                             task->pid,
+                                             get_rt_job(task));
+        }
+        entry->linked = task;
+        /* Higher criticality crit entries are now usable */
+        for (; i < entry_level(entry) + 1; i++) {
+                ce = &entry->crit_entries[i];
+                if (!can_use(ce)) {
+                        ce->state = CS_ACTIVATE;
+                }
+        }
+}
+/**
+ * preempt() - Preempt a logically running task with a higher priority one.
+ * @dom Domain from which to draw higher priority task
+ * @ce  CPU criticality level to preempt
+ *
+ * Caller must hold the lock for @dom and @ce's CPU lock.
+ */
+static void preempt(struct domain *dom, struct crit_entry *ce)
+{
+        struct task_struct *task = dom->take_ready(dom);
+        struct cpu_entry *entry = crit_cpu(ce);
+        struct task_struct *old = ce->linked;
+        BUG_ON(!task);
+        TRACE_CRIT_ENTRY(ce, "Preempted by " TS "\n", TA(task));
+        /* Per-domain preemption */
+        link_task_to_crit(ce, task);
+        if (old && can_requeue(old)) {
+                dom->requeue(dom, old);
+        }
+        update_crit_position(ce);
+        /* Preempt actual execution if this is a running task */
+        if (!is_ghost(task)) {
+                link_task_to_cpu(entry, task);
+                preempt_if_preemptable(entry->scheduled, entry->cpu);
+        } else if (old && old == entry->linked) {
+                /* Preempted a running task with a ghost job. Null needs to be
+                 * running.
+                 */
+                link_task_to_cpu(entry, NULL);
+                preempt_if_preemptable(entry->scheduled, entry->cpu);
+        }
+}
+/**
+ * update_crit_levels() - Update criticality entries for the new cpu state.
+ * This should be called after a new task has been linked to @entry.
+ * The caller must hold the @entry->lock, but this method will release it.
+ */
+static void update_crit_levels(struct cpu_entry *entry)
+{
+        int i, global_preempted;
+        struct crit_entry *ce;
+        struct task_struct *readmit[NUM_CRIT_LEVELS];
+        enum crit_level level = entry_level(entry);
+        /* Remove lower priority tasks from the entry */
+        for (i = level + 1; i < NUM_CRIT_LEVELS; i++) {
+                ce = &entry->crit_entries[i];
+                global_preempted = ce->linked &&
+                        /* This task is running on a cpu */
+                        ce->linked->rt_param.scheduled_on == entry->cpu &&
+                        /* But it was preempted */
+                        ce->linked != entry->linked &&
+                        /* And it is an eligible global task */
+                        !is_ghost(ce->linked) && is_global(ce->domain);
+                /* Do not readmit global tasks which are preempted! These can't
+                 * ever be re-admitted until they are descheduled for reasons
+                 * explained in job_arrival.
+                 */
+                readmit[i] = (!global_preempted) ? ce->linked : NULL;
+                ce->state = CS_REMOVE;
+                if (ce->linked)
+                        link_task_to_crit(ce, NULL);
+        }
+        /* Need to unlock so we can access domains */
+        raw_spin_unlock(&entry->lock);
+        /* Re-admit tasks to the system */
+        for (i = level + 1; i < NUM_CRIT_LEVELS; i++) {
+                ce = &entry->crit_entries[i];
+                if (readmit[i]) {
+                        low_prio_arrival(readmit[i]);
+                }
+        }
+}
+/**
+ * check_for_preempt() - Causes a preemption if higher-priority tasks are ready.
+ * Caller must hold domain lock.
+ * Makes gigantic nasty assumption that there is 1 global criticality level,
+ * and it is the last one in each list, so it doesn't call update_crit..
+ */
+static void check_for_preempt(struct domain *dom)
+{
+        int recheck = 1;
+        struct cpu_entry *entry;
+        struct crit_entry *ce;
+        if (is_global(dom)) {
+                /* Loop until we find a non-preemptable CPU */
+                while ((ce = lowest_prio_cpu(dom)) && recheck) {
+                        entry = crit_cpu(ce);
+                        recheck = 1;
+                        /* Cache next task */
+                        dom->peek_ready(dom);
+                        raw_spin_lock(&entry->lock);
+                        if (!can_use(ce))
+                                /* CPU disabled while locking! */
+                                fix_crit_position(ce);
+                        else if (dom->preempt_needed(dom, ce->linked))
+                                /* Success! Check for more preemptions */
+                                preempt(dom, ce);
+                        else {
+                                /* Failure! */
+                                recheck = 0;
+                                TRACE_CRIT_ENTRY(ce, "Stopped global check\n");
+                        }
+                        raw_spin_unlock(&entry->lock);
+                }
+        } else /* Partitioned */ {
+                ce = domain_data(dom)->crit_entry;
+                entry = crit_cpu(ce);
+                /* Cache next task */
+                dom->peek_ready(dom);
+                raw_spin_lock(&entry->lock);
+                if (can_use(ce) && dom->preempt_needed(dom, ce->linked)) {
+                        preempt(dom, ce);
+                        update_crit_levels(entry);
+                } else {
+                        raw_spin_unlock(&entry->lock);
+                }
+        }
+}
+/**
+ * remove_from_all() - Logically remove a task from all structures.
+ * Caller must hold no locks.
+ */
+static void remove_from_all(struct task_struct* task)
+{
+        int update = 0;
+        struct cpu_entry *entry;
+        struct crit_entry *ce;
+        struct domain *dom = get_task_domain(task);
+        TRACE_MC_TASK(task, "Removing from everything\n");
+        BUG_ON(!task);
+        raw_spin_lock(dom->lock);
+        /* Remove the task from any CPU state */
+        if (task->rt_param.linked_on != NO_CPU) {
+                entry = &per_cpu(cpus, task->rt_param.linked_on);
+                raw_spin_lock(&entry->lock);
+                /* Unlink only if task is still linked post lock */
+                ce = &entry->crit_entries[tsk_mc_crit(task)];
+                if (task->rt_param.linked_on != NO_CPU) {
+                        BUG_ON(ce->linked != task);
+                        link_task_to_crit(ce, NULL);
+                        update_crit_position(ce);
+                        if (!is_ghost(task) && entry->linked == task) {
+                                update = 1;
+                                link_task_to_cpu(entry, NULL);
+                        }
+                } else {
+                        TRACE_MC_TASK(task, "Unlinked before we got lock!\n");
+                }
+                if (update)
+                        update_crit_levels(entry);
+                else
+                        raw_spin_unlock(&entry->lock);
+        } else {
+                TRACE_MC_TASK(task, "Not linked to anything\n");
+        }
+        /* Ensure the task isn't returned by its domain */
+        dom->remove(dom, task);
+        raw_spin_unlock(dom->lock);
+}
+/**
+ * job_completion() - Update task state and re-enter it into the system.
+ * Converts tasks which have completed their execution early into ghost jobs.
+ * Caller must hold no locks.
+ */
+static void job_completion(struct task_struct *task, int forced)
+{
+        int behind;
+        TRACE_MC_TASK(task, "Completed\n");
+        /* Logically stop the task execution */
+        set_rt_flags(task, RT_F_SLEEP);
+        remove_from_all(task);
+        /* Level-A tasks cannot ever get behind */
+        behind = tsk_mc_crit(task) != CRIT_LEVEL_A && behind_server(task);
+        if (!forced && !is_ghost(task)) {
+                /* Task voluntarily ceased execution. Move on to next period */
+                task_release(task);
+                sched_trace_task_completion(task, forced);
+                /* Convert to ghost job */
+                tsk_mc_data(task)->mc_job.ghost_budget = budget_remaining(task);
+                tsk_mc_data(task)->mc_job.is_ghost = 1;
+        }
+        /* If the task has no ghost budget, convert back from ghost.
+         * If the task is behind, undo ghost conversion so that it
+         * can catch up.
+         */
+        if (behind || tsk_mc_data(task)->mc_job.ghost_budget == 0) {
+                TRACE_MC_TASK(task, "Not a ghost task\n");
+                tsk_mc_data(task)->mc_job.is_ghost = 0;
+                tsk_mc_data(task)->mc_job.ghost_budget = 0;
+        }
+        /* If server has run out of budget, wait until next release */
+        if (budget_exhausted(task)) {
+                sched_trace_server_completion(-task->pid,
+                                              get_server_job(task));
+                server_release(task);
+        }
+        /* Requeue non-blocking tasks */
+        if (is_running(task))
+                job_arrival(task);
+}
+/**
+ * mc_ghost_exhausted() - Complete logically running ghost task.
+ */
+#ifdef CONFIG_MERGE_TIMERS
+static void mc_ghost_exhausted(struct rt_event *e)
+{
+        struct crit_entry *ce = container_of(e, struct crit_entry, event);
+#else
+static enum hrtimer_restart mc_ghost_exhausted(struct hrtimer *timer)
+{
+        struct crit_entry *ce = container_of(timer, struct crit_entry, timer);
+#endif
+        unsigned long flags;
+        struct task_struct *tmp = NULL;
+        local_irq_save(flags);
+        TRACE("Ghost exhausted\n");
+        TRACE_CRIT_ENTRY(ce, "Firing here\n");
+        /* Due to race conditions, we cannot just set the linked
+         * task's budget to 0 as it may no longer be the task
+         * for which this timer was armed. Instead, update the running
+         * task time and see if this causes exhaustion.
+         */
+        raw_spin_lock(&crit_cpu(ce)->lock);
+        if (ce->linked && is_ghost(ce->linked)) {
+                update_ghost_time(ce->linked);
+                if (tsk_mc_data(ce->linked)->mc_job.ghost_budget == 0) {
+                        tmp = ce->linked;
+                }
+        }
+        raw_spin_unlock(&crit_cpu(ce)->lock);
+        if (tmp)
+                job_completion(tmp, 0);
+        local_irq_restore(flags);
+#ifndef CONFIG_MERGE_TIMERS
+        return HRTIMER_NORESTART;
+#endif
+}
+/*
+ * The MC-CE common timer callback code for merged and non-merged timers.
+ * Returns the next time the timer should fire.
+ */
+static lt_t __ce_timer_function(struct ce_dom_data *ce_data)
+{
+        struct crit_entry *ce = get_crit_entry_for(ce_data->cpu, CRIT_LEVEL_A);
+        struct domain *dom = ce->domain;
+        struct task_struct *old_link = NULL;
+        lt_t next_timer_abs;
+        TRACE("MC level-A timer callback for CPU %d\n", ce_data->cpu);
+        raw_spin_lock(dom->lock);
+        raw_spin_lock(&crit_cpu(ce)->lock);
+        if (ce->linked &&
+            ce->linked == ce_data->should_schedule &&
+            is_ghost(ce->linked))
+        {
+                old_link = ce->linked;
+                tsk_mc_data(ce->linked)->mc_job.ghost_budget = 0;
+                link_task_to_crit(ce, NULL);
+        }
+        raw_spin_unlock(&crit_cpu(ce)->lock);
+        next_timer_abs = mc_ce_timer_callback_common(dom);
+        /* Job completion will check for preemptions by means of calling job
+         * arrival if the task is not blocked */
+        if (NULL != old_link) {
+                STRACE("old_link " TS " so will call job completion\n", TA(old_link));
+                raw_spin_unlock(dom->lock);
+                job_completion(old_link, 0);
+        } else {
+                STRACE("old_link was null, so will call check for preempt\n");
+                raw_spin_unlock(dom->lock);
+                check_for_preempt(dom);
+        }
+        return next_timer_abs;
+}
+#ifdef CONFIG_MERGE_TIMERS
+static void ce_timer_function(struct rt_event *e)
+{
+        struct ce_dom_data *ce_data =
+                container_of(e, struct ce_dom_data, event);
+        unsigned long flags;
+        lt_t next_timer_abs;
+        TS_LVLA_RELEASE_START;
+        local_irq_save(flags);
+        next_timer_abs = __ce_timer_function(ce_data);
+        add_event(per_cpu(cpus, ce_data->cpu).event_group, e, next_timer_abs);
+        local_irq_restore(flags);
+        TS_LVLA_RELEASE_END;
+}
+#else /* else to CONFIG_MERGE_TIMERS */
+static enum hrtimer_restart ce_timer_function(struct hrtimer *timer)
+{
+        struct ce_dom_data *ce_data =
+                container_of(timer, struct ce_dom_data, timer);
+        unsigned long flags;
+        lt_t next_timer_abs;
+        TS_LVLA_RELEASE_START;
+        local_irq_save(flags);
+        next_timer_abs = __ce_timer_function(ce_data);
+        hrtimer_set_expires(timer, ns_to_ktime(next_timer_abs));
+        local_irq_restore(flags);
+        TS_LVLA_RELEASE_END;
+        return HRTIMER_RESTART;
+}
+#endif /* CONFIG_MERGE_TIMERS */
+/**
+ * mc_release_jobs() - Add heap of tasks to the system, check for preemptions.
+ */
+static void mc_release_jobs(rt_domain_t* rt, struct bheap* tasks)
+{
+        unsigned long flags;
+        struct task_struct *first = bheap_peek(rt->order, tasks)->value;
+        struct domain *dom = get_task_domain(first);
+        raw_spin_lock_irqsave(dom->lock, flags);
+        TRACE(TS "Jobs released\n", TA(first));
+        __merge_ready(rt, tasks);
+        check_for_preempt(dom);
+        raw_spin_unlock_irqrestore(dom->lock, flags);
+}
+/**
+ * ms_task_new() - Setup new mixed-criticality task.
+ * Assumes that there are no partitioned domains after level B.
+ */
+static void mc_task_new(struct task_struct *t, int on_rq, int running)
+{
+        unsigned long flags;
+        struct cpu_entry* entry;
+        enum crit_level level = tsk_mc_crit(t);
+        char name[TASK_COMM_LEN];
+        strcpy(name, "rtspin");
+        local_irq_save(flags);
+        TRACE("New mixed criticality task %d\n", t->pid);
+        /* Assign domain */
+        if (level < CRIT_LEVEL_C)
+                entry = &per_cpu(cpus, get_partition(t));
+        else
+                entry = &per_cpu(cpus, task_cpu(t));
+        t->rt_param._domain = entry->crit_entries[level].domain;
+        sched_trace_container_param(t->pid, name);
+        sched_trace_server_param(-t->pid, t->pid,
+                                 get_exec_cost(t), get_rt_period(t));
+        /* Setup job params */
+        release_at(t, litmus_clock());
+        tsk_mc_data(t)->mc_job.ghost_budget = 0;
+        tsk_mc_data(t)->mc_job.is_ghost = 0;
+        if (running) {
+                BUG_ON(entry->scheduled);
+                entry->scheduled = t;
+                tsk_rt(t)->scheduled_on = entry->cpu;
+        } else {
+                t->rt_param.scheduled_on = NO_CPU;
+        }
+        t->rt_param.linked_on = NO_CPU;
+        job_arrival(t);
+        local_irq_restore(flags);
+}
+/**
+ * mc_task_new() - Add task back into its domain check for preemptions.
+ */
+static void mc_task_wake_up(struct task_struct *task)
+{
+        unsigned long flags;
+        lt_t now = litmus_clock();
+        local_irq_save(flags);
+        TRACE(TS " wakes up\n", TA(task));
+        if (is_tardy(task, now)) {
+                /* Task missed its last release */
+                release_at(task, now);
+                sched_trace_task_release(task);
+        }
+        if (!is_ghost(task))
+                job_arrival(task);
+        local_irq_restore(flags);
+}
+/**
+ * mc_task_block() - Remove task from state to prevent it being run anywhere.
+ */
+static void mc_task_block(struct task_struct *task)
+{
+        unsigned long flags;
+        local_irq_save(flags);
+        TRACE(TS " blocks\n", TA(task));
+        remove_from_all(task);
+        local_irq_restore(flags);
+}
+/**
+ * mc_task_exit() - Remove task from the system.
+ */
+static void mc_task_exit(struct task_struct *task)
+{
+        unsigned long flags;
+        local_irq_save(flags);
+        BUG_ON(!is_realtime(task));
+        TRACE(TS " RIP\n", TA(task));
+        remove_from_all(task);
+        if (tsk_rt(task)->scheduled_on != NO_CPU) {
+                per_cpu(cpus, tsk_rt(task)->scheduled_on).scheduled = NULL;
+                tsk_rt(task)->scheduled_on = NO_CPU;
+        }
+        if (CRIT_LEVEL_A == tsk_mc_crit(task))
+                mc_ce_task_exit_common(task);
+        local_irq_restore(flags);
+}
+/**
+ * mc_admit_task() - Return true if the task is valid.
+ * Assumes there are no partitioned levels after level B.
+ */
+static long mc_admit_task(struct task_struct* task)
+{
+        const enum crit_level crit = tsk_mc_crit(task);
+        long ret;
+        if (!tsk_mc_data(task)) {
+                printk(KERN_WARNING "Tried to admit task with no criticality "
+                        "level\n");
+                ret = -EINVAL;
+                goto out;
+        }
+        if (crit < CRIT_LEVEL_C && get_partition(task) == NO_CPU) {
+                printk(KERN_WARNING "Tried to admit partitioned task with no "
+                       "partition\n");
+                ret = -EINVAL;
+                goto out;
+        }
+        if (crit == CRIT_LEVEL_A) {
+                ret = mc_ce_admit_task_common(task);
+                if (ret)
+                        goto out;
+        }
+        printk(KERN_INFO "Admitted task with criticality level %d\n",
+                tsk_mc_crit(task));
+        ret = 0;
+out:
+        return ret;
+}
+/**
+ * mc_schedule() - Return next task which should be scheduled.
+ */
+static struct task_struct* mc_schedule(struct task_struct* prev)
+{
+        unsigned long flags;
+        struct domain *dom;
+        struct crit_entry *ce;
+        struct cpu_entry* entry = &__get_cpu_var(cpus);
+        int i, out_of_time, sleep, preempt, exists, blocks, global, lower;
+        struct task_struct *dtask = NULL, *ready_task = NULL, *next = NULL;
+        local_irq_save(flags);
+        /* Litmus gave up because it couldn't access the stack of the CPU
+         * on which will_schedule was migrating from. Requeue it.
+         * This really only happens in VMs.
+         */
+        if (entry->will_schedule && entry->will_schedule != prev) {
+                entry->will_schedule->rt_param.scheduled_on = NO_CPU;
+                low_prio_arrival(entry->will_schedule);
+        }
+        raw_spin_lock(&entry->lock);
+        /* Sanity checking */
+        BUG_ON(entry->scheduled && entry->scheduled != prev);
+        BUG_ON(entry->scheduled && !is_realtime(prev));
+        BUG_ON(is_realtime(prev) && !entry->scheduled);
+        /* Determine state */
+        exists      = entry->scheduled != NULL;
+        blocks      = exists && !is_running(entry->scheduled);
+        out_of_time = exists && budget_enforced(entry->scheduled) &&
+                                budget_exhausted(entry->scheduled);
+        sleep       = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
+        global      = exists && is_global_task(entry->scheduled);
+        preempt     = entry->scheduled != entry->linked;
+        lower       = exists && preempt && entry->linked &&
+                tsk_mc_crit(entry->scheduled) > tsk_mc_crit(entry->linked);
+        TRACE(TS " blocks:%d out_of_time:%d sleep:%d preempt:%d\n",
+              TA(prev), blocks, out_of_time, sleep, preempt);
+        if (exists)
+                prev->rt_param.scheduled_on = NO_CPU;
+        raw_spin_unlock(&entry->lock);
+#ifdef CONFIG_PLUGIN_MC_REDIRECT
+        if (smp_processor_id() == interrupt_cpu)
+                fix_global_levels();
+#endif
+        /* If a task blocks we have no choice but to reschedule */
+        if (blocks)
+                remove_from_all(entry->scheduled);
+        /* Any task which exhausts its budget or sleeps waiting for its next
+         * period completes unless its execution has been forcibly stopped.
+         */
+        if ((out_of_time || sleep) && !blocks)/* && !preempt)*/
+                job_completion(entry->scheduled, !sleep);
+        /* Global scheduled tasks must wait for a deschedule before they
+         * can rejoin the global state. Rejoin them here.
+         */
+        else if (global && preempt && !blocks) {
+                if (lower)
+                        low_prio_arrival(entry->scheduled);
+                else
+                        job_arrival(entry->scheduled);
+        }
+        /* Pick next task if none is linked */
+        raw_spin_lock(&entry->lock);
+        for (i = 0; i < NUM_CRIT_LEVELS && !entry->linked; i++) {
+                ce = &entry->crit_entries[i];
+                dom = ce->domain;
+                /* Swap locks. We cannot acquire a domain lock while
+                 * holding an entry lock or deadlocks will happen.
+                 */
+                raw_spin_unlock(&entry->lock);
+                raw_spin_lock(dom->lock);
+                /* Do domain stuff before grabbing CPU locks */
+                dtask = dom->peek_ready(dom);
+                fix_crit_position(ce);
+                raw_spin_lock(&entry->lock);
+                if (!entry->linked && !ce->linked && dtask && can_use(ce)) {
+                        dom->take_ready(dom);
+                        link_task_to_crit(ce, dtask);
+                        update_crit_position(ce);
+                        ready_task = (is_ghost(dtask)) ? NULL : dtask;
+                        /* Task found! */
+                        if (ready_task) {
+                                link_task_to_cpu(entry, ready_task);
+                                raw_spin_unlock(dom->lock);
+                                update_crit_levels(entry);
+                                raw_spin_lock(&entry->lock);
+                                continue;
+                        }
+                }
+                raw_spin_unlock(dom->lock);
+        }
+        /* Schedule next task */
+        next = entry->linked;
+        if (entry->linked)
+                entry->linked->rt_param.scheduled_on = entry->cpu;
+        entry->will_schedule = entry->linked;
+        sched_state_task_picked();
+        raw_spin_unlock(&entry->lock);
+        local_irq_restore(flags);
+        if (next) {
+                TRACE_MC_TASK(next, "Picked this task\n");
+        } else if (exists && !next)
+                TRACE_ENTRY(entry, "Becomes idle at %llu\n", litmus_clock());
+        return next;
+}
+void mc_finish_switch(struct task_struct *prev)
+{
+        struct cpu_entry* entry = &__get_cpu_var(cpus);
+        entry->scheduled = is_realtime(current) ? current : NULL;
+        TRACE_TASK(prev, "Switched away from to " TS "\n",
+                   TA(entry->scheduled));
+}
+/*
+ * This is the plugin's release at function, called by the release task-set
+ * system call. Other places in the file use the generic LITMUS release_at(),
+ * which is not this.
+ */
+void mc_release_at(struct task_struct *ts, lt_t start)
+{
+        /* hack so that we can have CE timers start at the right time */
+        if (CRIT_LEVEL_A == tsk_mc_crit(ts))
+                mc_ce_release_at_common(ts, start);
+        else
+                release_at(ts, start);
+}
+long mc_deactivate_plugin(void)
+{
+        return mc_ce_deactivate_plugin_common();
+}
+/* **************************************************************************
+ * Initialization
+ * ************************************************************************** */
+/* Initialize values here so that they are allocated with the module
+ * and destroyed when the module is unloaded.
+ */
+/* LVL-A */
+DEFINE_PER_CPU(struct domain_data, _mc_crit_a);
+DEFINE_PER_CPU(raw_spinlock_t, _mc_crit_a_lock);
+DEFINE_PER_CPU(struct ce_dom_data, _mc_crit_a_ce_data);
+/* LVL-B */
+DEFINE_PER_CPU(struct domain_data, _mc_crit_b);
+DEFINE_PER_CPU(rt_domain_t, _mc_crit_b_rt);
+/* LVL-C */
+static struct domain_data _mc_crit_c;
+static rt_domain_t _mc_crit_c_rt;
+struct bheap _mc_heap_c;
+struct bheap_node _mc_nodes_c[NR_CPUS];
+static long mc_activate_plugin(void)
+{
+        struct domain_data *dom_data;
+        struct domain *dom;
+        struct domain_data *our_domains[NR_CPUS];
+        int cpu, n = 0;
+        long ret;
+#ifdef CONFIG_RELEASE_MASTER
+        interrupt_cpu = atomic_read(&release_master_cpu);
+#if defined(CONFIG_PLUGIN_MC_REDIRECT) || defined(CONFIG_PLUGIN_MC_RELEASE_MASTER)
+        if (NO_CPU == interrupt_cpu) {
+                printk(KERN_ERR "LITMUS-MC: need a release master\n");
+                ret = -EINVAL;
+                goto out;
+        }
+#endif
+#endif
+        for_each_online_cpu(cpu) {
+                BUG_ON(NR_CPUS <= n);
+                dom = per_cpu(cpus, cpu).crit_entries[CRIT_LEVEL_A].domain;
+                dom_data = domain_data(dom);
+                our_domains[cpu] = dom_data;
+#if defined(CONFIG_MERGE_TIMERS) && defined(CONFIG_PLUGIN_MC_RELEASE_MASTER)
+                per_cpu(cpus, cpu).event_group =
+                        get_event_group_for(interrupt_cpu);
+#elif defined(CONFIG_MERGE_TIMERS) && !defined(CONFIG_PLUGIN_MC_RELEASE_MASTER)
+                per_cpu(cpus, cpu).event_group = get_event_group_for(cpu);
+#endif
+                n++;
+        }
+        ret = mc_ce_set_domains(n, our_domains);
+        if (ret)
+                goto out;
+        ret = mc_ce_activate_plugin_common();
+out:
+        return ret;
+}
+static void mc_release_ts(lt_t time)
+{
+        int i, cpu, base_id = 0, cont_id = -1;
+        char name[TASK_COMM_LEN];
+        enum crit_level level;
+        struct cpu_entry *entry;
+        struct crit_entry *ce;
+        level = CRIT_LEVEL_A;
+        strcpy(name, "LVL-A");
+        for_each_online_cpu(cpu) {
+                entry = &per_cpu(cpus, cpu);
+                trace_litmus_container_param(++cont_id, (const char*)&name);
+                ce = &entry->crit_entries[level];
+                sched_trace_server_param(sid(ce), cont_id, 0, 0);
+        }
+        level = CRIT_LEVEL_B;
+        strcpy(name, "LVL-B");
+        for_each_online_cpu(cpu) {
+                entry = &per_cpu(cpus, cpu);
+                trace_litmus_container_param(++cont_id, (const char*)&name);
+                ce = &entry->crit_entries[level];
+                sched_trace_server_param(sid(ce), cont_id, 0, 0);
+        }
+        level = CRIT_LEVEL_C;
+        strcpy(name, "LVL-C");
+        trace_litmus_container_param(++cont_id, (const char*)&name);
+        for_each_online_cpu(cpu) {
+                entry = &per_cpu(cpus, cpu);
+                ce = &entry->crit_entries[level];
+                sched_trace_server_param(sid(ce), cont_id, 0, 0);
+        }
+}
+static struct sched_plugin mc_plugin __cacheline_aligned_in_smp = {
+        .plugin_name            = "MC",
+        .task_new               = mc_task_new,
+        .complete_job           = complete_job,
+        .task_exit              = mc_task_exit,
+        .schedule               = mc_schedule,
+        .task_wake_up           = mc_task_wake_up,
+        .task_block             = mc_task_block,
+        .admit_task             = mc_admit_task,
+        .activate_plugin        = mc_activate_plugin,
+        .release_at             = mc_release_at,
+        .deactivate_plugin      = mc_deactivate_plugin,
+        .finish_switch          = mc_finish_switch,
+        .release_ts             = mc_release_ts,
+};
+static void init_crit_entry(struct crit_entry *ce, enum crit_level level,
+                            struct domain_data *dom_data,
+                            struct bheap_node *node)
+{
+        ce->level  = level;
+        ce->linked = NULL;
+        ce->node   = node;
+        ce->domain = &dom_data->domain;
+        ce->state  = CS_ACTIVE;
+#ifdef CONFIG_MERGE_TIMERS
+        init_event(&ce->event, level, mc_ghost_exhausted,
+                   event_list_alloc(GFP_ATOMIC));
+#else
+        hrtimer_init(&ce->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+        ce->timer.function = mc_ghost_exhausted;
+#endif
+}
+static void init_local_domain(struct cpu_entry *entry, struct domain_data *dom_data,
+                              enum crit_level level)
+{
+        dom_data->heap = NULL;
+        dom_data->crit_entry = &entry->crit_entries[level];
+        init_crit_entry(dom_data->crit_entry, level, dom_data, NULL);
+}
+static void init_global_domain(struct domain_data *dom_data, enum crit_level level,
+                               struct bheap *heap, struct bheap_node *nodes)
+{
+        int cpu;
+        struct cpu_entry *entry;
+        struct crit_entry *ce;
+        struct bheap_node *node;
+        dom_data->crit_entry = NULL;
+        dom_data->heap = heap;
+        bheap_init(heap);
+        for_each_online_cpu(cpu) {
+                entry = &per_cpu(cpus, cpu);
+                node = &nodes[cpu];
+                ce = &entry->crit_entries[level];
+                init_crit_entry(ce, level, dom_data, node);
+                bheap_node_init(&ce->node, ce);
+                bheap_insert(cpu_lower_prio, heap, node);
+        }
+}
+static inline void init_edf_domain(struct domain *dom, rt_domain_t *rt,
+                                   enum crit_level prio, int is_partitioned, int cpu)
+{
+        pd_domain_init(dom, rt, edf_ready_order, NULL,
+                       mc_release_jobs, mc_preempt_needed,
+                       edf_higher_prio);
+        rt->level = prio;
+#if defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) && defined(CONFIG_MERGE_TIMERS)
+        /* All timers are on one CPU and release-master is using the event
+         * merging interface as well. */
+        BUG_ON(NO_CPU == interrupt_cpu);
+        rt->event_group = get_event_group_for(interrupt_cpu);
+        rt->prio = prio;
+#elif defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) && !defined(CONFIG_MERGE_TIMERS)
+        /* Using release master, but not merging timers. */
+        rt->release_master = interrupt_cpu;
+#elif !defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) && defined(CONFIG_MERGE_TIMERS)
+        /* Merge the timers, but don't move them to the release master. */
+        if (is_partitioned) {
+                rt->event_group = get_event_group_for(cpu);
+        } else {
+                /* Global timers will be added to the event groups that code is
+                 * executing on when add_event() is called.
+                 */
+                rt->event_group = NULL;
+        }
+        rt->prio = prio;
+#endif
+}
+struct domain_data *ce_domain_for(int);
+static int __init init_mc(void)
+{
+        int cpu;
+        struct cpu_entry *entry;
+        struct domain_data *dom_data;
+        rt_domain_t *rt;
+        raw_spinlock_t *a_dom_lock, *b_dom_lock, *c_dom_lock; /* For lock debugger */
+        struct ce_dom_data *ce_data;
+        for_each_online_cpu(cpu) {
+                entry = &per_cpu(cpus, cpu);
+                /* CPU */
+                entry->cpu = cpu;
+                entry->scheduled = NULL;
+                entry->linked = NULL;
+                raw_spin_lock_init(&entry->lock);
+#ifdef CONFIG_PLUGIN_MC_REDIRECT
+                raw_spin_lock_init(&entry->redir_lock);
+                INIT_LIST_HEAD(&entry->redir);
+#endif
+                /* CRIT_LEVEL_A */
+                dom_data = &per_cpu(_mc_crit_a, cpu);
+                ce_data = &per_cpu(_mc_crit_a_ce_data, cpu);
+                a_dom_lock = &per_cpu(_mc_crit_a_lock, cpu);
+                raw_spin_lock_init(a_dom_lock);
+                ce_domain_init(&dom_data->domain,
+                                a_dom_lock, ce_requeue, ce_peek_and_take_ready,
+                                ce_peek_and_take_ready, mc_preempt_needed,
+                                ce_higher_prio, ce_data, cpu,
+                                ce_timer_function);
+                init_local_domain(entry, dom_data, CRIT_LEVEL_A);
+                dom_data->domain.name = "LVL-A";
+                /* CRIT_LEVEL_B */
+                dom_data = &per_cpu(_mc_crit_b, cpu);
+                rt = &per_cpu(_mc_crit_b_rt, cpu);
+                init_local_domain(entry, dom_data, CRIT_LEVEL_B);
+                init_edf_domain(&dom_data->domain, rt, CRIT_LEVEL_B, 1, cpu);
+                b_dom_lock = dom_data->domain.lock;
+                raw_spin_lock_init(b_dom_lock);
+                dom_data->domain.name = "LVL-B";
+        }
+        /* CRIT_LEVEL_C */
+        init_global_domain(&_mc_crit_c, CRIT_LEVEL_C,
+                           &_mc_heap_c, _mc_nodes_c);
+        init_edf_domain(&_mc_crit_c.domain, &_mc_crit_c_rt, CRIT_LEVEL_C,
+                        0, NO_CPU);
+        c_dom_lock = _mc_crit_c.domain.lock;
+        raw_spin_lock_init(c_dom_lock);
+        _mc_crit_c.domain.name = "LVL-C";
+        return register_sched_plugin(&mc_plugin);
+}
+module_init(init_mc);
diff --git a/litmus/sched_mc_ce.c b/litmus/sched_mc_ce.c
new file mode 100644
index 000000000000..702b46da93d5
--- /dev/null
+++ b/litmus/sched_mc_ce.c
@@ -0,0 +1,1052 @@
+/**
+ * litmus/sched_mc_ce.c
+ *
+ * The Cyclic Executive (CE) scheduler used by the mixed criticality scheduling
+ * algorithm.
+ */
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/hrtimer.h>
+#include <linux/pid.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/rt_domain.h>
+#include <litmus/rt_param.h>
+#include <litmus/litmus_proc.h>
+#include <litmus/sched_trace.h>
+#include <litmus/jobs.h>
+#include <litmus/sched_mc.h>
+#include <litmus/ce_domain.h>
+static struct sched_plugin mc_ce_plugin __cacheline_aligned_in_smp;
+#define using_linux_plugin() (litmus == &linux_sched_plugin)
+/* get a reference to struct domain for a CPU */
+#define get_domain_for(cpu) (&per_cpu(domains, cpu)->domain)
+#define get_pid_table(cpu) (&per_cpu(ce_pid_table, cpu))
+#define get_pid_entry(cpu, idx) (&(get_pid_table(cpu)->entries[idx]))
+static atomic_t start_time_set = ATOMIC_INIT(-1);
+static atomic64_t start_time = ATOMIC64_INIT(0);
+static struct proc_dir_entry *mc_ce_dir = NULL, *ce_file = NULL;
+/*
+ * Cache the budget along with the struct PID for a task so that we don't need
+ * to fetch its task_struct every time we check to see what should be
+ * scheduled.
+ */
+struct ce_pid_entry {
+        struct pid *pid;
+        lt_t budget;
+        /* accumulated (summed) budgets, including this one */
+        lt_t acc_time;
+        unsigned int expected_job;
+};
+/*
+ * Each CPU needs a mapping of level A ID (integer) to struct pid so that we
+ * can get its task struct.
+ */
+struct ce_pid_table {
+        struct ce_pid_entry entries[CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS];
+        int num_pid_entries;
+        lt_t cycle_time;
+};
+DEFINE_PER_CPU(struct ce_pid_table, ce_pid_table);
+/*
+ * How we get the domain for a given CPU locally. Set with the
+ * mc_ce_set_domains function. Must be done before activating plugins. Be
+ * careful when using domains as a variable elsewhere in this file.
+ */
+DEFINE_PER_CPU(struct domain_data*, domains);
+/*
+ * The domains and other data used by the MC-CE plugin when it runs alone.
+ */
+DEFINE_PER_CPU(struct domain_data, _mc_ce_doms);
+DEFINE_PER_CPU(struct ce_dom_data, _mc_ce_dom_data);
+DEFINE_PER_CPU(raw_spinlock_t, _mc_ce_dom_locks);
+#ifdef CONFIG_PLUGIN_MC_RELEASE_MASTER
+static int interrupt_cpu;
+#endif
+long mc_ce_set_domains(const int n, struct domain_data *domains_in[])
+{
+        const int max = (NR_CPUS < n) ? NR_CPUS : n;
+        struct domain_data *new_dom = NULL;
+        int i, ret;
+        if (!using_linux_plugin()) {
+                printk(KERN_WARNING "can't set MC-CE domains when not using "
+                                "Linux scheduler.\n");
+                ret = -EINVAL;
+                goto out;
+        }
+        for (i = 0; i < max; ++i) {
+                new_dom = domains_in[i];
+                per_cpu(domains, i) = new_dom;
+        }
+        ret = 0;
+out:
+        return ret;
+}
+unsigned int mc_ce_get_expected_job(const int cpu, const int idx)
+{
+        const struct ce_pid_table *pid_table = get_pid_table(cpu);
+        BUG_ON(0 > cpu);
+        BUG_ON(0 > idx);
+        BUG_ON(pid_table->num_pid_entries <= idx);
+        return pid_table->entries[idx].expected_job;
+}
+/*
+ * Get the offset into the cycle taking the start time into account.
+ */
+static inline lt_t get_cycle_offset(const lt_t when, const lt_t cycle_time)
+{
+        long long st = atomic64_read(&start_time);
+        lt_t offset = (when - st) % cycle_time;
+        TRACE("when: %llu  cycle_time: %llu start_time: %lld  offset %llu\n",
+                        when, cycle_time, st, offset);
+        return offset;
+}
+/*
+ * The user land job completion call will set the RT_F_SLEEP flag and then
+ * call schedule. This function is used when schedule sleeps a task.
+ *
+ * Do not call prepare_for_next_period on Level-A tasks!
+ */
+static void mc_ce_job_completion(struct domain *dom, struct task_struct *ts)
+{
+        const int cpu = task_cpu(ts);
+        const int idx = tsk_mc_data(ts)->mc_task.lvl_a_id;
+        const struct ce_pid_entry *pid_entry = get_pid_entry(cpu, idx);
+        unsigned int just_finished;
+        TRACE_TASK(ts, "Completed\n");
+        /* sched_trace_task_completion(ts, 0); */
+        /* post-increment is important here */
+        just_finished = (tsk_rt(ts)->job_params.job_no)++;
+        /* Job completes in expected window: everything is normal.
+         * Job completes in an earlier window: BUG(), that's wrong.
+         * Job completes in a later window: The job is behind.
+         */
+        if (just_finished < pid_entry->expected_job) {
+                /* this job is already released because it's running behind */
+                set_rt_flags(ts, RT_F_RUNNING);
+                TRACE_TASK(ts, "appears behind: the expected job is %u but "
+                                "job %u just completed\n",
+                                pid_entry->expected_job, just_finished);
+        } else if (pid_entry->expected_job < just_finished) {
+                printk(KERN_CRIT "job %u completed in expected job %u which "
+                                "seems too early\n", just_finished,
+                                pid_entry->expected_job);
+                BUG();
+        }
+}
+/*
+ * Return the index into the PID entries table of what to schedule next.
+ * Don't call if the table is empty. Assumes the caller has the domain lock.
+ * The offset parameter is the offset into the cycle.
+ *
+ * TODO Currently O(n) in the number of tasks on the CPU. Binary search?
+ */
+static int mc_ce_schedule_at(const struct domain *dom, lt_t offset)
+{
+        const struct ce_dom_data *ce_data = dom->data;
+        struct ce_pid_table *pid_table = get_pid_table(ce_data->cpu);
+        const struct ce_pid_entry *pid_entry = NULL;
+        int idx;
+        BUG_ON(pid_table->cycle_time < 1);
+        BUG_ON(pid_table->num_pid_entries < 1);
+        for (idx = 0; idx < pid_table->num_pid_entries; ++idx) {
+                pid_entry = &pid_table->entries[idx];
+                if (offset < pid_entry->acc_time) {
+                        /* found task to schedule in this window */
+                        break;
+                }
+        }
+        /* can only happen if cycle_time is not right */
+        BUG_ON(pid_entry->acc_time > pid_table->cycle_time);
+        TRACE("schedule at returning task %d for CPU %d\n", idx, ce_data->cpu);
+        return idx;
+}
+static struct task_struct *mc_ce_schedule(struct task_struct *prev)
+{
+        struct domain *dom = get_domain_for(smp_processor_id());
+        struct ce_dom_data *ce_data = dom->data;
+        struct task_struct *next = NULL;
+        int exists, sleep, should_sched_exists, should_sched_blocked,
+            should_sched_asleep;
+        raw_spin_lock(dom->lock);
+        /* sanity checking */
+        BUG_ON(ce_data->scheduled && ce_data->scheduled != prev);
+        BUG_ON(ce_data->scheduled && !is_realtime(prev));
+        BUG_ON(is_realtime(prev) && !ce_data->scheduled);
+        exists = NULL != ce_data->scheduled;
+        sleep = exists && RT_F_SLEEP == get_rt_flags(ce_data->scheduled);
+        TRACE("exists: %d, sleep: %d\n", exists, sleep);
+        if (sleep)
+                mc_ce_job_completion(dom, ce_data->scheduled);
+        /* these checks must go after the call to mc_ce_job_completion in case
+         * a late task needs to be scheduled again right away and its the only
+         * task on a core
+         */
+        should_sched_exists = NULL != ce_data->should_schedule;
+        should_sched_blocked = should_sched_exists &&
+                !is_running(ce_data->should_schedule);
+        should_sched_asleep = should_sched_exists &&
+                RT_F_SLEEP == get_rt_flags(ce_data->should_schedule);
+        TRACE("should_sched_exists: %d, should_sched_blocked: %d, "
+                        "should_sched_asleep: %d\n", should_sched_exists,
+                        should_sched_blocked, should_sched_asleep);
+        if (should_sched_exists && !should_sched_blocked &&
+                        !should_sched_asleep) {
+                /*
+                 * schedule the task that should be executing in the cyclic
+                 * schedule if it is not blocked and not sleeping
+                 */
+                next = ce_data->should_schedule;
+        }
+        sched_state_task_picked();
+        raw_spin_unlock(dom->lock);
+        return next;
+}
+static void mc_ce_finish_switch(struct task_struct *prev)
+{
+        struct domain *dom = get_domain_for(smp_processor_id());
+        struct ce_dom_data *ce_data = dom->data;
+        TRACE("finish switch\n");
+        if (is_realtime(current) && CRIT_LEVEL_A == tsk_mc_crit(current))
+                ce_data->scheduled = current;
+        else
+                ce_data->scheduled = NULL;
+}
+/*
+ * Admit task called to see if this task is permitted to enter the system.
+ * Here we look up the task's PID structure and save it in the proper slot on
+ * the CPU this task will run on.
+ */
+long mc_ce_admit_task_common(struct task_struct *ts)
+{
+        struct domain *dom = get_domain_for(get_partition(ts));
+        struct ce_dom_data *ce_data = dom->data;
+        struct mc_data *mcd = tsk_mc_data(ts);
+        struct pid *pid = NULL;
+        long retval = -EINVAL;
+        const int lvl_a_id = mcd->mc_task.lvl_a_id;
+        struct ce_pid_table *pid_table = get_pid_table(ce_data->cpu);
+        BUG_ON(get_partition(ts) != ce_data->cpu);
+        /* check the task has migrated to the right CPU (like in sched_cedf) */
+        if (task_cpu(ts) != get_partition(ts)) {
+                printk(KERN_INFO "litmus: %d admitted on CPU %d but want %d ",
+                                ts->pid, task_cpu(ts), get_partition(ts));
+                goto out;
+        }
+        /* only level A tasks can be CE */
+        if (!mcd || CRIT_LEVEL_A != tsk_mc_crit(ts)) {
+                printk(KERN_INFO "litmus: non-MC or non level A task %d\n",
+                                ts->pid);
+                goto out;
+        }
+        /* try and get the task's PID structure */
+        pid = get_task_pid(ts, PIDTYPE_PID);
+        if (IS_ERR_OR_NULL(pid)) {
+                printk(KERN_INFO "litmus: couldn't get pid struct for %d\n",
+                                ts->pid);
+                goto out;
+        }
+        if (lvl_a_id >= pid_table->num_pid_entries) {
+                printk(KERN_INFO "litmus: level A id greater than expected "
+                                "number of tasks %d for %d cpu %d\n",
+                                pid_table->num_pid_entries, ts->pid,
+                                get_partition(ts));
+                goto out_put_pid;
+        }
+        if (pid_table->entries[lvl_a_id].pid) {
+                printk(KERN_INFO "litmus: have saved pid info id: %d cpu: %d\n",
+                                lvl_a_id, get_partition(ts));
+                goto out_put_pid;
+        }
+        if (get_exec_cost(ts) >= pid_table->entries[lvl_a_id].budget) {
+                printk(KERN_INFO "litmus: execution cost %llu is larger than "
+                                "the budget %llu\n",
+                                get_exec_cost(ts),
+                                pid_table->entries[lvl_a_id].budget);
+                goto out_put_pid;
+        }
+        pid_table->entries[lvl_a_id].pid = pid;
+        retval = 0;
+        /* don't call put_pid if we are successful */
+        goto out;
+out_put_pid:
+        put_pid(pid);
+out:
+        return retval;
+}
+static long mc_ce_admit_task(struct task_struct *ts)
+{
+        struct domain *dom = get_domain_for(get_partition(ts));
+        unsigned long flags, retval;
+        raw_spin_lock_irqsave(dom->lock, flags);
+        retval = mc_ce_admit_task_common(ts);
+        raw_spin_unlock_irqrestore(dom->lock, flags);
+        return retval;
+}
+/*
+ * Called to set up a new real-time task (after the admit_task callback).
+ * At this point the task's struct PID is already hooked up on the destination
+ * CPU. The task may already be running.
+ */
+static void mc_ce_task_new(struct task_struct *ts, int on_rq, int running)
+{
+        const int cpu = task_cpu(ts);
+        struct domain *dom = get_domain_for(cpu);
+        struct ce_dom_data *ce_data = dom->data;
+        struct ce_pid_table *pid_table = get_pid_table(cpu);
+        struct pid *pid_should_be_running;
+        struct ce_pid_entry *pid_entry;
+        unsigned long flags;
+        int idx, should_be_running;
+        lt_t offset;
+        raw_spin_lock_irqsave(dom->lock, flags);
+        pid_entry = get_pid_entry(cpu, tsk_mc_data(ts)->mc_task.lvl_a_id);
+        /* initialize some task state */
+        set_rt_flags(ts, RT_F_RUNNING);
+        /* have to call mc_ce_schedule_at because the task only gets a PID
+         * entry after calling admit_task */
+        offset = get_cycle_offset(litmus_clock(), pid_table->cycle_time);
+        idx = mc_ce_schedule_at(dom, offset);
+        pid_should_be_running = get_pid_entry(cpu, idx)->pid;
+        rcu_read_lock();
+        should_be_running = (ts == pid_task(pid_should_be_running, PIDTYPE_PID));
+        rcu_read_unlock();
+        if (running) {
+                /* admit task checks that the task is not on the wrong CPU */
+                BUG_ON(task_cpu(ts) != get_partition(ts));
+                BUG_ON(ce_data->scheduled);
+                ce_data->scheduled = ts;
+                if (should_be_running)
+                        ce_data->should_schedule = ts;
+                else
+                        preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
+        } else if (!running && should_be_running) {
+                ce_data->should_schedule = ts;
+                preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
+        }
+        raw_spin_unlock_irqrestore(dom->lock, flags);
+}
+/*
+ * Called to re-introduce a task after blocking.
+ * Can potentailly be called multiple times.
+ */
+static void mc_ce_task_wake_up(struct task_struct *ts)
+{
+        struct domain *dom = get_domain_for(get_partition(ts));
+        struct ce_dom_data *ce_data = dom->data;
+        unsigned long flags;
+        TRACE_TASK(ts, "wake up\n");
+        raw_spin_lock_irqsave(dom->lock, flags);
+        if (ts == ce_data->should_schedule && ts != ce_data->scheduled)
+                preempt_if_preemptable(ts, ce_data->cpu);
+        raw_spin_unlock_irqrestore(dom->lock, flags);
+}
+/*
+ * Called to notify the plugin of a blocking real-time tasks. Only called for
+ * real-time tasks and before schedule is called.
+ */
+static void mc_ce_task_block(struct task_struct *ts)
+{
+        /* nothing to do because it will be taken care of in schedule */
+        TRACE_TASK(ts, "blocked\n");
+}
+/*
+ * Called when a task switches from RT mode back to normal mode.
+ */
+void mc_ce_task_exit_common(struct task_struct *ts)
+{
+        struct domain *dom = get_domain_for(get_partition(ts));
+        struct ce_dom_data *ce_data = dom->data;
+        unsigned long flags;
+        struct pid *pid;
+        const int lvl_a_id = tsk_mc_data(ts)->mc_task.lvl_a_id;
+        struct ce_pid_table *pid_table = get_pid_table(ce_data->cpu);
+        BUG_ON(CRIT_LEVEL_A != tsk_mc_crit(ts));
+        BUG_ON(lvl_a_id >= pid_table->num_pid_entries);
+        raw_spin_lock_irqsave(dom->lock, flags);
+        pid = pid_table->entries[lvl_a_id].pid;
+        BUG_ON(!pid);
+        put_pid(pid);
+        pid_table->entries[lvl_a_id].pid = NULL;
+        if (ce_data->scheduled == ts)
+                ce_data->scheduled = NULL;
+        if (ce_data->should_schedule == ts)
+                ce_data->should_schedule = NULL;
+        raw_spin_unlock_irqrestore(dom->lock, flags);
+}
+/***********************************************************
+ * Timer stuff
+ **********************************************************/
+/*
+ * Returns the next absolute time that the timer should fire.
+ */
+lt_t mc_ce_timer_callback_common(struct domain *dom)
+{
+        /* relative and absolute times for cycles */
+        lt_t now, offset_rel, cycle_start_abs, next_timer_abs;
+        struct task_struct *should_schedule;
+        struct ce_pid_table *pid_table;
+        struct ce_pid_entry *pid_entry;
+        struct ce_dom_data *ce_data;
+        int idx, budget_overrun;
+        ce_data = dom->data;
+        pid_table = get_pid_table(ce_data->cpu);
+        /* Based off of the current time, figure out the offset into the cycle
+         * and the cycle's start time, and determine what should be scheduled.
+         */
+        now = litmus_clock();
+        offset_rel = get_cycle_offset(now, pid_table->cycle_time);
+        cycle_start_abs = now - offset_rel;
+        idx = mc_ce_schedule_at(dom, offset_rel);
+        pid_entry = get_pid_entry(ce_data->cpu, idx);
+        next_timer_abs = cycle_start_abs + pid_entry->acc_time;
+        STRACE("timer: now: %llu  offset_rel: %llu  cycle_start_abs: %llu  "
+                        "next_timer_abs: %llu\n", now, offset_rel,
+                        cycle_start_abs, next_timer_abs);
+        /* get the task_struct (pid_task can accept a NULL) */
+        rcu_read_lock();
+        should_schedule = pid_task(pid_entry->pid, PIDTYPE_PID);
+        rcu_read_unlock();
+        ce_data->should_schedule = should_schedule;
+        if (should_schedule && 0 == atomic_read(&start_time_set)) {
+                /*
+                 * If jobs are not overrunning their budgets, then this
+                 * should not happen.
+                 */
+                pid_entry->expected_job++;
+                budget_overrun = pid_entry->expected_job !=
+                        tsk_rt(should_schedule)->job_params.job_no;
+                if (budget_overrun)
+                        TRACE_MC_TASK(should_schedule,
+                                      "timer expected job number: %u "
+                                      "but current job: %u\n",
+                                      pid_entry->expected_job,
+                                      tsk_rt(should_schedule)->job_params.job_no);
+        }
+        if (ce_data->should_schedule) {
+                tsk_rt(should_schedule)->job_params.deadline =
+                        cycle_start_abs + pid_entry->acc_time;
+                tsk_rt(should_schedule)->job_params.release =
+                        tsk_rt(should_schedule)->job_params.deadline -
+                        pid_entry->budget;
+                tsk_rt(should_schedule)->job_params.exec_time = 0;
+                /* sched_trace_task_release(should_schedule); */
+                set_rt_flags(ce_data->should_schedule, RT_F_RUNNING);
+        }
+        return next_timer_abs;
+}
+/*
+ * What to do when a timer fires. The timer should only be armed if the number
+ * of PID entries is positive.
+ */
+#ifdef CONFIG_MERGE_TIMERS
+static void mc_ce_timer_callback(struct rt_event *e)
+#else
+static enum hrtimer_restart mc_ce_timer_callback(struct hrtimer *timer)
+#endif
+{
+        struct ce_dom_data *ce_data;
+        unsigned long flags;
+        struct domain *dom;
+        lt_t next_timer_abs;
+#ifdef CONFIG_MERGE_TIMERS
+        struct event_group *event_group;
+        ce_data = container_of(e, struct ce_dom_data, event);
+        /* use the same CPU the callbacking is executing on by passing NO_CPU */
+        event_group = get_event_group_for(NO_CPU);
+#else /* CONFIG_MERGE_TIMERS */
+        ce_data = container_of(timer, struct ce_dom_data, timer);
+#endif
+        dom = get_domain_for(ce_data->cpu);
+        TRACE("timer callback on CPU %d (before lock)\n", ce_data->cpu);
+        raw_spin_lock_irqsave(dom->lock, flags);
+        next_timer_abs = mc_ce_timer_callback_common(dom);
+        /* setup an event or timer for the next release in the CE schedule */
+#ifdef CONFIG_MERGE_TIMERS
+        add_event(event_group, e, next_timer_abs);
+#else
+        hrtimer_set_expires(timer, ns_to_ktime(next_timer_abs));
+#endif
+        if (ce_data->scheduled != ce_data->should_schedule)
+                preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
+        raw_spin_unlock_irqrestore(dom->lock, flags);
+#ifndef CONFIG_MERGE_TIMERS
+        return HRTIMER_RESTART;
+#endif
+}
+/*
+ * Cancel timers on all CPUs. Returns 1 if any were active.
+ */
+static int cancel_all_timers(void)
+{
+        struct ce_dom_data *ce_data;
+        struct domain *dom;
+        int cpu, ret = 0;
+#ifndef CONFIG_MERGE_TIMERS
+        int cancel_res;
+#endif
+        TRACE("cancel all timers\n");
+        for_each_online_cpu(cpu) {
+                dom = get_domain_for(cpu);
+                ce_data = dom->data;
+                ce_data->should_schedule = NULL;
+#ifdef CONFIG_MERGE_TIMERS
+                cancel_event(&ce_data->event);
+#else
+                cancel_res = hrtimer_cancel(&ce_data->timer);
+                atomic_set(&ce_data->timer_info.state,
+                                HRTIMER_START_ON_INACTIVE);
+                ret = ret || cancel_res;
+#endif
+        }
+        return ret;
+}
+/*
+ * Arm all timers so that they start at the new value of start time.
+ * Any CPU without CE PID entries won't have a timer armed.
+ * All timers should be canceled before calling this.
+ */
+static void arm_all_timers(void)
+{
+        struct domain *dom;
+        struct ce_dom_data *ce_data;
+        struct ce_pid_table *pid_table;
+        int cpu, idx, cpu_for_timer;
+        const lt_t start = atomic64_read(&start_time);
+        TRACE("arm all timers\n");
+        for_each_online_cpu(cpu) {
+                dom = get_domain_for(cpu);
+                ce_data = dom->data;
+                pid_table = get_pid_table(cpu);
+                if (0 == pid_table->num_pid_entries)
+                        continue;
+                for (idx = 0; idx < pid_table->num_pid_entries; idx++) {
+                        pid_table->entries[idx].expected_job = 0;
+                }
+#ifdef CONFIG_PLUGIN_MC_RELEASE_MASTER
+                cpu_for_timer = interrupt_cpu;
+#else
+                cpu_for_timer = cpu;
+#endif
+#ifdef CONFIG_MERGE_TIMERS
+                add_event(get_event_group_for(cpu_for_timer),
+                                &ce_data->event, start);
+#else
+                hrtimer_start_on(cpu_for_timer, &ce_data->timer_info,
+                                &ce_data->timer, ns_to_ktime(start),
+                                HRTIMER_MODE_ABS_PINNED);
+#endif
+        }
+}
+/*
+ * There are no real releases in the CE, but the task release syscall will
+ * call this. We can re-set our notion of the CE period start to make
+ * the schedule look pretty.
+ */
+void mc_ce_release_at_common(struct task_struct *ts, lt_t start)
+{
+        TRACE_TASK(ts, "release at\n");
+        if (atomic_inc_and_test(&start_time_set)) {
+                /* in this case, we won the race */
+                cancel_all_timers();
+                atomic64_set(&start_time, start);
+                arm_all_timers();
+        } else
+                atomic_dec(&start_time_set);
+}
+long mc_ce_activate_plugin_common(void)
+{
+        struct ce_dom_data *ce_data;
+        struct domain *dom;
+        long ret;
+        int cpu;
+#ifdef CONFIG_PLUGIN_MC_RELEASE_MASTER
+        interrupt_cpu = atomic_read(&release_master_cpu);
+        if (NO_CPU == interrupt_cpu) {
+                printk(KERN_ERR "LITMUS: MC-CE needs a release master\n");
+                ret = -EINVAL;
+                goto out;
+        }
+#endif
+        for_each_online_cpu(cpu) {
+                dom = get_domain_for(cpu);
+                ce_data = dom->data;
+                ce_data->scheduled = NULL;
+                ce_data->should_schedule = NULL;
+        }
+        atomic_set(&start_time_set, -1);
+        atomic64_set(&start_time, litmus_clock());
+        /* may not want to arm timers on activation, just after release */
+        arm_all_timers();
+        ret = 0;
+out:
+        return ret;
+}
+static long mc_ce_activate_plugin(void)
+{
+        struct domain_data *our_domains[NR_CPUS];
+        int cpu, n = 0;
+        long ret;
+        for_each_online_cpu(cpu) {
+                BUG_ON(NR_CPUS <= n);
+                our_domains[cpu] = &per_cpu(_mc_ce_doms, cpu);
+                n++;
+        }
+        ret = mc_ce_set_domains(n, our_domains);
+        if (ret)
+                goto out;
+        ret = mc_ce_activate_plugin_common();
+out:
+        return ret;
+}
+static void clear_pid_entries(void)
+{
+        struct ce_pid_table *pid_table = NULL;
+        int cpu, entry;
+        for_each_online_cpu(cpu) {
+                pid_table = get_pid_table(cpu);
+                pid_table->num_pid_entries = 0;
+                pid_table->cycle_time = 0;
+                for (entry = 0; entry < CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS;
+                                ++entry) {
+                        if (NULL != pid_table->entries[entry].pid) {
+                                put_pid(pid_table->entries[entry].pid);
+                                pid_table->entries[entry].pid = NULL;
+                        }
+                        pid_table->entries[entry].budget = 0;
+                        pid_table->entries[entry].acc_time = 0;
+                        pid_table->entries[entry].expected_job = 0;
+                }
+        }
+}
+long mc_ce_deactivate_plugin_common(void)
+{
+        int cpu;
+        cancel_all_timers();
+        for_each_online_cpu(cpu) {
+                per_cpu(domains, cpu) = NULL;
+        }
+        return 0;
+}
+/*      Plugin object   */
+static struct sched_plugin mc_ce_plugin __cacheline_aligned_in_smp = {
+        .plugin_name            = "MC-CE",
+        .admit_task             = mc_ce_admit_task,
+        .task_new               = mc_ce_task_new,
+        .complete_job           = complete_job,
+        .release_at             = mc_ce_release_at_common,
+        .task_exit              = mc_ce_task_exit_common,
+        .schedule               = mc_ce_schedule,
+        .finish_switch          = mc_ce_finish_switch,
+        .task_wake_up           = mc_ce_task_wake_up,
+        .task_block             = mc_ce_task_block,
+        .activate_plugin        = mc_ce_activate_plugin,
+        .deactivate_plugin      = mc_ce_deactivate_plugin_common,
+};
+static int setup_proc(void);
+static int __init init_sched_mc_ce(void)
+{
+        raw_spinlock_t *ce_lock;
+        struct domain_data *dom_data;
+        struct domain *dom;
+        int cpu, err;
+        for_each_online_cpu(cpu) {
+                per_cpu(domains, cpu) = NULL;
+                ce_lock = &per_cpu(_mc_ce_dom_locks, cpu);
+                raw_spin_lock_init(ce_lock);
+                dom_data = &per_cpu(_mc_ce_doms, cpu);
+                dom = &dom_data->domain;
+                ce_domain_init(dom, ce_lock, NULL, NULL, NULL, NULL, NULL,
+                                &per_cpu(_mc_ce_dom_data, cpu), cpu,
+                                mc_ce_timer_callback);
+        }
+        clear_pid_entries();
+        err = setup_proc();
+        if (!err)
+                err = register_sched_plugin(&mc_ce_plugin);
+        return err;
+}
+#define BUF_SIZE PAGE_SIZE
+static int write_into_proc(char *proc_buf, const int proc_size, char *fmt, ...)
+{
+        static char buf[BUF_SIZE];
+        int n;
+        va_list args;
+        /* When writing to procfs, we don't care about the trailing null that
+         * is not included in the count returned by vscnprintf.
+         */
+        va_start(args, fmt);
+        n = vsnprintf(buf, BUF_SIZE, fmt, args);
+        va_end(args);
+        if (BUF_SIZE <= n || proc_size <= n) {
+                /* too big for formatting buffer or proc (less null byte) */
+                n = -EINVAL;
+                goto out;
+        }
+        memcpy(proc_buf, buf, n);
+out:
+        return n;
+}
+#undef BUF_SIZE
+/*
+ * Writes a PID entry to the procfs.
+ *
+ * @page buffer to write into.
+ * @count bytes available in the buffer
+ */
+#define PID_SPACE 15
+#define TASK_INFO_BUF (PID_SPACE + TASK_COMM_LEN)
+static int write_pid_entry(char *page, const int count, const int cpu,
+                const int task, struct ce_pid_entry *pid_entry)
+{
+        static char task_info[TASK_INFO_BUF];
+        struct task_struct *ts;
+        int n = 0, err, ti_n;
+        char *ti_b;
+        if (pid_entry->pid) {
+                rcu_read_lock();
+                ts = pid_task(pid_entry->pid, PIDTYPE_PID);
+                rcu_read_unlock();
+                /* get some information about the task */
+                if (ts) {
+                        ti_b = task_info;
+                        ti_n = snprintf(ti_b, PID_SPACE, "%d", ts->pid);
+                        if (PID_SPACE <= ti_n)
+                                ti_n = PID_SPACE - 1;
+                        ti_b += ti_n;
+                        *ti_b = ' '; /* nuke the null byte */
+                        ti_b++;
+                        get_task_comm(ti_b, ts);
+                } else {
+                        strncpy(task_info, "pid_task() failed :(",
+                                        TASK_INFO_BUF);
+                }
+        } else
+                strncpy(task_info, "no", TASK_INFO_BUF);
+        task_info[TASK_INFO_BUF - 1] = '\0'; /* just to be sure */
+        err = write_into_proc(page + n, count - n, "# task: %s\n", task_info);
+        if (err < 0) {
+                n = -ENOSPC;
+                goto out;
+        }
+        n += err;
+        err = write_into_proc(page + n, count - n, "%d, %d, %llu\n",
+                        cpu, task, pid_entry->budget);
+        if (err < 0) {
+                n = -ENOSPC;
+                goto out;
+        }
+        n += err;
+out:
+        return n;
+}
+#undef PID_SPACE
+#undef TASK_INFO_BUF
+/*
+ * Called when the user-land reads from proc.
+ */
+static int proc_read_ce_file(char *page, char **start, off_t off, int count,
+                int *eof, void *data)
+{
+        int n = 0, err, cpu, t;
+        struct ce_pid_table *pid_table;
+        if (off > 0) {
+                printk(KERN_INFO "litmus: MC-CE called read with off > 0\n");
+                goto out;
+        }
+        for_each_online_cpu(cpu) {
+                pid_table = get_pid_table(cpu);
+                for (t = 0; t < pid_table->num_pid_entries; ++t) {
+                        err = write_pid_entry(page + n, count - n,
+                                        cpu, t, get_pid_entry(cpu, t));
+                        if (err < 0) {
+                                n = -ENOSPC;
+                                goto out;
+                        }
+                        n += err;
+                }
+        }
+out:
+        *eof = 1;
+        return n;
+}
+/*
+ * Skip a commented line.
+ */
+static int skip_comment(const char *buf, const unsigned long max)
+{
+        unsigned long i = 0;
+        const char *c = buf;
+        if (0 == max || !c || *c != '#')
+                return 0;
+        ++c; ++i;
+        for (; i < max; ++i) {
+                if (*c == '\n') {
+                        ++c; ++i;
+                        break;
+                }
+                ++c;
+        }
+        return i;
+}
+/* a budget of 5 milliseconds is probably reasonable */
+#define BUDGET_THRESHOLD 5000000ULL
+static int setup_pid_entry(const int cpu, const int task, const lt_t budget)
+{
+        struct ce_pid_table *pid_table = get_pid_table(cpu);
+        struct ce_pid_entry *new_entry = NULL;
+        int err = 0;
+        /* check the inputs */
+        if (cpu < 0 || NR_CPUS <= cpu || task < 0 ||
+                        CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS <= task ||
+                        budget < 1) {
+                printk(KERN_INFO "litmus: bad cpu, task ID, or budget sent to "
+                                "MC-CE proc\n");
+                err = -EINVAL;
+                goto out;
+        }
+        /* check for small budgets */
+        if (BUDGET_THRESHOLD > budget) {
+                printk(KERN_CRIT "litmus: you gave a small budget for an "
+                                "MC-CE task; that might be an issue.\n");
+        }
+        /* check that we have space for a new entry */
+        if (CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS <= pid_table->num_pid_entries) {
+                printk(KERN_INFO "litmus: too many MC-CE tasks for cpu "
+                                "%d\n", cpu);
+                err = -EINVAL;
+                goto out;
+        }
+        /* add the new entry */
+        new_entry = get_pid_entry(cpu, pid_table->num_pid_entries);
+        BUG_ON(NULL != new_entry->pid);
+        new_entry->budget = budget;
+        new_entry->acc_time = pid_table->cycle_time + budget;
+        /* update the domain entry */
+        pid_table->cycle_time += budget;
+        pid_table->num_pid_entries++;
+out:
+        return err;
+}
+#undef BUDGET_THRESHOLD
+/*
+ * Called when the user-land writes to proc.
+ *
+ * Error checking is quite minimal. Format is:
+ * <cpu>, <process ID>, <budget>
+ */
+#define PROCFS_MAX_SIZE PAGE_SIZE
+static int proc_write_ce_file(struct file *file, const char __user *buffer,
+                unsigned long count, void *data)
+{
+        static char kbuf[PROCFS_MAX_SIZE];
+        char *c = kbuf, *c_skipped;
+        int cpu, task, cnt = 0, chars_read, converted, err;
+        lt_t budget;
+        if (!using_linux_plugin()) {
+                printk(KERN_INFO "litmus: can only edit MC-CE proc under Linux "
+                                "plugin\n");
+                cnt = -EINVAL;
+                goto out;
+        }
+        if (count > PROCFS_MAX_SIZE) {
+                printk(KERN_INFO "litmus: MC-CE procfs got too many bytes "
+                                "from user-space.\n");
+                cnt = -EINVAL;
+                goto out;
+        }
+        if (copy_from_user(kbuf, buffer, count)) {
+                printk(KERN_INFO "litmus: couldn't copy from user %s\n",
+                                __FUNCTION__);
+                cnt = -EFAULT;
+                goto out;
+        }
+        clear_pid_entries();
+        while (cnt < count) {
+                c_skipped = skip_spaces(c);
+                if (c_skipped != c) {
+                        chars_read = c_skipped - c;
+                        cnt += chars_read;
+                        c += chars_read;
+                        continue;
+                }
+                if (*c == '#') {
+                        chars_read = skip_comment(c, count - cnt);
+                        cnt += chars_read;
+                        c += chars_read;
+                        continue;
+                }
+                converted = sscanf(c, "%d, %d, %llu%n", &cpu, &task, &budget,
+                                &chars_read);
+                if (3 != converted) {
+                        printk(KERN_INFO "litmus: MC-CE procfs expected three "
+                                        "arguments, but got %d.\n", converted);
+                        cnt = -EINVAL;
+                        goto out;
+                }
+                cnt += chars_read;
+                c += chars_read;
+                err = setup_pid_entry(cpu, task, budget);
+                if (err) {
+                        cnt = -EINVAL;
+                        goto out;
+                }
+        }
+out:
+        return cnt;
+}
+#undef PROCFS_MAX_SIZE
+#define CE_FILE_PROC_NAME "ce_file"
+static void tear_down_proc(void)
+{
+        if (ce_file)
+                remove_proc_entry(CE_FILE_PROC_NAME, mc_ce_dir);
+        if (mc_ce_dir)
+                remove_plugin_proc_dir(&mc_ce_plugin);
+}
+static int setup_proc(void)
+{
+        int err;
+        err = make_plugin_proc_dir(&mc_ce_plugin, &mc_ce_dir);
+        if (err) {
+                printk(KERN_ERR "could not create MC-CE procfs dir.\n");
+                goto out;
+        }
+        ce_file = create_proc_entry(CE_FILE_PROC_NAME, 0644, mc_ce_dir);
+        if (!ce_file) {
+                printk(KERN_ERR "could not create MC-CE procfs file.\n");
+                err = -EIO;
+                goto out_remove_proc;
+        }
+        ce_file->read_proc = proc_read_ce_file;
+        ce_file->write_proc = proc_write_ce_file;
+        goto out;
+out_remove_proc:
+        tear_down_proc();
+out:
+        return err;
+}
+#undef CE_FILE_PROC_NAME
+static void clean_sched_mc_ce(void)
+{
+        tear_down_proc();
+}
+module_init(init_sched_mc_ce);
+module_exit(clean_sched_mc_ce);
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index 00a1900d6457..123c7516fb76 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -95,6 +95,10 @@ static void litmus_dummy_task_exit(struct task_struct *task)
 {
 }
+static void litmus_dummy_release_ts(lt_t time)
+{
+}
 static long litmus_dummy_complete_job(void)
 {
        return -ENOSYS;
@@ -136,6 +140,7 @@ struct sched_plugin linux_sched_plugin = {
        .finish_switch = litmus_dummy_finish_switch,
        .activate_plugin = litmus_dummy_activate_plugin,
        .deactivate_plugin = litmus_dummy_deactivate_plugin,
+        .release_ts = litmus_dummy_release_ts,
 #ifdef CONFIG_LITMUS_LOCKING
        .allocate_lock = litmus_dummy_allocate_lock,
 #endif
@@ -174,6 +179,7 @@ int register_sched_plugin(struct sched_plugin* plugin)
        CHECK(complete_job);
        CHECK(activate_plugin);
        CHECK(deactivate_plugin);
+        CHECK(release_ts);
 #ifdef CONFIG_LITMUS_LOCKING
        CHECK(allocate_lock);
 #endif
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
index 8933e15605ae..4e117be9546b 100644
--- a/litmus/sched_psn_edf.c
+++ b/litmus/sched_psn_edf.c
@@ -290,6 +290,9 @@ static void psnedf_task_new(struct task_struct * t, int on_rq, int running)
        TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
                   t->rt_param.task_params.cpu);
+        trace_litmus_server_param(0 - t->pid, -1 - get_partition(t),
+                                  get_exec_time(t), get_rt_period(t));
        /* setup job parameters */
        release_at(t, litmus_clock());
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
index 7194d2fe6c6f..67b01c1dd51b 100644
--- a/litmus/sched_task_trace.c
+++ b/litmus/sched_task_trace.c
@@ -7,14 +7,18 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/percpu.h>
+#include <linux/math64.h>
 #include <litmus/ftdev.h>
 #include <litmus/litmus.h>
 #include <litmus/sched_trace.h>
 #include <litmus/feather_trace.h>
 #include <litmus/ftdev.h>
+#include <litmus/rt_domain.h>
+#include <litmus/domain.h>
+#include <litmus/event_group.h>
+#include <litmus/sched_mc.h>
 #define NO_EVENTS               (1 << CONFIG_SCHED_TASK_TRACE_SHIFT)
@@ -132,6 +136,7 @@ feather_callback void do_sched_trace_task_param(unsigned long id, unsigned long
                rec->data.param.phase     = get_rt_phase(t);
                rec->data.param.partition = get_partition(t);
                rec->data.param.class     = get_class(t);
+                rec->data.param.level     = (tsk_mc_data(t) ? tsk_mc_crit(t) : -1);
                put_record(rec);
        }
 }
@@ -141,8 +146,8 @@ feather_callback void do_sched_trace_task_release(unsigned long id, unsigned lon
        struct task_struct *t = (struct task_struct*) _task;
        struct st_event_record* rec = get_record(ST_RELEASE, t);
        if (rec) {
-                rec->data.release.release  = get_release(t);
+                rec->data.release.release  = tsk_rt(t)->job_params.real_release;
-                rec->data.release.deadline = get_deadline(t);
+                rec->data.release.deadline = tsk_rt(t)->job_params.real_deadline;
                put_record(rec);
        }
 }
@@ -231,7 +236,7 @@ feather_callback void do_sched_trace_task_exit(unsigned long id,
 {
        struct task_struct *t = (struct task_struct*) _task;
        const lt_t max_exec_time = tsk_rt(t)->max_exec_time;
-        const lt_t avg_exec_time = tsk_rt(t)->tot_exec_time / (get_job_no(t) - 1);
+        const lt_t avg_exec_time = div64_u64(tsk_rt(t)->tot_exec_time, (get_job_no(t) - 1));
        struct st_event_record *rec = get_record(ST_TASK_EXIT, t);
        if (rec) {
diff --git a/litmus/sync.c b/litmus/sync.c
index bf75fde5450b..f3c9262f7022 100644
--- a/litmus/sync.c
+++ b/litmus/sync.c
@@ -73,6 +73,9 @@ static long do_release_ts(lt_t start)
        complete_n(&ts_release, task_count);
+        /* TODO: remove this hack */
+        litmus->release_ts(start);
        return task_count;
 }
diff --git a/litmus/trace.c b/litmus/trace.c
index 3c35c527e805..4722ffa443c6 100644
--- a/litmus/trace.c
+++ b/litmus/trace.c
@@ -6,6 +6,10 @@
 #include <litmus/litmus.h>
 #include <litmus/trace.h>
+#include <litmus/domain.h>
+#include <litmus/event_group.h>
+#include <litmus/sched_mc.h>
 /******************************************************************************/
 /*                          Allocation                                        */
 /******************************************************************************/
@@ -83,6 +87,36 @@ static inline void __save_timestamp(unsigned long event,
        __save_timestamp_cpu(event, type, raw_smp_processor_id());
 }
+/* hack: fake timestamp to user-reported time, and record parts of the PID */
+feather_callback void save_timestamp_time(unsigned long event, unsigned long ptr)
+{
+        uint64_t* time = (uint64_t*) ptr;
+        unsigned int seq_no;
+        struct timestamp *ts;
+        seq_no = fetch_and_inc((int *) &ts_seq_no);
+        if (ft_buffer_start_write(trace_ts_buf, (void**)  &ts)) {
+                ts->event     = event;
+                ts->timestamp = *time;
+                ts->seq_no    = seq_no;
+                /* type takes lowest byte of PID */
+                ts->task_type = (uint8_t) current->pid;
+                /* cpu takes second-lowest byte of PID*/
+                ts->cpu       = (uint8_t) (current->pid >> 8);
+                ft_buffer_finish_write(trace_ts_buf, ts);
+        }
+}
+feather_callback void save_timestamp_pid(unsigned long event)
+{
+        /* Abuse existing fields to partially export PID. */
+        __save_timestamp_cpu(event,
+                             /* type takes lowest byte of PID */
+                             (uint8_t) current->pid,
+                             /* cpu takes second-lowest byte of PID*/
+                             (uint8_t) (current->pid >> 8));
+}
 feather_callback void save_timestamp(unsigned long event)
 {
        __save_timestamp(event, TSK_UNKNOWN);
@@ -97,8 +131,21 @@ feather_callback void save_timestamp_def(unsigned long event,
 feather_callback void save_timestamp_task(unsigned long event,
                                          unsigned long t_ptr)
 {
-        int rt = is_realtime((struct task_struct *) t_ptr);
+        struct task_struct *ts = (struct task_struct*) t_ptr;
-        __save_timestamp(event, rt ? TSK_RT : TSK_BE);
+        int rt = is_realtime(ts);
+        uint8_t type = rt ? TSK_RT : TSK_BE;
+        if (TS_LVLA_SCHED_END_ID == event) {
+                if (rt && CRIT_LEVEL_A == tsk_mc_crit(ts))
+                        type = TSK_LVLA;
+        } else if (TS_LVLB_SCHED_END_ID == event) {
+                if (rt && CRIT_LEVEL_B == tsk_mc_crit(ts))
+                        type = TSK_LVLB;
+        } else if (TS_LVLC_SCHED_END_ID == event) {
+                if (rt && CRIT_LEVEL_C == tsk_mc_crit(ts))
+                        type = TSK_LVLC;
+        }
+        __save_timestamp(event, type);
 }
 feather_callback void save_timestamp_cpu(unsigned long event,
diff --git a/mm/memory.c b/mm/memory.c
index 9b8a01d941cb..36e889cca247 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3016,6 +3016,35 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
        return 0;
 }
+static inline void print_page_fault_info(struct mm_struct *mm,
+                unsigned long address, struct page *page)
+{
+        struct task_struct *ts = current;
+        char ts_name[TASK_COMM_LEN];
+        char mismatch[4];
+#if 0
+        struct file *file = mm->exe_file;
+        const unsigned char *name_null = "NULL";
+        const unsigned char *name =
+                (file) ?file->f_dentry->d_name.name : name_null;
+#endif
+        get_task_comm(ts_name, ts);
+        if (likely(ts->mm == mm))
+                mismatch[0] = '\0';
+        else
+                snprintf(mismatch, 4, "(M)");
+#if 0
+        printk("%s: cur: %15s  name: %15s  user_addr: 0x%12lx  pfn: %12lu  "
+                        "addr: 0x%13llx\n",
+                        __FUNCTION__, ts_name, name, address,
+                        page_to_pfn(page), page_to_phys(page));
+#endif
+        printk("%s: %3s %15s  laddr: 0x%12lx  pfn: %19lu  paddr: 0x%13llx\n",
+                        __FUNCTION__, mismatch, ts_name, address >> PAGE_SHIFT,
+                        page_to_pfn(page), page_to_phys(page) >> PAGE_SHIFT);
+}
 /*
 * We enter with non-exclusive mmap_sem (to exclude vma changes,
 * but allow concurrent faults), and pte mapped but not yet locked.
@@ -3066,6 +3095,9 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
        inc_mm_counter_fast(mm, MM_ANONPAGES);
        page_add_new_anon_rmap(page, vma, address);
+#if 0
+        print_page_fault_info(mm, address, page);
+#endif
 setpte:
        set_pte_at(mm, address, page_table, entry);
@@ -3075,6 +3107,9 @@ unlock:
        pte_unmap_unlock(page_table, ptl);
        return 0;
 release:
+#if 0
+        printk("%s: release label\n", __FUNCTION__);
+#endif
        mem_cgroup_uncharge_page(page);
        page_cache_release(page);
        goto unlock;