From 8e048c798adaabef530a1526f7ce8c6c3cd3475e Mon Sep 17 00:00:00 2001
From: Bjoern Brandenburg <bbb@mpi-sws.org>
Date: Sun, 9 Aug 2015 13:18:48 +0200
Subject: Add LITMUS^RT core implementation

This patch adds the core of LITMUS^RT:

 - library functionality (heaps, rt_domain, prioritization, etc.)
 - budget enforcement logic
 - job management
 - system call backends
 - virtual devices (control page, etc.)
 - scheduler plugin API (and dummy plugin)

This code compiles, but is not yet integrated with the rest of Linux.
---
 litmus/Kconfig        | 193 ++++++++++++++
 litmus/Makefile       |  18 ++
 litmus/bheap.c        | 316 +++++++++++++++++++++++
 litmus/binheap.c      | 387 ++++++++++++++++++++++++++++
 litmus/budget.c       | 116 +++++++++
 litmus/clustered.c    | 119 +++++++++
 litmus/ctrldev.c      | 160 ++++++++++++
 litmus/edf_common.c   | 200 +++++++++++++++
 litmus/fdso.c         | 308 +++++++++++++++++++++++
 litmus/fp_common.c    | 119 +++++++++
 litmus/jobs.c         |  82 ++++++
 litmus/litmus.c       | 681 ++++++++++++++++++++++++++++++++++++++++++++++++++
 litmus/litmus_proc.c  | 573 ++++++++++++++++++++++++++++++++++++++++++
 litmus/locking.c      | 188 ++++++++++++++
 litmus/preempt.c      | 141 +++++++++++
 litmus/rt_domain.c    | 353 ++++++++++++++++++++++++++
 litmus/sched_plugin.c | 238 ++++++++++++++++++
 litmus/srp.c          | 308 +++++++++++++++++++++++
 litmus/sync.c         | 152 +++++++++++
 litmus/trace.c        |  11 +
 litmus/uncachedev.c   | 102 ++++++++
 21 files changed, 4765 insertions(+)
 create mode 100644 litmus/bheap.c
 create mode 100644 litmus/binheap.c
 create mode 100644 litmus/budget.c
 create mode 100644 litmus/clustered.c
 create mode 100644 litmus/ctrldev.c
 create mode 100644 litmus/edf_common.c
 create mode 100644 litmus/fdso.c
 create mode 100644 litmus/fp_common.c
 create mode 100644 litmus/jobs.c
 create mode 100644 litmus/litmus.c
 create mode 100644 litmus/litmus_proc.c
 create mode 100644 litmus/locking.c
 create mode 100644 litmus/preempt.c
 create mode 100644 litmus/rt_domain.c
 create mode 100644 litmus/sched_plugin.c
 create mode 100644 litmus/srp.c
 create mode 100644 litmus/sync.c
 create mode 100644 litmus/uncachedev.c

(limited to 'litmus')

diff --git a/litmus/Kconfig b/litmus/Kconfig
index 5408ef6b159b..fdf31f3dd6c2 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -1,5 +1,184 @@
 menu "LITMUS^RT"
 
+menu "Scheduling"
+
+config RELEASE_MASTER
+        bool "Release-master Support"
+	depends on ARCH_HAS_SEND_PULL_TIMERS && SMP
+	default n
+	help
+           Allow one processor to act as a dedicated interrupt processor
+           that services all timer interrupts, but that does not schedule
+           real-time tasks. See RTSS'09 paper for details
+	   (http://www.cs.unc.edu/~anderson/papers.html).
+
+config PREFER_LOCAL_LINKING
+       bool "Link newly arrived tasks locally if possible"
+       depends on SMP
+       default y
+       help
+          In linking-based schedulers such as GSN-EDF, if an idle CPU processes
+	  a job arrival (i.e., when a job resumed or was released), it can
+	  either link the task to itself and schedule it immediately (to avoid
+	  unnecessary scheduling latency) or it can try to link it to the CPU
+	  where it executed previously (to maximize cache affinity, at the
+	  expense of increased latency due to the need to send an IPI).
+
+	  In lightly loaded systems, this option can significantly reduce
+	  scheduling latencies. In heavily loaded systems (where CPUs are
+	  rarely idle), it will likely make hardly a difference.
+
+	  If unsure, say yes.
+
+config LITMUS_QUANTUM_LENGTH_US
+    int "quantum length (in us)"
+    default 1000
+    range 500 10000
+    help 
+      Determine the desired quantum length, in microseconds, which 
+      is used to determine the granularity of scheduling in
+      quantum-driven plugins (primarily PFAIR). This parameter does not
+      affect event-driven plugins (such as the EDF-based plugins and P-FP).
+      Default: 1000us = 1ms.
+
+config BUG_ON_MIGRATION_DEADLOCK
+       bool "Panic on suspected migration deadlock"
+       default y
+       help
+          This is a debugging option. The LITMUS^RT migration support code for
+	  global scheduling contains a simple heuristic to detect when the
+	  system deadlocks due to circular stack dependencies.
+
+	  For example, such a deadlock exists if CPU 0 waits for task A's stack
+	  to become available while using task B's stack, and CPU 1 waits for
+	  task B's stack to become available while using task A's stack. Such
+	  a situation can arise in (buggy) global scheduling plugins.
+
+	  With this option enabled, such a scenario with result in a BUG().
+	  You can turn off this option when debugging on real hardware (e.g.,
+	  to rescue traces, etc. that would be hard to get after a panic).
+
+	  Only turn this off if you really know what you are doing. If this
+	  BUG() triggers, the scheduler is broken and turning off this option
+	  won't fix it.
+
+
+endmenu
+
+menu "Real-Time Synchronization"
+
+config NP_SECTION
+        bool "Non-preemptive section support"
+	default y
+	help
+	  Allow tasks to become non-preemptable.
+          Note that plugins still need to explicitly support non-preemptivity.
+          Currently, only the GSN-EDF, PSN-EDF, and P-FP plugins have such support.
+
+	  This is required to support locking protocols such as the FMLP.
+	  If disabled, all tasks will be considered preemptable at all times.
+
+config LITMUS_LOCKING
+        bool "Support for real-time locking protocols"
+	depends on NP_SECTION
+	default y
+	help
+	  Enable LITMUS^RT's multiprocessor real-time locking protocols with
+	  predicable maximum blocking times.
+
+	  Say Yes if you want to include locking protocols such as the FMLP and
+	  Baker's SRP.
+
+endmenu
+
+menu "Performance Enhancements"
+
+config SCHED_CPU_AFFINITY
+	bool "Local Migration Affinity"
+	depends on X86 && SYSFS
+	default y
+	help
+	  Rescheduled tasks prefer CPUs near to their previously used CPU.
+	  This may improve cache performance through possible preservation of
+	  cache affinity, at the expense of (slightly) more involved scheduling
+	  logic.
+
+	  Warning: May make bugs harder to find since tasks may migrate less often.
+
+	  NOTES:
+		* Feature is not utilized by PFair/PD^2.
+
+	  Say Yes if unsure.
+
+config ALLOW_EARLY_RELEASE
+	bool "Allow Early Releasing"
+	default y
+	help
+	  Allow tasks to release jobs early (while still maintaining job
+	  precedence constraints). Only supported by EDF schedulers. Early
+	  releasing must be explicitly requested by real-time tasks via
+	  the task_params passed to sys_set_task_rt_param().
+
+	  Early releasing can improve job response times while maintaining
+	  real-time correctness. However, it can easily peg your CPUs
+	  since tasks never suspend to wait for their next job. As such, early
+	  releasing is really only useful in the context of implementing
+	  bandwidth servers, interrupt handling threads, or short-lived
+	  computations.
+
+	  Beware that early releasing may affect real-time analysis
+	  if using locking protocols or I/O.
+
+	  Say Yes if unsure.
+
+choice
+	prompt "EDF Tie-Break Behavior"
+	default EDF_TIE_BREAK_LATENESS_NORM
+	help
+	  Allows the configuration of tie-breaking behavior when the deadlines
+	  of two EDF-scheduled tasks are equal.
+
+	config EDF_TIE_BREAK_LATENESS
+	bool "Lateness-based Tie Break"
+	help
+	  Break ties between two jobs, A and B, based upon the lateness of their
+	  prior jobs. The job with the greatest lateness has priority. Note that
+	  lateness has a negative value if the prior job finished before its
+	  deadline.
+
+	config EDF_TIE_BREAK_LATENESS_NORM
+	bool "Normalized Lateness-based Tie Break"
+	help
+	  Break ties between two jobs, A and B, based upon the lateness, normalized
+	  by relative deadline, of their prior jobs. The job with the greatest
+	  normalized lateness has priority. Note that lateness has a negative value
+	  if the prior job finished before its deadline.
+
+	  Normalized lateness tie-breaks are likely desireable over non-normalized
+	  tie-breaks if the execution times and/or relative deadlines of tasks in a
+	  task set vary greatly.
+
+	config EDF_TIE_BREAK_HASH
+	bool "Hash-based Tie Breaks"
+	help
+	  Break ties between two jobs, A and B, with equal deadlines by using a
+	  uniform hash; i.e.: hash(A.pid, A.job_num) < hash(B.pid, B.job_num). Job
+	  A has ~50% of winning a given tie-break.
+
+	config EDF_PID_TIE_BREAK
+	bool "PID-based Tie Breaks"
+	help
+	  Break ties based upon OS-assigned thread IDs. Use this option if
+	  required by algorithm's real-time analysis or per-task response-time
+	  jitter must be minimized.
+
+	  NOTES:
+	    * This tie-breaking method was default in Litmus 2012.2 and before.
+
+endchoice
+
+endmenu
+
 menu "Tracing"
 
 config FEATHER_TRACE
@@ -154,6 +333,20 @@ config SCHED_DEBUG_TRACE_CALLER
 
 	 If unsure, say No.
 
+config PREEMPT_STATE_TRACE
+       bool "Trace preemption state machine transitions"
+       depends on SCHED_DEBUG_TRACE && DEBUG_KERNEL
+       default n
+       help
+         With this option enabled, each CPU will log when it transitions
+	 states in the preemption state machine. This state machine is
+	 used to determine how to react to IPIs (avoid races with in-flight IPIs).
+
+	 Warning: this creates a lot of information in the debug trace. Only
+	 recommended when you are debugging preemption-related races.
+
+	 If unsure, say No.
+
 endmenu
 
 endmenu
diff --git a/litmus/Makefile b/litmus/Makefile
index 6318f1c6fac8..c85abc7389c5 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -2,6 +2,24 @@
 # Makefile for LITMUS^RT
 #
 
+obj-y     = sched_plugin.o litmus.o \
+	    preempt.o \
+	    litmus_proc.o \
+	    budget.o \
+	    clustered.o \
+	    jobs.o \
+	    sync.o \
+	    rt_domain.o \
+	    edf_common.o \
+	    fp_common.o \
+	    fdso.o \
+	    locking.o \
+	    srp.o \
+	    bheap.o \
+	    binheap.o \
+	    ctrldev.o \
+	    uncachedev.o
+
 obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
 obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
 obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
diff --git a/litmus/bheap.c b/litmus/bheap.c
new file mode 100644
index 000000000000..2707e0122b6d
--- /dev/null
+++ b/litmus/bheap.c
@@ -0,0 +1,316 @@
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <litmus/bheap.h>
+
+void bheap_init(struct bheap* heap)
+{
+	heap->head = NULL;
+	heap->min  = NULL;
+}
+
+void bheap_node_init(struct bheap_node** _h, void* value)
+{
+	struct bheap_node* h = *_h;
+	h->parent = NULL;
+	h->next   = NULL;
+	h->child  = NULL;
+	h->degree = NOT_IN_HEAP;
+	h->value  = value;
+	h->ref    = _h;
+}
+
+
+/* make child a subtree of root */
+static void __bheap_link(struct bheap_node* root,
+			struct bheap_node* child)
+{
+	child->parent = root;
+	child->next   = root->child;
+	root->child   = child;
+	root->degree++;
+}
+
+/* merge root lists */
+static  struct bheap_node* __bheap_merge(struct bheap_node* a,
+					     struct bheap_node* b)
+{
+	struct bheap_node* head = NULL;
+	struct bheap_node** pos = &head;
+
+	while (a && b) {
+		if (a->degree < b->degree) {
+			*pos = a;
+			a = a->next;
+		} else {
+			*pos = b;
+			b = b->next;
+		}
+		pos = &(*pos)->next;
+	}
+	if (a)
+		*pos = a;
+	else
+		*pos = b;
+	return head;
+}
+
+/* reverse a linked list of nodes. also clears parent pointer */
+static  struct bheap_node* __bheap_reverse(struct bheap_node* h)
+{
+	struct bheap_node* tail = NULL;
+	struct bheap_node* next;
+
+	if (!h)
+		return h;
+
+	h->parent = NULL;
+	while (h->next) {
+		next    = h->next;
+		h->next = tail;
+		tail    = h;
+		h       = next;
+		h->parent = NULL;
+	}
+	h->next = tail;
+	return h;
+}
+
+static  void __bheap_min(bheap_prio_t higher_prio, struct bheap* heap,
+			      struct bheap_node** prev, struct bheap_node** node)
+{
+	struct bheap_node *_prev, *cur;
+	*prev = NULL;
+
+	if (!heap->head) {
+		*node = NULL;
+		return;
+	}
+
+	*node = heap->head;
+	_prev = heap->head;
+	cur   = heap->head->next;
+	while (cur) {
+		if (higher_prio(cur, *node)) {
+			*node = cur;
+			*prev = _prev;
+		}
+		_prev = cur;
+		cur   = cur->next;
+	}
+}
+
+static  void __bheap_union(bheap_prio_t higher_prio, struct bheap* heap,
+				struct bheap_node* h2)
+{
+	struct bheap_node* h1;
+	struct bheap_node *prev, *x, *next;
+	if (!h2)
+		return;
+	h1 = heap->head;
+	if (!h1) {
+		heap->head = h2;
+		return;
+	}
+	h1 = __bheap_merge(h1, h2);
+	prev = NULL;
+	x    = h1;
+	next = x->next;
+	while (next) {
+		if (x->degree != next->degree ||
+		    (next->next && next->next->degree == x->degree)) {
+			/* nothing to do, advance */
+			prev = x;
+			x    = next;
+		} else if (higher_prio(x, next)) {
+			/* x becomes the root of next */
+			x->next = next->next;
+			__bheap_link(x, next);
+		} else {
+			/* next becomes the root of x */
+			if (prev)
+				prev->next = next;
+			else
+				h1 = next;
+			__bheap_link(next, x);
+			x = next;
+		}
+		next = x->next;
+	}
+	heap->head = h1;
+}
+
+static struct bheap_node* __bheap_extract_min(bheap_prio_t higher_prio,
+					    struct bheap* heap)
+{
+	struct bheap_node *prev, *node;
+	__bheap_min(higher_prio, heap, &prev, &node);
+	if (!node)
+		return NULL;
+	if (prev)
+		prev->next = node->next;
+	else
+		heap->head = node->next;
+	__bheap_union(higher_prio, heap, __bheap_reverse(node->child));
+	return node;
+}
+
+/* insert (and reinitialize) a node into the heap */
+void bheap_insert(bheap_prio_t higher_prio, struct bheap* heap,
+		 struct bheap_node* node)
+{
+	struct bheap_node *min;
+	node->child  = NULL;
+	node->parent = NULL;
+	node->next   = NULL;
+	node->degree = 0;
+	if (heap->min && higher_prio(node, heap->min)) {
+		/* swap min cache */
+		min = heap->min;
+		min->child  = NULL;
+		min->parent = NULL;
+		min->next   = NULL;
+		min->degree = 0;
+		__bheap_union(higher_prio, heap, min);
+		heap->min   = node;
+	} else
+		__bheap_union(higher_prio, heap, node);
+}
+
+void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap)
+{
+	struct bheap_node* min;
+	if (heap->min) {
+		min = heap->min;
+		heap->min = NULL;
+		bheap_insert(higher_prio, heap, min);
+	}
+}
+
+/* merge addition into target */
+void bheap_union(bheap_prio_t higher_prio,
+		struct bheap* target, struct bheap* addition)
+{
+	/* first insert any cached minima, if necessary */
+	bheap_uncache_min(higher_prio, target);
+	bheap_uncache_min(higher_prio, addition);
+	__bheap_union(higher_prio, target, addition->head);
+	/* this is a destructive merge */
+	addition->head = NULL;
+}
+
+struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
+			    struct bheap* heap)
+{
+	if (!heap->min)
+		heap->min = __bheap_extract_min(higher_prio, heap);
+	return heap->min;
+}
+
+struct bheap_node* bheap_take(bheap_prio_t higher_prio,
+			    struct bheap* heap)
+{
+	struct bheap_node *node;
+	if (!heap->min)
+		heap->min = __bheap_extract_min(higher_prio, heap);
+	node = heap->min;
+	heap->min = NULL;
+	if (node)
+		node->degree = NOT_IN_HEAP;
+	return node;
+}
+
+int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node)
+{
+	struct bheap_node  *parent;
+	struct bheap_node** tmp_ref;
+	void* tmp;
+
+	/* bubble up */
+	parent = node->parent;
+	while (parent && higher_prio(node, parent)) {
+		/* swap parent and node */
+		tmp           = parent->value;
+		parent->value = node->value;
+		node->value   = tmp;
+		/* swap references */
+		*(parent->ref) = node;
+		*(node->ref)   = parent;
+		tmp_ref        = parent->ref;
+		parent->ref    = node->ref;
+		node->ref      = tmp_ref;
+		/* step up */
+		node   = parent;
+		parent = node->parent;
+	}
+
+	return parent != NULL;
+}
+
+void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap,
+		 struct bheap_node* node)
+{
+	struct bheap_node *parent, *prev, *pos;
+	struct bheap_node** tmp_ref;
+	void* tmp;
+
+	if (heap->min != node) {
+		/* bubble up */
+		parent = node->parent;
+		while (parent) {
+			/* swap parent and node */
+			tmp           = parent->value;
+			parent->value = node->value;
+			node->value   = tmp;
+			/* swap references */
+			*(parent->ref) = node;
+			*(node->ref)   = parent;
+			tmp_ref        = parent->ref;
+			parent->ref    = node->ref;
+			node->ref      = tmp_ref;
+			/* step up */
+			node   = parent;
+			parent = node->parent;
+		}
+		/* now delete:
+		 * first find prev */
+		prev = NULL;
+		pos  = heap->head;
+		while (pos != node) {
+			BUG_ON(!pos); /* fell off the list -> deleted from wrong heap */
+			prev = pos;
+			pos  = pos->next;
+		}
+		/* we have prev, now remove node */
+		if (prev)
+			prev->next = node->next;
+		else
+			heap->head = node->next;
+		__bheap_union(higher_prio, heap, __bheap_reverse(node->child));
+	} else
+		heap->min = NULL;
+	node->degree = NOT_IN_HEAP;
+}
+
+/* allocate a heap node for value and insert into the heap */
+int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
+	     void* value, int gfp_flags)
+{
+	struct bheap_node* hn = bheap_node_alloc(gfp_flags);
+	if (likely(hn)) {
+		bheap_node_init(&hn, value);
+		bheap_insert(higher_prio, heap, hn);
+	}
+	return hn != NULL;
+}
+
+void* bheap_take_del(bheap_prio_t higher_prio,
+		    struct bheap* heap)
+{
+	struct bheap_node* hn = bheap_take(higher_prio, heap);
+	void* ret = NULL;
+	if (hn) {
+		ret = hn->value;
+		bheap_node_free(hn);
+	}
+	return ret;
+}
diff --git a/litmus/binheap.c b/litmus/binheap.c
new file mode 100644
index 000000000000..d3ab34b92096
--- /dev/null
+++ b/litmus/binheap.c
@@ -0,0 +1,387 @@
+#include <litmus/binheap.h>
+
+/* Returns true of the root ancestor of node is the root of the given heap. */
+int binheap_is_in_this_heap(struct binheap_node *node,
+	struct binheap* heap)
+{
+	if(!binheap_is_in_heap(node)) {
+		return 0;
+	}
+
+	while(node->parent != NULL) {
+		node = node->parent;
+	}
+
+	return (node == heap->root);
+}
+
+
+/* Update the node reference pointers.  Same logic as Litmus binomial heap. */
+static void __update_ref(struct binheap_node *parent,
+				struct binheap_node *child)
+{
+	*(parent->ref_ptr) = child;
+	*(child->ref_ptr) = parent;
+
+	swap(parent->ref_ptr, child->ref_ptr);
+}
+
+
+/* Swaps data between two nodes. */
+static void __binheap_swap(struct binheap_node *parent,
+				struct binheap_node *child)
+{
+	swap(parent->data, child->data);
+	__update_ref(parent, child);
+}
+
+
+/* Swaps memory and data between two nodes. Actual nodes swap instead of
+ * just data.  Needed when we delete nodes from the heap.
+ */
+static void __binheap_swap_safe(struct binheap *handle,
+				struct binheap_node *a,
+				struct binheap_node *b)
+{
+	swap(a->data, b->data);
+	__update_ref(a, b);
+
+	if((a->parent != NULL) && (a->parent == b->parent)) {
+		/* special case: shared parent */
+		swap(a->parent->left, a->parent->right);
+	}
+	else {
+		/* Update pointers to swap parents. */
+
+		if(a->parent) {
+			if(a == a->parent->left) {
+				a->parent->left = b;
+			}
+			else {
+				a->parent->right = b;
+			}
+		}
+
+		if(b->parent) {
+			if(b == b->parent->left) {
+				b->parent->left = a;
+			}
+			else {
+				b->parent->right = a;
+			}
+		}
+
+		swap(a->parent, b->parent);
+	}
+
+	/* swap children */
+
+	if(a->left) {
+		a->left->parent = b;
+
+		if(a->right) {
+			a->right->parent = b;
+		}
+	}
+
+	if(b->left) {
+		b->left->parent = a;
+
+		if(b->right) {
+			b->right->parent = a;
+		}
+	}
+
+	swap(a->left, b->left);
+	swap(a->right, b->right);
+
+
+	/* update next/last/root pointers */
+
+	if(a == handle->next) {
+		handle->next = b;
+	}
+	else if(b == handle->next) {
+		handle->next = a;
+	}
+
+	if(a == handle->last) {
+		handle->last = b;
+	}
+	else if(b == handle->last) {
+		handle->last = a;
+	}
+
+	if(a == handle->root) {
+		handle->root = b;
+	}
+	else if(b == handle->root) {
+		handle->root = a;
+	}
+}
+
+
+/**
+ * Update the pointer to the last node in the complete binary tree.
+ * Called internally after the root node has been deleted.
+ */
+static void __binheap_update_last(struct binheap *handle)
+{
+	struct binheap_node *temp = handle->last;
+
+	/* find a "bend" in the tree. */
+	while(temp->parent && (temp == temp->parent->left)) {
+		temp = temp->parent;
+	}
+
+	/* step over to sibling if we're not at root */
+	if(temp->parent != NULL) {
+		temp = temp->parent->left;
+	}
+
+	/* now travel right as far as possible. */
+	while(temp->right != NULL) {
+		temp = temp->right;
+	}
+
+	/* take one step to the left if we're not at the bottom-most level. */
+	if(temp->left != NULL) {
+		temp = temp->left;
+	}
+
+	handle->last = temp;
+}
+
+
+/**
+ * Update the pointer to the node that will take the next inserted node.
+ * Called internally after a node has been inserted.
+ */
+static void __binheap_update_next(struct binheap *handle)
+{
+	struct binheap_node *temp = handle->next;
+
+	/* find a "bend" in the tree. */
+	while(temp->parent && (temp == temp->parent->right)) {
+		temp = temp->parent;
+	}
+
+	/* step over to sibling if we're not at root */
+	if(temp->parent != NULL) {
+		temp = temp->parent->right;
+	}
+
+	/* now travel left as far as possible. */
+	while(temp->left != NULL) {
+		temp = temp->left;
+	}
+
+	handle->next = temp;
+}
+
+
+
+/* bubble node up towards root */
+static void __binheap_bubble_up(struct binheap *handle,
+				struct binheap_node *node)
+{
+	/* let BINHEAP_POISON data bubble to the top */
+
+	while((node->parent != NULL) &&
+		  ((node->data == BINHEAP_POISON) ||
+		   handle->compare(node, node->parent))) {
+			  __binheap_swap(node->parent, node);
+			  node = node->parent;
+	}
+}
+
+
+/* bubble node down, swapping with min-child */
+static void __binheap_bubble_down(struct binheap *handle)
+{
+	struct binheap_node *node = handle->root;
+
+	while(node->left != NULL) {
+		if(node->right && handle->compare(node->right, node->left)) {
+			if(handle->compare(node->right, node)) {
+				__binheap_swap(node, node->right);
+				node = node->right;
+			}
+			else {
+				break;
+			}
+		}
+		else {
+			if(handle->compare(node->left, node)) {
+				__binheap_swap(node, node->left);
+				node = node->left;
+			}
+			else {
+				break;
+			}
+		}
+	}
+}
+
+
+void __binheap_add(struct binheap_node *new_node,
+				struct binheap *handle,
+				void *data)
+{
+	new_node->data = data;
+	new_node->ref = new_node;
+	new_node->ref_ptr = &(new_node->ref);
+
+	if(!binheap_empty(handle)) {
+		/* insert left side first */
+		if(handle->next->left == NULL) {
+			handle->next->left = new_node;
+			new_node->parent = handle->next;
+			new_node->left = NULL;
+			new_node->right = NULL;
+
+			handle->last = new_node;
+
+			__binheap_bubble_up(handle, new_node);
+		}
+		else {
+			/* left occupied. insert right. */
+			handle->next->right = new_node;
+			new_node->parent = handle->next;
+			new_node->left = NULL;
+			new_node->right = NULL;
+
+			handle->last = new_node;
+
+			__binheap_update_next(handle);
+			__binheap_bubble_up(handle, new_node);
+		}
+	}
+	else {
+		/* first node in heap */
+
+		new_node->parent = NULL;
+		new_node->left = NULL;
+		new_node->right = NULL;
+
+		handle->root = new_node;
+		handle->next = new_node;
+		handle->last = new_node;
+	}
+}
+
+
+/**
+ * Removes the root node from the heap. The node is removed after coalescing
+ * the binheap_node with its original data pointer at the root of the tree.
+ *
+ * The 'last' node in the tree is then swapped up to the root and bubbled
+ * down.
+ */
+void __binheap_delete_root(struct binheap *handle,
+				struct binheap_node *container)
+{
+	struct binheap_node *root = handle->root;
+
+	if(root != container) {
+		/* coalesce */
+		__binheap_swap_safe(handle, root, container);
+		root = container;
+	}
+
+	if(handle->last != root) {
+		/* swap 'last' node up to root and bubble it down. */
+
+		struct binheap_node *to_move = handle->last;
+
+		if(to_move->parent != root) {
+			handle->next = to_move->parent;
+
+			if(handle->next->right == to_move) {
+				/* disconnect from parent */
+				to_move->parent->right = NULL;
+				handle->last = handle->next->left;
+			}
+			else {
+				/* find new 'last' before we disconnect */
+				__binheap_update_last(handle);
+
+				/* disconnect from parent */
+				to_move->parent->left = NULL;
+			}
+		}
+		else {
+			/* 'last' is direct child of root */
+
+			handle->next = to_move;
+
+			if(to_move == to_move->parent->right) {
+				to_move->parent->right = NULL;
+				handle->last = to_move->parent->left;
+			}
+			else {
+				to_move->parent->left = NULL;
+				handle->last = to_move;
+			}
+		}
+		to_move->parent = NULL;
+
+		/* reconnect as root.  We can't just swap data ptrs since root node
+		 * may be freed after this function returns.
+		 */
+		to_move->left = root->left;
+		to_move->right = root->right;
+		if(to_move->left != NULL) {
+			to_move->left->parent = to_move;
+		}
+		if(to_move->right != NULL) {
+			to_move->right->parent = to_move;
+		}
+
+		handle->root = to_move;
+
+		/* bubble down */
+		__binheap_bubble_down(handle);
+	}
+	else {
+		/* removing last node in tree */
+		handle->root = NULL;
+		handle->next = NULL;
+		handle->last = NULL;
+	}
+
+	/* mark as removed */
+	container->parent = BINHEAP_POISON;
+}
+
+
+/**
+ * Delete an arbitrary node.  Bubble node to delete up to the root,
+ * and then delete to root.
+ */
+void __binheap_delete(struct binheap_node *node_to_delete,
+				struct binheap *handle)
+{
+	struct binheap_node *target = node_to_delete->ref;
+	void *temp_data = target->data;
+
+	/* temporarily set data to null to allow node to bubble up to the top. */
+	target->data = BINHEAP_POISON;
+
+	__binheap_bubble_up(handle, target);
+	__binheap_delete_root(handle, node_to_delete);
+
+	node_to_delete->data = temp_data;  /* restore node data pointer */
+}
+
+
+/**
+ * Bubble up a node whose pointer has decreased in value.
+ */
+void __binheap_decrease(struct binheap_node *orig_node,
+				struct binheap *handle)
+{
+	struct binheap_node *target = orig_node->ref;
+
+	__binheap_bubble_up(handle, target);
+}
diff --git a/litmus/budget.c b/litmus/budget.c
new file mode 100644
index 000000000000..47bf78a19f87
--- /dev/null
+++ b/litmus/budget.c
@@ -0,0 +1,116 @@
+#include <linux/sched.h>
+#include <linux/percpu.h>
+#include <linux/hrtimer.h>
+
+#include <litmus/litmus.h>
+#include <litmus/preempt.h>
+
+#include <litmus/budget.h>
+
+struct enforcement_timer {
+	/* The enforcement timer is used to accurately police
+	 * slice budgets. */
+	struct hrtimer		timer;
+	int			armed;
+};
+
+DEFINE_PER_CPU(struct enforcement_timer, budget_timer);
+
+static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer)
+{
+	struct enforcement_timer* et = container_of(timer,
+						    struct enforcement_timer,
+						    timer);
+	unsigned long flags;
+
+	local_irq_save(flags);
+	TRACE("enforcement timer fired.\n");
+	et->armed = 0;
+	/* activate scheduler */
+	litmus_reschedule_local();
+	local_irq_restore(flags);
+
+	return  HRTIMER_NORESTART;
+}
+
+/* assumes called with IRQs off */
+static void cancel_enforcement_timer(struct enforcement_timer* et)
+{
+	int ret;
+
+	TRACE("cancelling enforcement timer.\n");
+
+	/* Since interrupts are disabled and et->armed is only
+	 * modified locally, we do not need any locks.
+	 */
+
+	if (et->armed) {
+		ret = hrtimer_try_to_cancel(&et->timer);
+		/* Should never be inactive. */
+		BUG_ON(ret == 0);
+		/* Should never be running concurrently. */
+		BUG_ON(ret == -1);
+
+		et->armed = 0;
+	}
+}
+
+/* assumes called with IRQs off */
+static void arm_enforcement_timer(struct enforcement_timer* et,
+				  struct task_struct* t)
+{
+	lt_t when_to_fire;
+	TRACE_TASK(t, "arming enforcement timer.\n");
+
+	WARN_ONCE(!hrtimer_is_hres_active(&et->timer),
+		KERN_ERR "WARNING: no high resolution timers available!?\n");
+
+	/* Calling this when there is no budget left for the task
+	 * makes no sense, unless the task is non-preemptive. */
+	BUG_ON(budget_exhausted(t) && (!is_np(t)));
+
+	/* __hrtimer_start_range_ns() cancels the timer
+	 * anyway, so we don't have to check whether it is still armed */
+
+	if (likely(!is_np(t))) {
+		when_to_fire = litmus_clock() + budget_remaining(t);
+		__hrtimer_start_range_ns(&et->timer,
+					 ns_to_ktime(when_to_fire),
+					 0 /* delta */,
+					 HRTIMER_MODE_ABS_PINNED,
+					 0 /* no wakeup */);
+		et->armed = 1;
+	}
+}
+
+
+/* expects to be called with IRQs off */
+void update_enforcement_timer(struct task_struct* t)
+{
+	struct enforcement_timer* et = this_cpu_ptr(&budget_timer);
+
+	if (t && budget_precisely_enforced(t)) {
+		/* Make sure we call into the scheduler when this budget
+		 * expires. */
+		arm_enforcement_timer(et, t);
+	} else if (et->armed) {
+		/* Make sure we don't cause unnecessary interrupts. */
+		cancel_enforcement_timer(et);
+	}
+}
+
+
+static int __init init_budget_enforcement(void)
+{
+	int cpu;
+	struct enforcement_timer* et;
+
+	for (cpu = 0; cpu < NR_CPUS; cpu++)  {
+		et = &per_cpu(budget_timer, cpu);
+		hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+		et->timer.function = on_enforcement_timeout;
+	}
+	return 0;
+}
+
+module_init(init_budget_enforcement);
diff --git a/litmus/clustered.c b/litmus/clustered.c
new file mode 100644
index 000000000000..de2aca2a271c
--- /dev/null
+++ b/litmus/clustered.c
@@ -0,0 +1,119 @@
+#include <linux/gfp.h>
+#include <linux/cpumask.h>
+#include <linux/list.h>
+#include <linux/cacheinfo.h>
+
+#include <litmus/debug_trace.h>
+#include <litmus/clustered.h>
+
+int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, unsigned int index)
+{
+	struct cpu_cacheinfo* info = get_cpu_cacheinfo(cpu);
+	struct cacheinfo *ci;
+
+	if (!info || index >= info->num_leaves) {
+		TRACE("no shared-cache CPUs: info=%d index=%u\n",
+			info != NULL, index);
+		return 1;
+	}
+
+	if (!info->info_list) {
+		TRACE("no shared-cache CPUs: no info_list (cpu\n");
+	}
+	ci = info->info_list + index;
+
+	cpumask_copy(mask, &ci->shared_cpu_map);
+
+	TRACE("get_shared: P%u@L%u -> %d siblings\n ", cpu, index, cpumask_weight(mask));
+
+	return 0;
+}
+
+int get_cluster_size(enum cache_level level)
+{
+	cpumask_var_t mask;
+	int ok;
+	int num_cpus;
+
+	if (level == GLOBAL_CLUSTER)
+		return num_online_cpus();
+	else {
+		if (!zalloc_cpumask_var(&mask, GFP_ATOMIC))
+			return -ENOMEM;
+		/* assumes CPU 0 is representative of all CPUs */
+		ok = get_shared_cpu_map(mask, 0, level);
+		/* ok == 0 means we got the map; otherwise it's an invalid cache level */
+		if (ok == 0)
+			num_cpus = cpumask_weight(mask);
+		free_cpumask_var(mask);
+
+		if (ok == 0)
+			return num_cpus;
+		else
+			return -EINVAL;
+	}
+}
+
+int assign_cpus_to_clusters(enum cache_level level,
+			    struct scheduling_cluster* clusters[],
+			    unsigned int num_clusters,
+			    struct cluster_cpu* cpus[],
+			    unsigned int num_cpus)
+{
+	cpumask_var_t mask;
+	unsigned int i, free_cluster = 0, low_cpu;
+	int err = 0;
+
+	if (!zalloc_cpumask_var(&mask, GFP_ATOMIC))
+		return -ENOMEM;
+
+	/* clear cluster pointers */
+	for (i = 0; i < num_cpus; i++) {
+		cpus[i]->id      = i;
+		cpus[i]->cluster = NULL;
+	}
+
+	/* initialize clusters */
+	for (i = 0; i < num_clusters; i++) {
+		clusters[i]->id = i;
+		INIT_LIST_HEAD(&clusters[i]->cpus);
+	}
+
+	/* Assign each CPU. Two assumtions are made:
+	 * 1) The index of a cpu in cpus corresponds to its processor id (i.e., the index in a cpu mask).
+	 * 2) All cpus that belong to some cluster are online.
+	 */
+	for_each_online_cpu(i) {
+		/* get lowest-id CPU in cluster */
+		if (level != GLOBAL_CLUSTER) {
+			err = get_shared_cpu_map(mask, cpus[i]->id, level);
+			if (err != 0) {
+				/* ugh... wrong cache level? Either caller screwed up
+				 * or the CPU topology is weird. */
+				printk(KERN_ERR "Could not set up clusters for L%d sharing (max: L%d).\n",
+				       level, err);
+				err = -EINVAL;
+				goto out;
+			}
+			low_cpu = cpumask_first(mask);
+		} else
+			low_cpu = 0;
+		if (low_cpu == i) {
+			/* caller must provide an appropriate number of clusters */
+			BUG_ON(free_cluster >= num_clusters);
+
+			/* create new cluster */
+			cpus[i]->cluster = clusters[free_cluster++];
+		} else {
+			/* low_cpu points to the right cluster
+			 * Assumption: low_cpu is actually online and was processed earlier. */
+			cpus[i]->cluster = cpus[low_cpu]->cluster;
+		}
+		/* enqueue in cpus list */
+		list_add_tail(&cpus[i]->cluster_list, &cpus[i]->cluster->cpus);
+		printk(KERN_INFO "Assigning CPU%u to cluster %u\n.", i, cpus[i]->cluster->id);
+	}
+out:
+	free_cpumask_var(mask);
+	return err;
+}
diff --git a/litmus/ctrldev.c b/litmus/ctrldev.c
new file mode 100644
index 000000000000..877f2786b4c8
--- /dev/null
+++ b/litmus/ctrldev.c
@@ -0,0 +1,160 @@
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+
+#include <litmus/litmus.h>
+
+/* only one page for now, but we might want to add a RO version at some point */
+
+#define CTRL_NAME        "litmus/ctrl"
+
+/* allocate t->rt_param.ctrl_page*/
+static int alloc_ctrl_page(struct task_struct *t)
+{
+	int err = 0;
+
+	/* only allocate if the task doesn't have one yet */
+	if (!tsk_rt(t)->ctrl_page) {
+		tsk_rt(t)->ctrl_page = (void*) get_zeroed_page(GFP_KERNEL);
+		if (!tsk_rt(t)->ctrl_page)
+			err = -ENOMEM;
+		/* will get de-allocated in task teardown */
+		TRACE_TASK(t, "%s ctrl_page = %p\n", __FUNCTION__,
+			   tsk_rt(t)->ctrl_page);
+	}
+	return err;
+}
+
+static int map_ctrl_page(struct task_struct *t, struct vm_area_struct* vma)
+{
+	int err;
+
+	struct page* ctrl = virt_to_page(tsk_rt(t)->ctrl_page);
+
+	TRACE_CUR(CTRL_NAME
+		  ": mapping %p (pfn:%lx) to 0x%lx (prot:%lx)\n",
+		  tsk_rt(t)->ctrl_page,page_to_pfn(ctrl), vma->vm_start,
+		  vma->vm_page_prot);
+
+	/* Map it into the vma. */
+	err = vm_insert_page(vma, vma->vm_start, ctrl);
+
+	if (err)
+		TRACE_CUR(CTRL_NAME ": vm_insert_page() failed (%d)\n", err);
+
+	return err;
+}
+
+static void litmus_ctrl_vm_close(struct vm_area_struct* vma)
+{
+	TRACE_CUR("%s flags=0x%x prot=0x%x\n", __FUNCTION__,
+		  vma->vm_flags, vma->vm_page_prot);
+
+	TRACE_CUR(CTRL_NAME
+		  ": %p:%p vma:%p vma->vm_private_data:%p closed.\n",
+		  (void*) vma->vm_start, (void*) vma->vm_end, vma,
+		  vma->vm_private_data);
+}
+
+static int litmus_ctrl_vm_fault(struct vm_area_struct* vma,
+				      struct vm_fault* vmf)
+{
+	TRACE_CUR("%s flags=0x%x (off:%ld)\n", __FUNCTION__,
+		  vma->vm_flags, vmf->pgoff);
+
+	/* This function should never be called, since all pages should have
+	 * been mapped by mmap() already. */
+	WARN_ONCE(1, "Page faults should be impossible in the control page\n");
+
+	return VM_FAULT_SIGBUS;
+}
+
+static struct vm_operations_struct litmus_ctrl_vm_ops = {
+	.close = litmus_ctrl_vm_close,
+	.fault = litmus_ctrl_vm_fault,
+};
+
+static int litmus_ctrl_mmap(struct file* filp, struct vm_area_struct* vma)
+{
+	int err = 0;
+
+	/* first make sure mapper knows what he's doing */
+
+	/* you can only get one page */
+	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+		return -EINVAL;
+
+	/* you can only map the "first" page */
+	if (vma->vm_pgoff != 0)
+		return -EINVAL;
+
+	/* you can't share it with anyone */
+	if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_ops = &litmus_ctrl_vm_ops;
+	/* This mapping should not be kept across forks,
+	 * cannot be expanded, and is not a "normal" page. */
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_READ | VM_WRITE;
+
+	/* We don't want the first write access to trigger a "minor" page fault
+	 * to mark the page as dirty.  This is transient, private memory, we
+	 * don't care if it was touched or not. PAGE_SHARED means RW access, but
+	 * not execute, and avoids copy-on-write behavior.
+	 * See protection_map in mmap.c.  */
+	vma->vm_page_prot = PAGE_SHARED;
+
+	err = alloc_ctrl_page(current);
+	if (!err)
+		err = map_ctrl_page(current, vma);
+
+	TRACE_CUR("%s flags=0x%x prot=0x%lx\n",
+		  __FUNCTION__, vma->vm_flags, vma->vm_page_prot);
+
+	return err;
+}
+
+static struct file_operations litmus_ctrl_fops = {
+	.owner = THIS_MODULE,
+	.mmap  = litmus_ctrl_mmap,
+};
+
+static struct miscdevice litmus_ctrl_dev = {
+	.name  = CTRL_NAME,
+	.minor = MISC_DYNAMIC_MINOR,
+	.fops  = &litmus_ctrl_fops,
+};
+
+static int __init init_litmus_ctrl_dev(void)
+{
+	int err;
+
+	BUILD_BUG_ON(sizeof(struct control_page) > PAGE_SIZE);
+
+	BUILD_BUG_ON(sizeof(union np_flag) != sizeof(uint32_t));
+
+	BUILD_BUG_ON(offsetof(struct control_page, sched.raw)
+		     != LITMUS_CP_OFFSET_SCHED);
+	BUILD_BUG_ON(offsetof(struct control_page, irq_count)
+		     != LITMUS_CP_OFFSET_IRQ_COUNT);
+	BUILD_BUG_ON(offsetof(struct control_page, ts_syscall_start)
+		     != LITMUS_CP_OFFSET_TS_SC_START);
+	BUILD_BUG_ON(offsetof(struct control_page, irq_syscall_start)
+		     != LITMUS_CP_OFFSET_IRQ_SC_START);
+
+	printk("Initializing LITMUS^RT control device.\n");
+	err = misc_register(&litmus_ctrl_dev);
+	if (err)
+		printk("Could not allocate %s device (%d).\n", CTRL_NAME, err);
+	return err;
+}
+
+static void __exit exit_litmus_ctrl_dev(void)
+{
+	misc_deregister(&litmus_ctrl_dev);
+}
+
+module_init(init_litmus_ctrl_dev);
+module_exit(exit_litmus_ctrl_dev);
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
new file mode 100644
index 000000000000..5aca2934a7b5
--- /dev/null
+++ b/litmus/edf_common.c
@@ -0,0 +1,200 @@
+/*
+ * kernel/edf_common.c
+ *
+ * Common functions for EDF based scheduler.
+ */
+
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/sched_trace.h>
+
+#include <litmus/edf_common.h>
+
+#ifdef CONFIG_EDF_TIE_BREAK_LATENESS_NORM
+#include <litmus/fpmath.h>
+#endif
+
+#ifdef CONFIG_EDF_TIE_BREAK_HASH
+#include <linux/hash.h>
+static inline long edf_hash(struct task_struct *t)
+{
+	/* pid is 32 bits, so normally we would shove that into the
+	 * upper 32-bits and and put the job number in the bottom
+	 * and hash the 64-bit number with hash_64(). Sadly,
+	 * in testing, hash_64() doesn't distribute keys were the
+	 * upper bits are close together (as would be the case with
+	 * pids) and job numbers are equal (as would be the case with
+	 * synchronous task sets with all relative deadlines equal).
+	 *
+	 * A 2006 Linux patch proposed the following solution
+	 * (but for some reason it wasn't accepted...).
+	 *
+	 * At least this workaround works for 32-bit systems as well.
+	 */
+	return hash_32(hash_32((u32)tsk_rt(t)->job_params.job_no, 32) ^ t->pid, 32);
+}
+#endif
+
+
+/* edf_higher_prio -  returns true if first has a higher EDF priority
+ *                    than second. Deadline ties are broken by PID.
+ *
+ * both first and second may be NULL
+ */
+int edf_higher_prio(struct task_struct* first,
+		    struct task_struct* second)
+{
+	struct task_struct *first_task = first;
+	struct task_struct *second_task = second;
+
+	/* There is no point in comparing a task to itself. */
+	if (first && first == second) {
+		TRACE_TASK(first,
+			   "WARNING: pointless edf priority comparison.\n");
+		return 0;
+	}
+
+
+	/* check for NULL tasks */
+	if (!first || !second)
+		return first && !second;
+
+#ifdef CONFIG_LITMUS_LOCKING
+
+	/* Check for inherited priorities. Change task
+	 * used for comparison in such a case.
+	 */
+	if (unlikely(first->rt_param.inh_task))
+		first_task = first->rt_param.inh_task;
+	if (unlikely(second->rt_param.inh_task))
+		second_task = second->rt_param.inh_task;
+
+	/* Check for priority boosting. Tie-break by start of boosting.
+	 */
+	if (unlikely(is_priority_boosted(first_task))) {
+		/* first_task is boosted, how about second_task? */
+		if (!is_priority_boosted(second_task) ||
+		    lt_before(get_boost_start(first_task),
+			      get_boost_start(second_task)))
+			return 1;
+		else
+			return 0;
+	} else if (unlikely(is_priority_boosted(second_task)))
+		/* second_task is boosted, first is not*/
+		return 0;
+
+#endif
+
+	if (earlier_deadline(first_task, second_task)) {
+		return 1;
+	}
+	else if (get_deadline(first_task) == get_deadline(second_task)) {
+		/* Need to tie break. All methods must set pid_break to 0/1 if
+		 * first_task does not have priority over second_task.
+		 */
+		int pid_break;
+
+
+#if defined(CONFIG_EDF_TIE_BREAK_LATENESS)
+		/* Tie break by lateness. Jobs with greater lateness get
+		 * priority. This should spread tardiness across all tasks,
+		 * especially in task sets where all tasks have the same
+		 * period and relative deadlines.
+		 */
+		if (get_lateness(first_task) > get_lateness(second_task)) {
+			return 1;
+		}
+		pid_break = (get_lateness(first_task) == get_lateness(second_task));
+
+
+#elif defined(CONFIG_EDF_TIE_BREAK_LATENESS_NORM)
+		/* Tie break by lateness, normalized by relative deadline. Jobs with
+		 * greater normalized lateness get priority.
+		 *
+		 * Note: Considered using the algebraically equivalent
+		 *	lateness(first)*relative_deadline(second) >
+					lateness(second)*relative_deadline(first)
+		 * to avoid fixed-point math, but values are prone to overflow if inputs
+		 * are on the order of several seconds, even in 64-bit.
+		 */
+		fp_t fnorm = _frac(get_lateness(first_task),
+						   get_rt_relative_deadline(first_task));
+		fp_t snorm = _frac(get_lateness(second_task),
+						   get_rt_relative_deadline(second_task));
+		if (_gt(fnorm, snorm)) {
+			return 1;
+		}
+		pid_break = _eq(fnorm, snorm);
+
+
+#elif defined(CONFIG_EDF_TIE_BREAK_HASH)
+		/* Tie break by comparing hashs of (pid, job#) tuple.  There should be
+		 * a 50% chance that first_task has a higher priority than second_task.
+		 */
+		long fhash = edf_hash(first_task);
+		long shash = edf_hash(second_task);
+		if (fhash < shash) {
+			return 1;
+		}
+		pid_break = (fhash == shash);
+#else
+
+
+		/* CONFIG_EDF_PID_TIE_BREAK */
+		pid_break = 1; // fall through to tie-break by pid;
+#endif
+
+		/* Tie break by pid */
+		if(pid_break) {
+			if (first_task->pid < second_task->pid) {
+				return 1;
+			}
+			else if (first_task->pid == second_task->pid) {
+				/* If the PIDs are the same then the task with the
+				 * inherited priority wins.
+				 */
+				if (!second->rt_param.inh_task) {
+					return 1;
+				}
+			}
+		}
+	}
+	return 0; /* fall-through. prio(second_task) > prio(first_task) */
+}
+
+int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
+{
+	return edf_higher_prio(bheap2task(a), bheap2task(b));
+}
+
+void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
+		      release_jobs_t release)
+{
+	rt_domain_init(rt,  edf_ready_order, resched, release);
+}
+
+/* need_to_preempt - check whether the task t needs to be preempted
+ *                   call only with irqs disabled and with  ready_lock acquired
+ *                   THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
+ */
+int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t)
+{
+	/* we need the read lock for edf_ready_queue */
+	/* no need to preempt if there is nothing pending */
+	if (!__jobs_pending(rt))
+		return 0;
+	/* we need to reschedule if t doesn't exist */
+	if (!t)
+		return 1;
+
+	/* NOTE: We cannot check for non-preemptibility since we
+	 *       don't know what address space we're currently in.
+	 */
+
+	/* make sure to get non-rt stuff out of the way */
+	return !is_realtime(t) || edf_higher_prio(__next_ready(rt), t);
+}
diff --git a/litmus/fdso.c b/litmus/fdso.c
new file mode 100644
index 000000000000..0ff54e41839c
--- /dev/null
+++ b/litmus/fdso.c
@@ -0,0 +1,308 @@
+/* fdso.c - file descriptor attached shared objects
+ *
+ * (c) 2007 B. Brandenburg, LITMUS^RT project
+ *
+ * Notes:
+ *   - objects descriptor (OD) tables are not cloned during a fork.
+ *   - objects are created on-demand, and freed after the last reference
+ *     is dropped.
+ *   - for now, object types are hard coded.
+ *   - As long as we have live objects, we keep a reference to the inode.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mutex.h>
+#include <linux/file.h>
+#include <asm/uaccess.h>
+
+#include <litmus/fdso.h>
+
+extern struct fdso_ops generic_lock_ops;
+
+static const struct fdso_ops* fdso_ops[] = {
+	&generic_lock_ops, /* FMLP_SEM */
+	&generic_lock_ops, /* SRP_SEM */
+	&generic_lock_ops, /* MPCP_SEM */
+	&generic_lock_ops, /* MPCP_VS_SEM */
+	&generic_lock_ops, /* DPCP_SEM */
+	&generic_lock_ops, /* PCP_SEM */
+	&generic_lock_ops, /* DFLP_SEM */
+};
+
+static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
+{
+	BUILD_BUG_ON(ARRAY_SIZE(fdso_ops) != MAX_OBJ_TYPE + 1);
+
+	if (fdso_ops[type]->create)
+		return fdso_ops[type]->create(obj_ref, type, config);
+	else
+		return -EINVAL;
+}
+
+static void fdso_destroy(obj_type_t type, void* obj)
+{
+	fdso_ops[type]->destroy(type, obj);
+}
+
+static int fdso_open(struct od_table_entry* entry, void* __user config)
+{
+	if (fdso_ops[entry->obj->type]->open)
+		return fdso_ops[entry->obj->type]->open(entry, config);
+	else
+		return 0;
+}
+
+static int fdso_close(struct od_table_entry* entry)
+{
+	if (fdso_ops[entry->obj->type]->close)
+		return fdso_ops[entry->obj->type]->close(entry);
+	else
+		return 0;
+}
+
+/* inode must be locked already */
+static int alloc_inode_obj(struct inode_obj_id** obj_ref,
+			   struct inode* inode,
+			   obj_type_t type,
+			   unsigned int id,
+			   void* __user config)
+{
+	struct inode_obj_id* obj;
+	void* raw_obj;
+	int err;
+
+	obj = kmalloc(sizeof(*obj), GFP_KERNEL);
+	if (!obj) {
+		return -ENOMEM;
+	}
+
+	err = fdso_create(&raw_obj, type, config);
+	if (err != 0) {
+		kfree(obj);
+		return err;
+	}
+
+	INIT_LIST_HEAD(&obj->list);
+	atomic_set(&obj->count, 1);
+	obj->type  = type;
+	obj->id    = id;
+	obj->obj   = raw_obj;
+	obj->inode = inode;
+
+	list_add(&obj->list, &inode->i_obj_list);
+	atomic_inc(&inode->i_count);
+
+	printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id);
+
+	*obj_ref = obj;
+	return 0;
+}
+
+/* inode must be locked already */
+static struct inode_obj_id* get_inode_obj(struct inode* inode,
+					  obj_type_t type,
+					  unsigned int id)
+{
+	struct list_head* pos;
+	struct inode_obj_id* obj = NULL;
+
+	list_for_each(pos, &inode->i_obj_list) {
+		obj = list_entry(pos, struct inode_obj_id, list);
+		if (obj->id == id && obj->type == type) {
+			atomic_inc(&obj->count);
+			return obj;
+		}
+	}
+	printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id);
+	return NULL;
+}
+
+
+static void put_inode_obj(struct inode_obj_id* obj)
+{
+	struct inode* inode;
+	int let_go = 0;
+
+	inode = obj->inode;
+	if (atomic_dec_and_test(&obj->count)) {
+
+		mutex_lock(&inode->i_obj_mutex);
+		/* no new references can be obtained */
+		if (!atomic_read(&obj->count)) {
+			list_del(&obj->list);
+			fdso_destroy(obj->type, obj->obj);
+			kfree(obj);
+			let_go = 1;
+		}
+		mutex_unlock(&inode->i_obj_mutex);
+		if (let_go)
+			iput(inode);
+	}
+}
+
+static struct od_table_entry*  get_od_entry(struct task_struct* t)
+{
+	struct od_table_entry* table;
+	int i;
+
+
+	table = t->od_table;
+	if (!table) {
+		table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS,
+				GFP_KERNEL);
+		t->od_table = table;
+	}
+
+	for (i = 0; table &&  i < MAX_OBJECT_DESCRIPTORS; i++)
+		if (!table[i].used) {
+			table[i].used = 1;
+			return table + i;
+		}
+	return NULL;
+}
+
+static int put_od_entry(struct od_table_entry* od)
+{
+	put_inode_obj(od->obj);
+	od->used = 0;
+	return 0;
+}
+
+static long close_od_entry(struct od_table_entry *od)
+{
+	long ret;
+
+	/* Give the class a chance to reject the close. */
+	ret = fdso_close(od);
+	if (ret == 0)
+		ret = put_od_entry(od);
+
+	return ret;
+}
+
+void exit_od_table(struct task_struct* t)
+{
+	int i;
+
+	if (t->od_table) {
+		for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++)
+			if (t->od_table[i].used)
+				close_od_entry(t->od_table + i);
+		kfree(t->od_table);
+		t->od_table = NULL;
+	}
+}
+
+static int do_sys_od_open(struct file* file, obj_type_t type, int id,
+			  void* __user config)
+{
+	int idx = 0, err = 0;
+	struct inode* inode;
+	struct inode_obj_id* obj = NULL;
+	struct od_table_entry* entry;
+
+	inode = file_inode(file);
+
+	entry = get_od_entry(current);
+	if (!entry)
+		return -ENOMEM;
+
+	mutex_lock(&inode->i_obj_mutex);
+	obj = get_inode_obj(inode, type, id);
+	if (!obj)
+		err = alloc_inode_obj(&obj, inode, type, id, config);
+	if (err != 0) {
+		obj = NULL;
+		idx = err;
+		entry->used = 0;
+	} else {
+		entry->obj   = obj;
+		entry->class = fdso_ops[type];
+		idx = entry - current->od_table;
+	}
+
+	mutex_unlock(&inode->i_obj_mutex);
+
+	/* open only if creation succeeded */
+	if (!err)
+		err = fdso_open(entry, config);
+	if (err < 0) {
+		/* The class rejected the open call.
+		 * We need to clean up and tell user space.
+		 */
+		if (obj)
+			put_od_entry(entry);
+		idx = err;
+	}
+
+	return idx;
+}
+
+
+struct od_table_entry* get_entry_for_od(int od)
+{
+	struct task_struct *t = current;
+
+	if (!t->od_table)
+		return NULL;
+	if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
+		return NULL;
+	if (!t->od_table[od].used)
+		return NULL;
+	return t->od_table + od;
+}
+
+
+asmlinkage long sys_od_open(int fd, int type, int obj_id, void* __user config)
+{
+	int ret = 0;
+	struct file*  file;
+
+	/*
+	   1) get file from fd, get inode from file
+	   2) lock inode
+	   3) try to lookup object
+	   4) if not present create and enqueue object, inc inode refcnt
+	   5) increment refcnt of object
+	   6) alloc od_table_entry, setup ptrs
+	   7) unlock inode
+	   8) return offset in od_table as OD
+	 */
+
+	if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	file = fget(fd);
+	if (!file) {
+		ret = -EBADF;
+		goto out;
+	}
+
+	ret = do_sys_od_open(file, type, obj_id, config);
+
+	fput(file);
+
+out:
+	return ret;
+}
+
+
+asmlinkage long sys_od_close(int od)
+{
+	int ret = -EINVAL;
+	struct task_struct *t = current;
+
+	if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
+		return ret;
+
+	if (!t->od_table || !t->od_table[od].used)
+		return ret;
+
+
+	ret = close_od_entry(t->od_table + od);
+
+	return ret;
+}
diff --git a/litmus/fp_common.c b/litmus/fp_common.c
new file mode 100644
index 000000000000..964a4729deff
--- /dev/null
+++ b/litmus/fp_common.c
@@ -0,0 +1,119 @@
+/*
+ * litmus/fp_common.c
+ *
+ * Common functions for fixed-priority scheduler.
+ */
+
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/sched_trace.h>
+
+#include <litmus/fp_common.h>
+
+/* fp_higher_prio -  returns true if first has a higher static priority
+ *                   than second. Ties are broken by PID.
+ *
+ * both first and second may be NULL
+ */
+int fp_higher_prio(struct task_struct* first,
+		   struct task_struct* second)
+{
+	struct task_struct *first_task = first;
+	struct task_struct *second_task = second;
+
+	/* There is no point in comparing a task to itself. */
+	if (unlikely(first && first == second)) {
+		TRACE_TASK(first,
+			   "WARNING: pointless FP priority comparison.\n");
+		return 0;
+	}
+
+
+	/* check for NULL tasks */
+	if (!first || !second)
+		return first && !second;
+
+	if (!is_realtime(second_task))
+		return 1;
+
+#ifdef CONFIG_LITMUS_LOCKING
+
+	/* Check for inherited priorities. Change task
+	 * used for comparison in such a case.
+	 */
+	if (unlikely(first->rt_param.inh_task))
+		first_task = first->rt_param.inh_task;
+	if (unlikely(second->rt_param.inh_task))
+		second_task = second->rt_param.inh_task;
+
+	/* Check for priority boosting. Tie-break by start of boosting.
+	 */
+	if (unlikely(is_priority_boosted(first_task))) {
+		/* first_task is boosted, how about second_task? */
+		if (is_priority_boosted(second_task))
+			/* break by priority point */
+			return lt_before(get_boost_start(first_task),
+					 get_boost_start(second_task));
+		else
+			/* priority boosting wins. */
+			return 1;
+	} else if (unlikely(is_priority_boosted(second_task)))
+		/* second_task is boosted, first is not*/
+		return 0;
+
+#endif
+
+	/* Comparisons to itself are not expected; priority inheritance
+	 * should also not cause this to happen. */
+	BUG_ON(first_task == second_task);
+
+	if (get_priority(first_task) < get_priority(second_task))
+		return 1;
+	else if (get_priority(first_task) == get_priority(second_task))
+		/* Break by PID. */
+		return first_task->pid < second_task->pid;
+	else
+		return 0;
+}
+
+int fp_ready_order(struct bheap_node* a, struct bheap_node* b)
+{
+	return fp_higher_prio(bheap2task(a), bheap2task(b));
+}
+
+void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
+		    release_jobs_t release)
+{
+	rt_domain_init(rt,  fp_ready_order, resched, release);
+}
+
+/* need_to_preempt - check whether the task t needs to be preempted
+ */
+int fp_preemption_needed(struct fp_prio_queue *q, struct task_struct *t)
+{
+	struct task_struct *pending;
+
+	pending = fp_prio_peek(q);
+
+	if (!pending)
+		return 0;
+	if (!t)
+		return 1;
+
+	/* make sure to get non-rt stuff out of the way */
+	return !is_realtime(t) || fp_higher_prio(pending, t);
+}
+
+void fp_prio_queue_init(struct fp_prio_queue* q)
+{
+	int i;
+
+	for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
+		q->bitmask[i] = 0;
+	for (i = 0; i < LITMUS_MAX_PRIORITY; i++)
+		bheap_init(&q->queue[i]);
+}
diff --git a/litmus/jobs.c b/litmus/jobs.c
new file mode 100644
index 000000000000..0dd36b9343d6
--- /dev/null
+++ b/litmus/jobs.c
@@ -0,0 +1,82 @@
+/* litmus/jobs.c - common job control code
+ */
+
+#include <linux/sched.h>
+
+#include <litmus/preempt.h>
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/jobs.h>
+
+static inline void setup_release(struct task_struct *t, lt_t release)
+{
+	/* prepare next release */
+	t->rt_param.job_params.release = release;
+	t->rt_param.job_params.deadline = release + get_rt_relative_deadline(t);
+	t->rt_param.job_params.exec_time = 0;
+
+	/* update job sequence number */
+	t->rt_param.job_params.job_no++;
+}
+
+void prepare_for_next_period(struct task_struct *t)
+{
+	BUG_ON(!t);
+
+	/* Record lateness before we set up the next job's
+	 * release and deadline. Lateness may be negative.
+	 */
+	t->rt_param.job_params.lateness =
+		(long long)litmus_clock() -
+		(long long)t->rt_param.job_params.deadline;
+
+	if (tsk_rt(t)->sporadic_release) {
+		TRACE_TASK(t, "sporadic release at %llu\n",
+			   tsk_rt(t)->sporadic_release_time);
+		/* sporadic release */
+		setup_release(t, tsk_rt(t)->sporadic_release_time);
+		tsk_rt(t)->sporadic_release = 0;
+	} else {
+		/* periodic release => add period */
+		setup_release(t, get_release(t) + get_rt_period(t));
+	}
+}
+
+void release_at(struct task_struct *t, lt_t start)
+{
+	BUG_ON(!t);
+	setup_release(t, start);
+	tsk_rt(t)->completed = 0;
+}
+
+long default_wait_for_release_at(lt_t release_time)
+{
+	struct task_struct *t = current;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	tsk_rt(t)->sporadic_release_time = release_time;
+	smp_wmb();
+	tsk_rt(t)->sporadic_release = 1;
+	local_irq_restore(flags);
+
+	return litmus->complete_job();
+}
+
+
+/*
+ *	Deactivate current task until the beginning of the next period.
+ */
+long complete_job(void)
+{
+	preempt_disable();
+	TRACE_CUR("job completion indicated at %llu\n", litmus_clock());
+	/* Mark that we do not excute anymore */
+	tsk_rt(current)->completed = 1;
+	/* call schedule, this will return when a new job arrives
+	 * it also takes care of preparing for the next release
+	 */
+	litmus_reschedule_local();
+	preempt_enable();
+	return 0;
+}
diff --git a/litmus/litmus.c b/litmus/litmus.c
new file mode 100644
index 000000000000..703360c68609
--- /dev/null
+++ b/litmus/litmus.c
@@ -0,0 +1,681 @@
+/*
+ * litmus.c -- Implementation of the LITMUS syscalls,
+ *             the LITMUS intialization code,
+ *             and the procfs interface..
+ */
+#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <linux/sysrq.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/reboot.h>
+#include <linux/stop_machine.h>
+#include <linux/sched/rt.h>
+#include <linux/rwsem.h>
+#include <linux/interrupt.h>
+
+#include <litmus/litmus.h>
+#include <litmus/bheap.h>
+#include <litmus/trace.h>
+#include <litmus/rt_domain.h>
+#include <litmus/litmus_proc.h>
+#include <litmus/sched_trace.h>
+
+#ifdef CONFIG_SCHED_CPU_AFFINITY
+#include <litmus/affinity.h>
+#endif
+
+/* Number of RT tasks that exist in the system */
+atomic_t rt_task_count 		= ATOMIC_INIT(0);
+
+#ifdef CONFIG_RELEASE_MASTER
+/* current master CPU for handling timer IRQs */
+atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU);
+#endif
+
+static struct kmem_cache * bheap_node_cache;
+extern struct kmem_cache * release_heap_cache;
+
+struct bheap_node* bheap_node_alloc(int gfp_flags)
+{
+	return kmem_cache_alloc(bheap_node_cache, gfp_flags);
+}
+
+void bheap_node_free(struct bheap_node* hn)
+{
+	kmem_cache_free(bheap_node_cache, hn);
+}
+
+struct release_heap* release_heap_alloc(int gfp_flags);
+void release_heap_free(struct release_heap* rh);
+
+/**
+ * Get the quantum alignment as a cmdline option.
+ * Default is staggered quanta, as this results in lower overheads.
+ */
+static bool aligned_quanta = 0;
+module_param(aligned_quanta, bool, 0644);
+
+u64 cpu_stagger_offset(int cpu)
+{
+	u64 offset = 0;
+
+	if (!aligned_quanta) {
+		offset = LITMUS_QUANTUM_LENGTH_NS;
+		do_div(offset, num_possible_cpus());
+		offset *= cpu;
+	}
+	return offset;
+}
+
+/*
+ * sys_set_task_rt_param
+ * @pid: Pid of the task which scheduling parameters must be changed
+ * @param: New real-time extension parameters such as the execution cost and
+ *         period
+ * Syscall for manipulating with task rt extension params
+ * Returns EFAULT  if param is NULL.
+ *         ESRCH   if pid is not corrsponding
+ *	           to a valid task.
+ *	   EINVAL  if either period or execution cost is <=0
+ *	   EPERM   if pid is a real-time task
+ *	   0       if success
+ *
+ * Only non-real-time tasks may be configured with this system call
+ * to avoid races with the scheduler. In practice, this means that a
+ * task's parameters must be set _before_ calling sys_prepare_rt_task()
+ *
+ * find_task_by_vpid() assumes that we are in the same namespace of the
+ * target.
+ */
+asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
+{
+	struct rt_task tp;
+	struct task_struct *target;
+	int retval = -EINVAL;
+
+	printk("Setting up rt task parameters for process %d.\n", pid);
+
+	if (pid < 0 || param == 0) {
+		goto out;
+	}
+	if (copy_from_user(&tp, param, sizeof(tp))) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	/* Task search and manipulation must be protected */
+	read_lock_irq(&tasklist_lock);
+	rcu_read_lock();
+	if (!(target = find_task_by_vpid(pid))) {
+		retval = -ESRCH;
+		rcu_read_unlock();
+		goto out_unlock;
+	}
+	rcu_read_unlock();
+
+	if (is_realtime(target)) {
+		/* The task is already a real-time task.
+		 * We cannot not allow parameter changes at this point.
+		 */
+		retval = -EBUSY;
+		goto out_unlock;
+	}
+
+	/* set relative deadline to be implicit if left unspecified */
+	if (tp.relative_deadline == 0)
+		tp.relative_deadline = tp.period;
+
+	if (tp.exec_cost <= 0)
+		goto out_unlock;
+	if (tp.period <= 0)
+		goto out_unlock;
+	if (min(tp.relative_deadline, tp.period) < tp.exec_cost) /*density check*/
+	{
+		printk(KERN_INFO "litmus: real-time task %d rejected "
+		       "because task density > 1.0\n", pid);
+		goto out_unlock;
+	}
+	if (tp.cls != RT_CLASS_HARD &&
+	    tp.cls != RT_CLASS_SOFT &&
+	    tp.cls != RT_CLASS_BEST_EFFORT)
+	{
+		printk(KERN_INFO "litmus: real-time task %d rejected "
+				 "because its class is invalid\n", pid);
+		goto out_unlock;
+	}
+	if (tp.budget_policy != NO_ENFORCEMENT &&
+	    tp.budget_policy != QUANTUM_ENFORCEMENT &&
+	    tp.budget_policy != PRECISE_ENFORCEMENT)
+	{
+		printk(KERN_INFO "litmus: real-time task %d rejected "
+		       "because unsupported budget enforcement policy "
+		       "specified (%d)\n",
+		       pid, tp.budget_policy);
+		goto out_unlock;
+	}
+
+	target->rt_param.task_params = tp;
+
+	retval = 0;
+      out_unlock:
+	read_unlock_irq(&tasklist_lock);
+      out:
+	return retval;
+}
+
+/*
+ * Getter of task's RT params
+ *   returns EINVAL if param or pid is NULL
+ *   returns ESRCH  if pid does not correspond to a valid task
+ *   returns EFAULT if copying of parameters has failed.
+ *
+ *   find_task_by_vpid() assumes that we are in the same namespace of the
+ *   target.
+ */
+asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param)
+{
+	int retval = -EINVAL;
+	struct task_struct *source;
+	struct rt_task lp;
+	if (param == 0 || pid < 0)
+		goto out;
+	read_lock(&tasklist_lock);
+	if (!(source = find_task_by_vpid(pid))) {
+		retval = -ESRCH;
+		goto out_unlock;
+	}
+	lp = source->rt_param.task_params;
+	read_unlock(&tasklist_lock);
+	/* Do copying outside the lock */
+	retval =
+	    copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
+	return retval;
+      out_unlock:
+	read_unlock(&tasklist_lock);
+      out:
+	return retval;
+
+}
+
+/*
+ *	This is the crucial function for periodic task implementation,
+ *	It checks if a task is periodic, checks if such kind of sleep
+ *	is permitted and calls plugin-specific sleep, which puts the
+ *	task into a wait array.
+ *	returns 0 on successful wakeup
+ *	returns EPERM if current conditions do not permit such sleep
+ *	returns EINVAL if current task is not able to go to sleep
+ */
+asmlinkage long sys_complete_job(void)
+{
+	int retval = -EPERM;
+	if (!is_realtime(current)) {
+		retval = -EINVAL;
+		goto out;
+	}
+	/* Task with negative or zero period cannot sleep */
+	if (get_rt_period(current) <= 0) {
+		retval = -EINVAL;
+		goto out;
+	}
+	/* The plugin has to put the task into an
+	 * appropriate queue and call schedule
+	 */
+	retval = litmus->complete_job();
+      out:
+	return retval;
+}
+
+/*	This is an "improved" version of sys_complete_job that
+ *      addresses the problem of unintentionally missing a job after
+ *      an overrun.
+ *
+ *	returns 0 on successful wakeup
+ *	returns EPERM if current conditions do not permit such sleep
+ *	returns EINVAL if current task is not able to go to sleep
+ */
+asmlinkage long sys_wait_for_job_release(unsigned int job)
+{
+	int retval = -EPERM;
+	if (!is_realtime(current)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	/* Task with negative or zero period cannot sleep */
+	if (get_rt_period(current) <= 0) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	retval = 0;
+
+	/* first wait until we have "reached" the desired job
+	 *
+	 * This implementation has at least two problems:
+	 *
+	 * 1) It doesn't gracefully handle the wrap around of
+	 *    job_no. Since LITMUS is a prototype, this is not much
+	 *    of a problem right now.
+	 *
+	 * 2) It is theoretically racy if a job release occurs
+	 *    between checking job_no and calling sleep_next_period().
+	 *    A proper solution would requiring adding another callback
+	 *    in the plugin structure and testing the condition with
+	 *    interrupts disabled.
+	 *
+	 * FIXME: At least problem 2 should be taken care of eventually.
+	 */
+	while (!retval && job > current->rt_param.job_params.job_no)
+		/* If the last job overran then job <= job_no and we
+		 * don't send the task to sleep.
+		 */
+		retval = litmus->complete_job();
+      out:
+	return retval;
+}
+
+/*	This is a helper syscall to query the current job sequence number.
+ *
+ *	returns 0 on successful query
+ *	returns EPERM if task is not a real-time task.
+ *      returns EFAULT if &job is not a valid pointer.
+ */
+asmlinkage long sys_query_job_no(unsigned int __user *job)
+{
+	int retval = -EPERM;
+	if (is_realtime(current))
+		retval = put_user(current->rt_param.job_params.job_no, job);
+
+	return retval;
+}
+
+/* sys_null_call() is only used for determining raw system call
+ * overheads (kernel entry, kernel exit). It has no useful side effects.
+ * If ts is non-NULL, then the current Feather-Trace time is recorded.
+ */
+asmlinkage long sys_null_call(cycles_t __user *ts)
+{
+	long ret = 0;
+	cycles_t now;
+
+	if (ts) {
+		now = get_cycles();
+		ret = put_user(now, ts);
+	}
+
+	return ret;
+}
+
+/* p is a real-time task. Re-init its state as a best-effort task. */
+static void reinit_litmus_state(struct task_struct* p, int restore)
+{
+	struct rt_task  user_config = {};
+	void*  ctrl_page     = NULL;
+
+	if (restore) {
+		/* Safe user-space provided configuration data.
+		 * and allocated page. */
+		user_config = p->rt_param.task_params;
+		ctrl_page   = p->rt_param.ctrl_page;
+	}
+
+	/* We probably should not be inheriting any task's priority
+	 * at this point in time.
+	 */
+	WARN_ON(p->rt_param.inh_task);
+
+	/* Cleanup everything else. */
+	memset(&p->rt_param, 0, sizeof(p->rt_param));
+
+	/* Restore preserved fields. */
+	if (restore) {
+		p->rt_param.task_params = user_config;
+		p->rt_param.ctrl_page   = ctrl_page;
+	}
+}
+
+long litmus_admit_task(struct task_struct* tsk)
+{
+	long retval = 0;
+
+	BUG_ON(is_realtime(tsk));
+
+	tsk_rt(tsk)->heap_node = NULL;
+	tsk_rt(tsk)->rel_heap = NULL;
+
+	if (get_rt_relative_deadline(tsk) == 0 ||
+	    get_exec_cost(tsk) >
+			min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) {
+		TRACE_TASK(tsk,
+			"litmus admit: invalid task parameters "
+			"(e = %lu, p = %lu, d = %lu)\n",
+			get_exec_cost(tsk), get_rt_period(tsk),
+			get_rt_relative_deadline(tsk));
+		retval = -EINVAL;
+		goto out;
+	}
+
+	INIT_LIST_HEAD(&tsk_rt(tsk)->list);
+
+	/* allocate heap node for this task */
+	tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC);
+	tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC);
+
+	if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) {
+		printk(KERN_WARNING "litmus: no more heap node memory!?\n");
+
+		retval = -ENOMEM;
+		goto out;
+	} else {
+		bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
+	}
+
+	preempt_disable();
+
+	retval = litmus->admit_task(tsk);
+
+	if (!retval) {
+		sched_trace_task_name(tsk);
+		sched_trace_task_param(tsk);
+		atomic_inc(&rt_task_count);
+	}
+
+	preempt_enable();
+
+out:
+	if (retval) {
+		if (tsk_rt(tsk)->heap_node)
+			bheap_node_free(tsk_rt(tsk)->heap_node);
+		if (tsk_rt(tsk)->rel_heap)
+			release_heap_free(tsk_rt(tsk)->rel_heap);
+	}
+	return retval;
+}
+
+void litmus_clear_state(struct task_struct* tsk)
+{
+    BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node));
+    bheap_node_free(tsk_rt(tsk)->heap_node);
+    release_heap_free(tsk_rt(tsk)->rel_heap);
+
+    atomic_dec(&rt_task_count);
+    reinit_litmus_state(tsk, 1);
+}
+
+/* called from sched_setscheduler() */
+void litmus_exit_task(struct task_struct* tsk)
+{
+	if (is_realtime(tsk)) {
+		sched_trace_task_completion(tsk, 1);
+
+		litmus->task_exit(tsk);
+	}
+}
+
+static DECLARE_RWSEM(plugin_switch_mutex);
+
+void litmus_plugin_switch_disable(void)
+{
+	down_read(&plugin_switch_mutex);
+}
+
+void litmus_plugin_switch_enable(void)
+{
+	up_read(&plugin_switch_mutex);
+}
+
+static int __do_plugin_switch(struct sched_plugin* plugin)
+{
+	int ret;
+
+
+	/* don't switch if there are active real-time tasks */
+	if (atomic_read(&rt_task_count) == 0) {
+		TRACE("deactivating plugin %s\n", litmus->plugin_name);
+		ret = litmus->deactivate_plugin();
+		if (0 != ret)
+			goto out;
+
+		TRACE("activating plugin %s\n", plugin->plugin_name);
+		ret = plugin->activate_plugin();
+		if (0 != ret) {
+			printk(KERN_INFO "Can't activate %s (%d).\n",
+			       plugin->plugin_name, ret);
+			plugin = &linux_sched_plugin;
+		}
+
+		printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
+		litmus = plugin;
+	} else
+		ret = -EBUSY;
+out:
+	TRACE("do_plugin_switch() => %d\n", ret);
+	return ret;
+}
+
+static atomic_t ready_to_switch;
+
+static int do_plugin_switch(void *_plugin)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	local_save_flags(flags);
+	local_irq_disable();
+	hard_irq_disable();
+
+	if (atomic_dec_and_test(&ready_to_switch))
+	{
+		ret = __do_plugin_switch((struct sched_plugin*) _plugin);
+		atomic_set(&ready_to_switch, INT_MAX);
+	}
+
+	do {
+		cpu_relax();
+	} while (atomic_read(&ready_to_switch) != INT_MAX);
+
+	local_irq_restore(flags);
+	return ret;
+}
+
+/* Switching a plugin in use is tricky.
+ * We must watch out that no real-time tasks exists
+ * (and that none is created in parallel) and that the plugin is not
+ * currently in use on any processor (in theory).
+ */
+int switch_sched_plugin(struct sched_plugin* plugin)
+{
+	int err;
+	struct domain_proc_info* domain_info;
+
+	BUG_ON(!plugin);
+
+	if (atomic_read(&rt_task_count) == 0) {
+		down_write(&plugin_switch_mutex);
+
+		deactivate_domain_proc();
+
+		get_online_cpus();
+		atomic_set(&ready_to_switch, num_online_cpus());
+		err = stop_cpus(cpu_online_mask, do_plugin_switch, plugin);
+		put_online_cpus();
+
+		if (!litmus->get_domain_proc_info(&domain_info))
+			activate_domain_proc(domain_info);
+
+		up_write(&plugin_switch_mutex);
+		return err;
+	} else
+		return -EBUSY;
+}
+
+/* Called upon fork.
+ * p is the newly forked task.
+ */
+void litmus_fork(struct task_struct* p)
+{
+	if (is_realtime(p)) {
+		/* clean out any litmus related state, don't preserve anything */
+		reinit_litmus_state(p, 0);
+		/* Don't let the child be a real-time task.  */
+		p->sched_reset_on_fork = 1;
+	} else
+		/* non-rt tasks might have ctrl_page set */
+		tsk_rt(p)->ctrl_page = NULL;
+
+	/* od tables are never inherited across a fork */
+	p->od_table = NULL;
+}
+
+/* Called upon execve().
+ * current is doing the exec.
+ * Don't let address space specific stuff leak.
+ */
+void litmus_exec(void)
+{
+	struct task_struct* p = current;
+
+	if (is_realtime(p)) {
+		WARN_ON(p->rt_param.inh_task);
+		if (tsk_rt(p)->ctrl_page) {
+			free_page((unsigned long) tsk_rt(p)->ctrl_page);
+			tsk_rt(p)->ctrl_page = NULL;
+		}
+	}
+}
+
+/* Called when dead_tsk is being deallocated
+ */
+void exit_litmus(struct task_struct *dead_tsk)
+{
+	/* We also allow non-RT tasks to
+	 * allocate control pages to allow
+	 * measurements with non-RT tasks.
+	 * So check if we need to free the page
+	 * in any case.
+	 */
+	if (tsk_rt(dead_tsk)->ctrl_page) {
+		TRACE_TASK(dead_tsk,
+			   "freeing ctrl_page %p\n",
+			   tsk_rt(dead_tsk)->ctrl_page);
+		free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page);
+	}
+
+	/* Tasks should not be real-time tasks any longer at this point. */
+	BUG_ON(is_realtime(dead_tsk));
+}
+
+void litmus_do_exit(struct task_struct *exiting_tsk)
+{
+	/* This task called do_exit(), but is still a real-time task. To avoid
+	 * complications later, we force it to be a non-real-time task now. */
+
+	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
+
+	TRACE_TASK(exiting_tsk, "exiting, demoted to SCHED_FIFO\n");
+	sched_setscheduler_nocheck(exiting_tsk, SCHED_FIFO, &param);
+}
+
+void litmus_dealloc(struct task_struct *tsk)
+{
+	/* tsk is no longer a real-time task */
+	TRACE_TASK(tsk, "Deallocating real-time task data\n");
+	litmus->task_cleanup(tsk);
+	litmus_clear_state(tsk);
+}
+
+/* move current non-RT task to a specific CPU */
+int litmus_be_migrate_to(int cpu)
+{
+	struct cpumask single_cpu_aff;
+
+	cpumask_clear(&single_cpu_aff);
+	cpumask_set_cpu(cpu, &single_cpu_aff);
+	return sched_setaffinity(current->pid, &single_cpu_aff);
+}
+
+#ifdef CONFIG_MAGIC_SYSRQ
+int sys_kill(int pid, int sig);
+
+static void sysrq_handle_kill_rt_tasks(int key)
+{
+	struct task_struct *t;
+	read_lock(&tasklist_lock);
+	for_each_process(t) {
+		if (is_realtime(t)) {
+			sys_kill(t->pid, SIGKILL);
+		}
+	}
+	read_unlock(&tasklist_lock);
+}
+
+static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
+	.handler	= sysrq_handle_kill_rt_tasks,
+	.help_msg	= "quit-rt-tasks(X)",
+	.action_msg	= "sent SIGKILL to all LITMUS^RT real-time tasks",
+};
+#endif
+
+extern struct sched_plugin linux_sched_plugin;
+
+static int litmus_shutdown_nb(struct notifier_block *unused1,
+				unsigned long unused2, void *unused3)
+{
+	/* Attempt to switch back to regular Linux scheduling.
+	 * Forces the active plugin to clean up.
+	 */
+	if (litmus != &linux_sched_plugin) {
+		int ret = switch_sched_plugin(&linux_sched_plugin);
+		if (ret) {
+			printk("Auto-shutdown of active Litmus plugin failed.\n");
+		}
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block shutdown_notifier = {
+	.notifier_call = litmus_shutdown_nb,
+};
+
+static int __init _init_litmus(void)
+{
+	/*      Common initializers,
+	 *      mode change lock is used to enforce single mode change
+	 *      operation.
+	 */
+	printk("Starting LITMUS^RT kernel\n");
+
+	register_sched_plugin(&linux_sched_plugin);
+
+	bheap_node_cache    = KMEM_CACHE(bheap_node, SLAB_PANIC);
+	release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC);
+
+#ifdef CONFIG_MAGIC_SYSRQ
+	/* offer some debugging help */
+	if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op))
+		printk("Registered kill rt tasks magic sysrq.\n");
+	else
+		printk("Could not register kill rt tasks magic sysrq.\n");
+#endif
+
+	init_litmus_proc();
+
+	register_reboot_notifier(&shutdown_notifier);
+
+	return 0;
+}
+
+static void _exit_litmus(void)
+{
+	unregister_reboot_notifier(&shutdown_notifier);
+
+	exit_litmus_proc();
+	kmem_cache_destroy(bheap_node_cache);
+	kmem_cache_destroy(release_heap_cache);
+}
+
+module_init(_init_litmus);
+module_exit(_exit_litmus);
diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
new file mode 100644
index 000000000000..2ef1669eff17
--- /dev/null
+++ b/litmus/litmus_proc.c
@@ -0,0 +1,573 @@
+/*
+ * litmus_proc.c -- Implementation of the /proc/litmus directory tree.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/seq_file.h>
+
+#include <litmus/litmus.h>
+#include <litmus/litmus_proc.h>
+
+#include <litmus/clustered.h>
+
+/* in litmus/litmus.c */
+extern atomic_t rt_task_count;
+
+static struct proc_dir_entry *litmus_dir = NULL,
+	*curr_file = NULL,
+	*stat_file = NULL,
+	*plugs_dir = NULL,
+#ifdef CONFIG_RELEASE_MASTER
+	*release_master_file = NULL,
+#endif
+	*plugs_file = NULL,
+	*domains_dir = NULL,
+	*cpus_dir = NULL;
+
+
+/* in litmus/sync.c */
+int count_tasks_waiting_for_release(void);
+
+static int litmus_stats_proc_show(struct seq_file *m, void *v)
+{
+        seq_printf(m,
+		   "real-time tasks   = %d\n"
+		   "ready for release = %d\n",
+		   atomic_read(&rt_task_count),
+		   count_tasks_waiting_for_release());
+	return 0;
+}
+
+static int litmus_stats_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, litmus_stats_proc_show, PDE_DATA(inode));
+}
+
+static const struct file_operations litmus_stats_proc_fops = {
+	.open		= litmus_stats_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+
+static int litmus_loaded_proc_show(struct seq_file *m, void *v)
+{
+	print_sched_plugins(m);
+	return 0;
+}
+
+static int litmus_loaded_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, litmus_loaded_proc_show, PDE_DATA(inode));
+}
+
+static const struct file_operations litmus_loaded_proc_fops = {
+	.open		= litmus_loaded_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+
+
+
+/* in litmus/litmus.c */
+int switch_sched_plugin(struct sched_plugin*);
+
+static ssize_t litmus_active_proc_write(struct file *file,
+					const char __user *buffer, size_t count,
+					loff_t *ppos)
+{
+	char name[65];
+	struct sched_plugin* found;
+	ssize_t ret = -EINVAL;
+	int err;
+
+
+	ret = copy_and_chomp(name, sizeof(name), buffer, count);
+	if (ret < 0)
+		return ret;
+
+	found = find_sched_plugin(name);
+
+	if (found) {
+		err = switch_sched_plugin(found);
+		if (err) {
+			printk(KERN_INFO "Could not switch plugin: %d\n", err);
+			ret = err;
+		}
+	} else {
+		printk(KERN_INFO "Plugin '%s' is unknown.\n", name);
+		ret = -ESRCH;
+	}
+
+	return ret;
+}
+
+static int litmus_active_proc_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "%s\n", litmus->plugin_name);
+	return 0;
+}
+
+static int litmus_active_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, litmus_active_proc_show, PDE_DATA(inode));
+}
+
+static const struct file_operations litmus_active_proc_fops = {
+	.open		= litmus_active_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= litmus_active_proc_write,
+};
+
+
+#ifdef CONFIG_RELEASE_MASTER
+static ssize_t litmus_release_master_proc_write(
+	struct file *file,
+	const char __user *buffer, size_t count,
+	loff_t *ppos)
+{
+	int cpu, err, online = 0;
+	char msg[64];
+	ssize_t len;
+
+	len = copy_and_chomp(msg, sizeof(msg), buffer, count);
+
+	if (len < 0)
+		return len;
+
+	if (strcmp(msg, "NO_CPU") == 0)
+		atomic_set(&release_master_cpu, NO_CPU);
+	else {
+		err = sscanf(msg, "%d", &cpu);
+		if (err == 1 && cpu >= 0 && (online = cpu_online(cpu))) {
+			atomic_set(&release_master_cpu, cpu);
+		} else {
+			TRACE("invalid release master: '%s' "
+			      "(err:%d cpu:%d online:%d)\n",
+			      msg, err, cpu, online);
+			len = -EINVAL;
+		}
+	}
+	return len;
+}
+
+static int litmus_release_master_proc_show(struct seq_file *m, void *v)
+{
+	int master;
+	master = atomic_read(&release_master_cpu);
+	if (master == NO_CPU)
+		seq_printf(m, "NO_CPU\n");
+	else
+		seq_printf(m, "%d\n", master);
+	return 0;
+}
+
+static int litmus_release_master_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, litmus_release_master_proc_show, PDE_DATA(inode));
+}
+
+static const struct file_operations litmus_release_master_proc_fops = {
+	.open		= litmus_release_master_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= litmus_release_master_proc_write,
+};
+#endif
+
+int __init init_litmus_proc(void)
+{
+	litmus_dir = proc_mkdir("litmus", NULL);
+	if (!litmus_dir) {
+		printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n");
+		return -ENOMEM;
+	}
+
+	curr_file = proc_create("active_plugin", 0644, litmus_dir,
+				&litmus_active_proc_fops);
+
+	if (!curr_file) {
+		printk(KERN_ERR "Could not allocate active_plugin "
+		       "procfs entry.\n");
+		return -ENOMEM;
+	}
+
+#ifdef CONFIG_RELEASE_MASTER
+	release_master_file = proc_create("release_master", 0644, litmus_dir,
+					  &litmus_release_master_proc_fops);
+	if (!release_master_file) {
+		printk(KERN_ERR "Could not allocate release_master "
+		       "procfs entry.\n");
+		return -ENOMEM;
+	}
+#endif
+
+	stat_file = proc_create("stats", 0444, litmus_dir, &litmus_stats_proc_fops);
+
+	plugs_dir = proc_mkdir("plugins", litmus_dir);
+	if (!plugs_dir){
+		printk(KERN_ERR "Could not allocate plugins directory "
+				"procfs entry.\n");
+		return -ENOMEM;
+	}
+
+	plugs_file = proc_create("loaded", 0444, plugs_dir,
+				 &litmus_loaded_proc_fops);
+
+	domains_dir = proc_mkdir("domains", litmus_dir);
+	if (!domains_dir) {
+		printk(KERN_ERR "Could not allocate domains directory "
+				"procfs entry.\n");
+		return -ENOMEM;
+	}
+
+	cpus_dir = proc_mkdir("cpus", litmus_dir);
+	if (!cpus_dir) {
+		printk(KERN_ERR "Could not allocate cpus directory "
+				"procfs entry.\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void exit_litmus_proc(void)
+{
+	if (cpus_dir || domains_dir) {
+		deactivate_domain_proc();
+		if (cpus_dir)
+			remove_proc_entry("cpus", litmus_dir);
+		if (domains_dir)
+			remove_proc_entry("domains", litmus_dir);
+	}
+	if (plugs_file)
+		remove_proc_entry("loaded", plugs_dir);
+	if (plugs_dir)
+		remove_proc_entry("plugins", litmus_dir);
+	if (stat_file)
+		remove_proc_entry("stats", litmus_dir);
+	if (curr_file)
+		remove_proc_entry("active_plugin", litmus_dir);
+#ifdef CONFIG_RELEASE_MASTER
+	if (release_master_file)
+		remove_proc_entry("release_master", litmus_dir);
+#endif
+	if (litmus_dir)
+		remove_proc_entry("litmus", NULL);
+}
+
+long make_plugin_proc_dir(struct sched_plugin* plugin,
+		struct proc_dir_entry** pde_in)
+{
+	struct proc_dir_entry *pde_new = NULL;
+	long rv;
+
+	if (!plugin || !plugin->plugin_name){
+		printk(KERN_ERR "Invalid plugin struct passed to %s.\n",
+				__func__);
+		rv = -EINVAL;
+		goto out_no_pde;
+	}
+
+	if (!plugs_dir){
+		printk(KERN_ERR "Could not make plugin sub-directory, because "
+				"/proc/litmus/plugins does not exist.\n");
+		rv = -ENOENT;
+		goto out_no_pde;
+	}
+
+	pde_new = proc_mkdir(plugin->plugin_name, plugs_dir);
+	if (!pde_new){
+		printk(KERN_ERR "Could not make plugin sub-directory: "
+				"out of memory?.\n");
+		rv = -ENOMEM;
+		goto out_no_pde;
+	}
+
+	rv = 0;
+	*pde_in = pde_new;
+	goto out_ok;
+
+out_no_pde:
+	*pde_in = NULL;
+out_ok:
+	return rv;
+}
+
+void remove_plugin_proc_dir(struct sched_plugin* plugin)
+{
+	if (!plugin || !plugin->plugin_name){
+		printk(KERN_ERR "Invalid plugin struct passed to %s.\n",
+				__func__);
+		return;
+	}
+	remove_proc_entry(plugin->plugin_name, plugs_dir);
+}
+
+
+
+/* misc. I/O helper functions */
+
+int copy_and_chomp(char *kbuf, unsigned long ksize,
+		   __user const char* ubuf, unsigned long ulength)
+{
+	/* caller must provide buffer space */
+	BUG_ON(!ksize);
+
+	ksize--; /* leave space for null byte */
+
+	if (ksize > ulength)
+		ksize = ulength;
+
+	if(copy_from_user(kbuf, ubuf, ksize))
+		return -EFAULT;
+
+	kbuf[ksize] = '\0';
+
+	/* chomp kbuf */
+	if (ksize > 0 && kbuf[ksize - 1] == '\n')
+		kbuf[ksize - 1] = '\0';
+
+	return ksize;
+}
+
+/* helper functions for clustered plugins */
+static const char* cache_level_names[] = {
+	"ALL",
+	"L1",
+	"L2",
+	"L3",
+};
+
+int parse_cache_level(const char *cache_name, enum cache_level *level)
+{
+	int err = -EINVAL;
+	int i;
+	/* do a quick and dirty comparison to find the cluster size */
+	for (i = GLOBAL_CLUSTER; i <= L3_CLUSTER; i++)
+		if (!strcmp(cache_name, cache_level_names[i])) {
+			*level = (enum cache_level) i;
+			err = 0;
+			break;
+		}
+	return err;
+}
+
+const char* cache_level_name(enum cache_level level)
+{
+	int idx = level;
+
+	if (idx >= GLOBAL_CLUSTER && idx <= L3_CLUSTER)
+		return cache_level_names[idx];
+	else
+		return "INVALID";
+}
+
+
+
+
+/* proc file interface to configure the cluster size */
+
+static ssize_t litmus_cluster_proc_write(struct file *file,
+					const char __user *buffer, size_t count,
+					loff_t *ppos)
+{
+	enum cache_level *level = (enum cache_level *) PDE_DATA(file_inode(file));
+	ssize_t len;
+	char cache_name[8];
+
+	len = copy_and_chomp(cache_name, sizeof(cache_name), buffer, count);
+
+	if (len > 0 && parse_cache_level(cache_name, level)) {
+		printk(KERN_INFO "Cluster '%s' is unknown.\n", cache_name);
+		len = -EINVAL;
+	}
+
+	return len;
+}
+
+static int litmus_cluster_proc_show(struct seq_file *m, void *v)
+{
+	enum cache_level *level = (enum cache_level *)  m->private;
+
+	seq_printf(m, "%s\n", cache_level_name(*level));
+	return 0;
+}
+
+static int litmus_cluster_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, litmus_cluster_proc_show, PDE_DATA(inode));
+}
+
+static const struct file_operations litmus_cluster_proc_fops = {
+	.open		= litmus_cluster_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= litmus_cluster_proc_write,
+};
+
+struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent,
+					   enum cache_level* level)
+{
+	struct proc_dir_entry* cluster_file;
+
+
+	cluster_file = proc_create_data("cluster", 0644, parent,
+					&litmus_cluster_proc_fops,
+					(void *) level);
+	if (!cluster_file) {
+		printk(KERN_ERR
+		       "Could not cluster procfs entry.\n");
+	}
+	return cluster_file;
+}
+
+static struct domain_proc_info* active_mapping = NULL;
+
+static int litmus_mapping_proc_show(struct seq_file *m, void *v)
+{
+	struct cd_mapping *mapping = (struct cd_mapping*) m->private;
+
+	if(!mapping)
+		return 0;
+
+	seq_printf(m, "%*pb\n", cpumask_pr_args(mapping->mask));
+	return 0;
+}
+
+static int litmus_mapping_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, litmus_mapping_proc_show, PDE_DATA(inode));
+}
+
+static const struct file_operations litmus_domain_proc_fops = {
+	.open		= litmus_mapping_proc_open,
+	.read		= seq_read,
+	.llseek 	= seq_lseek,
+	.release 	= single_release,
+};
+
+long activate_domain_proc(struct domain_proc_info* map)
+{
+	int i;
+	char name[8];
+
+	if (!map)
+		return -EINVAL;
+	if (cpus_dir == NULL || domains_dir == NULL)
+		return -EINVAL;
+
+	if (active_mapping)
+		deactivate_domain_proc();
+
+	active_mapping = map;
+
+	for (i = 0; i < map->num_cpus; ++i) {
+		struct cd_mapping* m = &map->cpu_to_domains[i];
+		snprintf(name, sizeof(name), "%d", m->id);
+		m->proc_file = proc_create_data(name, 0444, cpus_dir,
+			&litmus_domain_proc_fops, (void*)m);
+	}
+
+	for (i = 0; i < map->num_domains; ++i) {
+		struct cd_mapping* m = &map->domain_to_cpus[i];
+		snprintf(name, sizeof(name), "%d", m->id);
+		m->proc_file = proc_create_data(name, 0444, domains_dir,
+			&litmus_domain_proc_fops, (void*)m);
+	}
+
+	return 0;
+}
+
+long deactivate_domain_proc()
+{
+	int i;
+	char name[65];
+
+	struct domain_proc_info* map = active_mapping;
+
+	if (!map)
+		return -EINVAL;
+
+	for (i = 0; i < map->num_cpus; ++i) {
+		struct cd_mapping* m = &map->cpu_to_domains[i];
+		snprintf(name, sizeof(name), "%d", m->id);
+		remove_proc_entry(name, cpus_dir);
+		m->proc_file = NULL;
+	}
+	for (i = 0; i < map->num_domains; ++i) {
+		struct cd_mapping* m = &map->domain_to_cpus[i];
+		snprintf(name, sizeof(name), "%d", m->id);
+		remove_proc_entry(name, domains_dir);
+		m->proc_file = NULL;
+	}
+
+	active_mapping = NULL;
+
+	return 0;
+}
+
+long init_domain_proc_info(struct domain_proc_info* m,
+				int num_cpus, int num_domains)
+{
+	int i;
+	int num_alloced_cpu_masks = 0;
+	int num_alloced_domain_masks = 0;
+
+	m->cpu_to_domains =
+		kmalloc(sizeof(*(m->cpu_to_domains))*num_cpus,
+			GFP_ATOMIC);
+	if(!m->cpu_to_domains)
+		goto failure;
+
+	m->domain_to_cpus =
+		kmalloc(sizeof(*(m->domain_to_cpus))*num_domains,
+			GFP_ATOMIC);
+	if(!m->domain_to_cpus)
+		goto failure;
+
+	for(i = 0; i < num_cpus; ++i) {
+		if(!zalloc_cpumask_var(&m->cpu_to_domains[i].mask, GFP_ATOMIC))
+			goto failure;
+		++num_alloced_cpu_masks;
+	}
+	for(i = 0; i < num_domains; ++i) {
+		if(!zalloc_cpumask_var(&m->domain_to_cpus[i].mask, GFP_ATOMIC))
+			goto failure;
+		++num_alloced_domain_masks;
+	}
+
+	return 0;
+
+failure:
+	for(i = 0; i < num_alloced_cpu_masks; ++i)
+		free_cpumask_var(m->cpu_to_domains[i].mask);
+	for(i = 0; i < num_alloced_domain_masks; ++i)
+		free_cpumask_var(m->domain_to_cpus[i].mask);
+	if(m->cpu_to_domains)
+		kfree(m->cpu_to_domains);
+	if(m->domain_to_cpus)
+		kfree(m->domain_to_cpus);
+	return -ENOMEM;
+}
+
+void destroy_domain_proc_info(struct domain_proc_info* m)
+{
+	int i;
+	for(i = 0; i < m->num_cpus; ++i)
+		free_cpumask_var(m->cpu_to_domains[i].mask);
+	for(i = 0; i < m->num_domains; ++i)
+		free_cpumask_var(m->domain_to_cpus[i].mask);
+	kfree(m->cpu_to_domains);
+	kfree(m->domain_to_cpus);
+	memset(m, sizeof(*m), 0);
+}
diff --git a/litmus/locking.c b/litmus/locking.c
new file mode 100644
index 000000000000..43d9aece2e74
--- /dev/null
+++ b/litmus/locking.c
@@ -0,0 +1,188 @@
+#include <linux/sched.h>
+#include <litmus/litmus.h>
+#include <litmus/fdso.h>
+
+#ifdef CONFIG_LITMUS_LOCKING
+
+#include <linux/sched.h>
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/trace.h>
+#include <litmus/wait.h>
+
+static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg);
+static int open_generic_lock(struct od_table_entry* entry, void* __user arg);
+static int close_generic_lock(struct od_table_entry* entry);
+static void destroy_generic_lock(obj_type_t type, void* sem);
+
+struct fdso_ops generic_lock_ops = {
+	.create  = create_generic_lock,
+	.open    = open_generic_lock,
+	.close   = close_generic_lock,
+	.destroy = destroy_generic_lock
+};
+
+static inline bool is_lock(struct od_table_entry* entry)
+{
+	return entry->class == &generic_lock_ops;
+}
+
+static inline struct litmus_lock* get_lock(struct od_table_entry* entry)
+{
+	BUG_ON(!is_lock(entry));
+	return (struct litmus_lock*) entry->obj->obj;
+}
+
+static  int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg)
+{
+	struct litmus_lock* lock;
+	int err;
+
+	err = litmus->allocate_lock(&lock, type, arg);
+	if (err == 0)
+		*obj_ref = lock;
+	return err;
+}
+
+static int open_generic_lock(struct od_table_entry* entry, void* __user arg)
+{
+	struct litmus_lock* lock = get_lock(entry);
+	if (lock->ops->open)
+		return lock->ops->open(lock, arg);
+	else
+		return 0; /* default: any task can open it */
+}
+
+static int close_generic_lock(struct od_table_entry* entry)
+{
+	struct litmus_lock* lock = get_lock(entry);
+	if (lock->ops->close)
+		return lock->ops->close(lock);
+	else
+		return 0; /* default: closing succeeds */
+}
+
+static void destroy_generic_lock(obj_type_t type, void* obj)
+{
+	struct litmus_lock* lock = (struct litmus_lock*) obj;
+	lock->ops->deallocate(lock);
+}
+
+asmlinkage long sys_litmus_lock(int lock_od)
+{
+	long err = -EINVAL;
+	struct od_table_entry* entry;
+	struct litmus_lock* l;
+
+	TS_SYSCALL_IN_START;
+
+	TS_SYSCALL_IN_END;
+
+	TS_LOCK_START;
+
+	entry = get_entry_for_od(lock_od);
+	if (entry && is_lock(entry)) {
+		l = get_lock(entry);
+		TRACE_CUR("attempts to lock 0x%p\n", l);
+		err = l->ops->lock(l);
+	}
+
+	/* Note: task my have been suspended or preempted in between!  Take
+	 * this into account when computing overheads. */
+	TS_LOCK_END;
+
+	TS_SYSCALL_OUT_START;
+
+	return err;
+}
+
+asmlinkage long sys_litmus_unlock(int lock_od)
+{
+	long err = -EINVAL;
+	struct od_table_entry* entry;
+	struct litmus_lock* l;
+
+	TS_SYSCALL_IN_START;
+
+	TS_SYSCALL_IN_END;
+
+	TS_UNLOCK_START;
+
+	entry = get_entry_for_od(lock_od);
+	if (entry && is_lock(entry)) {
+		l = get_lock(entry);
+		TRACE_CUR("attempts to unlock 0x%p\n", l);
+		err = l->ops->unlock(l);
+	}
+
+	/* Note: task my have been preempted in between!  Take this into
+	 * account when computing overheads. */
+	TS_UNLOCK_END;
+
+	TS_SYSCALL_OUT_START;
+
+	return err;
+}
+
+struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
+{
+	wait_queue_t* q;
+	struct task_struct* t = NULL;
+
+	if (waitqueue_active(wq)) {
+		q = list_entry(wq->task_list.next,
+			       wait_queue_t, task_list);
+		t = (struct task_struct*) q->private;
+		__remove_wait_queue(wq, q);
+	}
+	return(t);
+}
+
+unsigned int __add_wait_queue_prio_exclusive(
+	wait_queue_head_t* head,
+	prio_wait_queue_t *new)
+{
+	struct list_head *pos;
+	unsigned int passed = 0;
+
+	new->wq.flags |= WQ_FLAG_EXCLUSIVE;
+
+	/* find a spot where the new entry is less than the next */
+	list_for_each(pos, &head->task_list) {
+		prio_wait_queue_t* queued = list_entry(pos, prio_wait_queue_t,
+						       wq.task_list);
+
+		if (unlikely(lt_before(new->priority, queued->priority) ||
+			     (new->priority == queued->priority &&
+			      new->tie_breaker < queued->tie_breaker))) {
+			/* pos is not less than new, thus insert here */
+			__list_add(&new->wq.task_list, pos->prev, pos);
+			goto out;
+		}
+		passed++;
+	}
+
+	/* if we get to this point either the list is empty or every entry
+	 * queued element is less than new.
+	 * Let's add new to the end. */
+	list_add_tail(&new->wq.task_list, &head->task_list);
+out:
+	return passed;
+}
+
+
+#else
+
+struct fdso_ops generic_lock_ops = {};
+
+asmlinkage long sys_litmus_lock(int sem_od)
+{
+	return -ENOSYS;
+}
+
+asmlinkage long sys_litmus_unlock(int sem_od)
+{
+	return -ENOSYS;
+}
+
+#endif
diff --git a/litmus/preempt.c b/litmus/preempt.c
new file mode 100644
index 000000000000..03e9b5acfb5d
--- /dev/null
+++ b/litmus/preempt.c
@@ -0,0 +1,141 @@
+#include <linux/sched.h>
+
+#include <litmus/litmus.h>
+#include <litmus/preempt.h>
+#include <litmus/trace.h>
+
+/* The rescheduling state of each processor.
+ */
+DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
+
+void sched_state_will_schedule(struct task_struct* tsk)
+{
+	/* Litmus hack: we only care about processor-local invocations of
+	 * set_tsk_need_resched(). We can't reliably set the flag remotely
+	 * since it might race with other updates to the scheduling state.  We
+	 * can't rely on the runqueue lock protecting updates to the sched
+	 * state since processors do not acquire the runqueue locks for all
+	 * updates to the sched state (to avoid acquiring two runqueue locks at
+	 * the same time). Further, if tsk is residing on a remote processor,
+	 * then that processor doesn't actually know yet that it is going to
+	 * reschedule; it still must receive an IPI (unless a local invocation
+	 * races).
+	 */
+	if (likely(task_cpu(tsk) == smp_processor_id())) {
+		VERIFY_SCHED_STATE(TASK_SCHEDULED | SHOULD_SCHEDULE | TASK_PICKED | WILL_SCHEDULE);
+		if (is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK))
+			set_sched_state(PICKED_WRONG_TASK);
+		else
+			set_sched_state(WILL_SCHEDULE);
+	} else
+		/* Litmus tasks should never be subject to a remote
+		 * set_tsk_need_resched(). */
+		BUG_ON(is_realtime(tsk));
+#ifdef CONFIG_PREEMPT_STATE_TRACE
+	TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
+		   __builtin_return_address(0));
+#endif
+}
+
+/* Called by the IPI handler after another CPU called smp_send_resched(). */
+void sched_state_ipi(void)
+{
+	/* If the IPI was slow, we might be in any state right now. The IPI is
+	 * only meaningful if we are in SHOULD_SCHEDULE. */
+	if (is_in_sched_state(SHOULD_SCHEDULE)) {
+		/* Cause scheduler to be invoked.
+		 * This will cause a transition to WILL_SCHEDULE. */
+		set_tsk_need_resched(current);
+		TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n",
+			    current->comm, current->pid);
+		TS_SEND_RESCHED_END;
+	} else {
+		/* ignore */
+		TRACE_STATE("ignoring IPI in state %x (%s)\n",
+			    get_sched_state(),
+			    sched_state_name(get_sched_state()));
+	}
+}
+
+/* Called by plugins to cause a CPU to reschedule. IMPORTANT: the caller must
+ * hold the lock that is used to serialize scheduling decisions. */
+void litmus_reschedule(int cpu)
+{
+	int picked_transition_ok = 0;
+	int scheduled_transition_ok = 0;
+
+	/* The (remote) CPU could be in any state. */
+
+	/* The critical states are TASK_PICKED and TASK_SCHEDULED, as the CPU
+	 * is not aware of the need to reschedule at this point. */
+
+	/* is a context switch in progress? */
+	if (cpu_is_in_sched_state(cpu, TASK_PICKED))
+		picked_transition_ok = sched_state_transition_on(
+			cpu, TASK_PICKED, PICKED_WRONG_TASK);
+
+	if (!picked_transition_ok &&
+	    cpu_is_in_sched_state(cpu, TASK_SCHEDULED)) {
+		/* We either raced with the end of the context switch, or the
+		 * CPU was in TASK_SCHEDULED anyway. */
+		scheduled_transition_ok = sched_state_transition_on(
+			cpu, TASK_SCHEDULED, SHOULD_SCHEDULE);
+	}
+
+	/* If the CPU was in state TASK_SCHEDULED, then we need to cause the
+	 * scheduler to be invoked. */
+	if (scheduled_transition_ok) {
+		if (smp_processor_id() == cpu) {
+			set_tsk_need_resched(current);
+			preempt_set_need_resched();
+		} else {
+			TS_SEND_RESCHED_START(cpu);
+			smp_send_reschedule(cpu);
+		}
+	}
+
+	TRACE_STATE("%s picked-ok:%d sched-ok:%d\n",
+		    __FUNCTION__,
+		    picked_transition_ok,
+		    scheduled_transition_ok);
+}
+
+void litmus_reschedule_local(void)
+{
+	if (is_in_sched_state(TASK_PICKED))
+		set_sched_state(PICKED_WRONG_TASK);
+	else if (is_in_sched_state(TASK_SCHEDULED
+	                           | SHOULD_SCHEDULE
+	                           | PICKED_WRONG_TASK)) {
+		set_sched_state(WILL_SCHEDULE);
+		set_tsk_need_resched(current);
+		preempt_set_need_resched();
+	}
+}
+
+#ifdef CONFIG_DEBUG_KERNEL
+
+void sched_state_plugin_check(void)
+{
+	if (!is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK)) {
+		TRACE("!!!! plugin did not call sched_state_task_picked()!"
+		      "Calling sched_state_task_picked() is mandatory---fix this.\n");
+		set_sched_state(TASK_PICKED);
+	}
+}
+
+#define NAME_CHECK(x) case x:  return #x
+const char* sched_state_name(int s)
+{
+	switch (s) {
+		NAME_CHECK(TASK_SCHEDULED);
+		NAME_CHECK(SHOULD_SCHEDULE);
+		NAME_CHECK(WILL_SCHEDULE);
+		NAME_CHECK(TASK_PICKED);
+		NAME_CHECK(PICKED_WRONG_TASK);
+	default:
+		return "UNKNOWN";
+	};
+}
+
+#endif
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
new file mode 100644
index 000000000000..e5dec0bbbba9
--- /dev/null
+++ b/litmus/rt_domain.c
@@ -0,0 +1,353 @@
+/*
+ * litmus/rt_domain.c
+ *
+ * LITMUS real-time infrastructure. This file contains the
+ * functions that manipulate RT domains. RT domains are an abstraction
+ * of a ready queue and a release queue.
+ */
+
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/sched_trace.h>
+
+#include <litmus/rt_domain.h>
+
+#include <litmus/trace.h>
+
+#include <litmus/bheap.h>
+
+/* Uncomment when debugging timer races... */
+#if 0
+#define VTRACE_TASK TRACE_TASK
+#define VTRACE TRACE
+#else
+#define VTRACE_TASK(t, fmt, args...) /* shut up */
+#define VTRACE(fmt, args...) /* be quiet already */
+#endif
+
+static int dummy_resched(rt_domain_t *rt)
+{
+	return 0;
+}
+
+static int dummy_order(struct bheap_node* a, struct bheap_node* b)
+{
+	return 0;
+}
+
+/* default implementation: use default lock */
+static void default_release_jobs(rt_domain_t* rt, struct bheap* tasks)
+{
+	merge_ready(rt, tasks);
+}
+
+static unsigned int time2slot(lt_t time)
+{
+	return (unsigned int) time2quanta(time, FLOOR) % RELEASE_QUEUE_SLOTS;
+}
+
+static enum hrtimer_restart on_release_timer(struct hrtimer *timer)
+{
+	unsigned long flags;
+	struct release_heap* rh;
+	rh = container_of(timer, struct release_heap, timer);
+
+	TS_RELEASE_LATENCY(rh->release_time);
+
+	VTRACE("on_release_timer(0x%p) starts.\n", timer);
+
+	TS_RELEASE_START;
+
+
+	raw_spin_lock_irqsave(&rh->dom->release_lock, flags);
+	VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock);
+	/* remove from release queue */
+	list_del(&rh->list);
+	raw_spin_unlock_irqrestore(&rh->dom->release_lock, flags);
+	VTRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock);
+
+	/* call release callback */
+	rh->dom->release_jobs(rh->dom, &rh->heap);
+	/* WARNING: rh can be referenced from other CPUs from now on. */
+
+	TS_RELEASE_END;
+
+	VTRACE("on_release_timer(0x%p) ends.\n", timer);
+
+	return  HRTIMER_NORESTART;
+}
+
+/* allocated in litmus.c */
+struct kmem_cache * release_heap_cache;
+
+struct release_heap* release_heap_alloc(int gfp_flags)
+{
+	struct release_heap* rh;
+	rh= kmem_cache_alloc(release_heap_cache, gfp_flags);
+	if (rh) {
+		/* initialize timer */
+		hrtimer_init(&rh->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+		rh->timer.function = on_release_timer;
+	}
+	return rh;
+}
+
+void release_heap_free(struct release_heap* rh)
+{
+	/* make sure timer is no longer in use */
+	hrtimer_cancel(&rh->timer);
+	kmem_cache_free(release_heap_cache, rh);
+}
+
+/* Caller must hold release lock.
+ * Will return heap for given time. If no such heap exists prior to
+ * the invocation it will be created.
+ */
+static struct release_heap* get_release_heap(rt_domain_t *rt,
+					     struct task_struct* t,
+					     int use_task_heap)
+{
+	struct list_head* pos;
+	struct release_heap* heap = NULL;
+	struct release_heap* rh;
+	lt_t release_time = get_release(t);
+	unsigned int slot = time2slot(release_time);
+
+	/* initialize pos for the case that the list is empty */
+	pos = rt->release_queue.slot[slot].next;
+	list_for_each(pos, &rt->release_queue.slot[slot]) {
+		rh = list_entry(pos, struct release_heap, list);
+		if (release_time == rh->release_time) {
+			/* perfect match -- this happens on hyperperiod
+			 * boundaries
+			 */
+			heap = rh;
+			break;
+		} else if (lt_before(release_time, rh->release_time)) {
+			/* we need to insert a new node since rh is
+			 * already in the future
+			 */
+			break;
+		}
+	}
+	if (!heap && use_task_heap) {
+		/* use pre-allocated release heap */
+		rh = tsk_rt(t)->rel_heap;
+
+		rh->dom = rt;
+		rh->release_time = release_time;
+
+		/* add to release queue */
+		list_add(&rh->list, pos->prev);
+		heap = rh;
+	}
+	return heap;
+}
+
+static void reinit_release_heap(struct task_struct* t)
+{
+	struct release_heap* rh;
+
+	/* use pre-allocated release heap */
+	rh = tsk_rt(t)->rel_heap;
+
+	/* Make sure it is safe to use.  The timer callback could still
+	 * be executing on another CPU; hrtimer_cancel() will wait
+	 * until the timer callback has completed.  However, under no
+	 * circumstances should the timer be active (= yet to be
+	 * triggered).
+	 *
+	 * WARNING: If the CPU still holds the release_lock at this point,
+	 *          deadlock may occur!
+	 */
+	BUG_ON(hrtimer_cancel(&rh->timer));
+
+	/* initialize */
+	bheap_init(&rh->heap);
+#ifdef CONFIG_RELEASE_MASTER
+	atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE);
+#endif
+}
+/* arm_release_timer() - start local release timer or trigger
+ *     remote timer (pull timer)
+ *
+ * Called by add_release() with:
+ * - tobe_lock taken
+ * - IRQ disabled
+ */
+#ifdef CONFIG_RELEASE_MASTER
+#define arm_release_timer(t) arm_release_timer_on((t), NO_CPU)
+static void arm_release_timer_on(rt_domain_t *_rt , int target_cpu)
+#else
+static void arm_release_timer(rt_domain_t *_rt)
+#endif
+{
+	rt_domain_t *rt = _rt;
+	struct list_head list;
+	struct list_head *pos, *safe;
+	struct task_struct* t;
+	struct release_heap* rh;
+
+	VTRACE("arm_release_timer() at %llu\n", litmus_clock());
+	list_replace_init(&rt->tobe_released, &list);
+
+	list_for_each_safe(pos, safe, &list) {
+		/* pick task of work list */
+		t = list_entry(pos, struct task_struct, rt_param.list);
+		sched_trace_task_release(t);
+		list_del(pos);
+
+		/* put into release heap while holding release_lock */
+		raw_spin_lock(&rt->release_lock);
+		VTRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock);
+
+		rh = get_release_heap(rt, t, 0);
+		if (!rh) {
+			/* need to use our own, but drop lock first */
+			raw_spin_unlock(&rt->release_lock);
+			VTRACE_TASK(t, "Dropped release_lock 0x%p\n",
+				    &rt->release_lock);
+
+			reinit_release_heap(t);
+			VTRACE_TASK(t, "release_heap ready\n");
+
+			raw_spin_lock(&rt->release_lock);
+			VTRACE_TASK(t, "Re-acquired release_lock 0x%p\n",
+				    &rt->release_lock);
+
+			rh = get_release_heap(rt, t, 1);
+		}
+		bheap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node);
+		VTRACE_TASK(t, "arm_release_timer(): added to release heap\n");
+
+		raw_spin_unlock(&rt->release_lock);
+		VTRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock);
+
+		/* To avoid arming the timer multiple times, we only let the
+		 * owner do the arming (which is the "first" task to reference
+		 * this release_heap anyway).
+		 */
+		if (rh == tsk_rt(t)->rel_heap) {
+			VTRACE_TASK(t, "arming timer 0x%p\n", &rh->timer);
+
+			if (!hrtimer_is_hres_active(&rh->timer)) {
+				TRACE_TASK(t, "WARNING: no hires timer!!!\n");
+			}
+
+			/* we cannot arm the timer using hrtimer_start()
+			 * as it may deadlock on rq->lock
+			 *
+			 * PINNED mode is ok on both local and remote CPU
+			 */
+#ifdef CONFIG_RELEASE_MASTER
+			if (rt->release_master == NO_CPU &&
+			    target_cpu == NO_CPU)
+#endif
+				__hrtimer_start_range_ns(&rh->timer,
+						ns_to_ktime(rh->release_time),
+						0, HRTIMER_MODE_ABS_PINNED, 0);
+#ifdef CONFIG_RELEASE_MASTER
+			else
+				hrtimer_start_on(
+					/* target_cpu overrides release master */
+					(target_cpu != NO_CPU ?
+					 target_cpu : rt->release_master),
+					&rh->info, &rh->timer,
+					ns_to_ktime(rh->release_time),
+					HRTIMER_MODE_ABS_PINNED);
+#endif
+		} else
+			VTRACE_TASK(t, "0x%p is not my timer\n", &rh->timer);
+	}
+}
+
+void rt_domain_init(rt_domain_t *rt,
+		    bheap_prio_t order,
+		    check_resched_needed_t check,
+		    release_jobs_t release
+		   )
+{
+	int i;
+
+	BUG_ON(!rt);
+	if (!check)
+		check = dummy_resched;
+	if (!release)
+		release = default_release_jobs;
+	if (!order)
+		order = dummy_order;
+
+#ifdef CONFIG_RELEASE_MASTER
+	rt->release_master = NO_CPU;
+#endif
+
+	bheap_init(&rt->ready_queue);
+	INIT_LIST_HEAD(&rt->tobe_released);
+	for (i = 0; i < RELEASE_QUEUE_SLOTS; i++)
+		INIT_LIST_HEAD(&rt->release_queue.slot[i]);
+
+	raw_spin_lock_init(&rt->ready_lock);
+	raw_spin_lock_init(&rt->release_lock);
+	raw_spin_lock_init(&rt->tobe_lock);
+
+	rt->check_resched 	= check;
+	rt->release_jobs	= release;
+	rt->order		= order;
+}
+
+/* add_ready - add a real-time task to the rt ready queue. It must be runnable.
+ * @new:       the newly released task
+ */
+void __add_ready(rt_domain_t* rt, struct task_struct *new)
+{
+	TRACE("rt: adding %s/%d (%llu, %llu, %llu) rel=%llu "
+		"to ready queue at %llu\n",
+		new->comm, new->pid,
+		get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new),
+		get_release(new), litmus_clock());
+
+	BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node));
+
+	bheap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node);
+	rt->check_resched(rt);
+}
+
+/* merge_ready - Add a sorted set of tasks to the rt ready queue. They must be runnable.
+ * @tasks      - the newly released tasks
+ */
+void __merge_ready(rt_domain_t* rt, struct bheap* tasks)
+{
+	bheap_union(rt->order, &rt->ready_queue, tasks);
+	rt->check_resched(rt);
+}
+
+
+#ifdef CONFIG_RELEASE_MASTER
+void __add_release_on(rt_domain_t* rt, struct task_struct *task,
+		      int target_cpu)
+{
+	TRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n",
+		   get_release(task), target_cpu);
+	list_add(&tsk_rt(task)->list, &rt->tobe_released);
+	task->rt_param.domain = rt;
+
+	arm_release_timer_on(rt, target_cpu);
+}
+#endif
+
+/* add_release - add a real-time task to the rt release queue.
+ * @task:        the sleeping task
+ */
+void __add_release(rt_domain_t* rt, struct task_struct *task)
+{
+	TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task));
+	list_add(&tsk_rt(task)->list, &rt->tobe_released);
+	task->rt_param.domain = rt;
+
+	arm_release_timer(rt);
+}
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
new file mode 100644
index 000000000000..edd91e9bf773
--- /dev/null
+++ b/litmus/sched_plugin.c
@@ -0,0 +1,238 @@
+/* sched_plugin.c -- core infrastructure for the scheduler plugin system
+ *
+ * This file includes the initialization of the plugin system, the no-op Linux
+ * scheduler plugin, some dummy functions, and some helper functions.
+ */
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/preempt.h>
+#include <litmus/jobs.h>
+
+/*
+ * Generic function to trigger preemption on either local or remote cpu
+ * from scheduler plugins. The key feature is that this function is
+ * non-preemptive section aware and does not invoke the scheduler / send
+ * IPIs if the to-be-preempted task is actually non-preemptive.
+ */
+void preempt_if_preemptable(struct task_struct* t, int cpu)
+{
+	/* t is the real-time task executing on CPU on_cpu If t is NULL, then
+	 * on_cpu is currently scheduling background work.
+	 */
+
+	int reschedule = 0;
+
+	if (!t)
+		/* move non-real-time task out of the way */
+		reschedule = 1;
+	else {
+		if (smp_processor_id() == cpu) {
+			/* local CPU case */
+			/* check if we need to poke userspace */
+			if (is_user_np(t))
+				/* Yes, poke it. This doesn't have to be atomic since
+				 * the task is definitely not executing. */
+				request_exit_np(t);
+			else if (!is_kernel_np(t))
+				/* only if we are allowed to preempt the
+				 * currently-executing task */
+				reschedule = 1;
+		} else {
+			/* Remote CPU case.  Only notify if it's not a kernel
+			 * NP section and if we didn't set the userspace
+			 * flag. */
+			reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t));
+		}
+	}
+	if (likely(reschedule))
+		litmus_reschedule(cpu);
+}
+
+
+/*************************************************************
+ *                   Dummy plugin functions                  *
+ *************************************************************/
+
+static void litmus_dummy_finish_switch(struct task_struct * prev)
+{
+}
+
+static struct task_struct* litmus_dummy_schedule(struct task_struct * prev)
+{
+	sched_state_task_picked();
+	return NULL;
+}
+
+static long litmus_dummy_admit_task(struct task_struct* tsk)
+{
+	printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n",
+		tsk->comm, tsk->pid);
+	return -EINVAL;
+}
+
+static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running)
+{
+}
+
+static void litmus_dummy_task_wake_up(struct task_struct *task)
+{
+}
+
+static void litmus_dummy_task_block(struct task_struct *task)
+{
+}
+
+static void litmus_dummy_task_exit(struct task_struct *task)
+{
+}
+
+static void litmus_dummy_task_cleanup(struct task_struct *task)
+{
+}
+
+static long litmus_dummy_complete_job(void)
+{
+	return -ENOSYS;
+}
+
+static long litmus_dummy_activate_plugin(void)
+{
+	return 0;
+}
+
+static long litmus_dummy_deactivate_plugin(void)
+{
+	return 0;
+}
+
+static long litmus_dummy_get_domain_proc_info(struct domain_proc_info **d)
+{
+	*d = NULL;
+	return 0;
+}
+
+static void litmus_dummy_synchronous_release_at(lt_t time_zero)
+{
+	/* ignore */
+}
+
+#ifdef CONFIG_LITMUS_LOCKING
+
+static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
+				       void* __user config)
+{
+	return -ENXIO;
+}
+
+#endif
+
+
+/* The default scheduler plugin. It doesn't do anything and lets Linux do its
+ * job.
+ */
+struct sched_plugin linux_sched_plugin = {
+	.plugin_name = "Linux",
+	.task_new   = litmus_dummy_task_new,
+	.task_exit = litmus_dummy_task_exit,
+	.task_wake_up = litmus_dummy_task_wake_up,
+	.task_block = litmus_dummy_task_block,
+	.complete_job = litmus_dummy_complete_job,
+	.schedule = litmus_dummy_schedule,
+	.finish_switch = litmus_dummy_finish_switch,
+	.activate_plugin = litmus_dummy_activate_plugin,
+	.deactivate_plugin = litmus_dummy_deactivate_plugin,
+	.get_domain_proc_info = litmus_dummy_get_domain_proc_info,
+	.synchronous_release_at = litmus_dummy_synchronous_release_at,
+#ifdef CONFIG_LITMUS_LOCKING
+	.allocate_lock = litmus_dummy_allocate_lock,
+#endif
+	.admit_task = litmus_dummy_admit_task
+};
+
+/*
+ *	The reference to current plugin that is used to schedule tasks within
+ *	the system. It stores references to actual function implementations
+ *	Should be initialized by calling "init_***_plugin()"
+ */
+struct sched_plugin *litmus = &linux_sched_plugin;
+
+/* the list of registered scheduling plugins */
+static LIST_HEAD(sched_plugins);
+static DEFINE_RAW_SPINLOCK(sched_plugins_lock);
+
+#define CHECK(func) {\
+	if (!plugin->func) \
+		plugin->func = litmus_dummy_ ## func;}
+
+/* FIXME: get reference to module  */
+int register_sched_plugin(struct sched_plugin* plugin)
+{
+	printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n",
+	       plugin->plugin_name);
+
+	/* make sure we don't trip over null pointers later */
+	CHECK(finish_switch);
+	CHECK(schedule);
+	CHECK(task_wake_up);
+	CHECK(task_exit);
+	CHECK(task_cleanup);
+	CHECK(task_block);
+	CHECK(task_new);
+	CHECK(complete_job);
+	CHECK(activate_plugin);
+	CHECK(deactivate_plugin);
+	CHECK(get_domain_proc_info);
+#ifdef CONFIG_LITMUS_LOCKING
+	CHECK(allocate_lock);
+#endif
+	CHECK(admit_task);
+	CHECK(synchronous_release_at);
+
+	if (!plugin->wait_for_release_at)
+		plugin->wait_for_release_at = default_wait_for_release_at;
+
+	raw_spin_lock(&sched_plugins_lock);
+	list_add(&plugin->list, &sched_plugins);
+	raw_spin_unlock(&sched_plugins_lock);
+
+	return 0;
+}
+
+
+/* FIXME: reference counting, etc. */
+struct sched_plugin* find_sched_plugin(const char* name)
+{
+	struct list_head *pos;
+	struct sched_plugin *plugin;
+
+	raw_spin_lock(&sched_plugins_lock);
+	list_for_each(pos, &sched_plugins) {
+		plugin = list_entry(pos, struct sched_plugin, list);
+		if (!strcmp(plugin->plugin_name, name))
+		    goto out_unlock;
+	}
+	plugin = NULL;
+
+out_unlock:
+	raw_spin_unlock(&sched_plugins_lock);
+	return plugin;
+}
+
+void print_sched_plugins(struct seq_file *m)
+{
+	struct list_head *pos;
+	struct sched_plugin *plugin;
+
+	raw_spin_lock(&sched_plugins_lock);
+	list_for_each(pos, &sched_plugins) {
+		plugin = list_entry(pos, struct sched_plugin, list);
+		seq_printf(m, "%s\n", plugin->plugin_name);
+	}
+	raw_spin_unlock(&sched_plugins_lock);
+}
diff --git a/litmus/srp.c b/litmus/srp.c
new file mode 100644
index 000000000000..7ab388646e29
--- /dev/null
+++ b/litmus/srp.c
@@ -0,0 +1,308 @@
+/* ************************************************************************** */
+/*                          STACK RESOURCE POLICY                             */
+/* ************************************************************************** */
+
+#include <asm/atomic.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/fdso.h>
+#include <litmus/trace.h>
+
+
+#ifdef CONFIG_LITMUS_LOCKING
+
+#include <litmus/srp.h>
+
+srp_prioritization_t get_srp_prio;
+
+struct srp {
+	struct list_head	ceiling;
+	wait_queue_head_t	ceiling_blocked;
+};
+#define system_ceiling(srp) list2prio(srp->ceiling.next)
+#define ceiling2sem(c) container_of(c, struct srp_semaphore, ceiling)
+
+#define UNDEF_SEM -2
+
+DEFINE_PER_CPU(struct srp, srp);
+
+DEFINE_PER_CPU(int, srp_objects_in_use);
+
+/* Initialize SRP semaphores at boot time. */
+static int __init srp_init(void)
+{
+	int i;
+
+	printk("Initializing SRP per-CPU ceilings...");
+	for (i = 0; i < NR_CPUS; i++) {
+		init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked);
+		INIT_LIST_HEAD(&per_cpu(srp, i).ceiling);
+		per_cpu(srp_objects_in_use, i) = 0;
+	}
+	printk(" done!\n");
+
+	return 0;
+}
+module_init(srp_init);
+
+/* SRP task priority comparison function. Smaller numeric values have higher
+ * priority, tie-break is PID. Special case: priority == 0 <=> no priority
+ */
+static int srp_higher_prio(struct srp_priority* first,
+			   struct srp_priority* second)
+{
+	if (!first->priority)
+		return 0;
+	else
+		return  !second->priority ||
+			first->priority < second->priority || (
+			first->priority == second->priority &&
+			first->pid < second->pid);
+}
+
+
+static int srp_exceeds_ceiling(struct task_struct* first,
+			       struct srp* srp)
+{
+	struct srp_priority prio;
+
+	if (list_empty(&srp->ceiling))
+		return 1;
+	else {
+		prio.pid = first->pid;
+		prio.priority = get_srp_prio(first);
+		return srp_higher_prio(&prio, system_ceiling(srp)) ||
+			ceiling2sem(system_ceiling(srp))->owner == first;
+	}
+}
+
+static void srp_add_prio(struct srp* srp, struct srp_priority* prio)
+{
+	struct list_head *pos;
+	if (in_list(&prio->list)) {
+		printk(KERN_CRIT "WARNING: SRP violation detected, prio is already in "
+		       "ceiling list! cpu=%d, srp=%p\n", smp_processor_id(), ceiling2sem(prio));
+		return;
+	}
+	list_for_each(pos, &srp->ceiling)
+		if (unlikely(srp_higher_prio(prio, list2prio(pos)))) {
+			__list_add(&prio->list, pos->prev, pos);
+			return;
+		}
+
+	list_add_tail(&prio->list, &srp->ceiling);
+}
+
+
+static int lock_srp_semaphore(struct litmus_lock* l)
+{
+	struct task_struct* t = current;
+	struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
+
+	if (!is_realtime(t))
+		return -EPERM;
+
+	/* prevent acquisition of local locks in global critical sections */
+	if (tsk_rt(t)->num_locks_held)
+		return -EBUSY;
+
+	preempt_disable();
+
+	/* Update ceiling. */
+	srp_add_prio(this_cpu_ptr(&srp), &sem->ceiling);
+
+	/* SRP invariant: all resources available */
+	BUG_ON(sem->owner != NULL);
+
+	sem->owner = t;
+	TRACE_CUR("acquired srp 0x%p\n", sem);
+
+	tsk_rt(t)->num_local_locks_held++;
+
+	preempt_enable();
+
+	return 0;
+}
+
+static int unlock_srp_semaphore(struct litmus_lock* l)
+{
+	struct task_struct* t = current;
+	struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
+	int err = 0;
+
+	preempt_disable();
+
+	if (sem->owner != t) {
+		err = -EINVAL;
+	} else {
+		/* The current owner should be executing on the correct CPU.
+		 *
+		 * If the owner transitioned out of RT mode or is exiting, then
+		 * we it might have already been migrated away by the best-effort
+		 * scheduler and we just have to deal with it. */
+		if (unlikely(!is_realtime(t) && sem->cpu != smp_processor_id())) {
+			TRACE_TASK(t, "SRP unlock cpu=%d, sem->cpu=%d\n",
+				smp_processor_id(), sem->cpu);
+			preempt_enable();
+			err = litmus_be_migrate_to(sem->cpu);
+			preempt_disable();
+			TRACE_TASK(t, "post-migrate: cpu=%d, sem->cpu=%d err=%d\n",
+				smp_processor_id(), sem->cpu, err);
+		}
+		BUG_ON(sem->cpu != smp_processor_id());
+		err = 0;
+
+		/* Determine new system priority ceiling for this CPU. */
+		BUG_ON(!in_list(&sem->ceiling.list));
+
+		list_del(&sem->ceiling.list);
+		sem->owner = NULL;
+
+		/* Wake tasks on this CPU, if they exceed current ceiling. */
+		TRACE_CUR("released srp 0x%p\n", sem);
+		wake_up_all(&this_cpu_ptr(&srp)->ceiling_blocked);
+
+		tsk_rt(t)->num_local_locks_held--;
+	}
+
+	preempt_enable();
+	return err;
+}
+
+static int open_srp_semaphore(struct litmus_lock* l, void* __user arg)
+{
+	struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
+	int err = 0;
+	struct task_struct* t = current;
+	struct srp_priority t_prio;
+
+	if (!is_realtime(t))
+		return -EPERM;
+
+	TRACE_CUR("opening SRP semaphore %p, cpu=%d\n", sem, sem->cpu);
+
+	preempt_disable();
+
+	if (sem->owner != NULL)
+		err = -EBUSY;
+
+	if (err == 0) {
+		if (sem->cpu == UNDEF_SEM)
+			sem->cpu = get_partition(t);
+		else if (sem->cpu != get_partition(t))
+			err = -EPERM;
+	}
+
+	if (err == 0) {
+		t_prio.priority = get_srp_prio(t);
+		t_prio.pid      = t->pid;
+		if (srp_higher_prio(&t_prio, &sem->ceiling)) {
+			sem->ceiling.priority = t_prio.priority;
+			sem->ceiling.pid      = t_prio.pid;
+		}
+	}
+
+	preempt_enable();
+
+	return err;
+}
+
+static int close_srp_semaphore(struct litmus_lock* l)
+{
+	struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
+	int err = 0;
+
+	preempt_disable();
+
+	if (sem->owner == current)
+		unlock_srp_semaphore(l);
+
+	preempt_enable();
+
+	return err;
+}
+
+static void deallocate_srp_semaphore(struct litmus_lock* l)
+{
+	struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
+	raw_cpu_dec(srp_objects_in_use);
+	kfree(sem);
+}
+
+static struct litmus_lock_ops srp_lock_ops = {
+	.open   = open_srp_semaphore,
+	.close  = close_srp_semaphore,
+	.lock   = lock_srp_semaphore,
+	.unlock = unlock_srp_semaphore,
+	.deallocate = deallocate_srp_semaphore,
+};
+
+struct srp_semaphore* allocate_srp_semaphore(void)
+{
+	struct srp_semaphore* sem;
+
+	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
+	if (!sem)
+		return NULL;
+
+	INIT_LIST_HEAD(&sem->ceiling.list);
+	sem->ceiling.priority = 0;
+	sem->cpu     = UNDEF_SEM;
+	sem->owner   = NULL;
+
+	sem->litmus_lock.ops = &srp_lock_ops;
+
+	raw_cpu_inc(srp_objects_in_use);
+	return sem;
+}
+
+static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync,
+		       void *key)
+{
+	int cpu = smp_processor_id();
+	struct task_struct *tsk = wait->private;
+	if (cpu != get_partition(tsk))
+		TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b",
+			   get_partition(tsk));
+	else if (srp_exceeds_ceiling(tsk, this_cpu_ptr(&srp)))
+		return default_wake_function(wait, mode, sync, key);
+	return 0;
+}
+
+static void do_ceiling_block(struct task_struct *tsk)
+{
+	wait_queue_t wait = {
+		.private   = tsk,
+		.func      = srp_wake_up,
+		.task_list = {NULL, NULL}
+	};
+
+	tsk->state = TASK_UNINTERRUPTIBLE;
+	add_wait_queue(&this_cpu_ptr(&srp)->ceiling_blocked, &wait);
+	tsk->rt_param.srp_non_recurse = 1;
+	preempt_enable_no_resched();
+	schedule();
+	preempt_disable();
+	tsk->rt_param.srp_non_recurse = 0;
+	remove_wait_queue(&this_cpu_ptr(&srp)->ceiling_blocked, &wait);
+}
+
+/* Wait for current task priority to exceed system-wide priority ceiling.
+ */
+void __srp_ceiling_block(struct task_struct *cur)
+{
+	preempt_disable();
+	if (!srp_exceeds_ceiling(cur, this_cpu_ptr(&srp))) {
+		TRACE_CUR("is priority ceiling blocked.\n");
+		while (!srp_exceeds_ceiling(cur, this_cpu_ptr(&srp)))
+			do_ceiling_block(cur);
+		TRACE_CUR("finally exceeds system ceiling.\n");
+	} else
+		TRACE_CUR("is not priority ceiling blocked\n");
+	preempt_enable();
+}
+
+#endif
diff --git a/litmus/sync.c b/litmus/sync.c
new file mode 100644
index 000000000000..5d180603f46b
--- /dev/null
+++ b/litmus/sync.c
@@ -0,0 +1,152 @@
+/* litmus/sync.c - Support for synchronous and asynchronous task system releases.
+ *
+ *
+ */
+
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/jobs.h>
+
+#include <litmus/sched_trace.h>
+
+struct ts_release_wait {
+	struct list_head list;
+	struct completion completion;
+	lt_t ts_release_time;
+};
+
+#define DECLARE_TS_RELEASE_WAIT(symb)					\
+	struct ts_release_wait symb =					\
+	{								\
+		LIST_HEAD_INIT(symb.list),				\
+		COMPLETION_INITIALIZER_ONSTACK(symb.completion),	\
+		0							\
+	}
+
+static LIST_HEAD(task_release_list);
+static DEFINE_MUTEX(task_release_lock);
+
+static long do_wait_for_ts_release(void)
+{
+	DECLARE_TS_RELEASE_WAIT(wait);
+
+	long ret = -ERESTARTSYS;
+
+	if (mutex_lock_interruptible(&task_release_lock))
+		goto out;
+
+	list_add(&wait.list, &task_release_list);
+
+	mutex_unlock(&task_release_lock);
+
+	/* We are enqueued, now we wait for someone to wake us up. */
+	ret = wait_for_completion_interruptible(&wait.completion);
+
+	if (!ret) {
+		/* Completion succeeded, setup release time. */
+		ret = litmus->wait_for_release_at(
+			wait.ts_release_time + get_rt_phase(current));
+	} else {
+		/* We were interrupted, must cleanup list. */
+		mutex_lock(&task_release_lock);
+		if (!wait.completion.done)
+			list_del(&wait.list);
+		mutex_unlock(&task_release_lock);
+	}
+
+out:
+	return ret;
+}
+
+int count_tasks_waiting_for_release(void)
+{
+	int task_count = 0;
+	struct list_head *pos;
+
+	mutex_lock(&task_release_lock);
+
+	list_for_each(pos, &task_release_list) {
+		task_count++;
+	}
+
+	mutex_unlock(&task_release_lock);
+
+
+	return task_count;
+}
+
+static long do_release_ts(lt_t start)
+{
+	long  task_count = 0;
+
+	struct list_head	*pos, *safe;
+	struct ts_release_wait	*wait;
+
+	if (mutex_lock_interruptible(&task_release_lock)) {
+		task_count = -ERESTARTSYS;
+		goto out;
+	}
+
+	TRACE("<<<<<< synchronous task system release >>>>>>\n");
+	sched_trace_sys_release(&start);
+	litmus->synchronous_release_at(start);
+
+	task_count = 0;
+	list_for_each_safe(pos, safe, &task_release_list) {
+		wait = (struct ts_release_wait*)
+			list_entry(pos, struct ts_release_wait, list);
+
+		task_count++;
+		wait->ts_release_time = start;
+		complete(&wait->completion);
+	}
+
+	/* clear stale list */
+	INIT_LIST_HEAD(&task_release_list);
+
+	mutex_unlock(&task_release_lock);
+
+out:
+	return task_count;
+}
+
+
+asmlinkage long sys_wait_for_ts_release(void)
+{
+	long ret = -EPERM;
+	struct task_struct *t = current;
+
+	if (is_realtime(t))
+		ret = do_wait_for_ts_release();
+
+	return ret;
+}
+
+#define ONE_MS 1000000
+
+asmlinkage long sys_release_ts(lt_t __user *__delay)
+{
+	long ret;
+	lt_t delay;
+	lt_t start_time;
+
+	/* FIXME: check capabilities... */
+
+	ret = copy_from_user(&delay, __delay, sizeof(delay));
+	if (ret == 0) {
+		/* round up to next larger integral millisecond */
+		start_time = litmus_clock();
+		do_div(start_time, ONE_MS);
+		start_time *= ONE_MS;
+		ret = do_release_ts(start_time + delay);
+	}
+
+	return ret;
+}
diff --git a/litmus/trace.c b/litmus/trace.c
index 2bcaaf474b7a..6b3e5f77cc5e 100644
--- a/litmus/trace.c
+++ b/litmus/trace.c
@@ -258,6 +258,17 @@ feather_callback void save_cpu_timestamp_irq(unsigned long event,
 			    0, RECORD_LOCAL_TIMESTAMP);
 }
 
+feather_callback void save_cpu_task_latency(unsigned long event,
+					    unsigned long when_ptr)
+{
+	lt_t now = litmus_clock();
+	lt_t *when = (lt_t*) when_ptr;
+
+	write_cpu_timestamp(event, TSK_RT,
+			    0,
+			    0, LOCAL_IRQ_COUNT, 0,
+			    now - *when, DO_NOT_RECORD_TIMESTAMP);
+}
 
 feather_callback void msg_sent(unsigned long event, unsigned long to)
 {
diff --git a/litmus/uncachedev.c b/litmus/uncachedev.c
new file mode 100644
index 000000000000..06a6a7c17983
--- /dev/null
+++ b/litmus/uncachedev.c
@@ -0,0 +1,102 @@
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/highmem.h>
+#include <asm/page.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+
+#include <litmus/litmus.h>
+
+/* device for allocating pages not cached by the CPU */
+
+#define UNCACHE_NAME        "litmus/uncache"
+
+void litmus_uncache_vm_open(struct vm_area_struct *vma)
+{
+}
+
+void litmus_uncache_vm_close(struct vm_area_struct *vma)
+{
+}
+
+int litmus_uncache_vm_fault(struct vm_area_struct* vma,
+							struct vm_fault* vmf)
+{
+	/* modeled after SG DMA video4linux, but without DMA. */
+	/* (see drivers/media/video/videobuf-dma-sg.c) */
+	struct page *page;
+
+	page = alloc_page(GFP_USER);
+	if (!page)
+		return VM_FAULT_OOM;
+
+	clear_user_highpage(page, (unsigned long)vmf->virtual_address);
+	vmf->page = page;
+
+	return 0;
+}
+
+static struct vm_operations_struct litmus_uncache_vm_ops = {
+	.open = litmus_uncache_vm_open,
+	.close = litmus_uncache_vm_close,
+	.fault = litmus_uncache_vm_fault,
+};
+
+static int litmus_uncache_mmap(struct file* filp, struct vm_area_struct* vma)
+{
+	/* first make sure mapper knows what he's doing */
+
+	/* you can only map the "first" page */
+	if (vma->vm_pgoff != 0)
+		return -EINVAL;
+
+	/* you can't share it with anyone */
+	if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
+		return -EINVAL;
+
+	/* cannot be expanded, and is not a "normal" page. */
+	vma->vm_flags |= VM_DONTEXPAND;
+
+	/* noncached pages are not explicitly locked in memory (for now). */
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_ops = &litmus_uncache_vm_ops;
+
+	return 0;
+}
+
+static struct file_operations litmus_uncache_fops = {
+	.owner = THIS_MODULE,
+	.mmap  = litmus_uncache_mmap,
+};
+
+static struct miscdevice litmus_uncache_dev = {
+	.name  = UNCACHE_NAME,
+	.minor = MISC_DYNAMIC_MINOR,
+	.fops  = &litmus_uncache_fops,
+	/* pages are not locked, so there is no reason why
+	   anyone cannot allocate an uncache pages */
+	.mode  = (S_IRUGO | S_IWUGO),
+};
+
+static int __init init_litmus_uncache_dev(void)
+{
+	int err;
+
+	printk("Initializing LITMUS^RT uncache device.\n");
+	err = misc_register(&litmus_uncache_dev);
+	if (err)
+		printk("Could not allocate %s device (%d).\n", UNCACHE_NAME, err);
+	return err;
+}
+
+static void __exit exit_litmus_uncache_dev(void)
+{
+	misc_deregister(&litmus_uncache_dev);
+}
+
+module_init(init_litmus_uncache_dev);
+module_exit(exit_litmus_uncache_dev);
-- 
cgit v1.2.2