From 4b38febbd59fd33542a343991262119eb9860f5e Mon Sep 17 00:00:00 2001
From: Andrea Bastoni <bastoni@cs.unc.edu>
Date: Thu, 17 Dec 2009 21:23:36 -0500
Subject: [ported from 2008.3] Core LITMUS^RT infrastructure

Port 2008.3 Core LITMUS^RT infrastructure to Linux 2.6.32

litmus_sched_class implements 4 new methods:

- prio_changed:
  void

- switched_to:
  void

- get_rr_interval:
  return infinity (i.e., 0)

- select_task_rq:
  return current cpu
---
 Makefile                        |   4 +-
 include/linux/sched.h           |   7 +
 include/litmus/feather_buffer.h |  94 ++++++
 include/litmus/feather_trace.h  |  36 +++
 include/litmus/heap.h           |  77 +++++
 include/litmus/jobs.h           |   9 +
 include/litmus/litmus.h         | 177 +++++++++++
 include/litmus/rt_param.h       | 175 +++++++++++
 include/litmus/sched_plugin.h   | 159 ++++++++++
 include/litmus/sched_trace.h    | 191 ++++++++++++
 include/litmus/trace.h          | 113 +++++++
 kernel/fork.c                   |   7 +
 kernel/sched.c                  |  92 +++++-
 kernel/sched_fair.c             |   2 +-
 kernel/sched_rt.c               |   2 +-
 litmus/Kconfig                  |  50 +++
 litmus/Makefile                 |  12 +
 litmus/ft_event.c               |  43 +++
 litmus/heap.c                   | 314 +++++++++++++++++++
 litmus/jobs.c                   |  43 +++
 litmus/litmus.c                 | 654 ++++++++++++++++++++++++++++++++++++++++
 litmus/sched_litmus.c           | 275 +++++++++++++++++
 litmus/sched_plugin.c           | 199 ++++++++++++
 23 files changed, 2723 insertions(+), 12 deletions(-)
 create mode 100644 include/litmus/feather_buffer.h
 create mode 100644 include/litmus/feather_trace.h
 create mode 100644 include/litmus/heap.h
 create mode 100644 include/litmus/jobs.h
 create mode 100644 include/litmus/litmus.h
 create mode 100644 include/litmus/rt_param.h
 create mode 100644 include/litmus/sched_plugin.h
 create mode 100644 include/litmus/sched_trace.h
 create mode 100644 include/litmus/trace.h
 create mode 100644 litmus/Kconfig
 create mode 100644 litmus/Makefile
 create mode 100644 litmus/ft_event.c
 create mode 100644 litmus/heap.c
 create mode 100644 litmus/jobs.c
 create mode 100644 litmus/litmus.c
 create mode 100644 litmus/sched_litmus.c
 create mode 100644 litmus/sched_plugin.c

diff --git a/Makefile b/Makefile
index f5cdb72ba2ce..2603066a012d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 32
-EXTRAVERSION =
+EXTRAVERSION =-litmus2010
 NAME = Man-Eating Seals of Antiquity
 
 # *DOCUMENTATION*
@@ -644,7 +644,7 @@ export mod_strip_cmd
 
 
 ifeq ($(KBUILD_EXTMOD),)
-core-y		+= kernel/ mm/ fs/ ipc/ security/ crypto/ block/
+core-y		+= kernel/ mm/ fs/ ipc/ security/ crypto/ block/ litmus/
 
 vmlinux-dirs	:= $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
 		     $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 75e6e60bf583..bb046c0adf99 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -38,6 +38,7 @@
 #define SCHED_BATCH		3
 /* SCHED_ISO: reserved but not implemented yet */
 #define SCHED_IDLE		5
+#define SCHED_LITMUS		6
 /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
 #define SCHED_RESET_ON_FORK     0x40000000
 
@@ -94,6 +95,8 @@ struct sched_param {
 
 #include <asm/processor.h>
 
+#include <litmus/rt_param.h>
+
 struct exec_domain;
 struct futex_pi_state;
 struct robust_list_head;
@@ -1505,6 +1508,10 @@ struct task_struct {
 	int make_it_fail;
 #endif
 	struct prop_local_single dirties;
+
+	/* LITMUS RT parameters and state */
+	struct rt_param rt_param;
+
 #ifdef CONFIG_LATENCYTOP
 	int latency_record_count;
 	struct latency_record latency_record[LT_SAVECOUNT];
diff --git a/include/litmus/feather_buffer.h b/include/litmus/feather_buffer.h
new file mode 100644
index 000000000000..6c18277fdfc9
--- /dev/null
+++ b/include/litmus/feather_buffer.h
@@ -0,0 +1,94 @@
+#ifndef _FEATHER_BUFFER_H_
+#define _FEATHER_BUFFER_H_
+
+/* requires UINT_MAX and memcpy */
+
+#define SLOT_FREE	0
+#define	SLOT_BUSY 	1
+#define	SLOT_READY	2
+
+struct ft_buffer {
+	unsigned int	slot_count;
+	unsigned int	slot_size;
+
+	int 		free_count;
+	unsigned int 	write_idx;
+	unsigned int 	read_idx;
+
+	char*		slots;
+	void*		buffer_mem;
+	unsigned int	failed_writes;
+};
+
+static inline int init_ft_buffer(struct ft_buffer*	buf,
+				 unsigned int 		slot_count,
+				 unsigned int 		slot_size,
+				 char*			slots,
+				 void* 			buffer_mem)
+{
+	int i = 0;
+	if (!slot_count || UINT_MAX % slot_count != slot_count - 1) {
+		/* The slot count must divide UNIT_MAX + 1 so that when it
+		 * wraps around the index correctly points to 0.
+		 */
+		return 0;
+	} else {
+		buf->slot_count    = slot_count;
+		buf->slot_size     = slot_size;
+		buf->slots         = slots;
+		buf->buffer_mem    = buffer_mem;
+		buf->free_count    = slot_count;
+		buf->write_idx     = 0;
+		buf->read_idx      = 0;
+		buf->failed_writes = 0;
+		for (i = 0; i < slot_count; i++)
+			buf->slots[i] = SLOT_FREE;
+		return 1;
+	}
+}
+
+static inline int ft_buffer_start_write(struct ft_buffer* buf, void **ptr)
+{
+	int free = fetch_and_dec(&buf->free_count);
+	unsigned int idx;
+	if (free <= 0) {
+		fetch_and_inc(&buf->free_count);
+		*ptr = 0;
+		fetch_and_inc(&buf->failed_writes);
+		return 0;
+	} else {
+		idx  = fetch_and_inc((int*) &buf->write_idx) % buf->slot_count;
+		buf->slots[idx] = SLOT_BUSY;
+		*ptr = ((char*) buf->buffer_mem) + idx * buf->slot_size;
+		return 1;
+	}
+}
+
+static inline void ft_buffer_finish_write(struct ft_buffer* buf, void *ptr)
+{
+	unsigned int idx = ((char*) ptr - (char*) buf->buffer_mem) / buf->slot_size;
+	buf->slots[idx]  = SLOT_READY;
+}
+
+
+/* exclusive reader access is assumed */
+static inline int ft_buffer_read(struct ft_buffer* buf, void* dest)
+{
+	unsigned int idx;
+	if (buf->free_count == buf->slot_count)
+		/* nothing available */
+		return 0;
+	idx = buf->read_idx % buf->slot_count;
+	if (buf->slots[idx] == SLOT_READY) {
+		memcpy(dest, ((char*) buf->buffer_mem) + idx * buf->slot_size,
+		       buf->slot_size);
+		buf->slots[idx] = SLOT_FREE;
+		buf->read_idx++;
+		fetch_and_inc(&buf->free_count);
+		return 1;
+	} else
+		return 0;
+}
+
+
+#endif
diff --git a/include/litmus/feather_trace.h b/include/litmus/feather_trace.h
new file mode 100644
index 000000000000..3ac1ee5e0277
--- /dev/null
+++ b/include/litmus/feather_trace.h
@@ -0,0 +1,36 @@
+#ifndef _FEATHER_TRACE_H_
+#define _FEATHER_TRACE_H_
+
+
+int ft_enable_event(unsigned long id);
+int ft_disable_event(unsigned long id);
+int ft_is_event_enabled(unsigned long id);
+int ft_disable_all_events(void);
+
+#ifndef __ARCH_HAS_FEATHER_TRACE
+/* provide default implementation */
+
+#define feather_callback
+
+#define MAX_EVENTS 1024
+
+extern int ft_events[MAX_EVENTS];
+
+#define ft_event(id, callback) \
+	if (ft_events[id]) callback();
+
+#define ft_event0(id, callback) \
+	if (ft_events[id]) callback(id);
+
+#define ft_event1(id, callback, param) \
+	if (ft_events[id]) callback(id, param);
+
+#define ft_event2(id, callback, param, param2) \
+	if (ft_events[id]) callback(id, param, param2);
+
+#define ft_event3(id, callback, p, p2, p3) \
+	if (ft_events[id]) callback(id, p, p2, p3);
+#endif
+
+
+#endif
diff --git a/include/litmus/heap.h b/include/litmus/heap.h
new file mode 100644
index 000000000000..da959b0bec9c
--- /dev/null
+++ b/include/litmus/heap.h
@@ -0,0 +1,77 @@
+/* heaps.h -- Binomial Heaps
+ *
+ * (c) 2008, 2009 Bjoern Brandenburg
+ */
+
+#ifndef HEAP_H
+#define HEAP_H
+
+#define NOT_IN_HEAP UINT_MAX
+
+struct heap_node {
+	struct heap_node* 	parent;
+	struct heap_node* 	next;
+	struct heap_node* 	child;
+
+	unsigned int 		degree;
+	void*			value;
+	struct heap_node**	ref;
+};
+
+struct heap {
+	struct heap_node* 	head;
+	/* We cache the minimum of the heap.
+	 * This speeds up repeated peek operations.
+	 */
+	struct heap_node*	min;
+};
+
+typedef int (*heap_prio_t)(struct heap_node* a, struct heap_node* b);
+
+void heap_init(struct heap* heap);
+void heap_node_init(struct heap_node** ref_to_heap_node_ptr, void* value);
+
+static inline int heap_node_in_heap(struct heap_node* h)
+{
+	return h->degree != NOT_IN_HEAP;
+}
+
+static inline int heap_empty(struct heap* heap)
+{
+	return heap->head == NULL && heap->min == NULL;
+}
+
+/* insert (and reinitialize) a node into the heap */
+void heap_insert(heap_prio_t higher_prio,
+		 struct heap* heap,
+		 struct heap_node* node);
+
+/* merge addition into target */
+void heap_union(heap_prio_t higher_prio,
+		struct heap* target,
+		struct heap* addition);
+
+struct heap_node* heap_peek(heap_prio_t higher_prio,
+			    struct heap* heap);
+
+struct heap_node* heap_take(heap_prio_t higher_prio,
+			    struct heap* heap);
+
+void heap_uncache_min(heap_prio_t higher_prio, struct heap* heap);
+int  heap_decrease(heap_prio_t higher_prio, struct heap_node* node);
+
+void heap_delete(heap_prio_t higher_prio,
+		 struct heap* heap,
+		 struct heap_node* node);
+
+/* allocate from memcache */
+struct heap_node* heap_node_alloc(int gfp_flags);
+void heap_node_free(struct heap_node* hn);
+
+/* allocate a heap node for value and insert into the heap */
+int heap_add(heap_prio_t higher_prio, struct heap* heap,
+	     void* value, int gfp_flags);
+
+void* heap_take_del(heap_prio_t higher_prio,
+		    struct heap* heap);
+#endif
diff --git a/include/litmus/jobs.h b/include/litmus/jobs.h
new file mode 100644
index 000000000000..9bd361ef3943
--- /dev/null
+++ b/include/litmus/jobs.h
@@ -0,0 +1,9 @@
+#ifndef __LITMUS_JOBS_H__
+#define __LITMUS_JOBS_H__
+
+void prepare_for_next_period(struct task_struct *t);
+void release_at(struct task_struct *t, lt_t start);
+long complete_job(void);
+
+#endif
+
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
new file mode 100644
index 000000000000..380fcb8acb33
--- /dev/null
+++ b/include/litmus/litmus.h
@@ -0,0 +1,177 @@
+/*
+ * Constant definitions related to
+ * scheduling policy.
+ */
+
+#ifndef _LINUX_LITMUS_H_
+#define _LINUX_LITMUS_H_
+
+#include <linux/jiffies.h>
+#include <litmus/sched_trace.h>
+
+extern atomic_t release_master_cpu;
+
+extern atomic_t __log_seq_no;
+
+#define TRACE(fmt, args...) \
+	sched_trace_log_message("%d P%d: " fmt, atomic_add_return(1, &__log_seq_no), \
+				raw_smp_processor_id(), ## args)
+
+#define TRACE_TASK(t, fmt, args...) \
+	TRACE("(%s/%d) " fmt, (t)->comm, (t)->pid, ##args)
+
+#define TRACE_CUR(fmt, args...) \
+	TRACE_TASK(current, fmt, ## args)
+
+#define TRACE_BUG_ON(cond) \
+	do { if (cond) TRACE("BUG_ON(%s) at %s:%d " \
+			     "called from %p current=%s/%d state=%d " \
+			     "flags=%x partition=%d cpu=%d rtflags=%d"\
+			     " job=%u knp=%d timeslice=%u\n",		\
+	#cond, __FILE__, __LINE__, __builtin_return_address(0), current->comm, \
+	current->pid, current->state, current->flags,  \
+	get_partition(current), smp_processor_id(), get_rt_flags(current), \
+	current->rt_param.job_params.job_no, current->rt_param.kernel_np, \
+	current->rt.time_slice\
+	); } while(0);
+
+
+/* in_list - is a given list_head queued on some list?
+ */
+static inline int in_list(struct list_head* list)
+{
+	return !(  /* case 1: deleted */
+		   (list->next == LIST_POISON1 &&
+		    list->prev == LIST_POISON2)
+		 ||
+		   /* case 2: initialized */
+		   (list->next == list &&
+		    list->prev == list)
+		);
+}
+
+#define NO_CPU			0xffffffff
+
+void litmus_fork(struct task_struct *tsk);
+void litmus_exec(void);
+/* clean up real-time state of a task */
+void exit_litmus(struct task_struct *dead_tsk);
+
+long litmus_admit_task(struct task_struct *tsk);
+void litmus_exit_task(struct task_struct *tsk);
+
+#define is_realtime(t) 		((t)->policy == SCHED_LITMUS)
+#define rt_transition_pending(t) \
+	((t)->rt_param.transition_pending)
+
+#define tsk_rt(t)		(&(t)->rt_param)
+
+/*	Realtime utility macros */
+#define get_rt_flags(t)		(tsk_rt(t)->flags)
+#define set_rt_flags(t,f) 	(tsk_rt(t)->flags=(f))
+#define get_exec_cost(t)  	(tsk_rt(t)->task_params.exec_cost)
+#define get_exec_time(t)	(tsk_rt(t)->job_params.exec_time)
+#define get_rt_period(t)	(tsk_rt(t)->task_params.period)
+#define get_rt_phase(t)		(tsk_rt(t)->task_params.phase)
+#define get_partition(t) 	(tsk_rt(t)->task_params.cpu)
+#define get_deadline(t)		(tsk_rt(t)->job_params.deadline)
+#define get_release(t)		(tsk_rt(t)->job_params.release)
+#define get_class(t)		(tsk_rt(t)->task_params.cls)
+
+inline static int budget_exhausted(struct task_struct* t)
+{
+	return get_exec_time(t) >= get_exec_cost(t);
+}
+
+
+#define is_hrt(t)     		\
+	(tsk_rt(t)->task_params.class == RT_CLASS_HARD)
+#define is_srt(t)     		\
+	(tsk_rt(t)->task_params.class == RT_CLASS_SOFT)
+#define is_be(t)      		\
+	(tsk_rt(t)->task_params.class == RT_CLASS_BEST_EFFORT)
+
+/* Our notion of time within LITMUS: kernel monotonic time. */
+static inline lt_t litmus_clock(void)
+{
+	return ktime_to_ns(ktime_get());
+}
+
+/* A macro to convert from nanoseconds to ktime_t. */
+#define ns_to_ktime(t)		ktime_add_ns(ktime_set(0, 0), t)
+
+#define get_domain(t) (tsk_rt(t)->domain)
+
+/* Honor the flag in the preempt_count variable that is set
+ * when scheduling is in progress.
+ */
+#define is_running(t) 			\
+	((t)->state == TASK_RUNNING || 	\
+	 task_thread_info(t)->preempt_count & PREEMPT_ACTIVE)
+
+#define is_blocked(t)       \
+	(!is_running(t))
+#define is_released(t, now)	\
+	(lt_before_eq(get_release(t), now))
+#define is_tardy(t, now)    \
+	(lt_before_eq(tsk_rt(t)->job_params.deadline, now))
+
+/* real-time comparison macros */
+#define earlier_deadline(a, b) (lt_before(\
+	(a)->rt_param.job_params.deadline,\
+	(b)->rt_param.job_params.deadline))
+#define earlier_release(a, b)  (lt_before(\
+	(a)->rt_param.job_params.release,\
+	(b)->rt_param.job_params.release))
+
+#define make_np(t) do {t->rt_param.kernel_np++;} while(0);
+#define take_np(t) do {t->rt_param.kernel_np--;} while(0);
+
+#ifdef CONFIG_SRP
+void srp_ceiling_block(void);
+#else
+#define srp_ceiling_block() /* nothing */
+#endif
+
+#define heap2task(hn) ((struct task_struct*) hn->value)
+
+static inline int is_np(struct task_struct *t)
+{
+	return tsk_rt(t)->kernel_np;
+}
+
+#define  request_exit_np(t)
+
+static inline int is_present(struct task_struct* t)
+{
+	return t && tsk_rt(t)->present;
+}
+
+
+/* make the unit explicit */
+typedef unsigned long quanta_t;
+
+enum round {
+	FLOOR,
+	CEIL
+};
+
+
+/* Tick period is used to convert ns-specified execution
+ * costs and periods into tick-based equivalents.
+ */
+extern ktime_t tick_period;
+
+static inline quanta_t time2quanta(lt_t time, enum round round)
+{
+	s64  quantum_length = ktime_to_ns(tick_period);
+
+	if (do_div(time, quantum_length) && round == CEIL)
+		time++;
+	return (quanta_t) time;
+}
+
+/* By how much is cpu staggered behind CPU 0? */
+u64 cpu_stagger_offset(int cpu);
+
+#endif
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
new file mode 100644
index 000000000000..c599f848d1ed
--- /dev/null
+++ b/include/litmus/rt_param.h
@@ -0,0 +1,175 @@
+/*
+ * Definition of the scheduler plugin interface.
+ *
+ */
+#ifndef _LINUX_RT_PARAM_H_
+#define _LINUX_RT_PARAM_H_
+
+/* Litmus time type. */
+typedef unsigned long long lt_t;
+
+static inline int lt_after(lt_t a, lt_t b)
+{
+	return ((long long) b) - ((long long) a) < 0;
+}
+#define lt_before(a, b) lt_after(b, a)
+
+static inline int lt_after_eq(lt_t a, lt_t b)
+{
+	return ((long long) a) - ((long long) b) >= 0;
+}
+#define lt_before_eq(a, b) lt_after_eq(b, a)
+
+/* different types of clients */
+typedef enum {
+	RT_CLASS_HARD,
+	RT_CLASS_SOFT,
+	RT_CLASS_BEST_EFFORT
+} task_class_t;
+
+struct rt_task {
+	lt_t 		exec_cost;
+	lt_t 		period;
+	lt_t		phase;
+	unsigned int  	cpu;
+	task_class_t  	cls;
+};
+
+/* don't export internal data structures to user space (liblitmus) */
+#ifdef __KERNEL__
+
+struct _rt_domain;
+struct heap_node;
+struct release_heap;
+
+struct rt_job {
+	/* Time instant the the job was or will be released.  */
+	lt_t	release;
+	/* What is the current deadline? */
+	lt_t   	deadline;
+
+	/* How much service has this job received so far? */
+	lt_t	exec_time;
+
+	/* Which job is this. This is used to let user space
+	 * specify which job to wait for, which is important if jobs
+	 * overrun. If we just call sys_sleep_next_period() then we
+	 * will unintentionally miss jobs after an overrun.
+	 *
+	 * Increase this sequence number when a job is released.
+	 */
+	unsigned int    job_no;
+};
+
+
+struct pfair_param;
+
+/*	RT task parameters for scheduling extensions
+ *	These parameters are inherited during clone and therefore must
+ *	be explicitly set up before the task set is launched.
+ */
+struct rt_param {
+	/* is the task sleeping? */
+	unsigned int 		flags:8;
+
+	/* do we need to check for srp blocking? */
+	unsigned int		srp_non_recurse:1;
+
+	/* is the task present? (true if it can be scheduled) */
+	unsigned int		present:1;
+
+	/* user controlled parameters */
+	struct rt_task 		task_params;
+
+	/* timing parameters */
+	struct rt_job 		job_params;
+
+	/* task representing the current "inherited" task
+	 * priority, assigned by inherit_priority and
+	 * return priority in the scheduler plugins.
+	 * could point to self if PI does not result in
+	 * an increased task priority.
+	 */
+	 struct task_struct*	inh_task;
+
+	/* Don't just dereference this pointer in kernel space!
+	 * It might very well point to junk or nothing at all.
+	 * NULL indicates that the task has not requested any non-preemptable
+	 * section support.
+	 * Not inherited upon fork.
+	 */
+	short* 			np_flag;
+
+	/* re-use unused counter in plugins that don't need it */
+	union {
+		/* For the FMLP under PSN-EDF, it is required to make the task
+		 * non-preemptive from kernel space. In order not to interfere with
+		 * user space, this counter indicates the kernel space np setting.
+		 * kernel_np > 0 => task is non-preemptive
+		 */
+		unsigned int	kernel_np;
+
+		/* Used by GQ-EDF */
+		unsigned int	last_cpu;
+	};
+
+	/* This field can be used by plugins to store where the task
+	 * is currently scheduled. It is the responsibility of the
+	 * plugin to avoid race conditions.
+	 *
+	 * This used by GSN-EDF and PFAIR.
+	 */
+	volatile int		scheduled_on;
+
+	/* Is the stack of the task currently in use? This is updated by
+	 * the LITMUS core.
+	 *
+	 * Be careful to avoid deadlocks!
+	 */
+	volatile int		stack_in_use;
+
+	/* This field can be used by plugins to store where the task
+	 * is currently linked. It is the responsibility of the plugin
+	 * to avoid race conditions.
+	 *
+	 * Used by GSN-EDF.
+	 */
+	volatile int		linked_on;
+
+	/* PFAIR/PD^2 state. Allocated on demand. */
+	struct pfair_param*	pfair;
+
+	/* Fields saved before BE->RT transition.
+	 */
+	int old_policy;
+	int old_prio;
+
+	/* ready queue for this task */
+	struct _rt_domain* domain;
+
+	/* heap element for this task
+	 *
+	 * Warning: Don't statically allocate this node. The heap
+	 *          implementation swaps these between tasks, thus after
+	 *          dequeuing from a heap you may end up with a different node
+	 *          then the one you had when enqueuing the task.  For the same
+	 *          reason, don't obtain and store references to this node
+	 *          other than this pointer (which is updated by the heap
+	 *          implementation).
+	 */
+	struct heap_node*	heap_node;
+	struct release_heap*	rel_heap;
+
+	/* Used by rt_domain to queue task in release list.
+	 */
+	struct list_head list;
+};
+
+/*	Possible RT flags	*/
+#define RT_F_RUNNING		0x00000000
+#define RT_F_SLEEP		0x00000001
+#define RT_F_EXIT_SEM		0x00000008
+
+#endif
+
+#endif
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
new file mode 100644
index 000000000000..94952f6ccbfa
--- /dev/null
+++ b/include/litmus/sched_plugin.h
@@ -0,0 +1,159 @@
+/*
+ * Definition of the scheduler plugin interface.
+ *
+ */
+#ifndef _LINUX_SCHED_PLUGIN_H_
+#define _LINUX_SCHED_PLUGIN_H_
+
+#include <linux/sched.h>
+
+/* struct for semaphore with priority inheritance */
+struct pi_semaphore {
+	atomic_t count;
+	int sleepers;
+	wait_queue_head_t wait;
+	union {
+		/* highest-prio holder/waiter */
+		struct task_struct *task;
+		struct task_struct* cpu_task[NR_CPUS];
+	} hp;
+	/* current lock holder */
+	struct task_struct *holder;
+};
+
+/************************ setup/tear down ********************/
+
+typedef long (*activate_plugin_t) (void);
+typedef long (*deactivate_plugin_t) (void);
+
+
+
+/********************* scheduler invocation ******************/
+
+/*  Plugin-specific realtime tick handler */
+typedef void (*scheduler_tick_t) (struct task_struct *cur);
+/* Novell make sched decision function */
+typedef struct task_struct* (*schedule_t)(struct task_struct * prev);
+/* Clean up after the task switch has occured.
+ * This function is called after every (even non-rt) task switch.
+ */
+typedef void (*finish_switch_t)(struct task_struct *prev);
+
+
+/********************* task state changes ********************/
+
+/* Called to setup a new real-time task.
+ * Release the first job, enqueue, etc.
+ * Task may already be running.
+ */
+typedef void (*task_new_t) (struct task_struct *task,
+			    int on_rq,
+			    int running);
+
+/* Called to re-introduce a task after blocking.
+ * Can potentially be called multiple times.
+ */
+typedef void (*task_wake_up_t) (struct task_struct *task);
+/* called to notify the plugin of a blocking real-time task
+ * it will only be called for real-time tasks and before schedule is called */
+typedef void (*task_block_t)  (struct task_struct *task);
+/* Called when a real-time task exits or changes to a different scheduling
+ * class.
+ * Free any allocated resources
+ */
+typedef void (*task_exit_t)    (struct task_struct *);
+
+/* Called when the new_owner is released from the wait queue
+ * it should now inherit the priority from sem, _before_ it gets readded
+ * to any queue
+ */
+typedef long (*inherit_priority_t) (struct pi_semaphore *sem,
+				    struct task_struct *new_owner);
+
+/* Called when the current task releases a semahpore where it might have
+ * inherited a piority from
+ */
+typedef long (*return_priority_t) (struct pi_semaphore *sem);
+
+/* Called when a task tries to acquire a semaphore and fails. Check if its
+ * priority is higher than that of the current holder.
+ */
+typedef long (*pi_block_t) (struct pi_semaphore *sem, struct task_struct *t);
+
+
+
+
+/********************* sys call backends  ********************/
+/* This function causes the caller to sleep until the next release */
+typedef long (*complete_job_t) (void);
+
+typedef long (*admit_task_t)(struct task_struct* tsk);
+
+typedef void (*release_at_t)(struct task_struct *t, lt_t start);
+
+struct sched_plugin {
+	struct list_head	list;
+	/* 	basic info 		*/
+	char 			*plugin_name;
+
+	/*	setup			*/
+	activate_plugin_t	activate_plugin;
+	deactivate_plugin_t	deactivate_plugin;
+
+#ifdef CONFIG_SRP
+	unsigned int		srp_active;
+#endif
+
+	/* 	scheduler invocation 	*/
+	scheduler_tick_t        tick;
+	schedule_t 		schedule;
+	finish_switch_t 	finish_switch;
+
+	/*	syscall backend 	*/
+	complete_job_t 		complete_job;
+	release_at_t		release_at;
+
+	/*	task state changes 	*/
+	admit_task_t		admit_task;
+
+        task_new_t 		task_new;
+	task_wake_up_t		task_wake_up;
+	task_block_t		task_block;
+	task_exit_t 		task_exit;
+
+#ifdef CONFIG_FMLP
+	/*     priority inheritance 	*/
+	unsigned int		fmlp_active;
+	inherit_priority_t	inherit_priority;
+	return_priority_t	return_priority;
+	pi_block_t		pi_block;
+#endif
+} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
+
+
+extern struct sched_plugin *litmus;
+
+int register_sched_plugin(struct sched_plugin* plugin);
+struct sched_plugin* find_sched_plugin(const char* name);
+int print_sched_plugins(char* buf, int max);
+
+static inline int srp_active(void)
+{
+#ifdef CONFIG_SRP
+	return litmus->srp_active;
+#else
+	return 0;
+#endif
+}
+static inline int fmlp_active(void)
+{
+#ifdef CONFIG_FMLP
+	return litmus->fmlp_active;
+#else
+	return 0;
+#endif
+}
+
+extern struct sched_plugin linux_sched_plugin;
+
+#endif
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
new file mode 100644
index 000000000000..afd0391d127b
--- /dev/null
+++ b/include/litmus/sched_trace.h
@@ -0,0 +1,191 @@
+/* sched_trace.h -- record scheduler events to a byte stream for offline analysis.
+ */
+#ifndef _LINUX_SCHED_TRACE_H_
+#define _LINUX_SCHED_TRACE_H_
+
+/* all times in nanoseconds */
+
+struct st_trace_header {
+	u8	type;		/* Of what type is this record?  */
+	u8	cpu;		/* On which CPU was it recorded? */
+	u16	pid;		/* PID of the task.              */
+	u32	job;		/* The job sequence number.      */
+};
+
+#define ST_NAME_LEN 16
+struct st_name_data {
+	char	cmd[ST_NAME_LEN];/* The name of the executable of this process. */
+};
+
+struct st_param_data {		/* regular params */
+	u32	wcet;
+	u32	period;
+	u32	phase;
+	u8	partition;
+	u8	__unused[3];
+};
+
+struct st_release_data {	/* A job is was/is going to be released. */
+	u64	release;	/* What's the release time?              */
+	u64	deadline;	/* By when must it finish?		 */
+};
+
+struct st_assigned_data {	/* A job was asigned to a CPU. 		 */
+	u64	when;
+	u8	target;		/* Where should it execute?	         */
+	u8	__unused[3];
+};
+
+struct st_switch_to_data {	/* A process was switched to on a given CPU.   */
+	u64	when;		/* When did this occur?                        */
+	u32	exec_time;	/* Time the current job has executed.          */
+
+};
+
+struct st_switch_away_data {	/* A process was switched away from on a given CPU. */
+	u64	when;
+	u64	exec_time;
+};
+
+struct st_completion_data {	/* A job completed. */
+	u64	when;
+	u8	forced:1; 	/* Set to 1 if job overran and kernel advanced to the
+				 * next task automatically; set to 0 otherwise.
+				 */
+	u8	__uflags:7;
+	u8	__unused[3];
+};
+
+struct st_block_data {		/* A task blocks. */
+	u64	when;
+	u64	__unused;
+};
+
+struct st_resume_data {		/* A task resumes. */
+	u64	when;
+	u64	__unused;
+};
+
+struct st_sys_release_data {
+	u64	when;
+	u64	release;
+};
+
+#define DATA(x) struct st_ ## x ## _data x;
+
+typedef enum {
+        ST_NAME = 1,		/* Start at one, so that we can spot
+				 * uninitialized records. */
+	ST_PARAM,
+	ST_RELEASE,
+	ST_ASSIGNED,
+	ST_SWITCH_TO,
+	ST_SWITCH_AWAY,
+	ST_COMPLETION,
+	ST_BLOCK,
+	ST_RESUME,
+	ST_SYS_RELEASE,
+} st_event_record_type_t;
+
+struct st_event_record {
+	struct st_trace_header hdr;
+	union {
+		u64 raw[2];
+
+		DATA(name);
+		DATA(param);
+		DATA(release);
+		DATA(assigned);
+		DATA(switch_to);
+		DATA(switch_away);
+		DATA(completion);
+		DATA(block);
+		DATA(resume);
+		DATA(sys_release);
+
+	} data;
+};
+
+#undef DATA
+
+#ifdef __KERNEL__
+
+#include <linux/sched.h>
+#include <litmus/feather_trace.h>
+
+#ifdef CONFIG_SCHED_TASK_TRACE
+
+#define SCHED_TRACE(id, callback, task) \
+	ft_event1(id, callback, task)
+#define SCHED_TRACE2(id, callback, task, xtra) \
+	ft_event2(id, callback, task, xtra)
+
+/* provide prototypes; needed on sparc64 */
+#ifndef NO_TASK_TRACE_DECLS
+feather_callback void do_sched_trace_task_name(unsigned long id,
+					       struct task_struct* task);
+feather_callback void do_sched_trace_task_param(unsigned long id,
+						struct task_struct* task);
+feather_callback void do_sched_trace_task_release(unsigned long id,
+						  struct task_struct* task);
+feather_callback void do_sched_trace_task_switch_to(unsigned long id,
+						    struct task_struct* task);
+feather_callback void do_sched_trace_task_switch_away(unsigned long id,
+						      struct task_struct* task);
+feather_callback void do_sched_trace_task_completion(unsigned long id,
+						     struct task_struct* task,
+						     unsigned long forced);
+feather_callback void do_sched_trace_task_block(unsigned long id,
+						struct task_struct* task);
+feather_callback void do_sched_trace_task_resume(unsigned long id,
+						 struct task_struct* task);
+feather_callback void do_sched_trace_sys_release(unsigned long id,
+						 lt_t* start);
+#endif
+
+#else
+
+#define SCHED_TRACE(id, callback, task)        /* no tracing */
+#define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */
+
+#endif
+
+
+#define SCHED_TRACE_BASE_ID 500
+
+
+#define sched_trace_task_name(t) \
+	SCHED_TRACE(SCHED_TRACE_BASE_ID + 1, do_sched_trace_task_name, t)
+#define sched_trace_task_param(t) \
+	SCHED_TRACE(SCHED_TRACE_BASE_ID + 2, do_sched_trace_task_param, t)
+#define sched_trace_task_release(t) \
+	SCHED_TRACE(SCHED_TRACE_BASE_ID + 3, do_sched_trace_task_release, t)
+#define sched_trace_task_switch_to(t) \
+	SCHED_TRACE(SCHED_TRACE_BASE_ID + 4, do_sched_trace_task_switch_to, t)
+#define sched_trace_task_switch_away(t) \
+	SCHED_TRACE(SCHED_TRACE_BASE_ID + 5, do_sched_trace_task_switch_away, t)
+#define sched_trace_task_completion(t, forced) \
+	SCHED_TRACE2(SCHED_TRACE_BASE_ID + 6, do_sched_trace_task_completion, t, \
+		     forced)
+#define sched_trace_task_block(t) \
+	SCHED_TRACE(SCHED_TRACE_BASE_ID + 7, do_sched_trace_task_block, t)
+#define sched_trace_task_resume(t) \
+	SCHED_TRACE(SCHED_TRACE_BASE_ID + 8, do_sched_trace_task_resume, t)
+
+#define sched_trace_sys_release(when) \
+	SCHED_TRACE(SCHED_TRACE_BASE_ID + 9, do_sched_trace_sys_release, when)
+
+#define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
+
+#ifdef CONFIG_SCHED_DEBUG_TRACE
+void sched_trace_log_message(const char* fmt, ...);
+void dump_trace_buffer(int max);
+#else
+
+#define sched_trace_log_message(fmt, ...)
+
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/include/litmus/trace.h b/include/litmus/trace.h
new file mode 100644
index 000000000000..e8e0c7b6cc6a
--- /dev/null
+++ b/include/litmus/trace.h
@@ -0,0 +1,113 @@
+#ifndef _SYS_TRACE_H_
+#define	_SYS_TRACE_H_
+
+#ifdef CONFIG_SCHED_OVERHEAD_TRACE
+
+#include <litmus/feather_trace.h>
+#include <litmus/feather_buffer.h>
+
+
+/*********************** TIMESTAMPS ************************/
+
+enum task_type_marker {
+	TSK_BE,
+	TSK_RT,
+	TSK_UNKNOWN
+};
+
+struct timestamp {
+	uint64_t		timestamp;
+	uint32_t		seq_no;
+	uint8_t			cpu;
+	uint8_t			event;
+	uint8_t			task_type;
+};
+
+/* tracing callbacks */
+feather_callback void save_timestamp(unsigned long event);
+feather_callback void save_timestamp_def(unsigned long event, unsigned long type);
+feather_callback void save_timestamp_task(unsigned long event, unsigned long t_ptr);
+feather_callback void save_timestamp_cpu(unsigned long event, unsigned long cpu);
+
+
+#define TIMESTAMP(id) ft_event0(id, save_timestamp)
+
+#define DTIMESTAMP(id, def)  ft_event1(id, save_timestamp_def, def)
+
+#define TTIMESTAMP(id, task) \
+	ft_event1(id, save_timestamp_task, (unsigned long) task)
+
+#define CTIMESTAMP(id, cpu) \
+	ft_event1(id, save_timestamp_cpu, cpu)
+
+#else /* !CONFIG_SCHED_OVERHEAD_TRACE */
+
+#define TIMESTAMP(id)        /* no tracing */
+
+#define DTIMESTAMP(id, def)  /* no tracing */
+
+#define TTIMESTAMP(id, task) /* no tracing */
+
+#define CTIMESTAMP(id, cpu)  /* no tracing */
+
+#endif
+
+
+/* Convention for timestamps
+ * =========================
+ *
+ * In order to process the trace files with a common tool, we use the following
+ * convention to measure execution times: The end time id of a code segment is
+ * always the next number after the start time event id.
+ */
+
+#define TS_SCHED_START			DTIMESTAMP(100, TSK_UNKNOWN) /* we only
+								      * care
+								      * about
+								      * next */
+#define TS_SCHED_END(t)			TTIMESTAMP(101, t)
+#define TS_SCHED2_START(t) 		TTIMESTAMP(102, t)
+#define TS_SCHED2_END(t)       		TTIMESTAMP(103, t)
+
+#define TS_CXS_START(t)			TTIMESTAMP(104, t)
+#define TS_CXS_END(t)			TTIMESTAMP(105, t)
+
+#define TS_RELEASE_START		DTIMESTAMP(106, TSK_RT)
+#define TS_RELEASE_END			DTIMESTAMP(107, TSK_RT)
+
+#define TS_TICK_START(t)		TTIMESTAMP(110, t)
+#define TS_TICK_END(t) 			TTIMESTAMP(111, t)
+
+
+#define TS_PLUGIN_SCHED_START		/* TIMESTAMP(120) */  /* currently unused */
+#define TS_PLUGIN_SCHED_END		/* TIMESTAMP(121) */
+
+#define TS_PLUGIN_TICK_START		/* TIMESTAMP(130) */
+#define TS_PLUGIN_TICK_END		/* TIMESTAMP(131) */
+
+#define TS_ENTER_NP_START		TIMESTAMP(140)
+#define TS_ENTER_NP_END			TIMESTAMP(141)
+
+#define TS_EXIT_NP_START		TIMESTAMP(150)
+#define TS_EXIT_NP_END			TIMESTAMP(151)
+
+#define TS_SRP_UP_START			TIMESTAMP(160)
+#define TS_SRP_UP_END			TIMESTAMP(161)
+#define TS_SRP_DOWN_START		TIMESTAMP(162)
+#define TS_SRP_DOWN_END			TIMESTAMP(163)
+
+#define TS_PI_UP_START			TIMESTAMP(170)
+#define TS_PI_UP_END			TIMESTAMP(171)
+#define TS_PI_DOWN_START		TIMESTAMP(172)
+#define TS_PI_DOWN_END			TIMESTAMP(173)
+
+#define TS_FIFO_UP_START		TIMESTAMP(180)
+#define TS_FIFO_UP_END			TIMESTAMP(181)
+#define TS_FIFO_DOWN_START		TIMESTAMP(182)
+#define TS_FIFO_DOWN_END		TIMESTAMP(183)
+
+#define TS_SEND_RESCHED_START(c)	CTIMESTAMP(190, c)
+#define TS_SEND_RESCHED_END		DTIMESTAMP(191, TSK_UNKNOWN)
+
+
+#endif /* !_SYS_TRACE_H_ */
diff --git a/kernel/fork.c b/kernel/fork.c
index 166b8c49257c..889730cce3ad 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -74,6 +74,9 @@
 
 #include <trace/events/sched.h>
 
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+
 /*
  * Protected counters by write_lock_irq(&tasklist_lock)
  */
@@ -162,6 +165,7 @@ void __put_task_struct(struct task_struct *tsk)
 	WARN_ON(atomic_read(&tsk->usage));
 	WARN_ON(tsk == current);
 
+	exit_litmus(tsk);
 	exit_creds(tsk);
 	delayacct_tsk_free(tsk);
 
@@ -244,6 +248,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 
 	tsk->stack = ti;
 
+	/* Don't let the new task be a real-time task. */
+	memset(&tsk->rt_param, 0, sizeof(struct rt_task));
+
 	err = prop_local_init_single(&tsk->dirties);
 	if (err)
 		goto out;
diff --git a/kernel/sched.c b/kernel/sched.c
index 3c11ae0a948d..fcaed6b96442 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -77,6 +77,9 @@
 
 #include "sched_cpupri.h"
 
+#include <litmus/sched_trace.h>
+#include <litmus/trace.h>
+
 #define CREATE_TRACE_POINTS
 #include <trace/events/sched.h>
 
@@ -571,6 +574,8 @@ struct rq {
 
 	atomic_t nr_iowait;
 
+	struct task_struct *litmus_next;
+
 #ifdef CONFIG_SMP
 	struct root_domain *rd;
 	struct sched_domain *sd;
@@ -1815,11 +1820,12 @@ static void calc_load_account_active(struct rq *this_rq);
 #include "sched_idletask.c"
 #include "sched_fair.c"
 #include "sched_rt.c"
+#include "../litmus/sched_litmus.c"
 #ifdef CONFIG_SCHED_DEBUG
 # include "sched_debug.c"
 #endif
 
-#define sched_class_highest (&rt_sched_class)
+#define sched_class_highest (&litmus_sched_class)
 #define for_each_class(class) \
    for (class = sched_class_highest; class; class = class->next)
 
@@ -2343,6 +2349,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 	unsigned long flags;
 	struct rq *rq, *orig_rq;
 
+	if (is_realtime(p))
+		TRACE_TASK(p, "try_to_wake_up() state:%d\n", p->state);
+
 	if (!sched_feat(SYNC_WAKEUPS))
 		wake_flags &= ~WF_SYNC;
 
@@ -2361,7 +2370,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 	orig_cpu = cpu;
 
 #ifdef CONFIG_SMP
-	if (unlikely(task_running(rq, p)))
+	if (unlikely(task_running(rq, p)) || is_realtime(p))
 		goto out_activate;
 
 	/*
@@ -2442,6 +2451,8 @@ out_running:
 		p->sched_class->task_wake_up(rq, p);
 #endif
 out:
+	if (is_realtime(p))
+		TRACE_TASK(p, "try_to_wake_up() done state:%d\n", p->state);
 	task_rq_unlock(rq, &flags);
 	put_cpu();
 
@@ -2750,6 +2761,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	 */
 	prev_state = prev->state;
 	finish_arch_switch(prev);
+	litmus->finish_switch(prev);
+	prev->rt_param.stack_in_use = NO_CPU;
 	perf_event_task_sched_in(current, cpu_of(rq));
 	finish_lock_switch(rq, prev);
 
@@ -5232,18 +5245,31 @@ void scheduler_tick(void)
 
 	sched_clock_tick();
 
+	TS_TICK_START(current);
+
 	spin_lock(&rq->lock);
 	update_rq_clock(rq);
 	update_cpu_load(rq);
 	curr->sched_class->task_tick(rq, curr, 0);
+
+	/*
+	 * LITMUS_TODO: can we move litmus_tick inside task_tick
+	 * or will deadlock ?
+	 */
+	TS_PLUGIN_TICK_START;
+	litmus_tick(rq, curr);
+	TS_PLUGIN_TICK_END;
+
 	spin_unlock(&rq->lock);
 
 	perf_event_task_tick(curr, cpu);
 
 #ifdef CONFIG_SMP
 	rq->idle_at_tick = idle_cpu(cpu);
-	trigger_load_balance(rq, cpu);
+	if (!is_realtime(current))
+		trigger_load_balance(rq, cpu);
 #endif
+	TS_TICK_END(current);
 }
 
 notrace unsigned long get_parent_ip(unsigned long addr)
@@ -5387,11 +5413,17 @@ pick_next_task(struct rq *rq)
 	 * Optimization: we know that if all tasks are in
 	 * the fair class we can call that function directly:
 	 */
+	/*
+	 * LITMUS_TODO: can we move processes out of fair class?
+	 * i.e., create a litmus_rq
+	 */
+	/* Don't do this for LITMUS
 	if (likely(rq->nr_running == rq->cfs.nr_running)) {
 		p = fair_sched_class.pick_next_task(rq);
 		if (likely(p))
 			return p;
 	}
+	*/
 
 	class = sched_class_highest;
 	for ( ; ; ) {
@@ -5426,6 +5458,8 @@ need_resched:
 
 	release_kernel_lock(prev);
 need_resched_nonpreemptible:
+	TS_SCHED_START;
+	sched_trace_task_switch_away(prev);
 
 	schedule_debug(prev);
 
@@ -5436,6 +5470,14 @@ need_resched_nonpreemptible:
 	update_rq_clock(rq);
 	clear_tsk_need_resched(prev);
 
+	/*
+	 * LITMUS_TODO: can we integrate litmus_schedule in
+	 * pick_next_task?
+	 */
+	TS_PLUGIN_SCHED_START;
+	litmus_schedule(rq, prev);
+	TS_PLUGIN_SCHED_END;
+
 	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
 		if (unlikely(signal_pending_state(prev->state, prev)))
 			prev->state = TASK_RUNNING;
@@ -5460,22 +5502,35 @@ need_resched_nonpreemptible:
 		rq->curr = next;
 		++*switch_count;
 
+		TS_SCHED_END(next);
+		TS_CXS_START(next);
 		context_switch(rq, prev, next); /* unlocks the rq */
+		TS_CXS_END(current);
 		/*
 		 * the context switch might have flipped the stack from under
 		 * us, hence refresh the local variables.
 		 */
 		cpu = smp_processor_id();
 		rq = cpu_rq(cpu);
-	} else
+	} else {
+		TS_SCHED_END(prev);
 		spin_unlock_irq(&rq->lock);
+	}
+
+	TS_SCHED2_START(current);
+	sched_trace_task_switch_to(current);
 
 	post_schedule(rq);
 
-	if (unlikely(reacquire_kernel_lock(current) < 0))
+	if (unlikely(reacquire_kernel_lock(current) < 0)) {
+		TS_SCHED2_END(current);
 		goto need_resched_nonpreemptible;
+	}
 
 	preempt_enable_no_resched();
+
+	TS_SCHED2_END(current);
+
 	if (need_resched())
 		goto need_resched;
 }
@@ -6185,6 +6240,9 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
 	case SCHED_RR:
 		p->sched_class = &rt_sched_class;
 		break;
+	case SCHED_LITMUS:
+		p->sched_class = &litmus_sched_class;
+		break;
 	}
 
 	p->rt_priority = prio;
@@ -6232,7 +6290,7 @@ recheck:
 
 		if (policy != SCHED_FIFO && policy != SCHED_RR &&
 				policy != SCHED_NORMAL && policy != SCHED_BATCH &&
-				policy != SCHED_IDLE)
+				policy != SCHED_IDLE && policy != SCHED_LITMUS)
 			return -EINVAL;
 	}
 
@@ -6247,6 +6305,8 @@ recheck:
 		return -EINVAL;
 	if (rt_policy(policy) != (param->sched_priority != 0))
 		return -EINVAL;
+	if (policy == SCHED_LITMUS && policy == p->policy)
+		return -EINVAL;
 
 	/*
 	 * Allow unprivileged RT tasks to decrease priority:
@@ -6301,6 +6361,12 @@ recheck:
 			return retval;
 	}
 
+	if (policy == SCHED_LITMUS) {
+		retval = litmus_admit_task(p);
+		if (retval)
+			return retval;
+	}
+
 	/*
 	 * make sure no PI-waiters arrive (or leave) while we are
 	 * changing the priority of the task:
@@ -6328,9 +6394,18 @@ recheck:
 
 	p->sched_reset_on_fork = reset_on_fork;
 
+	if (p->policy == SCHED_LITMUS)
+		litmus_exit_task(p);
+
 	oldprio = p->prio;
 	__setscheduler(rq, p, policy, param->sched_priority);
 
+	if (policy == SCHED_LITMUS) {
+		p->rt_param.stack_in_use = running ? rq->cpu : NO_CPU;
+		p->rt_param.present = running;
+		litmus->task_new(p, on_rq, running);
+	}
+
 	if (running)
 		p->sched_class->set_curr_task(rq);
 	if (on_rq) {
@@ -6500,10 +6575,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
 	read_lock(&tasklist_lock);
 
 	p = find_process_by_pid(pid);
-	if (!p) {
+	/* Don't set affinity if task not found and for LITMUS tasks */
+	if (!p || is_realtime(p)) {
 		read_unlock(&tasklist_lock);
 		put_online_cpus();
-		return -ESRCH;
+		return p ? -EPERM : -ESRCH;
 	}
 
 	/*
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 37087a7fac22..ef43ff95999d 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1598,7 +1598,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 
 	update_curr(cfs_rq);
 
-	if (unlikely(rt_prio(p->prio))) {
+	if (unlikely(rt_prio(p->prio)) || p->policy == SCHED_LITMUS) {
 		resched_task(curr);
 		return;
 	}
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index a4d790cddb19..f622880e918f 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1004,7 +1004,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
  */
 static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
 {
-	if (p->prio < rq->curr->prio) {
+	if (p->prio < rq->curr->prio || p->policy == SCHED_LITMUS) {
 		resched_task(rq->curr);
 		return;
 	}
diff --git a/litmus/Kconfig b/litmus/Kconfig
new file mode 100644
index 000000000000..f8c642658a2f
--- /dev/null
+++ b/litmus/Kconfig
@@ -0,0 +1,50 @@
+menu "LITMUS^RT"
+
+menu "Tracing"
+
+config FEATHER_TRACE
+	bool "Feather-Trace Infrastructure"
+	default y
+	help
+	  Feather-Trace basic tracing infrastructure. Includes device file
+	  driver and instrumentation point support.
+
+
+config SCHED_TASK_TRACE
+	bool "Trace real-time tasks"
+	depends on FEATHER_TRACE
+	default y
+	help
+	  Include support for the sched_trace_XXX() tracing functions. This
+          allows the collection of real-time task events such as job
+	  completions, job releases, early completions, etc. This results in  a
+	  small overhead in the scheduling code. Disable if the overhead is not
+	  acceptable (e.g., benchmarking).
+
+	  Say Yes for debugging.
+	  Say No for overhead tracing.
+
+config SCHED_OVERHEAD_TRACE
+	bool "Record timestamps for overhead measurements"
+	depends on FEATHER_TRACE
+	default n
+	help
+	  Export event stream for overhead tracing.
+	  Say Yes for overhead tracing.
+
+config SCHED_DEBUG_TRACE
+	bool "TRACE() debugging"
+	default y
+	help
+	  Include support for sched_trace_log_messageg(), which is used to
+	  implement TRACE(). If disabled, no TRACE() messages will be included
+	  in the kernel, and no overheads due to debugging statements will be
+	  incurred by the scheduler. Disable if the overhead is not acceptable
+	  (e.g. benchmarking).
+
+	  Say Yes for debugging.
+	  Say No for overhead tracing.
+
+endmenu
+
+endmenu
diff --git a/litmus/Makefile b/litmus/Makefile
new file mode 100644
index 000000000000..f4c2d564cd0b
--- /dev/null
+++ b/litmus/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for LITMUS^RT
+#
+
+obj-y     = sched_plugin.o litmus.o \
+	    jobs.o \
+	    heap.o
+
+obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
+obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
+obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
+obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
diff --git a/litmus/ft_event.c b/litmus/ft_event.c
new file mode 100644
index 000000000000..6084b6d6b364
--- /dev/null
+++ b/litmus/ft_event.c
@@ -0,0 +1,43 @@
+#include <linux/types.h>
+
+#include <litmus/feather_trace.h>
+
+#ifndef __ARCH_HAS_FEATHER_TRACE
+/* provide dummy implementation */
+
+int ft_events[MAX_EVENTS];
+
+int ft_enable_event(unsigned long id)
+{
+	if (id < MAX_EVENTS) {
+		ft_events[id]++;
+		return 1;
+	} else
+		return 0;
+}
+
+int ft_disable_event(unsigned long id)
+{
+	if (id < MAX_EVENTS && ft_events[id]) {
+		ft_events[id]--;
+		return 1;
+	} else
+		return 0;
+}
+
+int ft_disable_all_events(void)
+{
+	int i;
+
+	for (i = 0; i < MAX_EVENTS; i++)
+		ft_events[i] = 0;
+
+	return MAX_EVENTS;
+}
+
+int ft_is_event_enabled(unsigned long id)
+{
+	return 	id < MAX_EVENTS && ft_events[id];
+}
+
+#endif
diff --git a/litmus/heap.c b/litmus/heap.c
new file mode 100644
index 000000000000..112d14da46c3
--- /dev/null
+++ b/litmus/heap.c
@@ -0,0 +1,314 @@
+#include "linux/kernel.h"
+#include "litmus/heap.h"
+
+void heap_init(struct heap* heap)
+{
+	heap->head = NULL;
+	heap->min  = NULL;
+}
+
+void heap_node_init(struct heap_node** _h, void* value)
+{
+	struct heap_node* h = *_h;
+	h->parent = NULL;
+	h->next   = NULL;
+	h->child  = NULL;
+	h->degree = NOT_IN_HEAP;
+	h->value  = value;
+	h->ref    = _h;
+}
+
+
+/* make child a subtree of root */
+static void __heap_link(struct heap_node* root,
+			struct heap_node* child)
+{
+	child->parent = root;
+	child->next   = root->child;
+	root->child   = child;
+	root->degree++;
+}
+
+/* merge root lists */
+static  struct heap_node* __heap_merge(struct heap_node* a,
+					     struct heap_node* b)
+{
+	struct heap_node* head = NULL;
+	struct heap_node** pos = &head;
+
+	while (a && b) {
+		if (a->degree < b->degree) {
+			*pos = a;
+			a = a->next;
+		} else {
+			*pos = b;
+			b = b->next;
+		}
+		pos = &(*pos)->next;
+	}
+	if (a)
+		*pos = a;
+	else
+		*pos = b;
+	return head;
+}
+
+/* reverse a linked list of nodes. also clears parent pointer */
+static  struct heap_node* __heap_reverse(struct heap_node* h)
+{
+	struct heap_node* tail = NULL;
+	struct heap_node* next;
+
+	if (!h)
+		return h;
+
+	h->parent = NULL;
+	while (h->next) {
+		next    = h->next;
+		h->next = tail;
+		tail    = h;
+		h       = next;
+		h->parent = NULL;
+	}
+	h->next = tail;
+	return h;
+}
+
+static  void __heap_min(heap_prio_t higher_prio, struct heap* heap,
+			      struct heap_node** prev, struct heap_node** node)
+{
+	struct heap_node *_prev, *cur;
+	*prev = NULL;
+
+	if (!heap->head) {
+		*node = NULL;
+		return;
+	}
+
+	*node = heap->head;
+	_prev = heap->head;
+	cur   = heap->head->next;
+	while (cur) {
+		if (higher_prio(cur, *node)) {
+			*node = cur;
+			*prev = _prev;
+		}
+		_prev = cur;
+		cur   = cur->next;
+	}
+}
+
+static  void __heap_union(heap_prio_t higher_prio, struct heap* heap,
+				struct heap_node* h2)
+{
+	struct heap_node* h1;
+	struct heap_node *prev, *x, *next;
+	if (!h2)
+		return;
+	h1 = heap->head;
+	if (!h1) {
+		heap->head = h2;
+		return;
+	}
+	h1 = __heap_merge(h1, h2);
+	prev = NULL;
+	x    = h1;
+	next = x->next;
+	while (next) {
+		if (x->degree != next->degree ||
+		    (next->next && next->next->degree == x->degree)) {
+			/* nothing to do, advance */
+			prev = x;
+			x    = next;
+		} else if (higher_prio(x, next)) {
+			/* x becomes the root of next */
+			x->next = next->next;
+			__heap_link(x, next);
+		} else {
+			/* next becomes the root of x */
+			if (prev)
+				prev->next = next;
+			else
+				h1 = next;
+			__heap_link(next, x);
+			x = next;
+		}
+		next = x->next;
+	}
+	heap->head = h1;
+}
+
+static struct heap_node* __heap_extract_min(heap_prio_t higher_prio,
+					    struct heap* heap)
+{
+	struct heap_node *prev, *node;
+	__heap_min(higher_prio, heap, &prev, &node);
+	if (!node)
+		return NULL;
+	if (prev)
+		prev->next = node->next;
+	else
+		heap->head = node->next;
+	__heap_union(higher_prio, heap, __heap_reverse(node->child));
+	return node;
+}
+
+/* insert (and reinitialize) a node into the heap */
+void heap_insert(heap_prio_t higher_prio, struct heap* heap,
+		 struct heap_node* node)
+{
+	struct heap_node *min;
+	node->child  = NULL;
+	node->parent = NULL;
+	node->next   = NULL;
+	node->degree = 0;
+	if (heap->min && higher_prio(node, heap->min)) {
+		/* swap min cache */
+		min = heap->min;
+		min->child  = NULL;
+		min->parent = NULL;
+		min->next   = NULL;
+		min->degree = 0;
+		__heap_union(higher_prio, heap, min);
+		heap->min   = node;
+	} else
+		__heap_union(higher_prio, heap, node);
+}
+
+void heap_uncache_min(heap_prio_t higher_prio, struct heap* heap)
+{
+	struct heap_node* min;
+	if (heap->min) {
+		min = heap->min;
+		heap->min = NULL;
+		heap_insert(higher_prio, heap, min);
+	}
+}
+
+/* merge addition into target */
+void heap_union(heap_prio_t higher_prio,
+		struct heap* target, struct heap* addition)
+{
+	/* first insert any cached minima, if necessary */
+	heap_uncache_min(higher_prio, target);
+	heap_uncache_min(higher_prio, addition);
+	__heap_union(higher_prio, target, addition->head);
+	/* this is a destructive merge */
+	addition->head = NULL;
+}
+
+struct heap_node* heap_peek(heap_prio_t higher_prio,
+			    struct heap* heap)
+{
+	if (!heap->min)
+		heap->min = __heap_extract_min(higher_prio, heap);
+	return heap->min;
+}
+
+struct heap_node* heap_take(heap_prio_t higher_prio,
+			    struct heap* heap)
+{
+	struct heap_node *node;
+	if (!heap->min)
+		heap->min = __heap_extract_min(higher_prio, heap);
+	node = heap->min;
+	heap->min = NULL;
+	if (node)
+		node->degree = NOT_IN_HEAP;
+	return node;
+}
+
+int heap_decrease(heap_prio_t higher_prio, struct heap_node* node)
+{
+	struct heap_node  *parent;
+	struct heap_node** tmp_ref;
+	void* tmp;
+
+	/* bubble up */
+	parent = node->parent;
+	while (parent && higher_prio(node, parent)) {
+		/* swap parent and node */
+		tmp           = parent->value;
+		parent->value = node->value;
+		node->value   = tmp;
+		/* swap references */
+		*(parent->ref) = node;
+		*(node->ref)   = parent;
+		tmp_ref        = parent->ref;
+		parent->ref    = node->ref;
+		node->ref      = tmp_ref;
+		/* step up */
+		node   = parent;
+		parent = node->parent;
+	}
+
+	return parent != NULL;
+}
+
+void heap_delete(heap_prio_t higher_prio, struct heap* heap,
+		 struct heap_node* node)
+{
+	struct heap_node *parent, *prev, *pos;
+	struct heap_node** tmp_ref;
+	void* tmp;
+
+	if (heap->min != node) {
+		/* bubble up */
+		parent = node->parent;
+		while (parent) {
+			/* swap parent and node */
+			tmp           = parent->value;
+			parent->value = node->value;
+			node->value   = tmp;
+			/* swap references */
+			*(parent->ref) = node;
+			*(node->ref)   = parent;
+			tmp_ref        = parent->ref;
+			parent->ref    = node->ref;
+			node->ref      = tmp_ref;
+			/* step up */
+			node   = parent;
+			parent = node->parent;
+		}
+		/* now delete:
+		 * first find prev */
+		prev = NULL;
+		pos  = heap->head;
+		while (pos != node) {
+			prev = pos;
+			pos  = pos->next;
+		}
+		/* we have prev, now remove node */
+		if (prev)
+			prev->next = node->next;
+		else
+			heap->head = node->next;
+		__heap_union(higher_prio, heap, __heap_reverse(node->child));
+	} else
+		heap->min = NULL;
+	node->degree = NOT_IN_HEAP;
+}
+
+/* allocate a heap node for value and insert into the heap */
+int heap_add(heap_prio_t higher_prio, struct heap* heap,
+	     void* value, int gfp_flags)
+{
+	struct heap_node* hn = heap_node_alloc(gfp_flags);
+	if (likely(hn)) {
+		heap_node_init(&hn, value);
+		heap_insert(higher_prio, heap, hn);
+	}
+	return hn != NULL;
+}
+
+void* heap_take_del(heap_prio_t higher_prio,
+		    struct heap* heap)
+{
+	struct heap_node* hn = heap_take(higher_prio, heap);
+	void* ret = NULL;
+	if (hn) {
+		ret = hn->value;
+		heap_node_free(hn);
+	}
+	return ret;
+}
diff --git a/litmus/jobs.c b/litmus/jobs.c
new file mode 100644
index 000000000000..36e314625d86
--- /dev/null
+++ b/litmus/jobs.c
@@ -0,0 +1,43 @@
+/* litmus/jobs.c - common job control code
+ */
+
+#include <linux/sched.h>
+
+#include <litmus/litmus.h>
+#include <litmus/jobs.h>
+
+void prepare_for_next_period(struct task_struct *t)
+{
+	BUG_ON(!t);
+	/* prepare next release */
+	t->rt_param.job_params.release   = t->rt_param.job_params.deadline;
+	t->rt_param.job_params.deadline += get_rt_period(t);
+	t->rt_param.job_params.exec_time = 0;
+	/* update job sequence number */
+	t->rt_param.job_params.job_no++;
+
+	/* don't confuse Linux */
+	t->rt.time_slice = 1;
+}
+
+void release_at(struct task_struct *t, lt_t start)
+{
+	t->rt_param.job_params.deadline = start;
+	prepare_for_next_period(t);
+	set_rt_flags(t, RT_F_RUNNING);
+}
+
+
+/*
+ *	Deactivate current task until the beginning of the next period.
+ */
+long complete_job(void)
+{
+	/* Mark that we do not excute anymore */
+	set_rt_flags(current, RT_F_SLEEP);
+	/* call schedule, this will return when a new job arrives
+	 * it also takes care of preparing for the next release
+	 */
+	schedule();
+	return 0;
+}
diff --git a/litmus/litmus.c b/litmus/litmus.c
new file mode 100644
index 000000000000..eb0d17e298d7
--- /dev/null
+++ b/litmus/litmus.c
@@ -0,0 +1,654 @@
+/* litmus.c -- Implementation of the LITMUS syscalls, the LITMUS intialization code,
+ *             and the procfs interface..
+ */
+#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <linux/sysrq.h>
+
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/slab.h>
+
+#include <litmus/litmus.h>
+#include <linux/sched.h>
+#include <litmus/sched_plugin.h>
+
+#include <litmus/heap.h>
+
+#include <litmus/trace.h>
+
+/* Number of RT tasks that exist in the system */
+atomic_t rt_task_count 		= ATOMIC_INIT(0);
+static DEFINE_SPINLOCK(task_transition_lock);
+
+/* Give log messages sequential IDs. */
+atomic_t __log_seq_no = ATOMIC_INIT(0);
+
+/* current master CPU for handling timer IRQs */
+atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU);
+
+static struct kmem_cache * heap_node_cache;
+
+struct heap_node* heap_node_alloc(int gfp_flags)
+{
+	return kmem_cache_alloc(heap_node_cache, gfp_flags);
+}
+
+void heap_node_free(struct heap_node* hn)
+{
+	kmem_cache_free(heap_node_cache, hn);
+}
+
+/*
+ * sys_set_task_rt_param
+ * @pid: Pid of the task which scheduling parameters must be changed
+ * @param: New real-time extension parameters such as the execution cost and
+ *         period
+ * Syscall for manipulating with task rt extension params
+ * Returns EFAULT  if param is NULL.
+ *         ESRCH   if pid is not corrsponding
+ *	           to a valid task.
+ *	   EINVAL  if either period or execution cost is <=0
+ *	   EPERM   if pid is a real-time task
+ *	   0       if success
+ *
+ * Only non-real-time tasks may be configured with this system call
+ * to avoid races with the scheduler. In practice, this means that a
+ * task's parameters must be set _before_ calling sys_prepare_rt_task()
+ *
+ * find_task_by_vpid() assumes that we are in the same namespace of the
+ * target.
+ */
+asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
+{
+	struct rt_task tp;
+	struct task_struct *target;
+	int retval = -EINVAL;
+
+	printk("Setting up rt task parameters for process %d.\n", pid);
+
+	if (pid < 0 || param == 0) {
+		goto out;
+	}
+	if (copy_from_user(&tp, param, sizeof(tp))) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	/* Task search and manipulation must be protected */
+	read_lock_irq(&tasklist_lock);
+	if (!(target = find_task_by_vpid(pid))) {
+		retval = -ESRCH;
+		goto out_unlock;
+	}
+
+	if (is_realtime(target)) {
+		/* The task is already a real-time task.
+		 * We cannot not allow parameter changes at this point.
+		 */
+		retval = -EBUSY;
+		goto out_unlock;
+	}
+
+	if (tp.exec_cost <= 0)
+		goto out_unlock;
+	if (tp.period <= 0)
+		goto out_unlock;
+	if (!cpu_online(tp.cpu))
+		goto out_unlock;
+	if (tp.period < tp.exec_cost)
+	{
+		printk(KERN_INFO "litmus: real-time task %d rejected "
+		       "because wcet > period\n", pid);
+		goto out_unlock;
+	}
+
+	target->rt_param.task_params = tp;
+
+	retval = 0;
+      out_unlock:
+	read_unlock_irq(&tasklist_lock);
+      out:
+	return retval;
+}
+
+/*
+ * Getter of task's RT params
+ *   returns EINVAL if param or pid is NULL
+ *   returns ESRCH  if pid does not correspond to a valid task
+ *   returns EFAULT if copying of parameters has failed.
+ *
+ *   find_task_by_vpid() assumes that we are in the same namespace of the
+ *   target.
+ */
+asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param)
+{
+	int retval = -EINVAL;
+	struct task_struct *source;
+	struct rt_task lp;
+	if (param == 0 || pid < 0)
+		goto out;
+	read_lock(&tasklist_lock);
+	if (!(source = find_task_by_vpid(pid))) {
+		retval = -ESRCH;
+		goto out_unlock;
+	}
+	lp = source->rt_param.task_params;
+	read_unlock(&tasklist_lock);
+	/* Do copying outside the lock */
+	retval =
+	    copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
+	return retval;
+      out_unlock:
+	read_unlock(&tasklist_lock);
+      out:
+	return retval;
+
+}
+
+/*
+ *	This is the crucial function for periodic task implementation,
+ *	It checks if a task is periodic, checks if such kind of sleep
+ *	is permitted and calls plugin-specific sleep, which puts the
+ *	task into a wait array.
+ *	returns 0 on successful wakeup
+ *	returns EPERM if current conditions do not permit such sleep
+ *	returns EINVAL if current task is not able to go to sleep
+ */
+asmlinkage long sys_complete_job(void)
+{
+	int retval = -EPERM;
+	if (!is_realtime(current)) {
+		retval = -EINVAL;
+		goto out;
+	}
+	/* Task with negative or zero period cannot sleep */
+	if (get_rt_period(current) <= 0) {
+		retval = -EINVAL;
+		goto out;
+	}
+	/* The plugin has to put the task into an
+	 * appropriate queue and call schedule
+	 */
+	retval = litmus->complete_job();
+      out:
+	return retval;
+}
+
+/*	This is an "improved" version of sys_complete_job that
+ *      addresses the problem of unintentionally missing a job after
+ *      an overrun.
+ *
+ *	returns 0 on successful wakeup
+ *	returns EPERM if current conditions do not permit such sleep
+ *	returns EINVAL if current task is not able to go to sleep
+ */
+asmlinkage long sys_wait_for_job_release(unsigned int job)
+{
+	int retval = -EPERM;
+	if (!is_realtime(current)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	/* Task with negative or zero period cannot sleep */
+	if (get_rt_period(current) <= 0) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	retval = 0;
+
+	/* first wait until we have "reached" the desired job
+	 *
+	 * This implementation has at least two problems:
+	 *
+	 * 1) It doesn't gracefully handle the wrap around of
+	 *    job_no. Since LITMUS is a prototype, this is not much
+	 *    of a problem right now.
+	 *
+	 * 2) It is theoretically racy if a job release occurs
+	 *    between checking job_no and calling sleep_next_period().
+	 *    A proper solution would requiring adding another callback
+	 *    in the plugin structure and testing the condition with
+	 *    interrupts disabled.
+	 *
+	 * FIXME: At least problem 2 should be taken care of eventually.
+	 */
+	while (!retval && job > current->rt_param.job_params.job_no)
+		/* If the last job overran then job <= job_no and we
+		 * don't send the task to sleep.
+		 */
+		retval = litmus->complete_job();
+      out:
+	return retval;
+}
+
+/*	This is a helper syscall to query the current job sequence number.
+ *
+ *	returns 0 on successful query
+ *	returns EPERM if task is not a real-time task.
+ *      returns EFAULT if &job is not a valid pointer.
+ */
+asmlinkage long sys_query_job_no(unsigned int __user *job)
+{
+	int retval = -EPERM;
+	if (is_realtime(current))
+		retval = put_user(current->rt_param.job_params.job_no, job);
+
+	return retval;
+}
+
+/* sys_null_call() is only used for determining raw system call
+ * overheads (kernel entry, kernel exit). It has no useful side effects.
+ * If ts is non-NULL, then the current Feather-Trace time is recorded.
+ */
+asmlinkage long sys_null_call(cycles_t __user *ts)
+{
+	long ret = 0;
+	cycles_t now;
+
+	if (ts) {
+		now = get_cycles();
+		ret = put_user(now, ts);
+	}
+
+	return ret;
+}
+
+/* p is a real-time task. Re-init its state as a best-effort task. */
+static void reinit_litmus_state(struct task_struct* p, int restore)
+{
+	struct rt_task  user_config = {};
+	__user short   *np_flag     = NULL;
+
+	if (restore) {
+		/* Safe user-space provided configuration data. */
+		user_config = p->rt_param.task_params;
+		np_flag     = p->rt_param.np_flag;
+	}
+
+	/* We probably should not be inheriting any task's priority
+	 * at this point in time.
+	 */
+	WARN_ON(p->rt_param.inh_task);
+
+	/* We need to restore the priority of the task. */
+//	__setscheduler(p, p->rt_param.old_policy, p->rt_param.old_prio);
+
+	/* Cleanup everything else. */
+	memset(&p->rt_param, 0, sizeof(user_config));
+
+	/* Restore preserved fields. */
+	if (restore) {
+		p->rt_param.task_params = user_config;
+		p->rt_param.np_flag      = np_flag;
+	}
+}
+
+long litmus_admit_task(struct task_struct* tsk)
+{
+	long retval = 0;
+	unsigned long flags;
+
+	BUG_ON(is_realtime(tsk));
+
+	if (get_rt_period(tsk) == 0 ||
+	    get_exec_cost(tsk) > get_rt_period(tsk)) {
+		TRACE_TASK(tsk, "litmus admit: invalid task parameters "
+			   "(%lu, %lu)\n",
+		       get_exec_cost(tsk), get_rt_period(tsk));
+		return -EINVAL;
+	}
+
+	if (!cpu_online(get_partition(tsk)))
+	{
+		TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n",
+			   get_partition(tsk));
+		return -EINVAL;
+	}
+
+	INIT_LIST_HEAD(&tsk_rt(tsk)->list);
+
+	/* avoid scheduler plugin changing underneath us */
+	spin_lock_irqsave(&task_transition_lock, flags);
+
+	/* allocate heap node for this task */
+	tsk_rt(tsk)->heap_node    = heap_node_alloc(GFP_ATOMIC);
+	if (!tsk_rt(tsk)->heap_node ||
+	    !tsk_rt(tsk)->rel_heap) {
+		printk(KERN_WARNING "litmus: no more heap node memory!?\n");
+		retval = -ENOMEM;
+		heap_node_free(tsk_rt(tsk)->heap_node);
+	} else
+		heap_node_init(&tsk_rt(tsk)->heap_node, tsk);
+
+	if (!retval)
+		retval = litmus->admit_task(tsk);
+
+	if (!retval) {
+		sched_trace_task_name(tsk);
+		sched_trace_task_param(tsk);
+		atomic_inc(&rt_task_count);
+	}
+
+	spin_unlock_irqrestore(&task_transition_lock, flags);
+
+	return retval;
+}
+
+void litmus_exit_task(struct task_struct* tsk)
+{
+	if (is_realtime(tsk)) {
+		sched_trace_task_completion(tsk, 1);
+		litmus->task_exit(tsk);
+		BUG_ON(heap_node_in_heap(tsk_rt(tsk)->heap_node));
+	        heap_node_free(tsk_rt(tsk)->heap_node);
+		atomic_dec(&rt_task_count);
+		reinit_litmus_state(tsk, 1);
+	}
+}
+
+/* Switching a plugin in use is tricky.
+ * We must watch out that no real-time tasks exists
+ * (and that none is created in parallel) and that the plugin is not
+ * currently in use on any processor (in theory).
+ *
+ * For now, we don't enforce the second part since it is unlikely to cause
+ * any trouble by itself as long as we don't unload modules.
+ */
+int switch_sched_plugin(struct sched_plugin* plugin)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	BUG_ON(!plugin);
+
+	/* stop task transitions */
+	spin_lock_irqsave(&task_transition_lock, flags);
+
+	/* don't switch if there are active real-time tasks */
+	if (atomic_read(&rt_task_count) == 0) {
+		ret = litmus->deactivate_plugin();
+		if (0 != ret)
+			goto out;
+		ret = plugin->activate_plugin();
+		if (0 != ret) {
+			printk(KERN_INFO "Can't activate %s (%d).\n",
+			       plugin->plugin_name, ret);
+			plugin = &linux_sched_plugin;
+		}
+		printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
+		litmus = plugin;
+	} else
+		ret = -EBUSY;
+out:
+	spin_unlock_irqrestore(&task_transition_lock, flags);
+	return ret;
+}
+
+/* Called upon fork.
+ * p is the newly forked task.
+ */
+void litmus_fork(struct task_struct* p)
+{
+	if (is_realtime(p))
+		/* clean out any litmus related state, don't preserve anything*/
+		reinit_litmus_state(p, 0);
+}
+
+/* Called upon execve().
+ * current is doing the exec.
+ * Don't let address space specific stuff leak.
+ */
+void litmus_exec(void)
+{
+	struct task_struct* p = current;
+
+	if (is_realtime(p)) {
+		WARN_ON(p->rt_param.inh_task);
+		p->rt_param.np_flag = NULL;
+	}
+}
+
+void exit_litmus(struct task_struct *dead_tsk)
+{
+	if (is_realtime(dead_tsk))
+		litmus_exit_task(dead_tsk);
+}
+
+
+#ifdef CONFIG_MAGIC_SYSRQ
+int sys_kill(int pid, int sig);
+
+static void sysrq_handle_kill_rt_tasks(int key, struct tty_struct *tty)
+{
+	struct task_struct *t;
+	read_lock(&tasklist_lock);
+	for_each_process(t) {
+		if (is_realtime(t)) {
+			sys_kill(t->pid, SIGKILL);
+		}
+	}
+	read_unlock(&tasklist_lock);
+}
+
+static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
+	.handler	= sysrq_handle_kill_rt_tasks,
+	.help_msg	= "quit-rt-tasks(X)",
+	.action_msg	= "sent SIGKILL to all LITMUS^RT real-time tasks",
+};
+
+
+#endif
+
+
+static int proc_read_stats(char *page, char **start,
+			   off_t off, int count,
+			   int *eof, void *data)
+{
+	int len;
+
+	len = snprintf(page, PAGE_SIZE,
+		       "real-time tasks   = %d\n"
+		       "ready for release = %d\n",
+		       atomic_read(&rt_task_count),
+		       0);
+	return len;
+}
+
+static int proc_read_plugins(char *page, char **start,
+			   off_t off, int count,
+			   int *eof, void *data)
+{
+	int len;
+
+	len = print_sched_plugins(page, PAGE_SIZE);
+	return len;
+}
+
+static int proc_read_curr(char *page, char **start,
+			  off_t off, int count,
+			  int *eof, void *data)
+{
+	int len;
+
+	len = snprintf(page, PAGE_SIZE, "%s\n", litmus->plugin_name);
+	return len;
+}
+
+static int proc_write_curr(struct file *file,
+			   const char *buffer,
+			   unsigned long count,
+			   void *data)
+{
+	int len, ret;
+	char name[65];
+	struct sched_plugin* found;
+
+	if(count > 64)
+		len = 64;
+	else
+		len = count;
+
+	if(copy_from_user(name, buffer, len))
+		return -EFAULT;
+
+	name[len] = '\0';
+	/* chomp name */
+	if (len > 1 && name[len - 1] == '\n')
+		name[len - 1] = '\0';
+
+	found = find_sched_plugin(name);
+
+	if (found) {
+		ret = switch_sched_plugin(found);
+		if (ret != 0)
+			printk(KERN_INFO "Could not switch plugin: %d\n", ret);
+	} else
+		printk(KERN_INFO "Plugin '%s' is unknown.\n", name);
+
+	return len;
+}
+
+
+static int proc_read_release_master(char *page, char **start,
+				    off_t off, int count,
+				    int *eof, void *data)
+{
+	int len, master;
+	master = atomic_read(&release_master_cpu);
+	if (master == NO_CPU)
+		len = snprintf(page, PAGE_SIZE, "NO_CPU\n");
+	else
+		len = snprintf(page, PAGE_SIZE, "%d\n", master);
+	return len;
+}
+
+static int proc_write_release_master(struct file *file,
+				     const char *buffer,
+				     unsigned long count,
+				     void *data)
+{
+	int cpu, err, online = 0;
+	char msg[64];
+
+	if (count > 63)
+		return -EINVAL;
+
+	if (copy_from_user(msg, buffer, count))
+		return -EFAULT;
+
+	/* terminate */
+	msg[count] = '\0';
+	/* chomp */
+	if (count > 1 && msg[count - 1] == '\n')
+		msg[count - 1] = '\0';
+
+	if (strcmp(msg, "NO_CPU") == 0) {
+		atomic_set(&release_master_cpu, NO_CPU);
+		return count;
+	} else {
+		err = sscanf(msg, "%d", &cpu);
+		if (err == 1 && cpu >= 0 && (online = cpu_online(cpu))) {
+			atomic_set(&release_master_cpu, cpu);
+			return count;
+		} else {
+			TRACE("invalid release master: '%s' "
+			      "(err:%d cpu:%d online:%d)\n",
+			      msg, err, cpu, online);
+			return -EINVAL;
+		}
+	}
+}
+
+static struct proc_dir_entry *litmus_dir = NULL,
+	*curr_file = NULL,
+	*stat_file = NULL,
+	*plugs_file = NULL,
+	*release_master_file = NULL;
+
+static int __init init_litmus_proc(void)
+{
+	litmus_dir = proc_mkdir("litmus", NULL);
+	if (!litmus_dir) {
+		printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n");
+		return -ENOMEM;
+	}
+
+	curr_file = create_proc_entry("active_plugin",
+				      0644, litmus_dir);
+	if (!curr_file) {
+		printk(KERN_ERR "Could not allocate active_plugin "
+		       "procfs entry.\n");
+		return -ENOMEM;
+	}
+	curr_file->read_proc  = proc_read_curr;
+	curr_file->write_proc = proc_write_curr;
+
+	release_master_file = create_proc_entry("release_master",
+						0644, litmus_dir);
+	if (!release_master_file) {
+		printk(KERN_ERR "Could not allocate release_master "
+		       "procfs entry.\n");
+		return -ENOMEM;
+	}
+	release_master_file->read_proc = proc_read_release_master;
+	release_master_file->write_proc  = proc_write_release_master;
+
+	stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
+					   proc_read_stats, NULL);
+
+	plugs_file = create_proc_read_entry("plugins", 0444, litmus_dir,
+					   proc_read_plugins, NULL);
+
+	return 0;
+}
+
+static void exit_litmus_proc(void)
+{
+	if (plugs_file)
+		remove_proc_entry("plugins", litmus_dir);
+	if (stat_file)
+		remove_proc_entry("stats", litmus_dir);
+	if (curr_file)
+		remove_proc_entry("active_plugin", litmus_dir);
+	if (litmus_dir)
+		remove_proc_entry("litmus", NULL);
+}
+
+extern struct sched_plugin linux_sched_plugin;
+
+static int __init _init_litmus(void)
+{
+	/*      Common initializers,
+	 *      mode change lock is used to enforce single mode change
+	 *      operation.
+	 */
+	printk("Starting LITMUS^RT kernel\n");
+
+	register_sched_plugin(&linux_sched_plugin);
+
+	heap_node_cache    = KMEM_CACHE(heap_node, SLAB_PANIC);
+
+#ifdef CONFIG_MAGIC_SYSRQ
+	/* offer some debugging help */
+	if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op))
+		printk("Registered kill rt tasks magic sysrq.\n");
+	else
+		printk("Could not register kill rt tasks magic sysrq.\n");
+#endif
+
+	init_litmus_proc();
+
+	return 0;
+}
+
+static void _exit_litmus(void)
+{
+	exit_litmus_proc();
+	kmem_cache_destroy(heap_node_cache);
+}
+
+module_init(_init_litmus);
+module_exit(_exit_litmus);
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
new file mode 100644
index 000000000000..ccedd3670ac5
--- /dev/null
+++ b/litmus/sched_litmus.c
@@ -0,0 +1,275 @@
+/* This file is included from kernel/sched.c */
+
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+
+static void update_time_litmus(struct rq *rq, struct task_struct *p)
+{
+	u64 delta = rq->clock - p->se.exec_start;
+	if (unlikely((s64)delta < 0))
+		delta = 0;
+	/* per job counter */
+	p->rt_param.job_params.exec_time += delta;
+	/* task counter */
+	p->se.sum_exec_runtime += delta;
+	/* sched_clock() */
+	p->se.exec_start = rq->clock;
+	cpuacct_charge(p, delta);
+}
+
+static void double_rq_lock(struct rq *rq1, struct rq *rq2);
+static void double_rq_unlock(struct rq *rq1, struct rq *rq2);
+
+static void litmus_tick(struct rq *rq, struct task_struct *p)
+{
+	if (is_realtime(p))
+		update_time_litmus(rq, p);
+	litmus->tick(p);
+}
+
+static void litmus_schedule(struct rq *rq, struct task_struct *prev)
+{
+	struct rq* other_rq;
+	long was_running;
+	lt_t _maybe_deadlock = 0;
+	/* WARNING: rq is _not_ locked! */
+	if (is_realtime(prev)) {
+		update_time_litmus(rq, prev);
+		if (!is_running(prev))
+			tsk_rt(prev)->present = 0;
+	}
+
+	/* let the plugin schedule */
+	rq->litmus_next = litmus->schedule(prev);
+
+	/* check if a global plugin pulled a task from a different RQ */
+	if (rq->litmus_next && task_rq(rq->litmus_next) != rq) {
+		/* we need to migrate the task */
+		other_rq = task_rq(rq->litmus_next);
+		TRACE_TASK(rq->litmus_next, "migrate from %d\n", other_rq->cpu);
+
+		/* while we drop the lock, the prev task could change its
+		 * state
+		 */
+		was_running = is_running(prev);
+		mb();
+		spin_unlock(&rq->lock);
+
+		/* Don't race with a concurrent switch.  This could deadlock in
+		 * the case of cross or circular migrations.  It's the job of
+		 * the plugin to make sure that doesn't happen.
+		 */
+		TRACE_TASK(rq->litmus_next, "stack_in_use=%d\n",
+			   rq->litmus_next->rt_param.stack_in_use);
+		if (rq->litmus_next->rt_param.stack_in_use != NO_CPU) {
+			TRACE_TASK(rq->litmus_next, "waiting to deschedule\n");
+			_maybe_deadlock = litmus_clock();
+		}
+		while (rq->litmus_next->rt_param.stack_in_use != NO_CPU) {
+			cpu_relax();
+			mb();
+			if (rq->litmus_next->rt_param.stack_in_use == NO_CPU)
+				TRACE_TASK(rq->litmus_next,
+					   "descheduled. Proceeding.\n");
+			if (lt_before(_maybe_deadlock + 10000000,
+				      litmus_clock())) {
+				/* We've been spinning for 10ms.
+				 * Something can't be right!
+				 * Let's abandon the task and bail out; at least
+				 * we will have debug info instead of a hard
+				 * deadlock.
+				 */
+				TRACE_TASK(rq->litmus_next,
+					   "stack too long in use. "
+					   "Deadlock?\n");
+				rq->litmus_next = NULL;
+
+				/* bail out */
+				spin_lock(&rq->lock);
+				return;
+			}
+		}
+#ifdef  __ARCH_WANT_UNLOCKED_CTXSW
+		if (rq->litmus_next->oncpu)
+			TRACE_TASK(rq->litmus_next, "waiting for !oncpu");
+		while (rq->litmus_next->oncpu) {
+			cpu_relax();
+			mb();
+		}
+#endif
+		double_rq_lock(rq, other_rq);
+		mb();
+		if (is_realtime(prev) && is_running(prev) != was_running) {
+			TRACE_TASK(prev,
+				   "state changed while we dropped"
+				   " the lock: is_running=%d, was_running=%d\n",
+				   is_running(prev), was_running);
+			if (is_running(prev) && !was_running) {
+				/* prev task became unblocked
+				 * we need to simulate normal sequence of events
+				 * to scheduler plugins.
+				 */
+				litmus->task_block(prev);
+				litmus->task_wake_up(prev);
+			}
+		}
+
+		set_task_cpu(rq->litmus_next, smp_processor_id());
+
+		/* DEBUG: now that we have the lock we need to make sure a
+		 *  couple of things still hold:
+		 *  - it is still a real-time task
+		 *  - it is still runnable (could have been stopped)
+		 * If either is violated, then the active plugin is
+		 * doing something wrong.
+		 */
+		if (!is_realtime(rq->litmus_next) ||
+		    !is_running(rq->litmus_next)) {
+			/* BAD BAD BAD */
+			TRACE_TASK(rq->litmus_next,
+				   "BAD: migration invariant FAILED: "
+				   "rt=%d running=%d\n",
+				   is_realtime(rq->litmus_next),
+				   is_running(rq->litmus_next));
+			/* drop the task */
+			rq->litmus_next = NULL;
+		}
+		/* release the other CPU's runqueue, but keep ours */
+		spin_unlock(&other_rq->lock);
+	}
+	if (rq->litmus_next)
+		rq->litmus_next->rt_param.stack_in_use = rq->cpu;
+}
+
+static void enqueue_task_litmus(struct rq *rq, struct task_struct *p,
+				int wakeup)
+{
+	if (wakeup) {
+		sched_trace_task_resume(p);
+		tsk_rt(p)->present = 1;
+		litmus->task_wake_up(p);
+	} else
+		TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n");
+}
+
+static void dequeue_task_litmus(struct rq *rq, struct task_struct *p, int sleep)
+{
+	if (sleep) {
+		litmus->task_block(p);
+		tsk_rt(p)->present = 0;
+		sched_trace_task_block(p);
+	} else
+		TRACE_TASK(p, "ignoring a dequeue, not going to sleep.\n");
+}
+
+static void yield_task_litmus(struct rq *rq)
+{
+	BUG_ON(rq->curr != current);
+	litmus->complete_job();
+}
+
+/* Plugins are responsible for this.
+ */
+static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p, int flags)
+{
+}
+
+/* has already been taken care of */
+static void put_prev_task_litmus(struct rq *rq, struct task_struct *p)
+{
+}
+
+static struct task_struct *pick_next_task_litmus(struct rq *rq)
+{
+	struct task_struct* picked = rq->litmus_next;
+	rq->litmus_next = NULL;
+	if (picked)
+		picked->se.exec_start = rq->clock;
+	return picked;
+}
+
+static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued)
+{
+}
+
+static void switched_to_litmus(struct rq *rq, struct task_struct *p, int running)
+{
+}
+
+static void prio_changed_litmus(struct rq *rq, struct task_struct *p,
+				int oldprio, int running)
+{
+}
+
+unsigned int get_rr_interval_litmus(struct task_struct *p)
+{
+	/* return infinity */
+	return 0;
+}
+
+/* This is called when a task became a real-time task, either due to a SCHED_*
+ * class transition or due to PI mutex inheritance. We don't handle Linux PI
+ * mutex inheritance yet (and probably never will). Use LITMUS provided
+ * synchronization primitives instead.
+ */
+static void set_curr_task_litmus(struct rq *rq)
+{
+	rq->curr->se.exec_start = rq->clock;
+}
+
+
+#ifdef CONFIG_SMP
+/* execve tries to rebalance task in this scheduling domain */
+static int select_task_rq_litmus(struct task_struct *p, int sd_flag, int flags)
+{
+	/* preemption is already disabled.
+	 * We don't want to change cpu here
+	 */
+	return smp_processor_id();
+}
+
+/* we don't repartition at runtime */
+
+static unsigned long
+load_balance_litmus(struct rq *this_rq, int this_cpu, struct rq *busiest,
+		unsigned long max_load_move,
+		struct sched_domain *sd, enum cpu_idle_type idle,
+		int *all_pinned, int *this_best_prio)
+{
+	return 0;
+}
+
+static int
+move_one_task_litmus(struct rq *this_rq, int this_cpu, struct rq *busiest,
+		 struct sched_domain *sd, enum cpu_idle_type idle)
+{
+	return 0;
+}
+#endif
+
+const struct sched_class litmus_sched_class = {
+	.next			= &rt_sched_class,
+	.enqueue_task		= enqueue_task_litmus,
+	.dequeue_task		= dequeue_task_litmus,
+	.yield_task		= yield_task_litmus,
+
+	.check_preempt_curr	= check_preempt_curr_litmus,
+
+	.pick_next_task		= pick_next_task_litmus,
+	.put_prev_task		= put_prev_task_litmus,
+
+#ifdef CONFIG_SMP
+	.select_task_rq		= select_task_rq_litmus,
+
+	.load_balance		= load_balance_litmus,
+	.move_one_task		= move_one_task_litmus,
+#endif
+
+	.set_curr_task          = set_curr_task_litmus,
+	.task_tick		= task_tick_litmus,
+
+	.get_rr_interval	= get_rr_interval_litmus,
+
+	.prio_changed		= prio_changed_litmus,
+	.switched_to		= switched_to_litmus,
+};
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
new file mode 100644
index 000000000000..0be091ece569
--- /dev/null
+++ b/litmus/sched_plugin.c
@@ -0,0 +1,199 @@
+/* sched_plugin.c -- core infrastructure for the scheduler plugin system
+ *
+ * This file includes the initialization of the plugin system, the no-op Linux
+ * scheduler plugin and some dummy functions.
+ */
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+
+#include <litmus/jobs.h>
+
+/*************************************************************
+ *                   Dummy plugin functions                  *
+ *************************************************************/
+
+static void litmus_dummy_finish_switch(struct task_struct * prev)
+{
+}
+
+static struct task_struct* litmus_dummy_schedule(struct task_struct * prev)
+{
+	return NULL;
+}
+
+static void litmus_dummy_tick(struct task_struct* tsk)
+{
+}
+
+static long litmus_dummy_admit_task(struct task_struct* tsk)
+{
+	printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n",
+		tsk->comm, tsk->pid);
+	return -EINVAL;
+}
+
+static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running)
+{
+}
+
+static void litmus_dummy_task_wake_up(struct task_struct *task)
+{
+}
+
+static void litmus_dummy_task_block(struct task_struct *task)
+{
+}
+
+static void litmus_dummy_task_exit(struct task_struct *task)
+{
+}
+
+static long litmus_dummy_complete_job(void)
+{
+	return -ENOSYS;
+}
+
+static long litmus_dummy_activate_plugin(void)
+{
+	return 0;
+}
+
+static long litmus_dummy_deactivate_plugin(void)
+{
+	return 0;
+}
+
+#ifdef CONFIG_FMLP
+
+static long litmus_dummy_inherit_priority(struct pi_semaphore *sem,
+					  struct task_struct *new_owner)
+{
+	return -ENOSYS;
+}
+
+static long litmus_dummy_return_priority(struct pi_semaphore *sem)
+{
+	return -ENOSYS;
+}
+
+static long litmus_dummy_pi_block(struct pi_semaphore *sem,
+				  struct task_struct *new_waiter)
+{
+	return -ENOSYS;
+}
+
+#endif
+
+
+/* The default scheduler plugin. It doesn't do anything and lets Linux do its
+ * job.
+ */
+struct sched_plugin linux_sched_plugin = {
+	.plugin_name = "Linux",
+	.tick = litmus_dummy_tick,
+	.task_new   = litmus_dummy_task_new,
+	.task_exit = litmus_dummy_task_exit,
+	.task_wake_up = litmus_dummy_task_wake_up,
+	.task_block = litmus_dummy_task_block,
+	.complete_job = litmus_dummy_complete_job,
+	.schedule = litmus_dummy_schedule,
+	.finish_switch = litmus_dummy_finish_switch,
+	.activate_plugin = litmus_dummy_activate_plugin,
+	.deactivate_plugin = litmus_dummy_deactivate_plugin,
+#ifdef CONFIG_FMLP
+	.inherit_priority = litmus_dummy_inherit_priority,
+	.return_priority = litmus_dummy_return_priority,
+	.pi_block = litmus_dummy_pi_block,
+#endif
+	.admit_task = litmus_dummy_admit_task
+};
+
+/*
+ *	The reference to current plugin that is used to schedule tasks within
+ *	the system. It stores references to actual function implementations
+ *	Should be initialized by calling "init_***_plugin()"
+ */
+struct sched_plugin *litmus = &linux_sched_plugin;
+
+/* the list of registered scheduling plugins */
+static LIST_HEAD(sched_plugins);
+static DEFINE_SPINLOCK(sched_plugins_lock);
+
+#define CHECK(func) {\
+	if (!plugin->func) \
+		plugin->func = litmus_dummy_ ## func;}
+
+/* FIXME: get reference to module  */
+int register_sched_plugin(struct sched_plugin* plugin)
+{
+	printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n",
+	       plugin->plugin_name);
+
+	/* make sure we don't trip over null pointers later */
+	CHECK(finish_switch);
+	CHECK(schedule);
+	CHECK(tick);
+	CHECK(task_wake_up);
+	CHECK(task_exit);
+	CHECK(task_block);
+	CHECK(task_new);
+	CHECK(complete_job);
+	CHECK(activate_plugin);
+	CHECK(deactivate_plugin);
+#ifdef CONFIG_FMLP
+	CHECK(inherit_priority);
+	CHECK(return_priority);
+	CHECK(pi_block);
+#endif
+	CHECK(admit_task);
+
+	if (!plugin->release_at)
+		plugin->release_at = release_at;
+
+	spin_lock(&sched_plugins_lock);
+	list_add(&plugin->list, &sched_plugins);
+	spin_unlock(&sched_plugins_lock);
+
+	return 0;
+}
+
+
+/* FIXME: reference counting, etc. */
+struct sched_plugin* find_sched_plugin(const char* name)
+{
+	struct list_head *pos;
+	struct sched_plugin *plugin;
+
+	spin_lock(&sched_plugins_lock);
+	list_for_each(pos, &sched_plugins) {
+		plugin = list_entry(pos, struct sched_plugin, list);
+		if (!strcmp(plugin->plugin_name, name))
+		    goto out_unlock;
+	}
+	plugin = NULL;
+
+out_unlock:
+	spin_unlock(&sched_plugins_lock);
+	return plugin;
+}
+
+int print_sched_plugins(char* buf, int max)
+{
+	int count = 0;
+	struct list_head *pos;
+	struct sched_plugin *plugin;
+
+	spin_lock(&sched_plugins_lock);
+	list_for_each(pos, &sched_plugins) {
+		plugin = list_entry(pos, struct sched_plugin, list);
+		count += snprintf(buf + count, max - count, "%s\n", plugin->plugin_name);
+		if (max - count <= 0)
+			break;
+	}
+	spin_unlock(&sched_plugins_lock);
+	return 	count;
+}
-- 
cgit v1.2.2