diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
index 5d20276..8672398 100644
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -88,7 +88,7 @@ inline static int budget_exhausted(struct task_struct* t)
 inline static lt_t budget_remaining(struct task_struct* t)
 {
 	if (!budget_exhausted(t))
-		return get_exec_time(t) - get_exec_cost(t);
+		return get_exec_cost(t) - get_exec_time(t);
 	else
 		/* avoid overflow */
 		return 0;
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index a7a183f..6f43dea 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -1,3 +1,5 @@
+#include <linux/threads.h>
+
 /*
  * Definition of the scheduler plugin interface.
  *
@@ -33,6 +35,72 @@ typedef enum {
 	PRECISE_ENFORCEMENT  /* NOT IMPLEMENTED - enforced with hrtimers */
 } budget_policy_t;
 
+
+/* The parameters for EDF-Fm scheduling algorithm.
+ * Each task may be fixed or migratory. Migratory tasks may
+ * migrate on 2 (contiguous) CPU only. NR_CPUS_EDF_FM = 2.
+ */
+#define NR_CPUS_EDF_FM	2
+
+struct edffm_params {
+	/* EDF-fm where can a migratory task execute? */
+	unsigned int	cpus[NR_CPUS_EDF_FM];
+	/* how many cpus are used by this task?
+	 * fixed = 0, migratory = (NR_CPUS_EDF_FM - 1)
+	 * Efficient way to allow writing cpus[nr_cpus].
+	 */
+	unsigned int	nr_cpus;
+	/* Fraction of this task exec_cost that each CPU should handle.
+	 * We keep the fraction divided in num/denom : a matrix of
+	 * (NR_CPUS_EDF_FM rows) x (2 columns).
+	 * The first column is the numerator of the fraction.
+	 * The second column is the denominator.
+	 * In EDF-fm this is a 2*2 matrix
+	 */
+	lt_t		fraction[2][NR_CPUS_EDF_FM];
+};
+
+/* Parameters for NPS-F semi-partitioned scheduling algorithm.
+ * Each (cpu, budget) entry defines the share ('budget' in ns, a % of
+ * the slot_length) of the notional processor on the CPU 'cpu'.
+ * This structure is used by the library - syscall interface in order
+ * to go through the overhead of a syscall only once per server.
+ */
+struct npsf_budgets {
+	int	cpu;
+	lt_t	budget;
+};
+
+/* The parameters for the EDF-WM semi-partitioned scheduler.
+ * Each task may be split across multiple cpus. Each per-cpu allocation
+ * is called a 'slice'.
+ */
+#define MAX_EDF_WM_SLICES 24
+#define MIN_EDF_WM_SLICE_SIZE 50000 /* .05 millisecond = 50us */
+
+struct edf_wm_slice {
+	/* on which CPU is this slice allocated */
+	unsigned int cpu;
+	/* relative deadline from job release (not from slice release!) */
+	lt_t deadline;
+	/* budget of this slice; must be precisely enforced */
+	lt_t budget;
+	/* offset of this slice relative to the job release */
+	lt_t offset;
+};
+
+/* If a job is not sliced across multiple CPUs, then
+ * count is set to zero and none of the slices is used.
+ * This implies that count == 1 is illegal.
+ */
+struct edf_wm_params {
+	/* enumeration of all slices */
+	struct edf_wm_slice slices[MAX_EDF_WM_SLICES];
+
+	/* how many slices are defined? */
+	unsigned int count;
+};
+
 struct rt_task {
 	lt_t 		exec_cost;
 	lt_t 		period;
@@ -40,6 +108,22 @@ struct rt_task {
 	unsigned int	cpu;
 	task_class_t	cls;
 	budget_policy_t budget_policy; /* ignored by pfair */
+
+	/* parameters used by the semi-partitioned algorithms */
+	union {
+		/* EDF-Fm; defined in sched_edf_fm.c */
+		struct edffm_params fm;
+
+		/* NPS-F; defined in sched_npsf.c
+		 * id for the server (notional processor) that holds
+		 * this task; the same npfs_id can be assigned to "the same"
+		 * server split on different cpus
+		 */
+		int npsf_id;
+
+		/* EDF-WM; defined in sched_edf_wm.c */
+		struct edf_wm_params wm;
+	} semi_part;
 };
 
 /* The definition of the data that is shared between the kernel and real-time
@@ -184,6 +268,27 @@ struct rt_param {
 
 	/* Pointer to the page shared between userspace and kernel. */
 	struct control_page * ctrl_page;
+
+	/* runtime info for the semi-part plugins */
+	union {
+		/* EDF-Fm runtime information
+		 * number of jobs handled by this cpu
+		 * (to determine next cpu for a migrating task)
+		 */
+		unsigned int	cpu_job_no[NR_CPUS_EDF_FM];
+
+		/* EDF-WM runtime information */
+		struct {
+			/* at which exec time did the current slice start? */
+			lt_t exec_time;
+			/* when did the job suspend? */
+			lt_t suspend_time;
+			/* cached job parameters */
+			lt_t job_release, job_deadline;
+			/* pointer to the current slice */
+			struct edf_wm_slice* slice;
+		} wm;
+	} semi_part;
 };
 
 /*	Possible RT flags	*/
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
index 9c1c9f2..7ea9176 100644
--- a/include/litmus/sched_plugin.h
+++ b/include/litmus/sched_plugin.h
@@ -6,6 +6,8 @@
 #define _LINUX_SCHED_PLUGIN_H_
 
 #include <linux/sched.h>
+/* NSEC_PER... conversions */
+#include <linux/time.h>
 
 /* struct for semaphore with priority inheritance */
 struct pi_semaphore {
@@ -136,6 +138,9 @@ extern struct sched_plugin *litmus;
 /* cluster size: cache_index = 2 L2, cache_index = 3 L3 */
 extern int cluster_cache_index;
 
+/* Slot length (ns) for NPS-F semi-part. algo */
+extern lt_t npsf_slot_length;
+
 int register_sched_plugin(struct sched_plugin* plugin);
 struct sched_plugin* find_sched_plugin(const char* name);
 int print_sched_plugins(char* buf, int max);
diff --git a/include/litmus/trace.h b/include/litmus/trace.h
index b32c711..6afbf96 100644
--- a/include/litmus/trace.h
+++ b/include/litmus/trace.h
@@ -78,6 +78,8 @@ feather_callback void save_timestamp_cpu(unsigned long event, unsigned long cpu)
 #define TS_TICK_START(t)		TTIMESTAMP(110, t)
 #define TS_TICK_END(t) 			TTIMESTAMP(111, t)
 
+#define TS_PULL_TIMER_START		TIMESTAMP(112)
+#define TS_PULL_TIMER_END		TIMESTAMP(113)
 
 #define TS_PLUGIN_SCHED_START		/* TIMESTAMP(120) */  /* currently unused */
 #define TS_PLUGIN_SCHED_END		/* TIMESTAMP(121) */
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
index f0618e7..4e82c52 100644
--- a/include/litmus/unistd_64.h
+++ b/include/litmus/unistd_64.h
@@ -33,5 +33,7 @@ __SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release)
 __SYSCALL(__NR_release_ts, sys_release_ts)
 #define __NR_null_call				__LSC(13)
 __SYSCALL(__NR_null_call, sys_null_call)
+#define __NR_add_server				__LSC(14)
+__SYSCALL(__NR_add_server, sys_add_server)
 
-#define NR_litmus_syscalls 14
+#define NR_litmus_syscalls 15
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index fdf9596..23d3712 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -47,6 +47,7 @@
 #include <linux/timer.h>
 
 #include <litmus/litmus.h>
+#include <litmus/trace.h>
 
 #include <asm/uaccess.h>
 
@@ -1063,6 +1064,7 @@ void hrtimer_pull(void)
 	struct hrtimer_start_on_info *info;
 	struct list_head *pos, *safe, list;
 
+	TS_PULL_TIMER_START;
 	raw_spin_lock(&base->lock);
 	list_replace_init(&base->to_pull, &list);
 	raw_spin_unlock(&base->lock);
@@ -1073,6 +1075,7 @@ void hrtimer_pull(void)
 		list_del(pos);
 		hrtimer_start(info->timer, info->time, info->mode);
 	}
+	TS_PULL_TIMER_END;
 }
 
 /**
diff --git a/litmus/Makefile b/litmus/Makefile
index f301d28..5533a58 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -14,7 +14,10 @@ obj-y     = sched_plugin.o litmus.o \
 	    bheap.o \
 	    ctrldev.o \
 	    sched_gsn_edf.o \
-	    sched_psn_edf.o
+	    sched_psn_edf.o \
+	    sched_edf_wm.o \
+	    sched_npsf.o \
+	    sched_edf_fm.o
 
 obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
 obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
diff --git a/litmus/litmus.c b/litmus/litmus.c
index b04a42b..2f78022 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -632,6 +632,55 @@ static int proc_write_cluster_size(struct file *file,
 	return len;
 }
 
+static int proc_read_npsf_slot_length(char *page, char **start,
+				    off_t off, int count,
+				    int *eof, void *data)
+{
+	return snprintf(page, PAGE_SIZE, "%d us\n",
+			(int) (npsf_slot_length / NSEC_PER_USEC));
+}
+
+extern void npsf_hrtimers_cleanup(void);
+/* NPS-F slot length in us.
+ *
+ * Writing 0 as npsf_slot_length will trigger the removal of the
+ * hrtimers for the domain_reschedule_tick() in the NPS-F plugin.
+ */
+static int proc_write_npsf_slot_length(struct file *file,
+				     const char *buffer,
+				     unsigned long count,
+				     void *data)
+{
+	int err, slot_length;
+	char msg[64];
+
+	if (count > 63)
+		return -EINVAL;
+
+	if (copy_from_user(msg, buffer, count))
+		return -EFAULT;
+
+	/* terminate */
+	msg[count] = '\0';
+	/* chomp */
+	if (count > 1 && msg[count - 1] == '\n')
+		msg[count - 1] = '\0';
+
+	err = sscanf(msg, "%d", &slot_length);
+
+	if (err == 1) {
+		if (!slot_length) {
+			npsf_hrtimers_cleanup();
+			/* reset to default */
+			slot_length = 5000;
+		}
+		npsf_slot_length = (lt_t)((lt_t) slot_length * NSEC_PER_USEC);
+		return count;
+	}
+
+	return -EINVAL;
+}
+
 #ifdef CONFIG_RELEASE_MASTER
 static int proc_read_release_master(char *page, char **start,
 				    off_t off, int count,
@@ -691,7 +740,8 @@ static struct proc_dir_entry *litmus_dir = NULL,
 #ifdef CONFIG_RELEASE_MASTER
 	*release_master_file = NULL,
 #endif
-	*clus_cache_idx_file = NULL;
+	*clus_cache_idx_file = NULL,
+	*npsf_slot_length_file = NULL;
 
 static int __init init_litmus_proc(void)
 {
@@ -733,6 +783,16 @@ static int __init init_litmus_proc(void)
 	clus_cache_idx_file->read_proc = proc_read_cluster_size;
 	clus_cache_idx_file->write_proc = proc_write_cluster_size;
 
+	npsf_slot_length_file = create_proc_entry("npsf_slot_length",
+						0644, litmus_dir);
+	if (!npsf_slot_length_file) {
+		printk(KERN_ERR "Could not allocate npsf_slot_length "
+		       "procfs entry.\n");
+		return -ENOMEM;
+	}
+	npsf_slot_length_file->read_proc = proc_read_npsf_slot_length;
+	npsf_slot_length_file->write_proc = proc_write_npsf_slot_length;
+
 	stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
 					   proc_read_stats, NULL);
 
@@ -752,6 +812,8 @@ static void exit_litmus_proc(void)
 		remove_proc_entry("active_plugin", litmus_dir);
 	if (clus_cache_idx_file)
 		remove_proc_entry("cluster_cache", litmus_dir);
+	if (npsf_slot_length_file)
+		remove_proc_entry("npsf_slot_length", litmus_dir);
 #ifdef CONFIG_RELEASE_MASTER
 	if (release_master_file)
 		remove_proc_entry("release_master", litmus_dir);
diff --git a/litmus/sched_edf_fm.c b/litmus/sched_edf_fm.c
new file mode 100644
index 0000000..b721072
--- /dev/null
+++ b/litmus/sched_edf_fm.c
@@ -0,0 +1,565 @@
+/*
+ * litmus/sched_edf_fm.c
+ *
+ * Implementation of the EDF-fm scheduling algorithm.
+ */
+
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+#include <linux/module.h>
+
+#include <litmus/litmus.h>
+#include <litmus/jobs.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/edf_common.h>
+
+typedef struct {
+	rt_domain_t 		domain;
+	int          		cpu;
+	struct task_struct* 	scheduled; /* only RT tasks */
+/* domain lock */
+#define slock domain.ready_lock
+} edffm_domain_t;
+
+DEFINE_PER_CPU(edffm_domain_t, edffm_domains);
+
+#define local_edffm		(&__get_cpu_var(edffm_domains))
+#define remote_edf(cpu)		(&per_cpu(edffm_domains, cpu).domain)
+#define remote_edffm(cpu)	(&per_cpu(edffm_domains, cpu))
+#define task_edf(task)		remote_edf(get_partition(task))
+#define task_edffm(task)	remote_edffm(get_partition(task))
+
+#define edffm_params(t)		(t->rt_param.task_params.semi_part.fm)
+
+/* Is the task a migratory task? */
+#define is_migrat_task(task)	(edffm_params(task).nr_cpus)
+/* t is on the wrong CPU (it should be requeued properly) */
+#define wrong_cpu(t)	is_migrat_task((t)) && task_cpu((t)) != get_partition((t))
+/* Get next CPU */
+#define migrat_next_cpu(t)	\
+	((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \
+		edffm_params(t).cpus[1] : \
+		edffm_params(t).cpus[0])
+/* Get current cpu */
+#define migrat_cur_cpu(t)	\
+	((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \
+		edffm_params(t).cpus[0] : \
+		edffm_params(t).cpus[1])
+/* Manipulate share for current cpu */
+#define cur_cpu_fract_num(t)	\
+	((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \
+		edffm_params(t).fraction[0][0] : \
+		edffm_params(t).fraction[0][1])
+#define cur_cpu_fract_den(t)	\
+	((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \
+		edffm_params(t).fraction[1][0] : \
+		edffm_params(t).fraction[1][1])
+/* Get job number for current cpu */
+#define cur_cpu_job_no(t)	\
+	((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \
+		tsk_rt(t)->semi_part.cpu_job_no[0] : \
+		tsk_rt(t)->semi_part.cpu_job_no[1])
+/* What is the current cpu position in the array? */
+#define edffm_cpu_pos(cpu,t)    \
+	((cpu == edffm_params(t).cpus[0]) ? \
+	 0 : 1)
+
+/*
+ * EDF-fm: migratory tasks have higher prio than fixed, EDF in both classes.
+ * (Both first and second may be NULL).
+ */
+int edffm_higher_prio(struct task_struct* first, struct task_struct* second)
+{
+	if ((first && edffm_params(first).nr_cpus) ||
+			(second && edffm_params(second).nr_cpus)) {
+		if ((first && edffm_params(first).nr_cpus) &&
+			(second && edffm_params(second).nr_cpus))
+			/* both are migrating */
+			return edf_higher_prio(first, second);
+
+		if (first && edffm_params(first).nr_cpus)
+			/* first is migrating */
+			return 1;
+		else
+			/* second is migrating */
+			return 0;
+	}
+
+	/* both are fixed or not real time */
+	return edf_higher_prio(first, second);
+}
+
+/* need_to_preempt - check whether the task t needs to be preempted
+ *                   call only with irqs disabled and with ready_lock acquired
+ */
+int edffm_preemption_needed(rt_domain_t* rt, struct task_struct *t)
+{
+	/* we need the read lock for edf_ready_queue */
+	/* no need to preempt if there is nothing pending */
+	if (!__jobs_pending(rt))
+		return 0;
+	/* we need to reschedule if t doesn't exist */
+	if (!t)
+		return 1;
+
+	/* make sure to get non-rt stuff out of the way */
+	return !is_realtime(t) || edffm_higher_prio(__next_ready(rt), t);
+}
+
+/* we assume the lock is being held */
+static void preempt(edffm_domain_t *edffm)
+{
+	preempt_if_preemptable(edffm->scheduled, edffm->cpu);
+}
+
+static void edffm_release_jobs(rt_domain_t* rt, struct bheap* tasks)
+{
+	unsigned long flags;
+	edffm_domain_t *edffm = container_of(rt, edffm_domain_t, domain);
+
+	raw_spin_lock_irqsave(&edffm->slock, flags);
+
+	__merge_ready(rt, tasks);
+
+	if (edffm_preemption_needed(rt, edffm->scheduled))
+		preempt(edffm);
+
+	raw_spin_unlock_irqrestore(&edffm->slock, flags);
+}
+
+/* EDF-fm uses the "release_master" field to force the next release for
+ * the task 'task' to happen on a remote CPU. The remote cpu for task is
+ * previously set up during job_completion() taking into consideration
+ * whether a task is a migratory task or not.
+ */
+static inline void
+edffm_add_release_remote(struct task_struct *task)
+{
+	unsigned long flags;
+	rt_domain_t *rt = task_edf(task);
+
+	raw_spin_lock_irqsave(&rt->tobe_lock, flags);
+
+	/* "modify" destination cpu */
+	rt->release_master = get_partition(task);
+
+	TRACE_TASK(task, "Add remote release: smp_proc_id = %d, cpu = %d, remote = %d\n",
+			smp_processor_id(), task_cpu(task), rt->release_master);
+
+	/* trigger future release */
+	__add_release(rt, task);
+
+	/* reset proper release_master and unlock */
+	rt->release_master = NO_CPU;
+	raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
+}
+
+/* perform double ready_queue locking in an orderwise fashion
+ * this is called with: interrupt disabled and rq->lock held (from
+ * schedule())
+ */
+static noinline void double_domain_lock(edffm_domain_t *dom1, edffm_domain_t *dom2)
+{
+	if (dom1 == dom2) {
+		/* fake */
+		raw_spin_lock(&dom1->slock);
+	} else {
+		if (dom1 < dom2) {
+			raw_spin_lock(&dom1->slock);
+			raw_spin_lock(&dom2->slock);
+			TRACE("acquired %d and %d\n", dom1->cpu, dom2->cpu);
+		} else {
+			raw_spin_lock(&dom2->slock);
+			raw_spin_lock(&dom1->slock);
+			TRACE("acquired %d and %d\n", dom2->cpu, dom1->cpu);
+		}
+	}
+}
+
+/* Directly insert a task in a remote ready queue. This function
+ * should only be called if this task is a migrating task and its
+ * last job for this CPU just completed (a new one is released for
+ * a remote CPU), but the new job is already tardy.
+ */
+static noinline void insert_task_in_remote_ready(struct task_struct *task)
+{
+	edffm_domain_t *this = remote_edffm(task_cpu(task));
+	edffm_domain_t *remote = remote_edffm(get_partition(task));
+
+	BUG_ON(get_partition(task) != remote->cpu);
+
+	TRACE_TASK(task, "Migrate From P%d -> To P%d\n",
+			this->cpu, remote->cpu);
+	TRACE_TASK(task, "Inserting in remote ready queue\n");
+
+	WARN_ON(!irqs_disabled());
+
+	raw_spin_unlock(&this->slock);
+	mb();
+	TRACE_TASK(task,"edffm_lock %d released\n", this->cpu);
+
+	/* lock both ready queues */
+	double_domain_lock(this, remote);
+	mb();
+
+	__add_ready(&remote->domain, task);
+
+	/* release remote but keep ours */
+	raw_spin_unlock(&remote->slock);
+	TRACE_TASK(task,"edffm_lock %d released\n", remote->cpu);
+
+	/* ask remote cpu to reschedule, we are already rescheduling on this */
+	preempt(remote);
+}
+
+static void requeue(struct task_struct* t, rt_domain_t *edf)
+{
+	if (t->state != TASK_RUNNING)
+		TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
+
+	set_rt_flags(t, RT_F_RUNNING);
+	if (is_released(t, litmus_clock())) {
+		if (wrong_cpu(t)) {
+			/* this should only happen if t just completed, but
+			 * its next release is already tardy, so it should be
+			 * migrated and inserted in the remote ready queue
+			 */
+			TRACE_TASK(t, "Migrating task already released, "
+				       "move from P%d to P%d\n",
+					task_cpu(t), get_partition(t));
+
+			insert_task_in_remote_ready(t);
+		} else {
+			/* not a migrat task or the job is on the right CPU */
+			__add_ready(edf, t);
+		}
+	} else {
+		if (wrong_cpu(t)) {
+
+			TRACE_TASK(t, "Migrating task, adding remote release\n");
+			edffm_add_release_remote(t);
+		} else {
+			TRACE_TASK(t, "Adding local release\n");
+			add_release(edf, t);
+		}
+	}
+}
+
+/* Update statistics for the _current_ job.
+ * 	- job_no was incremented _before_ starting this job
+ * 	(release_at / prepare_for_next_period)
+ * 	- cpu_job_no is incremented when the job completes
+ */
+static void update_job_counter(struct task_struct *t)
+{
+	int cpu_pos;
+
+	/* Which CPU counter should be incremented? */
+	cpu_pos = edffm_cpu_pos(t->rt_param.task_params.cpu, t);
+	t->rt_param.semi_part.cpu_job_no[cpu_pos]++;
+
+	TRACE_TASK(t, "job_no = %d, cpu_job_no(pos %d) = %d, cpu %d\n",
+			t->rt_param.job_params.job_no, cpu_pos, cur_cpu_job_no(t),
+			t->rt_param.task_params.cpu);
+}
+
+/* What is the next cpu for this job? (eq. 8, in EDF-Fm paper) */
+static int next_cpu_for_job(struct task_struct *t)
+{
+	BUG_ON(!is_migrat_task(t));
+
+	TRACE_TASK(t, "%u = %u * %u / %u\n",
+			t->rt_param.job_params.job_no, cur_cpu_job_no(t),
+			cur_cpu_fract_den(t), cur_cpu_fract_num(t));
+	if ((t->rt_param.job_params.job_no) ==
+			(((lt_t) cur_cpu_job_no(t) * cur_cpu_fract_den(t)) /
+			cur_cpu_fract_num(t)))
+		return edffm_params(t).cpus[0];
+
+	return edffm_params(t).cpus[1];
+}
+
+/* If needed (the share for task t on this CPU is exhausted), updates
+ * the task_params.cpu for the _migrating_ task t
+ */
+static void change_migrat_cpu_if_needed(struct task_struct *t)
+{
+	BUG_ON(!is_migrat_task(t));
+	/* EDF-fm: if it is a migrating task and it has already executed
+	 * the required number of jobs on this CPU, we need to move it
+	 * on its next CPU; changing the cpu here will affect the requeue
+	 * and the next release
+	 */
+	if (unlikely(next_cpu_for_job(t) != migrat_cur_cpu(t))) {
+
+		tsk_rt(t)->task_params.cpu = migrat_next_cpu(t);
+		TRACE_TASK(t, "EDF-fm: will migrate job %d -> %d\n",
+			task_cpu(t), tsk_rt(t)->task_params.cpu);
+		return;
+	}
+
+	TRACE_TASK(t, "EDF-fm: job will stay on %d -> %d\n",
+			task_cpu(t), tsk_rt(t)->task_params.cpu);
+}
+
+static void job_completion(struct task_struct* t, int forced)
+{
+	sched_trace_task_completion(t,forced);
+	TRACE_TASK(t, "job_completion().\n");
+
+	if (unlikely(is_migrat_task(t))) {
+		update_job_counter(t);
+		change_migrat_cpu_if_needed(t);
+	}
+
+	set_rt_flags(t, RT_F_SLEEP);
+	prepare_for_next_period(t);
+}
+
+static void edffm_tick(struct task_struct *t)
+{
+	edffm_domain_t *edffm = local_edffm;
+
+	BUG_ON(is_realtime(t) && t != edffm->scheduled);
+
+	if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
+		set_tsk_need_resched(t);
+		TRACE("edffm_scheduler_tick: "
+			"%d is preemptable "
+			" => FORCE_RESCHED\n", t->pid);
+	}
+}
+
+static struct task_struct* edffm_schedule(struct task_struct * prev)
+{
+	edffm_domain_t* 	edffm = local_edffm;
+	rt_domain_t*		edf  = &edffm->domain;
+	struct task_struct*	next;
+
+	int out_of_time, sleep, preempt, exists, blocks, change_cpu, resched;
+
+	raw_spin_lock(&edffm->slock);
+
+	BUG_ON(edffm->scheduled && edffm->scheduled != prev);
+	BUG_ON(edffm->scheduled && !is_realtime(prev));
+
+	/* (0) Determine state */
+	exists      = edffm->scheduled != NULL;
+	blocks      = exists && !is_running(edffm->scheduled);
+	out_of_time = exists &&
+				  budget_enforced(edffm->scheduled) &&
+				  budget_exhausted(edffm->scheduled);
+	sleep	    = exists && get_rt_flags(edffm->scheduled) == RT_F_SLEEP;
+	change_cpu  = exists && wrong_cpu(edffm->scheduled);
+	preempt     = edffm_preemption_needed(edf, prev);
+
+	BUG_ON(blocks && change_cpu);
+
+	if (exists)
+		TRACE_TASK(prev,
+			   "blocks:%d out_of_time:%d sleep:%d preempt:%d "
+			   "wrong_cpu:%d state:%d sig:%d\n",
+			   blocks, out_of_time, sleep, preempt,
+			   change_cpu, prev->state, signal_pending(prev));
+
+	/* If we need to preempt do so. */
+	resched = preempt;
+
+	/* If a task blocks we have no choice but to reschedule. */
+	if (blocks)
+		resched = 1;
+
+	/* If a task has just woken up, it was tardy and the wake up
+	 * raced with this schedule, a new job has already been released,
+	 * but scheduled should be enqueued on a remote ready queue, and a
+	 * new task should be selected for the current queue.
+	 */
+	if (change_cpu)
+		resched = 1;
+
+	/* Any task that is preemptable and either exhausts its execution
+	 * budget or wants to sleep completes. We may have to reschedule after
+	 * this.
+	 */
+	if ((out_of_time || sleep) && !blocks) {
+		job_completion(edffm->scheduled, !sleep);
+		resched = 1;
+	}
+
+	/* The final scheduling decision. Do we need to switch for some reason?
+	 * Switch if we are in RT mode and have no task or if we need to
+	 * resched.
+	 */
+	next = NULL;
+	if (resched || !exists) {
+
+		if (edffm->scheduled && !blocks)
+			requeue(edffm->scheduled, edf);
+		next = __take_ready(edf);
+	} else
+		/* Only override Linux scheduler if we have a real-time task
+		 * scheduled that needs to continue.
+		 */
+		if (exists)
+			next = prev;
+
+	if (next) {
+		TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
+		set_rt_flags(next, RT_F_RUNNING);
+	} else {
+		TRACE("becoming idle at %llu\n", litmus_clock());
+	}
+
+	edffm->scheduled = next;
+	raw_spin_unlock(&edffm->slock);
+
+	return next;
+}
+
+/*	Prepare a task for running in RT mode
+ */
+static void edffm_task_new(struct task_struct * t, int on_rq, int running)
+{
+	rt_domain_t* 		edf  = task_edf(t);
+	edffm_domain_t* 	edffm = task_edffm(t);
+	unsigned long		flags;
+
+	TRACE_TASK(t, "EDF-fm: task new, cpu = %d\n",
+		   t->rt_param.task_params.cpu);
+
+	release_at(t, litmus_clock());
+	update_job_counter(t);
+
+	/* The task should be running in the queue, otherwise signal
+	 * code will try to wake it up with fatal consequences.
+	 */
+	raw_spin_lock_irqsave(&edffm->slock, flags);
+	if (running) {
+		/* there shouldn't be anything else running at the time */
+		BUG_ON(edffm->scheduled);
+		edffm->scheduled = t;
+	} else {
+		requeue(t, edf);
+		/* maybe we have to reschedule */
+		preempt(edffm);
+	}
+	raw_spin_unlock_irqrestore(&edffm->slock, flags);
+}
+
+static void edffm_task_wake_up(struct task_struct *task)
+{
+	unsigned long		flags;
+	edffm_domain_t* 	edffm = task_edffm(task);
+	rt_domain_t* 		edf  = task_edf(task);
+	lt_t			now;
+
+	TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
+
+	TRACE_TASK(task, "acquire edffm %d\n", edffm->cpu);
+	raw_spin_lock_irqsave(&edffm->slock, flags);
+
+	BUG_ON(edffm != task_edffm(task));
+	BUG_ON(is_queued(task));
+
+	now = litmus_clock();
+	if (is_tardy(task, now)) {
+		if (unlikely(is_migrat_task(task))) {
+			/* a new job will be released.
+			 * Update current job counter */
+			update_job_counter(task);
+			/* Switch CPU if needed */
+			change_migrat_cpu_if_needed(task);
+		}
+		/* new sporadic release */
+		TRACE_TASK(task, "release new\n");
+		release_at(task, now);
+		sched_trace_task_release(task);
+	}
+
+	/* Only add to ready queue if it is not the currently-scheduled
+	 * task. This could be the case if a task was woken up concurrently
+	 * on a remote CPU before the executing CPU got around to actually
+	 * de-scheduling the task, i.e., wake_up() raced with schedule()
+	 * and won.
+	 */
+	if (edffm->scheduled != task)
+		requeue(task, edf);
+
+	raw_spin_unlock_irqrestore(&edffm->slock, flags);
+	TRACE_TASK(task, "release edffm %d\n", edffm->cpu);
+	TRACE_TASK(task, "wake up done\n");
+}
+
+static void edffm_task_block(struct task_struct *t)
+{
+	TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
+
+	BUG_ON(!is_realtime(t));
+	if (is_queued(t)) {
+		edffm_domain_t *edffm = local_edffm;
+		TRACE_TASK(t, "task blocked, race with wakeup, "
+				"remove from queue %d\n", edffm->cpu);
+		remove(&edffm->domain, t);
+	}
+}
+
+static void edffm_task_exit(struct task_struct * t)
+{
+	unsigned long flags;
+	edffm_domain_t* 	edffm = task_edffm(t);
+	rt_domain_t*		edf;
+
+	raw_spin_lock_irqsave(&edffm->slock, flags);
+	if (is_queued(t)) {
+		/* dequeue */
+		edf  = task_edf(t);
+		remove(edf, t);
+	}
+	if (edffm->scheduled == t)
+		edffm->scheduled = NULL;
+
+	TRACE_TASK(t, "RIP\n");
+
+	preempt(edffm);
+	raw_spin_unlock_irqrestore(&edffm->slock, flags);
+}
+
+static long edffm_admit_task(struct task_struct* tsk)
+{
+	return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
+}
+
+/*	Plugin object	*/
+static struct sched_plugin edffm_plugin __cacheline_aligned_in_smp = {
+	.plugin_name		= "EDF-fm",
+	.tick			= edffm_tick,
+	.task_new		= edffm_task_new,
+	.complete_job		= complete_job,
+	.task_exit		= edffm_task_exit,
+	.schedule		= edffm_schedule,
+	.task_wake_up		= edffm_task_wake_up,
+	.task_block		= edffm_task_block,
+	.admit_task		= edffm_admit_task
+};
+
+static int __init init_edffm(void)
+{
+	int i;
+	edffm_domain_t *edffm;
+
+	/* Note, broken if num_online_cpus() may change */
+	for (i = 0; i < num_online_cpus(); i++) {
+		edffm = remote_edffm(i);
+		edffm->cpu = i;
+		edffm->scheduled = NULL;
+		edf_domain_init(&edffm->domain, NULL, edffm_release_jobs);
+	}
+
+	return register_sched_plugin(&edffm_plugin);
+}
+
+module_init(init_edffm);
+
diff --git a/litmus/sched_edf_wm.c b/litmus/sched_edf_wm.c
new file mode 100644
index 0000000..8b7be32
--- /dev/null
+++ b/litmus/sched_edf_wm.c
@@ -0,0 +1,688 @@
+/* EDF-WM: based on PSN-EDF.
+ */
+
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+#include <linux/module.h>
+
+#include <litmus/litmus.h>
+#include <litmus/jobs.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/edf_common.h>
+
+typedef struct {
+	rt_domain_t 		domain;
+	int          		cpu;
+	struct task_struct* 	scheduled; /* only RT tasks */
+
+/*
+ * scheduling lock slock
+ * protects the domain and serializes scheduling decisions
+ */
+#define slock domain.ready_lock
+
+} wm_domain_t;
+
+DEFINE_PER_CPU(wm_domain_t, wm_domains);
+
+#define TRACE_DOM(dom, fmt, args...) \
+	TRACE("(wm_domains[%d]) " fmt, (dom)->cpu, ##args)
+
+
+#define local_domain         (&__get_cpu_var(wm_domains))
+#define remote_domain(cpu)   (&per_cpu(wm_domains, cpu))
+#define domain_of_task(task) (remote_domain(get_partition(task)))
+
+static int is_sliced_task(struct task_struct* t)
+{
+	return tsk_rt(t)->task_params.semi_part.wm.count;
+}
+
+static struct edf_wm_slice* get_last_slice(struct task_struct* t)
+{
+	int idx = tsk_rt(t)->task_params.semi_part.wm.count - 1;
+	return tsk_rt(t)->task_params.semi_part.wm.slices + idx;
+}
+
+static void compute_slice_params(struct task_struct* t)
+{
+	struct rt_param* p = tsk_rt(t);
+	/* Here we do a little trick to make the generic EDF code
+	 * play well with job slices. We overwrite the job-level
+	 * release and deadline fields with the slice-specific values
+	 * so that we can enqueue this task in an EDF rt_domain_t
+	 * without issue. The actual values are cached in the semi_part.wm
+	 * structure. */
+	p->job_params.deadline = p->semi_part.wm.job_release +
+		p->semi_part.wm.slice->deadline;
+	p->job_params.release  = p->semi_part.wm.job_release +
+		p->semi_part.wm.slice->offset;
+
+	/* Similarly, we play a trick on the cpu field. */
+	p->task_params.cpu = p->semi_part.wm.slice->cpu;
+
+	/* update the per-slice budget reference */
+	p->semi_part.wm.exec_time = p->job_params.exec_time;
+}
+
+static void complete_sliced_job(struct task_struct* t)
+{
+	struct rt_param* p = tsk_rt(t);
+
+	/* We need to undo our trickery to the
+	 * job parameters (see above). */
+	p->job_params.release  = p->semi_part.wm.job_release;
+	p->job_params.deadline = p->semi_part.wm.job_deadline;
+
+	/* Ok, now let generic code do the actual work. */
+	prepare_for_next_period(t);
+
+	/* And finally cache the updated parameters. */
+	p->semi_part.wm.job_release = p->job_params.release;
+	p->semi_part.wm.job_deadline = p->job_params.deadline;
+}
+
+static lt_t slice_exec_time(struct task_struct* t)
+{
+	struct rt_param* p = tsk_rt(t);
+
+	/* Compute how much execution time has been consumed
+	 * since last slice advancement. */
+	return p->job_params.exec_time - p->semi_part.wm.exec_time;
+}
+
+static lt_t slice_budget(struct task_struct* t)
+{
+	return tsk_rt(t)->semi_part.wm.slice->budget;
+}
+
+static int slice_budget_exhausted(struct task_struct* t)
+{
+	return slice_exec_time(t) >= slice_budget(t);
+}
+
+/* assumes positive remainder; overflows otherwise */
+static lt_t slice_budget_remaining(struct task_struct* t)
+{
+	return slice_budget(t) - slice_exec_time(t);
+}
+
+static int wm_budget_exhausted(struct task_struct* t)
+{
+	if (is_sliced_task(t))
+		return slice_budget_exhausted(t);
+	else
+		return budget_exhausted(t);
+}
+
+static void advance_next_slice(struct task_struct* t, int completion_signaled)
+{
+	int idx;
+	struct rt_param* p = tsk_rt(t);
+
+	/* make sure this is actually a sliced job */
+	BUG_ON(!is_sliced_task(t));
+	BUG_ON(is_queued(t));
+
+	/* determine index of current slice */
+	idx = p->semi_part.wm.slice -
+		p->task_params.semi_part.wm.slices;
+
+	TRACE_TASK(t, "advancing slice %d; excess=%lluns; "
+		   "completion_signaled=%d.\n",
+		   idx, slice_exec_time(t) - slice_budget(t),
+		   completion_signaled);
+
+	if (completion_signaled)
+		idx = 0;
+	else
+		/* increment and wrap around, if necessary */
+		idx = (idx + 1) % p->task_params.semi_part.wm.count;
+
+	/* point to next slice */
+	p->semi_part.wm.slice =
+		p->task_params.semi_part.wm.slices + idx;
+
+	/* Check if we need to update essential job parameters. */
+	if (!idx) {
+		/* job completion */
+		sched_trace_task_completion(t, !completion_signaled);
+		TRACE_TASK(t, "completed sliced job"
+			   "(signaled:%d)\n", completion_signaled);
+		complete_sliced_job(t);
+	}
+
+	/* Update job parameters for new slice. */
+	compute_slice_params(t);
+}
+
+/* assumes time_passed does not advance past the last slice */
+static void fast_forward_slices(struct task_struct* t, lt_t time_passed)
+{
+	TRACE_TASK(t, "fast forwarding %lluns\n", time_passed);
+
+	/* this is NOT the slice version */
+	BUG_ON(budget_remaining(t) <= time_passed);
+
+	if (wm_budget_exhausted(t)) {
+		/* This can happen if a suspension raced
+		 * with a normal slice advancement. wm_schedule()
+		 * does not process out_of_time when a task blocks. */
+		TRACE_TASK(t, "block raced with out_of_time?\n");
+		advance_next_slice(t, 0);
+	}
+
+	while (time_passed &&
+	       time_passed >= slice_budget_remaining(t)) {
+		/* slice completely exhausted */
+		time_passed -= slice_budget_remaining(t);
+		tsk_rt(t)->job_params.exec_time +=
+			slice_budget_remaining(t);
+
+		BUG_ON(!slice_budget_exhausted(t));
+		BUG_ON(slice_budget_remaining(t) != 0);
+		BUG_ON(tsk_rt(t)->semi_part.wm.slice == get_last_slice(t));
+
+		advance_next_slice(t, 0);
+	}
+	/* add remainder to exec cost */
+	tsk_rt(t)->job_params.exec_time += time_passed;
+}
+
+/* we assume the lock is being held */
+static void preempt(wm_domain_t *dom)
+{
+	TRACE_DOM(dom, "will be preempted.\n");
+	/* We pass NULL as the task since non-preemptive sections are not
+	 * supported in this plugin, so per-task checks are not needed. */
+	preempt_if_preemptable(NULL, dom->cpu);
+}
+
+static void wm_domain_init(wm_domain_t* dom,
+			   check_resched_needed_t check,
+			   release_jobs_t release,
+			   int cpu)
+{
+	edf_domain_init(&dom->domain, check, release);
+	dom->cpu      		= cpu;
+	dom->scheduled		= NULL;
+}
+
+static void wm_requeue_remote(struct task_struct *t)
+{
+	wm_domain_t *dom = domain_of_task(t);
+
+	set_rt_flags(t, RT_F_RUNNING);
+	if (is_released(t, litmus_clock()))
+		/* acquires necessary lock */
+		add_ready(&dom->domain, t);
+	else
+		/* force timer on remote CPU */
+		add_release_on(&dom->domain, t, get_partition(t));
+}
+
+static void wm_requeue_local(struct task_struct* t, rt_domain_t *edf)
+{
+	if (t->state != TASK_RUNNING)
+		TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
+
+	set_rt_flags(t, RT_F_RUNNING);
+	if (is_released(t, litmus_clock()))
+		__add_ready(edf, t);
+	else
+		add_release(edf, t); /* it has got to wait */
+}
+
+static int wm_check_resched(rt_domain_t *edf)
+{
+	wm_domain_t *dom = container_of(edf, wm_domain_t, domain);
+
+	/* because this is a callback from rt_domain_t we already hold
+	 * the necessary lock for the ready queue
+	 */
+	if (edf_preemption_needed(edf, dom->scheduled)) {
+		preempt(dom);
+		return 1;
+	} else
+		return 0;
+}
+
+static void regular_job_completion(struct task_struct* t, int forced)
+{
+	sched_trace_task_completion(t, forced);
+	TRACE_TASK(t, "job_completion().\n");
+
+	set_rt_flags(t, RT_F_SLEEP);
+	prepare_for_next_period(t);
+}
+
+static void wm_job_or_slice_completion(struct task_struct* t,
+				       int completion_signaled)
+{
+	if (is_sliced_task(t))
+		advance_next_slice(t, completion_signaled);
+	else
+		regular_job_completion(t, !completion_signaled);
+}
+
+static void wm_tick(struct task_struct *t)
+{
+	wm_domain_t *dom = local_domain;
+
+	/* Check for inconsistency. We don't need the lock for this since
+	 * ->scheduled is only changed in schedule, which obviously is not
+	 *  executing in parallel on this CPU
+	 */
+	BUG_ON(is_realtime(t) && t != dom->scheduled);
+
+	if (is_realtime(t) && budget_enforced(t) && wm_budget_exhausted(t)) {
+		set_tsk_need_resched(t);
+		TRACE_DOM(dom, "budget of %d exhausted in tick\n",
+			  t->pid);
+	}
+}
+
+static struct task_struct* wm_schedule(struct task_struct * prev)
+{
+	wm_domain_t		*dom = local_domain;
+	rt_domain_t		*edf = &dom->domain;
+	struct task_struct	*next, *migrate = NULL;
+
+	int out_of_time, sleep, preempt, wrong_cpu, exists, blocks, resched;
+
+	raw_spin_lock(&dom->slock);
+
+	/* Sanity checking:
+	 * When a task exits (dead) dom->schedule may be null
+	 * and prev _is_ realtime. */
+	BUG_ON(dom->scheduled && dom->scheduled != prev);
+	BUG_ON(dom->scheduled && !is_realtime(prev));
+
+	/* (0) Determine state */
+	exists      = dom->scheduled != NULL;
+	wrong_cpu   = exists && get_partition(dom->scheduled) != dom->cpu;
+	blocks      = exists && !is_running(dom->scheduled);
+	out_of_time = exists
+		&& budget_enforced(dom->scheduled)
+		&& wm_budget_exhausted(dom->scheduled);
+	sleep	    = exists && get_rt_flags(dom->scheduled) == RT_F_SLEEP;
+	preempt     = edf_preemption_needed(edf, prev);
+
+	/* If we need to preempt do so.
+	 * The following checks set resched to 1 in case of special
+	 * circumstances.
+	 */
+	resched = preempt;
+
+
+	if (exists)
+		TRACE_TASK(prev,
+			   "blocks:%d out_of_time:%d sleep:%d preempt:%d "
+			   "wrong_cpu:%d state:%d sig:%d\n",
+			   blocks, out_of_time, sleep, preempt, wrong_cpu,
+			   prev->state, signal_pending(prev));
+
+	/* If a task blocks we have no choice but to reschedule.
+	 */
+	if (blocks)
+		resched = 1;
+
+	/* This can happen if sliced task was moved to the next slice
+	 * by the wake_up() code path while still being scheduled.
+	 */
+	if (wrong_cpu)
+		resched = 1;
+
+	/* Any task that is preemptable and either exhausts its execution
+	 * budget or wants to sleep completes. We may have to reschedule after
+	 * this.
+	 */
+	if ((out_of_time || sleep) && !blocks) {
+		wm_job_or_slice_completion(dom->scheduled, sleep);
+		resched = 1;
+	}
+
+	/* The final scheduling decision. Do we need to switch for some reason?
+	 * Switch if we are in RT mode and have no task or if we need to
+	 * resched.
+	 */
+	next = NULL;
+	if (resched || !exists) {
+		if (dom->scheduled && !blocks) {
+			if (get_partition(dom->scheduled) == dom->cpu)
+				/* local task */
+				wm_requeue_local(dom->scheduled, edf);
+			else
+				/* not local anymore; wait until we drop the
+				 * ready queue lock */
+				migrate = dom->scheduled;
+		}
+		next = __take_ready(edf);
+	} else
+		/* Only override Linux scheduler if we have a real-time task
+		 * scheduled that needs to continue. */
+		if (exists)
+			next = prev;
+
+	if (next) {
+		TRACE_TASK(next, "scheduled at %llu (state:%d/%d)\n", litmus_clock(),
+			   next->state, is_running(next));
+		set_rt_flags(next, RT_F_RUNNING);
+	} else if (exists) {
+		TRACE("becoming idle at %llu\n", litmus_clock());
+	}
+
+	dom->scheduled = next;
+	raw_spin_unlock(&dom->slock);
+
+	/* check if we need to push the previous task onto another queue */
+	if (migrate) {
+		TRACE_TASK(migrate, "schedule-initiated migration to %d\n",
+			   get_partition(migrate));
+		wm_requeue_remote(migrate);
+	}
+
+	return next;
+}
+
+
+/*	Prepare a task for running in RT mode
+ */
+static void wm_task_new(struct task_struct * t, int on_rq, int running)
+{
+	wm_domain_t* dom = domain_of_task(t);
+	rt_domain_t* edf = &dom->domain;
+	unsigned long flags;
+
+	TRACE_TASK(t, "edf-wm: task new, cpu = %d\n",
+		   t->rt_param.task_params.cpu);
+
+	/* setup job parameters */
+	release_at(t, litmus_clock());
+
+	/* The task should be running in the queue, otherwise signal
+	 * code will try to wake it up with fatal consequences.
+	 */
+	raw_spin_lock_irqsave(&dom->slock, flags);
+
+	if (is_sliced_task(t)) {
+		/* make sure parameters are initialized consistently */
+		tsk_rt(t)->semi_part.wm.exec_time = 0;
+		tsk_rt(t)->semi_part.wm.job_release  = get_release(t);
+		tsk_rt(t)->semi_part.wm.job_deadline = get_deadline(t);
+		tsk_rt(t)->semi_part.wm.slice = tsk_rt(t)->task_params.semi_part.wm.slices;
+		tsk_rt(t)->job_params.exec_time = 0;
+	}
+
+	if (running) {
+		/* there shouldn't be anything else running at the time */
+		BUG_ON(dom->scheduled);
+		dom->scheduled = t;
+	} else {
+		wm_requeue_local(t, edf);
+		/* maybe we have to reschedule */
+		preempt(dom);
+	}
+	raw_spin_unlock_irqrestore(&dom->slock, flags);
+}
+
+static void wm_release_at(struct task_struct *t, lt_t start)
+{
+	struct rt_param* p = tsk_rt(t);
+
+	if (is_sliced_task(t)) {
+		/* simulate wrapping to the first slice */
+		p->semi_part.wm.job_deadline = start;
+		p->semi_part.wm.slice = get_last_slice(t);
+		/* FIXME: creates bogus completion event... */
+		advance_next_slice(t, 0);
+		set_rt_flags(t, RT_F_RUNNING);
+	} else
+		/* generic code handles it */
+		release_at(t, start);
+}
+
+static lt_t wm_earliest_release(struct task_struct *t, lt_t now)
+{
+	lt_t deadline;
+	if (is_sliced_task(t))
+		deadline = tsk_rt(t)->semi_part.wm.job_deadline;
+	else
+		deadline = get_deadline(t);
+	if (lt_before(deadline, now))
+		return now;
+	else
+		return deadline;
+}
+
+static void wm_task_wake_up(struct task_struct *t)
+{
+	unsigned long flags;
+	wm_domain_t* dom = domain_of_task(t);
+	rt_domain_t* edf = &dom->domain;
+	struct rt_param* p = tsk_rt(t);
+	lt_t now, sleep_time;
+	int migrate = 0;
+
+	raw_spin_lock_irqsave(&dom->slock, flags);
+	BUG_ON(is_queued(t));
+
+	now = litmus_clock();
+
+	sleep_time = now - p->semi_part.wm.suspend_time;
+
+	TRACE_TASK(t, "wake_up at %llu after %llu, still-scheduled:%d\n",
+		   now, sleep_time, dom->scheduled == t);
+
+	/* account sleep time as execution time */
+	if (get_exec_time(t) + sleep_time >= get_exec_cost(t)) {
+		/* new sporadic release */
+		TRACE_TASK(t, "new sporadic release\n");
+		wm_release_at(t, wm_earliest_release(t, now));
+		sched_trace_task_release(t);
+	} else if (is_sliced_task(t)) {
+		/* figure out which slice we should be executing on */
+		fast_forward_slices(t, sleep_time);
+		/* can't be exhausted now */
+		BUG_ON(wm_budget_exhausted(t));
+	} else {
+		/* simply add to the execution time */
+		tsk_rt(t)->job_params.exec_time += sleep_time;
+	}
+
+
+	/* Only add to ready queue if it is not the currently-scheduled
+	 * task. This could be the case if a task was woken up concurrently
+	 * on a remote CPU before the executing CPU got around to actually
+	 * de-scheduling the task, i.e., wake_up() raced with schedule()
+	 * and won.
+	 */
+	if (dom->scheduled != t) {
+		if (get_partition(t) == dom->cpu)
+			wm_requeue_local(t, edf);
+		else
+			/* post-pone migration until after unlocking */
+			migrate = 1;
+	}
+
+	raw_spin_unlock_irqrestore(&dom->slock, flags);
+
+	if (migrate) {
+		TRACE_TASK(t, "wake_up-initiated migration to %d\n",
+			   get_partition(t));
+		wm_requeue_remote(t);
+	}
+
+	TRACE_TASK(t, "wake up done\n");
+}
+
+static void wm_task_block(struct task_struct *t)
+{
+	wm_domain_t* dom = domain_of_task(t);
+	unsigned long flags;
+	lt_t now = litmus_clock();
+
+	TRACE_TASK(t, "block at %llu, state=%d\n", now, t->state);
+
+	tsk_rt(t)->semi_part.wm.suspend_time = now;
+
+	raw_spin_lock_irqsave(&dom->slock, flags);
+	if (is_queued(t)) {
+		TRACE_TASK(t, "still queued; migration invariant failed?\n");
+		remove(&dom->domain, t);
+	}
+	raw_spin_unlock_irqrestore(&dom->slock, flags);
+
+	BUG_ON(!is_realtime(t));
+}
+
+static void wm_task_exit(struct task_struct * t)
+{
+	unsigned long flags;
+	wm_domain_t* dom = domain_of_task(t);
+	rt_domain_t* edf = &dom->domain;
+
+	raw_spin_lock_irqsave(&dom->slock, flags);
+	if (is_queued(t)) {
+		/* dequeue */
+		remove(edf, t);
+	}
+	if (dom->scheduled == t)
+		dom->scheduled = NULL;
+
+	TRACE_TASK(t, "RIP, now reschedule\n");
+
+	preempt(dom);
+	raw_spin_unlock_irqrestore(&dom->slock, flags);
+}
+
+static long wm_check_params(struct task_struct *t)
+{
+	struct rt_param* p = tsk_rt(t);
+	struct edf_wm_params* wm = &p->task_params.semi_part.wm;
+	int i;
+	lt_t tmp;
+
+	if (!is_sliced_task(t)) {
+		/* regular task; nothing to check */
+		TRACE_TASK(t, "accepted regular (non-sliced) task with "
+			   "%d slices\n",
+			   wm->count);
+		return 0;
+	}
+
+	/* (1) Either not sliced, or more than 1 slice. */
+	if (wm->count == 1 || wm->count > MAX_EDF_WM_SLICES) {
+		TRACE_TASK(t, "bad number of slices (%u) \n",
+			   wm->count);
+		return -EINVAL;
+	}
+
+	/* (2) The partition has to agree with the first slice. */
+	if (get_partition(t) != wm->slices[0].cpu) {
+		TRACE_TASK(t, "partition and first slice CPU differ "
+			   "(%d != %d)\n", get_partition(t), wm->slices[0].cpu);
+		return -EINVAL;
+	}
+
+	/* (3) The total budget must agree. */
+	for (i = 0, tmp = 0; i < wm->count; i++)
+		tmp += wm->slices[i].budget;
+	if (get_exec_cost(t) != tmp) {
+		TRACE_TASK(t, "total budget and sum of slice budgets differ\n");
+		return -EINVAL;
+	}
+
+	/* (4) The release of each slice must not precede the previous
+	 *     deadline. */
+	for (i = 0; i < wm->count - 1; i++)
+		if (wm->slices[i].deadline > wm->slices[i + 1].offset) {
+			TRACE_TASK(t, "slice %d overlaps with slice %d\n",
+				   i, i + 1);
+			return -EINVAL;
+		}
+
+	/* (5) The budget of each slice must fit within [offset, deadline] */
+	for (i = 0; i < wm->count; i++)
+		if (lt_before(wm->slices[i].deadline, wm->slices[i].offset) ||
+		    wm->slices[i].deadline - wm->slices[i].offset <
+		    wm->slices[i].budget) {
+			TRACE_TASK(t, "slice %d is overloaded\n", i);
+			return -EINVAL;
+		}
+
+	/* (6) The budget of each slice must exceed the minimum budget size. */
+	for (i = 0; i < wm->count; i++)
+		if (wm->slices[i].budget < MIN_EDF_WM_SLICE_SIZE) {
+			TRACE_TASK(t, "slice %d is too short\n", i);
+			return -EINVAL;
+		}
+
+	/* (7) The CPU of each slice must be different from the previous CPU. */
+	for (i = 0; i < wm->count - 1; i++)
+		if (wm->slices[i].cpu == wm->slices[i + 1].cpu) {
+			TRACE_TASK(t, "slice %d does not migrate\n", i);
+			return -EINVAL;
+		}
+
+	/* (8) The CPU of each slice must be online. */
+	for (i = 0; i < wm->count; i++)
+		if (!cpu_online(wm->slices[i].cpu)) {
+			TRACE_TASK(t, "slice %d is allocated on offline CPU\n",
+				   i);
+			return -EINVAL;
+		}
+
+	/* (9) A sliced task's budget must be precisely enforced. */
+	if (!budget_precisely_enforced(t)) {
+		TRACE_TASK(t, "budget is not precisely enforced "
+			   "(policy: %d).\n",
+			   tsk_rt(t)->task_params.budget_policy);
+		return -EINVAL;
+	}
+
+	TRACE_TASK(t, "accepted sliced task with %d slices\n",
+		   wm->count);
+
+	return 0;
+}
+
+static long wm_admit_task(struct task_struct* t)
+{
+	return task_cpu(t) == get_partition(t) ? wm_check_params(t) : -EINVAL;
+}
+
+/*	Plugin object	*/
+static struct sched_plugin edf_wm_plugin __cacheline_aligned_in_smp = {
+	.plugin_name		= "EDF-WM",
+	.tick			= wm_tick,
+	.task_new		= wm_task_new,
+	.complete_job		= complete_job,
+	.task_exit		= wm_task_exit,
+	.schedule		= wm_schedule,
+	.release_at		= wm_release_at,
+	.task_wake_up		= wm_task_wake_up,
+	.task_block		= wm_task_block,
+	.admit_task		= wm_admit_task
+};
+
+
+static int __init init_edf_wm(void)
+{
+	int i;
+
+	/* FIXME: breaks with CPU hotplug
+	 */
+	for (i = 0; i < num_online_cpus(); i++) {
+		wm_domain_init(remote_domain(i),
+			       wm_check_resched,
+			       NULL, i);
+	}
+	return register_sched_plugin(&edf_wm_plugin);
+}
+
+module_init(init_edf_wm);
+
diff --git a/litmus/sched_npsf.c b/litmus/sched_npsf.c
new file mode 100644
index 0000000..aad99c7
--- /dev/null
+++ b/litmus/sched_npsf.c
@@ -0,0 +1,1185 @@
+/*
+ * litmus/sched_npsf.c
+ *
+ * Implementation of the NPS-F scheduling algorithm.
+ *
+ * A _server_ may span on multiple _reserves_ on different CPUs.
+ *
+ *                      *                      1
+ * +--------------+  +--> +--------------+  +--> +--------------+
+ * | cpu_entry_t  |  |    | npsf_reserve |  |    | npsf_server  |
+ * +--------------+  |    +--------------+  |    +--------------+
+ * |              |1 |    |              |1 |    |              |
+ * | cpu_reserve  |--+   1|       server |--+   1|              |
+ * |              |   +---| cpu          |   +---| curr_reserve |
+ * +--------------+ <-+   +--------------+ <-+   +--------------+
+ *                  1                      *
+ */
+
+#include <asm/uaccess.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+
+#include <linux/module.h>
+
+#include <litmus/litmus.h>
+#include <litmus/jobs.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/edf_common.h>
+
+/* Be extra verbose (log spam) */
+#define NPSF_VERBOSE
+
+#ifdef NPSF_VERBOSE
+#define npsf_printk(fmt, arg...) printk(KERN_INFO fmt, ##arg)
+#else
+#define npsf_printk(fmt, arg...)
+#endif
+
+struct npsf_reserve;
+
+/* cpu_entry_t
+ *
+ * Each cpu has a list of reserves assigned on the cpu.
+ * Each reserve has a pointer to its server (Notional processor)
+ * that may be shared among multiple reserves.
+ */
+typedef struct  {
+	/* lock to protect cpu_reserve and list changes */
+	raw_spinlock_t		cpu_res_lock;
+	/* the reserve currently executing on this cpu */
+	struct npsf_reserve	*cpu_reserve;
+	/* list of reserves on this cpu */
+	struct list_head	npsf_reserves;
+	/* cpu ID */
+	int 			cpu;
+	/* timer to control reserve switching */
+	struct hrtimer		timer;
+	/* virtual timer expiring (wrt time_origin) */
+	lt_t			should_expire;
+	/* delegate timer firing to proper cpu */
+	struct hrtimer_start_on_info	info;
+	/* FIXME: the ids for servers should be an increasing int >=0 */
+	int			last_seen_npsf_id;
+} cpu_entry_t;
+
+/* one cpu_entry_t per CPU */
+DEFINE_PER_CPU(cpu_entry_t, npsf_cpu_entries);
+
+/* This is the "notional processor" (i.e., simple server) abstraction. */
+typedef struct npsf_server {
+	/* shared among reserves */
+	rt_domain_t		dom;
+	/* the real-time task that this server *SHOULD* be scheduling */
+	struct task_struct	*highest_prio;
+	/* current reserve where this dom is executing */
+	struct npsf_reserve	*curr_reserve;
+	/* The "first" reserve for this server in a time slot.
+	 * For non-migrating servers this will always be the same as curr_reserve. */
+	struct npsf_reserve *first_reserve;
+	/* Prevent a race between the last CPU in a reserve chain an the first. */
+	int first_cpu_wants_ipi;
+	/* rt_domain_t lock + npsf_server_t lock */
+#define lock dom.ready_lock
+} npsf_server_t;
+
+typedef struct npsf_reserve {
+	/* Pointer to the server for this reserve: a server may be shared among
+	 * multiple cpus with different budget per cpu, but same npsf_id. */
+	npsf_server_t		*server;
+	/* we queue here in npsf_reserves */
+	struct list_head	node;
+	/* budget of this npsf_id on this cpu */
+	lt_t			budget;
+	/* cpu for this (portion of) server */
+	cpu_entry_t		*cpu;
+	/* id of this server, it is the same for the
+	 * same server on different cpus */
+	int 			npsf_id;
+	/* Can be used to identify if a reserve continues
+	 * next npsf in the chain, needed for proper server deletion */
+	struct npsf_reserve 	*next_npsf;
+	/* flag that is true if the reserve is currently scheduled */
+	int			is_currently_scheduled;
+} npsf_reserve_t;
+
+/* synchronization point to start moving and switching servers only
+ * when all servers have been properly set up by the user.
+ */
+static atomic_t all_servers_added;
+static atomic_t timers_activated = ATOMIC_INIT(0);
+
+/* Virtual time starts here */
+static lt_t time_origin;
+
+/* save number of online cpus seen at init time */
+static unsigned int _online_cpus = 1;
+
+#define no_reserves(entry)	(list_empty(&((entry)->npsf_reserves)))
+#define local_entry		(&__get_cpu_var(npsf_cpu_entries))
+#define remote_entry(cpu)	(&per_cpu(npsf_cpu_entries, (cpu)))
+
+#define server_from_dom(domain)	(container_of((domain), npsf_server_t, dom))
+
+/* task_entry uses get_partition() therefore we must take care of
+ * updating correclty the task_params.cpu whenever we switch task,
+ * otherwise we'll deadlock.
+ */
+#define task_entry(task)	remote_entry(get_partition(task))
+#define domain_edf(npsf)	(&((npsf)->server->dom))
+
+#define task_npsfid(task)	((task)->rt_param.task_params.semi_part.npsf_id)
+
+static inline int owns_server(npsf_reserve_t *npsf)
+{
+	return (npsf->server->curr_reserve == npsf);
+}
+
+/* utility functions to get next and prev domains; must hold entry lock */
+static inline npsf_reserve_t* local_next_reserve(npsf_reserve_t *curr,
+		cpu_entry_t *entry)
+{
+	return (list_is_last(&curr->node, &entry->npsf_reserves)) ?
+		list_entry(entry->npsf_reserves.next, npsf_reserve_t, node) :
+		list_entry(curr->node.next, npsf_reserve_t, node);
+
+}
+
+static inline npsf_reserve_t* local_prev_reserve(npsf_reserve_t *curr,
+		cpu_entry_t *entry)
+{
+	return ((curr->node.prev == &entry->npsf_reserves) ?
+		list_entry(entry->npsf_reserves.prev, npsf_reserve_t, node) :
+		list_entry(curr->node.prev, npsf_reserve_t, node));
+}
+static void requeue(struct task_struct* t, rt_domain_t *edf)
+{
+	if (t->state != TASK_RUNNING)
+		TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
+
+	BUG_ON(is_queued(t));
+
+	set_rt_flags(t, RT_F_RUNNING);
+	if (is_released(t, litmus_clock()))
+		__add_ready(edf, t);
+	else
+		add_release(edf, t); /* it has got to wait */
+}
+
+/* we assume the lock is being held */
+static void preempt(npsf_reserve_t *npsf)
+{
+	/* Since we do not support non-preemptable sections,
+	 * we don't need to pass in a task. If we call this,
+	 * we want the remote CPU to reschedule, no matter what.
+	 */
+	preempt_if_preemptable(NULL, npsf->cpu->cpu);
+}
+
+
+static void npsf_preempt_if_server_is_scheduled(npsf_server_t* srv)
+{
+	npsf_reserve_t *reserve = srv->curr_reserve;
+	if (reserve->is_currently_scheduled) {
+		preempt(reserve);
+	}
+}
+
+/* assumes lock is held by caller */
+static void npsf_reschedule_server(npsf_server_t* srv)
+{
+	struct task_struct* hp = srv->highest_prio;
+	rt_domain_t* edf = &srv->dom;
+
+	if (edf_preemption_needed(edf, hp)) {
+		srv->highest_prio = __take_ready(edf);
+		if (hp) {
+			TRACE_TASK(hp, "requeue: no longer highest prio\n");
+			requeue(hp, edf);
+		}
+		npsf_preempt_if_server_is_scheduled(srv);
+	}
+}
+
+static void npsf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
+{
+	npsf_server_t *srv = server_from_dom(rt);
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&srv->lock, flags);
+
+	__merge_ready(rt, tasks);
+	npsf_reschedule_server(srv);
+
+	raw_spin_unlock_irqrestore(&srv->lock, flags);
+}
+
+static void job_completion(struct task_struct* t, int forced)
+{
+	sched_trace_task_completion(t, forced);
+	TRACE_TASK(t, "job_completion().\n");
+
+	set_rt_flags(t, RT_F_SLEEP);
+	prepare_for_next_period(t);
+}
+
+/* When did this slot start ? */
+static inline lt_t slot_begin(lt_t now)
+{
+	return (((now - time_origin) / npsf_slot_length)
+			* npsf_slot_length + time_origin);
+}
+
+/* Compute the delta from the beginning of the current slot. */
+static inline lt_t delta_from_slot_begin(lt_t now)
+{
+	return (now - slot_begin(now));
+}
+
+/* Given an offset into a slot, return the corresponding eligible reserve.
+ * The output param reservation_end is used to return the (relative) time at which
+ * the returned reserve ends.
+ */
+static npsf_reserve_t* get_reserve_for_offset(cpu_entry_t *entry, lt_t offset,
+                                              lt_t *reservation_end)
+{
+	npsf_reserve_t *tmp;
+
+	*reservation_end = 0;
+
+	/* linear search through all reserves, figure out which one is the last one
+	 * to become eligible before delta */
+	list_for_each_entry(tmp, &entry->npsf_reserves, node) {
+		*reservation_end += tmp->budget;
+
+		/* We are always "late". Found tmp is the right one */
+		if ((*reservation_end > offset))
+			return tmp;
+	}
+
+	/* error: we should never fall of the reserve list */
+	BUG();
+	return NULL;
+}
+
+/* Determine which reserve is eligible based on the current time.
+ */
+static npsf_reserve_t* get_current_reserve(cpu_entry_t *entry)
+{
+	lt_t reservation_end;
+	lt_t offset = delta_from_slot_begin(litmus_clock());
+	return get_reserve_for_offset(entry, offset, &reservation_end);
+}
+
+/* This is used to ensure that we are "always" late, i.e., to make
+ * sure that the timer jitter is always positive. This should
+ * only trigger in KVM (or in real machines with bad TSC drift after
+ * an IPI).
+ *
+ * ATM proper tracing for this event is done in reserve_switch_tick().
+ */
+static noinline ktime_t catchup_time(lt_t from, lt_t target)
+{
+	while(lt_before(from, target)) {
+		from = litmus_clock();
+
+		mb();
+		cpu_relax();
+	}
+
+	return ns_to_ktime(from);
+}
+
+
+/* compute the next ABSOLUTE timer value */
+static lt_t get_next_reserve_switch_time(void)
+{
+	cpu_entry_t *entry = local_entry;
+	lt_t now        = litmus_clock();
+	lt_t slot_start = slot_begin(now);
+	lt_t offset     = now - slot_start;
+	lt_t next_time;
+	npsf_reserve_t* reserve;
+
+	/* compute the absolute litmus time of the next reserve switch */
+	reserve = get_reserve_for_offset(entry, offset, &next_time);
+	/* get_reserve_for_offset returns a relative start time; let's make it
+	   absolute */
+	next_time += slot_start;
+
+	/* Let's see if we need to skip the next timer. */
+	reserve = local_next_reserve(reserve, entry);
+	/* if the next reserve is a continuing reserve
+	 * (i.e., if it belongs to a migrating server),
+	 * then we skip the timer event because we will
+	 * receive an IPI from the previous processor instead. */
+	if (reserve->server->first_reserve != reserve) {
+		/* it is indeed not the first reserve */
+		next_time += reserve->budget;
+	}
+
+	return next_time;
+}
+
+/* This is the callback for reserve-switching interrupts.
+ * The timer is reprogrammed to expire at the beginning of every logical
+ * reserve (i.e., a continuing reserve may be split among different CPUs
+ * but is a _single_ logical reserve). get_next_reserve_switch_time()
+ * will return the right next_expire time.
+ */
+static enum hrtimer_restart reserve_switch_tick(struct hrtimer *timer)
+{
+	unsigned long flags;
+	cpu_entry_t *entry;
+	/* we are using CLOCK_MONOTONIC */
+	ktime_t now = ktime_get();
+	ktime_t delta;
+	int late;
+
+	entry = container_of(timer, cpu_entry_t, timer);
+	raw_spin_lock_irqsave(&entry->cpu_res_lock, flags);
+
+	/* jitter wrt virtual time */
+	delta = ktime_sub(now, ns_to_ktime(entry->should_expire));
+	late = (ktime_to_ns(delta) >= 0) ? 1 : 0;
+
+#ifdef NPSF_VERBOSE
+	if (entry->cpu_reserve && atomic_read(&all_servers_added))
+		TRACE("(npsf_id: %d) tick starts at %Ld, "
+		      "now - should_expire: %Ld\n",
+		      entry->cpu_reserve->npsf_id,
+		      ktime_to_ns(now), ktime_to_ns(delta));
+#endif
+	/* if the timer expires earlier than the should_expire time,
+	 * we delay the switching until time it's synchronized with
+	 * the switch boundary. Otherwise next reserve will execute
+	 * longer (wrong).
+	 */
+	if (!late) {
+		TRACE("+++ Timer fired early, waiting...\n");
+		now = catchup_time(ktime_to_ns(now), entry->should_expire);
+
+		delta = ktime_sub(now, ns_to_ktime(entry->should_expire));
+		TRACE("+++ done, tick restarts at %Ld, "
+		      "now - should_expire: %Ld\n",
+		      ktime_to_ns(now), ktime_to_ns(delta));
+	}
+
+	BUG_ON(!atomic_read(&all_servers_added));
+	BUG_ON(no_reserves(entry));
+
+	/* Compute the next time that we need to be notified. */
+	entry->should_expire = get_next_reserve_switch_time();
+
+	/* kindly ask the Penguin to let us know... */
+	hrtimer_set_expires(timer, ns_to_ktime(entry->should_expire));
+
+	/* set resched flag to reschedule local cpu */
+	set_need_resched();
+
+	raw_spin_unlock_irqrestore(&entry->cpu_res_lock, flags);
+#ifdef NPSF_VERBOSE
+	if (atomic_read(&all_servers_added))
+		TRACE("(npsf_id: %d) tick ends at %Ld, should_expire: %llu\n",
+		      entry->cpu_reserve->npsf_id, ktime_to_ns(ktime_get()),
+		      entry->should_expire);
+#endif
+
+	return HRTIMER_RESTART;
+}
+
+static void npsf_scheduler_tick(struct task_struct *t)
+{
+	if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
+		set_tsk_need_resched(t);
+		TRACE("npsf_tick: %d is preemptable "
+				" => FORCE_RESCHED\n", t->pid);
+	}
+}
+
+/* Assumption: caller holds srv lock and prev belongs to
+ * the currently-scheduled reservation.
+ */
+static void npsf_schedule_server(struct task_struct* prev,
+				 cpu_entry_t *entry)
+{
+	npsf_server_t* srv = entry->cpu_reserve->server;
+
+	int out_of_time, sleep, exists, blocks;
+
+	exists      = is_realtime(prev);
+	blocks      = exists && !is_running(prev);
+	out_of_time = exists &&
+		budget_enforced(prev) &&
+		budget_exhausted(prev);
+	sleep	    = exists && get_rt_flags(prev) == RT_F_SLEEP;
+
+	if (exists)
+		TRACE_TASK(prev, "(npsf_id %d) blocks:%d "
+			   "out_of_time:%d sleep:%d state:%d sig:%d\n",
+			   task_npsfid(prev),
+			   blocks, out_of_time, sleep,
+			   prev->state,
+			   signal_pending(prev));
+
+	/* Any task that is preemptable and either exhausts its
+	 * execution budget or wants to sleep completes. We may have
+	 * to reschedule after this.
+	 */
+	if ((out_of_time || sleep) && !blocks) {
+		job_completion(prev, !sleep);
+
+		if (srv->highest_prio != prev) {
+			BUG_ON(!is_queued(prev));
+			remove(&srv->dom, prev);
+		}
+
+		requeue(prev, &srv->dom);
+
+		if (srv->highest_prio == prev)
+			srv->highest_prio = __take_ready(&srv->dom);
+	}
+
+	BUG_ON(blocks && prev == srv->highest_prio);
+//	BUG_ON(!srv->highest_prio && jobs_pending(&srv->dom));
+}
+
+static void npsf_notify_next_cpu(npsf_reserve_t *npsf_prev)
+{
+	npsf_server_t *srv;
+
+	if (unlikely(npsf_prev->next_npsf != npsf_prev)) {
+		/* This reserve is actually shared. Let's update its 'owner'
+		 * and notify the next CPU. */
+		srv = npsf_prev->server;
+		raw_spin_lock(&srv->lock);
+		srv->curr_reserve = npsf_prev->next_npsf;
+		if (srv->first_reserve != srv->curr_reserve ||
+		    srv->first_cpu_wants_ipi) {
+			/* send an IPI to notify next CPU in chain */
+			srv->first_cpu_wants_ipi = 0;
+			TRACE("sending IPI\n");
+			preempt(srv->curr_reserve);
+		}
+		raw_spin_unlock(&srv->lock);
+	}
+}
+
+static struct task_struct* npsf_schedule(struct task_struct * prev)
+{
+	npsf_reserve_t *npsf_prev, *npsf_next;
+	npsf_server_t *srv_prev, *srv_next;
+	cpu_entry_t *entry = local_entry;
+	struct task_struct *next;
+
+	int reserve_switch;
+
+	/* servers not ready yet, yield to linux */
+	if (!atomic_read(&all_servers_added))
+		return NULL;
+
+#ifdef NPSF_VERBOSE
+	TRACE_TASK(prev, "schedule\n");
+#endif
+	raw_spin_lock(&entry->cpu_res_lock);
+
+	BUG_ON(no_reserves(entry));
+
+	/* step 1: what are we currently serving? */
+	npsf_prev = entry->cpu_reserve;
+	srv_prev  = npsf_prev->server;
+
+	/* step 2: what SHOULD we be currently serving? */
+	npsf_next = get_current_reserve(entry);
+	srv_next  = npsf_next->server;
+
+	/* TODO second measuring point for IPI receiving
+	 * if (!srv_next->measure_wait_IPI) --- the remote reset
+	 * 	trace_time_end.
+	 */
+	raw_spin_lock(&srv_prev->lock);
+
+
+	/* step 3: update prev server */
+	if (is_realtime(prev) && task_npsfid(prev) == entry->cpu_reserve->npsf_id)
+		npsf_schedule_server(prev, entry);
+	else if (is_realtime(prev))
+		TRACE_TASK(prev, "npsf_id %d != cpu_reserve npsf_id %d\n",
+				task_npsfid(prev), entry->cpu_reserve->npsf_id);
+
+	/* step 4: determine if we need to switch to another reserve */
+	reserve_switch = npsf_prev != npsf_next;
+
+	if (!reserve_switch) {
+		/* easy case: just enact what the server scheduler decided */
+		next = srv_prev->highest_prio;
+
+		/* Unlock AFTER observing highest_prio to avoid races with
+		 * remote rescheduling activity. */
+		raw_spin_unlock(&srv_prev->lock);
+	} else {
+		/* In this case we have a reserve switch.  We are done with the
+		 * previous server, so release its lock. */
+		TRACE("switch reserve npsf_id %d -> npsf_id %d\n",
+				npsf_prev->npsf_id, npsf_next->npsf_id);
+		npsf_prev->is_currently_scheduled = 0;
+		raw_spin_unlock(&srv_prev->lock);
+
+		/* Move on to the next server. */
+
+		raw_spin_lock(&srv_next->lock);
+		npsf_next->is_currently_scheduled = 1;
+
+		/* make sure we are owner of a  server (if it is shared) */
+		if (unlikely(srv_next->curr_reserve != npsf_next)) {
+			/* We raced with the previous owner.  Let's schedule
+			 * the previous reserve for now. The previous owner
+			 * will send us an IPI when the server has been pushed
+			 * to us.
+			 */
+			TRACE("(npsf_id %d) raced with previous server owner\n",
+			      npsf_next->npsf_id);
+
+			/* check if we are the first CPU, in which case we need
+			 * to request a notification explicitly */
+			if (srv_next->first_reserve == npsf_next)
+				srv_next->first_cpu_wants_ipi = 1;
+
+			npsf_next->is_currently_scheduled = 0;
+			raw_spin_unlock(&srv_next->lock);
+
+			/* just keep the previous reserve one more time */
+			raw_spin_lock(&srv_prev->lock);
+
+			npsf_prev->is_currently_scheduled = 1;
+			/* Note that there is not a race condition here.
+			 * Since curr_reserve didn't point yet to this reserve,
+			 * so no processor would have observed the one in npsf_next.
+			 * A processor might have observed the flag being zero
+			 * in npsf_prev and decided not to send an IPI, which
+			 * doesn't matter since we are going to reschedule
+			 * below anyay. */
+
+			next = srv_prev->highest_prio;
+
+			raw_spin_unlock(&srv_prev->lock);
+
+			/* TODO first measuring point for '0'-switching time
+			 * remote is not ready yet and will send us an IPI
+			 * when it's done.
+			 * local:
+			 * 	srv_next->measure_wait_IPI = 1;
+			 * remote before sending IPI:
+			 * 	if (srv_next->measure_wait_IPI) reset;
+			 */
+		} else {
+			/* invariant: srv->highest_prio is always the
+			 * highest-priority job in the server, and it is always
+			 * runnable. Any update to the server must maintain
+			 * this invariant. */
+			next = srv_next->highest_prio;
+
+			entry->cpu_reserve = npsf_next;
+			raw_spin_unlock(&srv_next->lock);
+
+			/* send an IPI (if necessary) */
+			npsf_notify_next_cpu(npsf_prev);
+		}
+
+	}
+
+	if (next) {
+		TRACE_TASK(next, "(npsf_id %d) scheduled at %llu\n",
+			   task_npsfid(next), litmus_clock());
+		set_rt_flags(next, RT_F_RUNNING);
+		/* The TASK_RUNNING flag is set by the Penguin _way_ after
+		 * activating a task. This dosn't matter much to Linux as
+		 * the rq lock will prevent any changes, but it matters to
+		 * us. It is possible for a remote cpu waking up this task
+		 * to requeue the task before it's runnable, send an IPI here,
+		 * we schedule that task (still "not-runnable"), and only
+		 * before the real execution of next, the running flag is set.
+		 */
+		if (!is_running(next))
+			TRACE_TASK(next, "BAD: !TASK_RUNNING\n");
+	} else {
+		/* FIXME npsf_id is wrong if reserve switch but "switching back"
+		 * if we race */
+		TRACE("(npsf_id %d) becoming idle at %llu\n",
+		      reserve_switch ? npsf_next->npsf_id : npsf_prev->npsf_id,
+		      litmus_clock());
+	}
+
+	raw_spin_unlock(&entry->cpu_res_lock);
+
+	return next;
+}
+
+/*	Prepare a task for running in RT mode
+ *
+ *	We can only be sure that the cpu is a right one (admit checks
+ *	against tasks released on a cpu that doesn't host the right npsf_id)
+ *	but we _cannot_ be sure that:
+ *	1) the found npsf is the reserve currently running on this cpu.
+ *	2) the current reserve (the one in charge of scheduling) is not
+ *	running on a different cpu.
+ */
+static void npsf_task_new(struct task_struct * t, int on_rq, int running)
+{
+	npsf_reserve_t *npsf;
+	npsf_server_t *srv;
+	cpu_entry_t *entry = task_entry(t);
+	rt_domain_t *edf;
+	unsigned long flags;
+
+	BUG_ON(no_reserves(entry));
+
+	/* search the proper npsf_server where to add the new task */
+	list_for_each_entry(npsf, &entry->npsf_reserves, node) {
+		if (npsf->npsf_id == task_npsfid(t))
+			break;
+	}
+
+
+	srv = npsf->server;
+
+	/* The task should be running in the queue, otherwise signal
+	 * code will try to wake it up with fatal consequences.
+	 */
+	raw_spin_lock_irqsave(&entry->cpu_res_lock, flags);
+	raw_spin_lock(&srv->lock);
+
+	edf = domain_edf(npsf);
+	tsk_rt(t)->domain = edf;
+
+	TRACE_TASK(t, "task_new: P%d, task_npsfid %d, "
+			"npsf->npsf_id %d, entry->cpu %d\n",
+			t->rt_param.task_params.cpu, task_npsfid(t),
+			npsf->npsf_id, entry->cpu);
+
+	/* setup job parameters */
+	release_at(t, litmus_clock());
+
+	/* There are four basic scenarios that could happen:
+	 *  1) the server is on another cpu and scheduled;
+	 *  2) the server is on another cpu and not scheduled;
+	 *  3) the server is on this cpu and scheduled; and
+	 *  4) the server is on this cpu and not scheduled.
+	 *
+	 * Whatever scenario we're in, it cannot change while we are
+	 * holding the server lock.
+	 *
+	 * If the new task does not have a high priority, then
+	 * we can just queue it and be done.
+	 *
+	 * In theory, the requeue() and reschedule_server() code
+	 * take care of all that.
+	 */
+
+	requeue(t, edf);
+	/* reschedule will cause a remote preemption, if required */
+	npsf_reschedule_server(srv);
+	/* always preempt to make sure we don't
+	 * use the stack if it needs to migrate */
+	set_tsk_need_resched(t);
+
+	raw_spin_unlock(&srv->lock);
+	raw_spin_unlock_irqrestore(&entry->cpu_res_lock, flags);
+}
+
+static void npsf_task_wake_up(struct task_struct *t)
+{
+	rt_domain_t *edf;
+	npsf_server_t* srv;
+	unsigned long flags;
+	lt_t now;
+
+	BUG_ON(!is_realtime(t));
+
+	edf = tsk_rt(t)->domain;
+	srv = server_from_dom(edf);
+
+	raw_spin_lock_irqsave(&srv->lock, flags);
+
+	BUG_ON(is_queued(t));
+
+	now = litmus_clock();
+	/* FIXME: this should be a configurable policy... */
+	if (is_tardy(t, now)) {
+		/* new sporadic release */
+		release_at(t, now);
+		sched_trace_task_release(t);
+	}
+
+	/* Only add to ready queue if it is not the
+	 * currently-scheduled task.
+	 */
+	if (srv->highest_prio != t) {
+		requeue(t, edf);
+		npsf_reschedule_server(srv);
+	}
+#ifdef NPSF_VERBOSE
+	else
+		TRACE_TASK(t, "wake_up, is curr_sched, not requeued\n");
+#endif
+
+	raw_spin_unlock_irqrestore(&srv->lock, flags);
+
+	TRACE_TASK(t, "wake up done\n");
+}
+
+static void remove_from_server(struct task_struct *t, npsf_server_t* srv)
+{
+	if (srv->highest_prio == t) {
+		TRACE_TASK(t, "remove from server: is highest-prio task\n");
+		srv->highest_prio = NULL;
+		npsf_reschedule_server(srv);
+	} else if (is_queued(t)) {
+		TRACE_TASK(t, "remove from server:  removed from queue\n");
+		remove(&srv->dom, t);
+	}
+#ifdef NPSF_VERBOSE
+	else
+		TRACE_TASK(t, "WARN: where is this task?\n");
+#endif
+}
+
+static void npsf_task_block(struct task_struct *t)
+{
+	rt_domain_t *edf;
+	npsf_server_t* srv;
+	unsigned long flags;
+
+	TRACE_TASK(t, "(npsf_id %d) block at %llu, state=%d\n",
+			task_npsfid(t), litmus_clock(), t->state);
+
+	BUG_ON(!is_realtime(t));
+
+	edf = tsk_rt(t)->domain;
+	srv = server_from_dom(edf);
+
+	raw_spin_lock_irqsave(&srv->lock, flags);
+
+	remove_from_server(t, srv);
+
+	raw_spin_unlock_irqrestore(&srv->lock, flags);
+}
+
+static void npsf_task_exit(struct task_struct * t)
+{
+	rt_domain_t *edf;
+	npsf_server_t* srv;
+	unsigned long flags;
+
+	BUG_ON(!is_realtime(t));
+
+	edf = tsk_rt(t)->domain;
+	srv = server_from_dom(edf);
+
+	raw_spin_lock_irqsave(&srv->lock, flags);
+
+	remove_from_server(t, srv);
+
+	raw_spin_unlock_irqrestore(&srv->lock, flags);
+
+	TRACE_TASK(t, "RIP, now reschedule\n");
+}
+
+static long npsf_admit_task(struct task_struct* tsk)
+{
+	npsf_reserve_t *npsf;
+	cpu_entry_t *entry = task_entry(tsk);
+	int id_ok = 0;
+
+	if (!atomic_read(&all_servers_added)) {
+		printk(KERN_DEBUG "not all servers added\n");
+		return -ENODEV;
+	}
+
+	/* check to be on the right cpu and on the right server */
+	if (task_cpu(tsk) != tsk->rt_param.task_params.cpu) {
+		printk(KERN_DEBUG "wrong CPU(%d, %d, %d) for npsf_id %d\n",
+			task_cpu(tsk), tsk->rt_param.task_params.cpu,
+			entry->cpu, task_npsfid(tsk));
+		return -EINVAL;
+	}
+
+	/* 1) this cpu should have the proper npsf_id in the list
+	 * 2) the rt_domain for the proper npsf_id is not null
+	 */
+	list_for_each_entry(npsf, &entry->npsf_reserves, node) {
+		if (npsf->npsf_id == task_npsfid(tsk)) {
+			id_ok = 1;
+			break;
+		}
+	}
+	if (!id_ok)
+		printk(KERN_DEBUG "wrong npsf_id (%d) for entry %d\n",
+				task_npsfid(tsk), entry->cpu);
+
+	return id_ok ? 0 : -EINVAL;
+}
+
+/* in litmus.c */
+extern atomic_t rt_task_count;
+
+/* initialization status control */
+static int reserves_allocated = 0;
+
+#ifdef NPSF_VERBOSE
+static void print_reserve(cpu_entry_t *cpu)
+{
+	npsf_reserve_t *tmp;
+
+	printk(KERN_INFO "NPS-F: reserves on CPU %d:\n", cpu->cpu);
+	list_for_each_entry(tmp, &cpu->npsf_reserves, node) {
+		BUG_ON(!tmp->server);
+		BUG_ON(!&(tmp->server->dom));
+		BUG_ON(tmp->server->highest_prio);
+		printk(KERN_INFO "%d: %d us\n", tmp->npsf_id,
+				(int)(tmp->budget / 1000));
+	}
+}
+#endif
+/*
+ * do_add_reserve:	add a reserve(cpu, id, budget)
+ *
+ * Callback for syscall add_server(); it allows to add the reserve "id"
+ * to the CPU "cpu". "budget" is the length of the reserve for the
+ * notional processor (server) id on the cpu cpu.
+ */
+static long do_add_reserve(npsf_reserve_t **new, cpu_entry_t *cpu,
+		npsf_server_t *the_dom, int npsf_id, lt_t budget)
+{
+	unsigned long flags;
+
+	/* npsf_id for each cpu should be given in increasing order,
+	 * it doesn't make sense the same np on the same cpu.
+	 * The last_seen_npsf_id is reset upon plugin insertion.
+	 */
+	if (cpu->last_seen_npsf_id >= npsf_id)
+		return -EINVAL;
+
+	/* don't allow server changes if there are tasks in the system */
+	if (atomic_read(&rt_task_count))
+		return -EACCES;
+
+	if ((*new = kmalloc(sizeof(npsf_reserve_t), GFP_ATOMIC)) == NULL)
+		return -ENOMEM;
+
+	(*new)->server = the_dom;
+	(*new)->npsf_id = npsf_id;
+	(*new)->budget = budget;
+	(*new)->cpu = cpu;
+
+	npsf_printk("Add npsf_id %d on P%d with budget %llu\n", (*new)->npsf_id,
+			(*new)->cpu->cpu, (*new)->budget);
+
+	raw_spin_lock_irqsave(&cpu->cpu_res_lock, flags);
+
+	list_add_tail(&(*new)->node, &cpu->npsf_reserves);
+	cpu->last_seen_npsf_id = npsf_id;
+	cpu->cpu_reserve = list_first_entry(&cpu->npsf_reserves, npsf_reserve_t, node);
+
+	raw_spin_unlock_irqrestore(&cpu->cpu_res_lock, flags);
+
+	return 0;
+}
+
+static void kickoff_timers(void)
+{
+	int cpu;
+	cpu_entry_t *entry;
+	lt_t kickoff;
+
+	kickoff = slot_begin(litmus_clock() + npsf_slot_length * 2);
+
+	for_each_online_cpu(cpu) {
+		entry = &per_cpu(npsf_cpu_entries, cpu);
+		hrtimer_start_on(cpu, &entry->info, &entry->timer,
+				 ns_to_ktime(kickoff),
+				 HRTIMER_MODE_ABS_PINNED);
+		entry->should_expire = kickoff;
+	}
+	atomic_set(&timers_activated, 1);
+}
+
+/* We offer to library a budgets array interface (so we go through the
+ * syscall path only once) and we internally cycle on do_add_reserve.
+ *
+ * last == 1 means that the user is adding the last server and after
+ * the insertion the plugin is properly set up. (FIXME it should be
+ * done in a better way, but I doubt this plugin will ever go
+ * to the master branch).
+ */
+asmlinkage long sys_add_server(int __user *__id,
+		struct npsf_budgets __user *__budgets, int last)
+{
+	int id, i;
+	int ret = -EFAULT;
+	struct npsf_budgets *budgets;
+	cpu_entry_t *entry;
+	npsf_server_t *npsfserver;
+	npsf_reserve_t *npsf_reserve_array[NR_CPUS];
+	npsf_reserve_t *first_reserve;
+
+	if (_online_cpus != num_online_cpus())
+		return ret;
+
+	if (copy_from_user(&id, __id, sizeof(id)))
+		return ret;
+
+	budgets = kmalloc(_online_cpus * sizeof(struct npsf_budgets),
+			GFP_ATOMIC);
+
+	for (i = 0; i < _online_cpus; i++) {
+		budgets[i].cpu = NO_CPU;
+		budgets[i].budget = 0;
+	}
+
+	if (copy_from_user(budgets, __budgets,
+				sizeof(budgets) * _online_cpus))
+		goto err;
+
+	/* initialize the npsf_server_t for this npsf_server series */
+	npsfserver = kmalloc(sizeof(npsf_server_t), GFP_ATOMIC);
+	if (!npsfserver) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	edf_domain_init(&npsfserver->dom, NULL, npsf_release_jobs);
+	npsfserver->highest_prio = NULL;
+
+	/* initialize all npsf_reserve_t for this server */
+	for (i = 0; budgets[i].cpu != NO_CPU && i < _online_cpus; i++) {
+		entry = &per_cpu(npsf_cpu_entries, budgets[i].cpu);
+		if ((ret = do_add_reserve(&npsf_reserve_array[i], entry,
+					npsfserver,
+					id, budgets[i].budget)) < 0)
+			goto err;
+	}
+	/* set the current reserve to the first (and possibly unique)
+	 * slice for this npsf_id */
+	npsfserver->curr_reserve  = npsf_reserve_array[0];
+	npsfserver->first_reserve = npsf_reserve_array[0];
+	npsfserver->first_cpu_wants_ipi = 0;
+	for (i = 0; budgets[i].cpu != NO_CPU && i < _online_cpus; i++) {
+
+		if (i == 0 && budgets[i+1].cpu == NO_CPU) {
+			/* Fixed reserve always has itself as next */
+			npsf_reserve_array[i]->next_npsf = npsf_reserve_array[i];
+		} else if (((i+1) < _online_cpus) &&
+				(i > 0 && budgets[i+1].cpu == NO_CPU)) {
+			/* Last reserve in the chain has the first reserve as next */
+			npsf_reserve_array[i]->next_npsf = npsf_reserve_array[0];
+		} else {
+			/* Normal continuing reserve */
+			npsf_reserve_array[i]->next_npsf = npsf_reserve_array[i+1];
+		}
+	}
+#ifdef NPSF_VERBOSE
+	for (i = 0; budgets[i].cpu != NO_CPU && i < _online_cpus; i++) {
+		entry = &per_cpu(npsf_cpu_entries, budgets[i].cpu);
+		print_reserve(entry);
+	}
+#endif
+
+	if (last) {
+		/* force the first slot switching by setting the
+		 * current_reserve to the last server for each cpu.
+		 *
+		 * FIXME:don't assume there exists at least one reserve per CPU
+		 */
+		for_each_online_cpu(i) {
+			entry = &per_cpu(npsf_cpu_entries, i);
+			first_reserve = list_entry(entry->npsf_reserves.next,
+						   npsf_reserve_t, node);
+
+			first_reserve->server->curr_reserve = first_reserve;
+			entry->cpu_reserve = first_reserve;
+			npsf_printk("npsf_id %d is the current reserve "
+				    "and server on CPU %d\n",
+				    first_reserve->npsf_id, entry->cpu);
+
+		}
+
+		kickoff_timers();
+
+		/* real plugin enable */
+		atomic_set(&all_servers_added, 1);
+		mb();
+	}
+
+	/* at least one server was initialized and may need deletion */
+	reserves_allocated = 1;
+err:
+	kfree(budgets);
+	return ret;
+}
+
+
+/* Cancel server_reschedule_tick() hrtimers. Wait for all callbacks
+ * to complete. The function is triggered writing 0 as npsf_slot_length.
+ */
+void npsf_hrtimers_cleanup(void)
+{
+	int cpu;
+	cpu_entry_t *entry;
+	int redo;
+
+	if (!atomic_read(&timers_activated))
+		return;
+
+	atomic_set(&timers_activated, 0);
+
+	/* prevent the firing of the timer on this cpu */
+	do {
+		redo = 0;
+		for_each_online_cpu(cpu) {
+			entry = &per_cpu(npsf_cpu_entries, cpu);
+
+			/* if callback active, skip it for now and redo later */
+			if (hrtimer_try_to_cancel(&entry->timer) == -1) {
+				redo = 1;
+#ifdef NPSF_VERBOSE
+				printk(KERN_INFO "(P%d) hrtimer on P%d was "
+						"active, try to delete again\n",
+						get_cpu(), cpu);
+				put_cpu();
+#endif
+			}
+		}
+	} while (redo);
+
+	printk(KERN_INFO "npsf hrtimers deleted\n");
+}
+
+static void cleanup_npsf(void)
+{
+	int cpu;
+	cpu_entry_t *entry;
+	struct list_head *nd, *next;
+	npsf_reserve_t *tmp, *tmp_save;
+
+	for_each_online_cpu(cpu) {
+		entry = &per_cpu(npsf_cpu_entries, cpu);
+
+		/* FIXME probably not needed as we should be the only cpu
+		 * doing the removal */
+		raw_spin_lock(&entry->cpu_res_lock);
+
+		list_for_each_safe(nd, next, &entry->npsf_reserves) {
+			tmp = list_entry(nd, npsf_reserve_t, node);
+			npsf_printk("Del. (id, cpu):(%d, %d)\n",
+					tmp->npsf_id,
+					tmp->cpu->cpu);
+			if (tmp->server) {
+				npsf_printk("Del. reserves for npsf_id %d\n",
+						tmp->npsf_id);
+				tmp_save = tmp;
+				while (tmp_save->next_npsf &&
+						tmp_save->next_npsf != tmp) {
+					tmp_save = tmp_save->next_npsf;
+					tmp_save->server = NULL;
+				}
+				npsf_printk("Freeing server 0x%p\n", tmp->server);
+				kfree(tmp->server);
+			}
+			npsf_printk("Freeing npsf_reserve_t 0x%p\n", tmp);
+			kfree(tmp);
+		}
+		list_del(&entry->npsf_reserves);
+		raw_spin_unlock(&entry->cpu_res_lock);
+	}
+}
+
+/* prevent plugin deactivation if timers are still active */
+static long npsf_deactivate_plugin(void)
+{
+	return (atomic_read(&timers_activated)) ? -1 : 0;
+}
+
+static long npsf_activate_plugin(void)
+{
+	int cpu;
+	cpu_entry_t *entry;
+	ktime_t now = ktime_get();
+
+	/* prevent plugin switching if timers are active */
+	if (atomic_read(&timers_activated))
+		return -1;
+
+	atomic_set(&all_servers_added, 0);
+
+	/* de-allocate old servers (if any) */
+	if (reserves_allocated)
+		cleanup_npsf();
+
+	_online_cpus = num_online_cpus();
+
+	for_each_online_cpu(cpu) {
+		entry = &per_cpu(npsf_cpu_entries, cpu);
+
+		raw_spin_lock_init(&entry->cpu_res_lock);
+
+		entry->cpu_reserve = NULL;
+		INIT_LIST_HEAD(&entry->npsf_reserves);
+
+		entry->cpu = cpu;
+		hrtimer_init(&entry->timer, CLOCK_MONOTONIC,
+				HRTIMER_MODE_ABS_PINNED);
+
+		/* initialize (reinitialize) pull timers */
+		hrtimer_start_on_info_init(&entry->info);
+
+		entry->timer.function = reserve_switch_tick;
+		entry->last_seen_npsf_id = -1;
+	}
+
+	printk(KERN_INFO "NPS-F activated: slot length = %lld ns\n",
+			npsf_slot_length);
+
+	/* time starts now! */
+	time_origin = (lt_t) ktime_to_ns(now);
+	TRACE("Time_origin = %llu\n", time_origin);
+	return 0;
+}
+
+/*	Plugin object	*/
+static struct sched_plugin npsf_plugin __cacheline_aligned_in_smp = {
+	.plugin_name		= "NPS-F",
+
+	.tick			= npsf_scheduler_tick,
+	.task_new		= npsf_task_new,
+	.complete_job		= complete_job,
+	.task_exit		= npsf_task_exit,
+	.schedule		= npsf_schedule,
+	.task_wake_up		= npsf_task_wake_up,
+	.task_block		= npsf_task_block,
+	.admit_task		= npsf_admit_task,
+	.activate_plugin	= npsf_activate_plugin,
+	.deactivate_plugin	= npsf_deactivate_plugin,
+};
+
+static int __init init_npsf(void)
+{
+	return register_sched_plugin(&npsf_plugin);
+}
+
+static void __exit exit_npsf(void)
+{
+	if (atomic_read(&timers_activated)) {
+		atomic_set(&timers_activated, 0);
+		return;
+	}
+
+	if (reserves_allocated)
+		cleanup_npsf();
+}
+
+module_init(init_npsf);
+module_exit(exit_npsf);
+
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index 3543b7b..3036df9 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -179,6 +179,12 @@ struct sched_plugin linux_sched_plugin = {
 int cluster_cache_index = 2;
 
 /*
+ * Slot length (in ns) for NPS-F semi-partitioned plugin.
+ * This value can be changed at "runtime" through proc file.
+ */
+lt_t npsf_slot_length = 5 * NSEC_PER_MSEC;
+
+/*
  *	The reference to current plugin that is used to schedule tasks within
  *	the system. It stores references to actual function implementations
  *	Should be initialized by calling "init_***_plugin()"