From 5649fefe911fdadaefa579eaa672e7de583f113f Mon Sep 17 00:00:00 2001
From: Jeremy Erickson <jerickso@cs.unc.edu>
Date: Sat, 26 May 2012 16:14:12 -0400
Subject: Initial GSN-EDF-split plugin

---
 litmus/Makefile              |   1 +
 litmus/sched_gsn_edf_split.c | 965 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 966 insertions(+)
 create mode 100644 litmus/sched_gsn_edf_split.c

(limited to 'litmus')

diff --git a/litmus/Makefile b/litmus/Makefile
index 7338180f196f..8efcb699935b 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -17,6 +17,7 @@ obj-y     = sched_plugin.o litmus.o \
 	    bheap.o \
 	    ctrldev.o \
 	    sched_gsn_edf.o \
+        sched_gsn_edf_split.o \
 	    sched_psn_edf.o
 
 obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
diff --git a/litmus/sched_gsn_edf_split.c b/litmus/sched_gsn_edf_split.c
new file mode 100644
index 000000000000..1a4b06391d12
--- /dev/null
+++ b/litmus/sched_gsn_edf_split.c
@@ -0,0 +1,965 @@
+/*
+ * litmus/sched_gsn_edf.c
+ *
+ * Implementation of the GSN-EDF scheduling algorithm with job splitting, i.e.
+ * GSN-EDF-split.
+ *
+ * This plugin is a modified version of the prior GSN-EDF plugin in
+ * litmus/sched_gsn_edf_split.c
+ */
+
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <litmus/litmus.h>
+#include <litmus/jobs.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/edf_common.h>
+#include <litmus/sched_trace.h>
+#include <litmus/trace.h>
+
+#include <litmus/preempt.h>
+
+#include <litmus/bheap.h>
+
+#ifdef CONFIG_SCHED_CPU_AFFINITY
+#include <litmus/affinity.h>
+#endif
+
+#include <linux/module.h>
+
+/* cpu_entry_t - maintain the linked and scheduled state
+ */
+typedef struct  {
+	int 			cpu;
+	struct task_struct*	linked;		/* only RT tasks */
+	struct task_struct*	scheduled;	/* only RT tasks */
+	struct bheap_node*	hn;
+    struct hrtimer      split_timer;
+    int                 timer_armed;
+} cpu_entry_t;
+DEFINE_PER_CPU(cpu_entry_t, gsnedfsplit_cpu_entries);
+
+cpu_entry_t* gsnedfsplit_cpus[NR_CPUS];
+
+/* the cpus queue themselves according to priority in here */
+static struct bheap_node gsnedfsplit_heap_node[NR_CPUS];
+static struct bheap      gsnedfsplit_cpu_heap;
+
+static rt_domain_t gsnedfsplit;
+#define gsnedfsplit_lock (gsnedfsplit.ready_lock)
+
+
+/* Uncomment this if you want to see all scheduling decisions in the
+ * TRACE() log.
+#define WANT_ALL_SCHED_EVENTS
+ */
+
+static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
+{
+	cpu_entry_t *a, *b;
+	a = _a->value;
+	b = _b->value;
+	/* Note that a and b are inverted: we want the lowest-priority CPU at
+	 * the top of the heap.
+	 */
+	return edf_higher_prio(b->linked, a->linked);
+}
+
+/* update_cpu_position - Move the cpu entry to the correct place to maintain
+ *                       order in the cpu queue. Caller must hold gsnedfsplit lock.
+ */
+static void update_cpu_position(cpu_entry_t *entry)
+{
+	if (likely(bheap_node_in_heap(entry->hn)))
+		bheap_delete(cpu_lower_prio, &gsnedfsplit_cpu_heap, entry->hn);
+	bheap_insert(cpu_lower_prio, &gsnedfsplit_cpu_heap, entry->hn);
+}
+
+/* caller must hold gsnedfsplit lock */
+static cpu_entry_t* lowest_prio_cpu(void)
+{
+	struct bheap_node* hn;
+	hn = bheap_peek(cpu_lower_prio, &gsnedfsplit_cpu_heap);
+	return hn->value;
+}
+
+
+/* link_task_to_cpu - Update the link of a CPU.
+ *                    Handles the case where the to-be-linked task is already
+ *                    scheduled on a different CPU.
+ */
+static noinline void link_task_to_cpu(struct task_struct* linked,
+				      cpu_entry_t *entry)
+{
+	cpu_entry_t *sched;
+	struct task_struct* tmp;
+	int on_cpu;
+
+	BUG_ON(linked && !is_realtime(linked));
+
+	/* Currently linked task is set to be unlinked. */
+	if (entry->linked) {
+		entry->linked->rt_param.linked_on = NO_CPU;
+	}
+
+	/* Link new task to CPU. */
+	if (linked) {
+		set_rt_flags(linked, RT_F_RUNNING);
+		/* handle task is already scheduled somewhere! */
+		on_cpu = linked->rt_param.scheduled_on;
+		if (on_cpu != NO_CPU) {
+			sched = &per_cpu(gsnedfsplit_cpu_entries, on_cpu);
+			/* this should only happen if not linked already */
+			BUG_ON(sched->linked == linked);
+
+			/* If we are already scheduled on the CPU to which we
+			 * wanted to link, we don't need to do the swap --
+			 * we just link ourselves to the CPU and depend on
+			 * the caller to get things right.
+			 */
+			if (entry != sched) {
+				TRACE_TASK(linked,
+					   "already scheduled on %d, updating link.\n",
+					   sched->cpu);
+				tmp = sched->linked;
+				linked->rt_param.linked_on = sched->cpu;
+				sched->linked = linked;
+				update_cpu_position(sched);
+				linked = tmp;
+			}
+		}
+		if (linked) /* might be NULL due to swap */
+			linked->rt_param.linked_on = entry->cpu;
+	}
+	entry->linked = linked;
+#ifdef WANT_ALL_SCHED_EVENTS
+	if (linked)
+		TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
+	else
+		TRACE("NULL linked to %d.\n", entry->cpu);
+#endif
+	update_cpu_position(entry);
+}
+
+/* unlink - Make sure a task is not linked any longer to an entry
+ *          where it was linked before. Must hold gsnedfsplit_lock.
+ */
+static noinline void unlink(struct task_struct* t)
+{
+    	cpu_entry_t *entry;
+
+	if (t->rt_param.linked_on != NO_CPU) {
+		/* unlink */
+		entry = &per_cpu(gsnedfsplit_cpu_entries, t->rt_param.linked_on);
+		t->rt_param.linked_on = NO_CPU;
+		link_task_to_cpu(NULL, entry);
+	} else if (is_queued(t)) {
+		/* This is an interesting situation: t is scheduled,
+		 * but was just recently unlinked.  It cannot be
+		 * linked anywhere else (because then it would have
+		 * been relinked to this CPU), thus it must be in some
+		 * queue. We must remove it from the list in this
+		 * case.
+		 */
+		remove(&gsnedfsplit, t);
+	}
+}
+
+
+/* preempt - force a CPU to reschedule
+ */
+static void preempt(cpu_entry_t *entry)
+{
+	preempt_if_preemptable(entry->scheduled, entry->cpu);
+}
+
+/* requeue - Put an unlinked task into gsn-edf domain.
+ *           Caller must hold gsnedfsplit_lock.
+ */
+static noinline void requeue(struct task_struct* task)
+{
+	BUG_ON(!task);
+	/* sanity check before insertion */
+	BUG_ON(is_queued(task));
+
+	if (is_released(task, litmus_clock()))
+		__add_ready(&gsnedfsplit, task);
+	else {
+		/* it has got to wait */
+		add_release(&gsnedfsplit, task);
+	}
+}
+
+#ifdef CONFIG_SCHED_CPU_AFFINITY
+static cpu_entry_t* gsnedfsplit_get_nearest_available_cpu(cpu_entry_t *start)
+{
+	cpu_entry_t *affinity;
+
+	get_nearest_available_cpu(affinity, start, gsnedfsplit_cpu_entries,
+#ifdef CONFIG_RELEASE_MASTER
+			gsnedfsplit.release_master
+#else
+			NO_CPU
+#endif
+			);
+
+	return(affinity);
+}
+#endif
+
+/* check for any necessary preemptions */
+static void check_for_preemptions(void)
+{
+	struct task_struct *task;
+	cpu_entry_t *last;
+
+	for (last = lowest_prio_cpu();
+	     edf_preemption_needed(&gsnedfsplit, last->linked);
+	     last = lowest_prio_cpu()) {
+		/* preemption necessary */
+		task = __take_ready(&gsnedfsplit);
+		TRACE("check_for_preemptions: attempting to link task %d to %d\n",
+		      task->pid, last->cpu);
+
+#ifdef CONFIG_SCHED_CPU_AFFINITY
+		{
+			cpu_entry_t *affinity =
+					gsnedfsplit_get_nearest_available_cpu(
+						&per_cpu(gsnedfsplit_cpu_entries, task_cpu(task)));
+			if (affinity)
+				last = affinity;
+			else if (last->linked)
+				requeue(last->linked);
+		}
+#else
+		if (last->linked)
+			requeue(last->linked);
+#endif
+
+		link_task_to_cpu(task, last);
+		preempt(last);
+	}
+}
+
+/* gsnedfsplit_job_arrival: task is either resumed or released */
+static noinline void gsnedfsplit_job_arrival(struct task_struct* task)
+{
+	BUG_ON(!task);
+
+	requeue(task);
+	check_for_preemptions();
+}
+
+static void gsnedfsplit_release_jobs(rt_domain_t* rt, struct bheap* tasks)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&gsnedfsplit_lock, flags);
+
+	__merge_ready(rt, tasks);
+	check_for_preemptions();
+
+	raw_spin_unlock_irqrestore(&gsnedfsplit_lock, flags);
+}
+
+/* caller holds gsnedfsplit_lock */
+static noinline void job_completion(struct task_struct *t, int forced)
+{
+	BUG_ON(!t);
+
+	sched_trace_task_completion(t, forced);
+
+	TRACE_TASK(t, "job_completion().\n");
+
+	/* set flags */
+	set_rt_flags(t, RT_F_SLEEP);
+	/* prepare for next period */
+	prepare_for_next_period(t);
+	if (is_released(t, litmus_clock()))
+		sched_trace_task_release(t);
+	/* unlink */
+	unlink(t);
+	/* requeue
+	 * But don't requeue a blocking task. */
+	if (is_running(t))
+		gsnedfsplit_job_arrival(t);
+}
+
+/* gsnedfsplit_tick - this function is called for every local timer
+ *                         interrupt.
+ *
+ *                   checks whether the current task has expired and checks
+ *                   whether we need to preempt it if it has not expired
+ */
+static void gsnedfsplit_tick(struct task_struct* t)
+{
+	if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
+		if (!is_np(t)) {
+			/* np tasks will be preempted when they become
+			 * preemptable again
+			 */
+			litmus_reschedule_local();
+			TRACE("gsnedfsplit_scheduler_tick: "
+			      "%d is preemptable "
+			      " => FORCE_RESCHED\n", t->pid);
+		} else if (is_user_np(t)) {
+			TRACE("gsnedfsplit_scheduler_tick: "
+			      "%d is non-preemptable, "
+			      "preemption delayed.\n", t->pid);
+			request_exit_np(t);
+		}
+	}
+}
+
+/* Getting schedule() right is a bit tricky. schedule() may not make any
+ * assumptions on the state of the current task since it may be called for a
+ * number of reasons. The reasons include a scheduler_tick() determined that it
+ * was necessary, because sys_exit_np() was called, because some Linux
+ * subsystem determined so, or even (in the worst case) because there is a bug
+ * hidden somewhere. Thus, we must take extreme care to determine what the
+ * current state is.
+ *
+ * The CPU could currently be scheduling a task (or not), be linked (or not).
+ *
+ * The following assertions for the scheduled task could hold:
+ *
+ *      - !is_running(scheduled)        // the job blocks
+ *	- scheduled->timeslice == 0	// the job completed (forcefully)
+ *	- get_rt_flag() == RT_F_SLEEP	// the job completed (by syscall)
+ * 	- linked != scheduled		// we need to reschedule (for any reason)
+ * 	- is_np(scheduled)		// rescheduling must be delayed,
+ *					   sys_exit_np must be requested
+ *
+ * Any of these can occur together.
+ */
+static struct task_struct* gsnedfsplit_schedule(struct task_struct * prev)
+{
+	cpu_entry_t* entry = &__get_cpu_var(gsnedfsplit_cpu_entries);
+	int out_of_time, sleep, preempt, np, exists, blocks;
+	struct task_struct* next = NULL;
+
+#ifdef CONFIG_RELEASE_MASTER
+	/* Bail out early if we are the release master.
+	 * The release master never schedules any real-time tasks.
+	 */
+	if (unlikely(gsnedfsplit.release_master == entry->cpu)) {
+		sched_state_task_picked();
+		return NULL;
+	}
+#endif
+
+	raw_spin_lock(&gsnedfsplit_lock);
+
+	/* sanity checking */
+	BUG_ON(entry->scheduled && entry->scheduled != prev);
+	BUG_ON(entry->scheduled && !is_realtime(prev));
+	BUG_ON(is_realtime(prev) && !entry->scheduled);
+
+	/* (0) Determine state */
+	exists      = entry->scheduled != NULL;
+	blocks      = exists && !is_running(entry->scheduled);
+	out_of_time = exists &&
+				  budget_enforced(entry->scheduled) &&
+				  budget_exhausted(entry->scheduled);
+	np 	    = exists && is_np(entry->scheduled);
+	sleep	    = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
+	preempt     = entry->scheduled != entry->linked;
+
+#ifdef WANT_ALL_SCHED_EVENTS
+	TRACE_TASK(prev, "invoked gsnedfsplit_schedule.\n");
+#endif
+
+	if (exists)
+		TRACE_TASK(prev,
+			   "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
+			   "state:%d sig:%d\n",
+			   blocks, out_of_time, np, sleep, preempt,
+			   prev->state, signal_pending(prev));
+	if (entry->linked && preempt)
+		TRACE_TASK(prev, "will be preempted by %s/%d\n",
+			   entry->linked->comm, entry->linked->pid);
+
+
+	/* If a task blocks we have no choice but to reschedule.
+	 */
+	if (blocks)
+		unlink(entry->scheduled);
+
+	/* Request a sys_exit_np() call if we would like to preempt but cannot.
+	 * We need to make sure to update the link structure anyway in case
+	 * that we are still linked. Multiple calls to request_exit_np() don't
+	 * hurt.
+	 */
+	if (np && (out_of_time || preempt || sleep)) {
+		unlink(entry->scheduled);
+		request_exit_np(entry->scheduled);
+	}
+
+	/* Any task that is preemptable and either exhausts its execution
+	 * budget or wants to sleep completes. We may have to reschedule after
+	 * this. Don't do a job completion if we block (can't have timers running
+	 * for blocked jobs). Preemption go first for the same reason.
+	 */
+	if (!np && (out_of_time || sleep) && !blocks && !preempt)
+		job_completion(entry->scheduled, !sleep);
+
+	/* Link pending task if we became unlinked.
+	 */
+	if (!entry->linked)
+		link_task_to_cpu(__take_ready(&gsnedfsplit), entry);
+
+	/* The final scheduling decision. Do we need to switch for some reason?
+	 * If linked is different from scheduled, then select linked as next.
+	 */
+	if ((!np || blocks) &&
+	    entry->linked != entry->scheduled) {
+		/* Schedule a linked job? */
+		if (entry->linked) {
+			entry->linked->rt_param.scheduled_on = entry->cpu;
+			next = entry->linked;
+			TRACE_TASK(next, "scheduled_on = P%d\n", smp_processor_id());
+		}
+		if (entry->scheduled) {
+			/* not gonna be scheduled soon */
+			entry->scheduled->rt_param.scheduled_on = NO_CPU;
+			TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
+		}
+	} else
+		/* Only override Linux scheduler if we have a real-time task
+		 * scheduled that needs to continue.
+		 */
+		if (exists)
+			next = prev;
+
+	sched_state_task_picked();
+
+	raw_spin_unlock(&gsnedfsplit_lock);
+
+#ifdef WANT_ALL_SCHED_EVENTS
+	TRACE("gsnedfsplit_lock released, next=0x%p\n", next);
+
+	if (next)
+		TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
+	else if (exists && !next)
+		TRACE("becomes idle at %llu.\n", litmus_clock());
+#endif
+
+
+	return next;
+}
+
+
+/* _finish_switch - we just finished the switch away from prev
+ */
+static void gsnedfsplit_finish_switch(struct task_struct *prev)
+{
+	cpu_entry_t* 	entry = &__get_cpu_var(gsnedfsplit_cpu_entries);
+
+	entry->scheduled = is_realtime(current) ? current : NULL;
+#ifdef WANT_ALL_SCHED_EVENTS
+	TRACE_TASK(prev, "switched away from\n");
+#endif
+}
+
+
+/*	Prepare a task for running in RT mode
+ */
+static void gsnedfsplit_task_new(struct task_struct * t, int on_rq, int running)
+{
+	unsigned long 		flags;
+	cpu_entry_t* 		entry;
+
+	TRACE("gsn edf: task new %d\n", t->pid);
+
+	raw_spin_lock_irqsave(&gsnedfsplit_lock, flags);
+
+	/* setup job params */
+	release_at(t, litmus_clock());
+
+	if (running) {
+		entry = &per_cpu(gsnedfsplit_cpu_entries, task_cpu(t));
+		BUG_ON(entry->scheduled);
+
+#ifdef CONFIG_RELEASE_MASTER
+		if (entry->cpu != gsnedfsplit.release_master) {
+#endif
+			entry->scheduled = t;
+			tsk_rt(t)->scheduled_on = task_cpu(t);
+#ifdef CONFIG_RELEASE_MASTER
+		} else {
+			/* do not schedule on release master */
+			preempt(entry); /* force resched */
+			tsk_rt(t)->scheduled_on = NO_CPU;
+		}
+#endif
+	} else {
+		t->rt_param.scheduled_on = NO_CPU;
+	}
+	t->rt_param.linked_on          = NO_CPU;
+
+	gsnedfsplit_job_arrival(t);
+	raw_spin_unlock_irqrestore(&gsnedfsplit_lock, flags);
+}
+
+static void gsnedfsplit_task_wake_up(struct task_struct *task)
+{
+	unsigned long flags;
+	lt_t now;
+
+	TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
+
+	raw_spin_lock_irqsave(&gsnedfsplit_lock, flags);
+	/* We need to take suspensions because of semaphores into
+	 * account! If a job resumes after being suspended due to acquiring
+	 * a semaphore, it should never be treated as a new job release.
+	 */
+	if (get_rt_flags(task) == RT_F_EXIT_SEM) {
+		set_rt_flags(task, RT_F_RUNNING);
+	} else {
+		now = litmus_clock();
+		if (is_tardy(task, now)) {
+			/* new sporadic release */
+			release_at(task, now);
+			sched_trace_task_release(task);
+		}
+		else {
+			if (task->rt.time_slice) {
+				/* came back in time before deadline
+				*/
+				set_rt_flags(task, RT_F_RUNNING);
+			}
+		}
+	}
+	gsnedfsplit_job_arrival(task);
+	raw_spin_unlock_irqrestore(&gsnedfsplit_lock, flags);
+}
+
+static void gsnedfsplit_task_block(struct task_struct *t)
+{
+	unsigned long flags;
+
+	TRACE_TASK(t, "block at %llu\n", litmus_clock());
+
+	/* unlink if necessary */
+	raw_spin_lock_irqsave(&gsnedfsplit_lock, flags);
+	unlink(t);
+	raw_spin_unlock_irqrestore(&gsnedfsplit_lock, flags);
+
+	BUG_ON(!is_realtime(t));
+}
+
+
+static void gsnedfsplit_task_exit(struct task_struct * t)
+{
+	unsigned long flags;
+
+	/* unlink if necessary */
+	raw_spin_lock_irqsave(&gsnedfsplit_lock, flags);
+	unlink(t);
+	if (tsk_rt(t)->scheduled_on != NO_CPU) {
+		gsnedfsplit_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
+		tsk_rt(t)->scheduled_on = NO_CPU;
+	}
+	raw_spin_unlock_irqrestore(&gsnedfsplit_lock, flags);
+
+	BUG_ON(!is_realtime(t));
+        TRACE_TASK(t, "RIP\n");
+}
+
+
+static long gsnedfsplit_admit_task(struct task_struct* tsk)
+{
+	return 0;
+}
+
+#ifdef CONFIG_LITMUS_LOCKING
+
+#include <litmus/fdso.h>
+
+/* called with IRQs off */
+static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
+{
+	int linked_on;
+	int check_preempt = 0;
+
+	raw_spin_lock(&gsnedfsplit_lock);
+
+	TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid);
+	tsk_rt(t)->inh_task = prio_inh;
+
+	linked_on  = tsk_rt(t)->linked_on;
+
+	/* If it is scheduled, then we need to reorder the CPU heap. */
+	if (linked_on != NO_CPU) {
+		TRACE_TASK(t, "%s: linked  on %d\n",
+			   __FUNCTION__, linked_on);
+		/* Holder is scheduled; need to re-order CPUs.
+		 * We can't use heap_decrease() here since
+		 * the cpu_heap is ordered in reverse direction, so
+		 * it is actually an increase. */
+		bheap_delete(cpu_lower_prio, &gsnedfsplit_cpu_heap,
+			    gsnedfsplit_cpus[linked_on]->hn);
+		bheap_insert(cpu_lower_prio, &gsnedfsplit_cpu_heap,
+			    gsnedfsplit_cpus[linked_on]->hn);
+	} else {
+		/* holder may be queued: first stop queue changes */
+		raw_spin_lock(&gsnedfsplit.release_lock);
+		if (is_queued(t)) {
+			TRACE_TASK(t, "%s: is queued\n",
+				   __FUNCTION__);
+			/* We need to update the position of holder in some
+			 * heap. Note that this could be a release heap if we
+			 * budget enforcement is used and this job overran. */
+			check_preempt =
+				!bheap_decrease(edf_ready_order,
+					       tsk_rt(t)->heap_node);
+		} else {
+			/* Nothing to do: if it is not queued and not linked
+			 * then it is either sleeping or currently being moved
+			 * by other code (e.g., a timer interrupt handler) that
+			 * will use the correct priority when enqueuing the
+			 * task. */
+			TRACE_TASK(t, "%s: is NOT queued => Done.\n",
+				   __FUNCTION__);
+		}
+		raw_spin_unlock(&gsnedfsplit.release_lock);
+
+		/* If holder was enqueued in a release heap, then the following
+		 * preemption check is pointless, but we can't easily detect
+		 * that case. If you want to fix this, then consider that
+		 * simply adding a state flag requires O(n) time to update when
+		 * releasing n tasks, which conflicts with the goal to have
+		 * O(log n) merges. */
+		if (check_preempt) {
+			/* heap_decrease() hit the top level of the heap: make
+			 * sure preemption checks get the right task, not the
+			 * potentially stale cache. */
+			bheap_uncache_min(edf_ready_order,
+					 &gsnedfsplit.ready_queue);
+			check_for_preemptions();
+		}
+	}
+
+	raw_spin_unlock(&gsnedfsplit_lock);
+}
+
+/* called with IRQs off */
+static void clear_priority_inheritance(struct task_struct* t)
+{
+	raw_spin_lock(&gsnedfsplit_lock);
+
+	/* A job only stops inheriting a priority when it releases a
+	 * resource. Thus we can make the following assumption.*/
+	BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU);
+
+	TRACE_TASK(t, "priority restored\n");
+	tsk_rt(t)->inh_task = NULL;
+
+	/* Check if rescheduling is necessary. We can't use heap_decrease()
+	 * since the priority was effectively lowered. */
+	unlink(t);
+	gsnedfsplit_job_arrival(t);
+
+	raw_spin_unlock(&gsnedfsplit_lock);
+}
+
+
+/* ******************** FMLP support ********************** */
+
+/* struct for semaphore with priority inheritance */
+struct fmlp_semaphore {
+	struct litmus_lock litmus_lock;
+
+	/* current resource holder */
+	struct task_struct *owner;
+
+	/* highest-priority waiter */
+	struct task_struct *hp_waiter;
+
+	/* FIFO queue of waiting tasks */
+	wait_queue_head_t wait;
+};
+
+static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
+{
+	return container_of(lock, struct fmlp_semaphore, litmus_lock);
+}
+
+/* caller is responsible for locking */
+static struct task_struct* find_hp_waiter(struct fmlp_semaphore *sem,
+				   struct task_struct* skip)
+{
+	struct list_head	*pos;
+	struct task_struct 	*queued, *found = NULL;
+
+	list_for_each(pos, &sem->wait.task_list) {
+		queued  = (struct task_struct*) list_entry(pos, wait_queue_t,
+							   task_list)->private;
+
+		/* Compare task prios, find high prio task. */
+		if (queued != skip && edf_higher_prio(queued, found))
+			found = queued;
+	}
+	return found;
+}
+
+int gsnedfsplit_fmlp_lock(struct litmus_lock* l)
+{
+	struct task_struct* t = current;
+	struct fmlp_semaphore *sem = fmlp_from_lock(l);
+	wait_queue_t wait;
+	unsigned long flags;
+
+	if (!is_realtime(t))
+		return -EPERM;
+
+	spin_lock_irqsave(&sem->wait.lock, flags);
+
+	if (sem->owner) {
+		/* resource is not free => must suspend and wait */
+
+		init_waitqueue_entry(&wait, t);
+
+		/* FIXME: interruptible would be nice some day */
+		set_task_state(t, TASK_UNINTERRUPTIBLE);
+
+		__add_wait_queue_tail_exclusive(&sem->wait, &wait);
+
+		/* check if we need to activate priority inheritance */
+		if (edf_higher_prio(t, sem->hp_waiter)) {
+			sem->hp_waiter = t;
+			if (edf_higher_prio(t, sem->owner))
+				set_priority_inheritance(sem->owner, sem->hp_waiter);
+		}
+
+		TS_LOCK_SUSPEND;
+
+		/* release lock before sleeping */
+		spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+		/* We depend on the FIFO order.  Thus, we don't need to recheck
+		 * when we wake up; we are guaranteed to have the lock since
+		 * there is only one wake up per release.
+		 */
+
+		schedule();
+
+		TS_LOCK_RESUME;
+
+		/* Since we hold the lock, no other task will change
+		 * ->owner. We can thus check it without acquiring the spin
+		 * lock. */
+		BUG_ON(sem->owner != t);
+	} else {
+		/* it's ours now */
+		sem->owner = t;
+
+		spin_unlock_irqrestore(&sem->wait.lock, flags);
+	}
+
+	return 0;
+}
+
+int gsnedfsplit_fmlp_unlock(struct litmus_lock* l)
+{
+	struct task_struct *t = current, *next;
+	struct fmlp_semaphore *sem = fmlp_from_lock(l);
+	unsigned long flags;
+	int err = 0;
+
+	spin_lock_irqsave(&sem->wait.lock, flags);
+
+	if (sem->owner != t) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* check if there are jobs waiting for this resource */
+	next = __waitqueue_remove_first(&sem->wait);
+	if (next) {
+		/* next becomes the resouce holder */
+		sem->owner = next;
+		TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid);
+
+		/* determine new hp_waiter if necessary */
+		if (next == sem->hp_waiter) {
+			TRACE_TASK(next, "was highest-prio waiter\n");
+			/* next has the highest priority --- it doesn't need to
+			 * inherit.  However, we need to make sure that the
+			 * next-highest priority in the queue is reflected in
+			 * hp_waiter. */
+			sem->hp_waiter = find_hp_waiter(sem, next);
+			if (sem->hp_waiter)
+				TRACE_TASK(sem->hp_waiter, "is new highest-prio waiter\n");
+			else
+				TRACE("no further waiters\n");
+		} else {
+			/* Well, if next is not the highest-priority waiter,
+			 * then it ought to inherit the highest-priority
+			 * waiter's priority. */
+			set_priority_inheritance(next, sem->hp_waiter);
+		}
+
+		/* wake up next */
+		wake_up_process(next);
+	} else
+		/* becomes available */
+		sem->owner = NULL;
+
+	/* we lose the benefit of priority inheritance (if any) */
+	if (tsk_rt(t)->inh_task)
+		clear_priority_inheritance(t);
+
+out:
+	spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+	return err;
+}
+
+int gsnedfsplit_fmlp_close(struct litmus_lock* l)
+{
+	struct task_struct *t = current;
+	struct fmlp_semaphore *sem = fmlp_from_lock(l);
+	unsigned long flags;
+
+	int owner;
+
+	spin_lock_irqsave(&sem->wait.lock, flags);
+
+	owner = sem->owner == t;
+
+	spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+	if (owner)
+		gsnedfsplit_fmlp_unlock(l);
+
+	return 0;
+}
+
+void gsnedfsplit_fmlp_free(struct litmus_lock* lock)
+{
+	kfree(fmlp_from_lock(lock));
+}
+
+static struct litmus_lock_ops gsnedfsplit_fmlp_lock_ops = {
+	.close  = gsnedfsplit_fmlp_close,
+	.lock   = gsnedfsplit_fmlp_lock,
+	.unlock = gsnedfsplit_fmlp_unlock,
+	.deallocate = gsnedfsplit_fmlp_free,
+};
+
+static struct litmus_lock* gsnedfsplit_new_fmlp(void)
+{
+	struct fmlp_semaphore* sem;
+
+	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
+	if (!sem)
+		return NULL;
+
+	sem->owner   = NULL;
+	sem->hp_waiter = NULL;
+	init_waitqueue_head(&sem->wait);
+	sem->litmus_lock.ops = &gsnedfsplit_fmlp_lock_ops;
+
+	return &sem->litmus_lock;
+}
+
+/* **** lock constructor **** */
+
+
+static long gsnedfsplit_allocate_lock(struct litmus_lock **lock, int type,
+				 void* __user unused)
+{
+	int err = -ENXIO;
+
+	/* GSN-EDF-split currently only supports the FMLP for global resources. */
+	switch (type) {
+
+	case FMLP_SEM:
+		/* Flexible Multiprocessor Locking Protocol */
+		*lock = gsnedfsplit_new_fmlp();
+		if (*lock)
+			err = 0;
+		else
+			err = -ENOMEM;
+		break;
+
+	};
+
+	return err;
+}
+
+#endif
+
+
+static long gsnedfsplit_activate_plugin(void)
+{
+	int cpu;
+	cpu_entry_t *entry;
+
+	bheap_init(&gsnedfsplit_cpu_heap);
+#ifdef CONFIG_RELEASE_MASTER
+	gsnedfsplit.release_master = atomic_read(&release_master_cpu);
+#endif
+
+	for_each_online_cpu(cpu) {
+		entry = &per_cpu(gsnedfsplit_cpu_entries, cpu);
+		bheap_node_init(&entry->hn, entry);
+		entry->linked    = NULL;
+		entry->scheduled = NULL;
+#ifdef CONFIG_RELEASE_MASTER
+		if (cpu != gsnedfsplit.release_master) {
+#endif
+			TRACE("GSN-EDF-split: Initializing CPU #%d.\n", cpu);
+			update_cpu_position(entry);
+#ifdef CONFIG_RELEASE_MASTER
+		} else {
+			TRACE("GSN-EDF-split: CPU %d is release master.\n", cpu);
+		}
+#endif
+	}
+	return 0;
+}
+
+/*	Plugin object	*/
+static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
+	.plugin_name		= "GSN-EDF-split",
+	.finish_switch		= gsnedfsplit_finish_switch,
+	.tick			= gsnedfsplit_tick,
+	.task_new		= gsnedfsplit_task_new,
+	.complete_job		= complete_job,
+	.task_exit		= gsnedfsplit_task_exit,
+	.schedule		= gsnedfsplit_schedule,
+	.task_wake_up		= gsnedfsplit_task_wake_up,
+	.task_block		= gsnedfsplit_task_block,
+	.admit_task		= gsnedfsplit_admit_task,
+	.activate_plugin	= gsnedfsplit_activate_plugin,
+#ifdef CONFIG_LITMUS_LOCKING
+	.allocate_lock		= gsnedfsplit_allocate_lock,
+#endif
+};
+
+
+static int __init init_gsn_edf(void)
+{
+	int cpu;
+	cpu_entry_t *entry;
+
+	bheap_init(&gsnedfsplit_cpu_heap);
+	/* initialize CPU state */
+	for (cpu = 0; cpu < NR_CPUS; cpu++)  {
+		entry = &per_cpu(gsnedfsplit_cpu_entries, cpu);
+		gsnedfsplit_cpus[cpu] = entry;
+		entry->cpu 	 = cpu;
+		entry->hn        = &gsnedfsplit_heap_node[cpu];
+		bheap_node_init(&entry->hn, entry);
+	}
+	edf_domain_init(&gsnedfsplit, NULL, gsnedfsplit_release_jobs);
+	return register_sched_plugin(&gsn_edf_plugin);
+}
+
+
+module_init(init_gsn_edf);
-- 
cgit v1.2.2