Add PSN-EDF Plugin

author: Andrea Bastoni <bastoni@cs.unc.edu> 2010-01-28 19:03:17 -0500
committer: Andrea Bastoni <bastoni@cs.unc.edu> 2010-01-28 19:04:58 -0500
commit: 7ed6b8ee12634ffbd4aa8ad46a660b702f8221e7 (patch)
tree: 06da3efe242e397b2fd4597c13a9904547a20d39
parent: 55752980a3fac83a651d594186fe76b726350167 (diff)
2 files changed, 476 insertions, 0 deletions
diff --git a/litmus/Makefile b/litmus/Makefile
index 26e0fdb5c2c1..3b3f1af8d5b4 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -12,6 +12,7 @@ obj-y     = sched_plugin.o litmus.o \
            fmlp.o \
            bheap.o \
            sched_gsn_edf.o \
+            sched_psn_edf.o \
            sched_pfair.o
 obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
new file mode 100644
index 000000000000..f0ab8ebc5111
--- /dev/null
+++ b/litmus/sched_psn_edf.c
@@ -0,0 +1,475 @@
+/*
+ * kernel/sched_psn_edf.c
+ *
+ * Implementation of the PSN-EDF scheduler plugin.
+ * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
+ *
+ * Suspensions and non-preemptable sections are supported.
+ * Priority inheritance is not supported.
+ */
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <litmus/litmus.h>
+#include <litmus/jobs.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/edf_common.h>
+typedef struct {
+        rt_domain_t             domain;
+        int                     cpu;
+        struct task_struct*     scheduled; /* only RT tasks */
+/*
+ * scheduling lock slock
+ * protects the domain and serializes scheduling decisions
+ */
+#define slock domain.ready_lock
+} psnedf_domain_t;
+DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
+#define local_edf               (&__get_cpu_var(psnedf_domains).domain)
+#define local_pedf              (&__get_cpu_var(psnedf_domains))
+#define remote_edf(cpu)         (&per_cpu(psnedf_domains, cpu).domain)
+#define remote_pedf(cpu)        (&per_cpu(psnedf_domains, cpu))
+#define task_edf(task)          remote_edf(get_partition(task))
+#define task_pedf(task)         remote_pedf(get_partition(task))
+static void psnedf_domain_init(psnedf_domain_t* pedf,
+                               check_resched_needed_t check,
+                               release_jobs_t release,
+                               int cpu)
+{
+        edf_domain_init(&pedf->domain, check, release);
+        pedf->cpu               = cpu;
+        pedf->scheduled         = NULL;
+}
+static void requeue(struct task_struct* t, rt_domain_t *edf)
+{
+        if (t->state != TASK_RUNNING)
+                TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
+        set_rt_flags(t, RT_F_RUNNING);
+        if (is_released(t, litmus_clock()))
+                __add_ready(edf, t);
+        else
+                add_release(edf, t); /* it has got to wait */
+}
+/* we assume the lock is being held */
+static void preempt(psnedf_domain_t *pedf)
+{
+        if (smp_processor_id() == pedf->cpu) {
+                if (pedf->scheduled && is_np(pedf->scheduled))
+                        request_exit_np(pedf->scheduled);
+                else
+                        set_tsk_need_resched(current);
+        } else
+                /* in case that it is a remote CPU we have to defer the
+                 * the decision to the remote CPU
+                 */
+                smp_send_reschedule(pedf->cpu);
+}
+/* This check is trivial in partioned systems as we only have to consider
+ * the CPU of the partition.
+ */
+static int psnedf_check_resched(rt_domain_t *edf)
+{
+        psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
+        int ret = 0;
+        /* because this is a callback from rt_domain_t we already hold
+         * the necessary lock for the ready queue
+         */
+        if (edf_preemption_needed(edf, pedf->scheduled)) {
+                preempt(pedf);
+                ret = 1;
+        }
+        return ret;
+}
+static void job_completion(struct task_struct* t)
+{
+        TRACE_TASK(t, "job_completion().\n");
+        set_rt_flags(t, RT_F_SLEEP);
+        prepare_for_next_period(t);
+}
+static void psnedf_tick(struct task_struct *t)
+{
+        psnedf_domain_t *pedf = local_pedf;
+        /* Check for inconsistency. We don't need the lock for this since
+         * ->scheduled is only changed in schedule, which obviously is not
+         *  executing in parallel on this CPU
+         */
+        BUG_ON(is_realtime(t) && t != pedf->scheduled);
+        if (is_realtime(t) && budget_exhausted(t)) {
+                if (!is_np(t)) {
+                        set_tsk_need_resched(t);
+                        TRACE("psnedf_scheduler_tick: "
+                              "%d is preemptable "
+                              " => FORCE_RESCHED\n", t->pid);
+                } else {
+                        TRACE("psnedf_scheduler_tick: "
+                              "%d is non-preemptable, "
+                              "preemption delayed.\n", t->pid);
+                        request_exit_np(t);
+                }
+        }
+}
+static struct task_struct* psnedf_schedule(struct task_struct * prev)
+{
+        psnedf_domain_t*        pedf = local_pedf;
+        rt_domain_t*            edf  = &pedf->domain;
+        struct task_struct*     next;
+        int                     out_of_time, sleep, preempt,
+                                np, exists, blocks, resched;
+        spin_lock(&pedf->slock);
+        /* sanity checking
+         * differently from gedf, when a task exits (dead)
+         * pedf->schedule may be null and prev _is_ realtime
+         */
+        BUG_ON(pedf->scheduled && pedf->scheduled != prev);
+        BUG_ON(pedf->scheduled && !is_realtime(prev));
+        /* (0) Determine state */
+        exists      = pedf->scheduled != NULL;
+        blocks      = exists && !is_running(pedf->scheduled);
+        out_of_time = exists && budget_exhausted(pedf->scheduled);
+        np          = exists && is_np(pedf->scheduled);
+        sleep       = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP;
+        preempt     = edf_preemption_needed(edf, prev);
+        /* If we need to preempt do so.
+         * The following checks set resched to 1 in case of special
+         * circumstances.
+         */
+        resched = preempt;
+        /* If a task blocks we have no choice but to reschedule.
+         */
+        if (blocks)
+                resched = 1;
+        /* Request a sys_exit_np() call if we would like to preempt but cannot.
+         * Multiple calls to request_exit_np() don't hurt.
+         */
+        if (np && (out_of_time || preempt || sleep))
+                request_exit_np(pedf->scheduled);
+        /* Any task that is preemptable and either exhausts its execution
+         * budget or wants to sleep completes. We may have to reschedule after
+         * this.
+         */
+        if (!np && (out_of_time || sleep) && !blocks) {
+                job_completion(pedf->scheduled);
+                resched = 1;
+        }
+        /* The final scheduling decision. Do we need to switch for some reason?
+         * Switch if we are in RT mode and have no task or if we need to
+         * resched.
+         */
+        next = NULL;
+        if ((!np || blocks) && (resched || !exists)) {
+                /* Take care of a previously scheduled
+                 * job by taking it out of the Linux runqueue.
+                 */
+                if (pedf->scheduled && !blocks)
+                        requeue(pedf->scheduled, edf);
+                next = __take_ready(edf);
+        } else
+                /* Only override Linux scheduler if we have a real-time task
+                 * scheduled that needs to continue.
+                 */
+                if (exists)
+                        next = prev;
+        if (next) {
+                TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
+                set_rt_flags(next, RT_F_RUNNING);
+        } else {
+                TRACE("becoming idle at %llu\n", litmus_clock());
+        }
+        pedf->scheduled = next;
+        spin_unlock(&pedf->slock);
+        return next;
+}
+/*      Prepare a task for running in RT mode
+ */
+static void psnedf_task_new(struct task_struct * t, int on_rq, int running)
+{
+        rt_domain_t*            edf  = task_edf(t);
+        psnedf_domain_t*        pedf = task_pedf(t);
+        unsigned long           flags;
+        TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
+                   t->rt_param.task_params.cpu);
+        /* setup job parameters */
+        release_at(t, litmus_clock());
+        /* The task should be running in the queue, otherwise signal
+         * code will try to wake it up with fatal consequences.
+         */
+        spin_lock_irqsave(&pedf->slock, flags);
+        if (running) {
+                /* there shouldn't be anything else running at the time */
+                BUG_ON(pedf->scheduled);
+                pedf->scheduled = t;
+        } else {
+                requeue(t, edf);
+                /* maybe we have to reschedule */
+                preempt(pedf);
+        }
+        spin_unlock_irqrestore(&pedf->slock, flags);
+}
+static void psnedf_task_wake_up(struct task_struct *task)
+{
+        unsigned long           flags;
+        psnedf_domain_t*        pedf = task_pedf(task);
+        rt_domain_t*            edf  = task_edf(task);
+        lt_t                    now;
+        TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
+        spin_lock_irqsave(&pedf->slock, flags);
+        BUG_ON(is_queued(task));
+        /* We need to take suspensions because of semaphores into
+         * account! If a job resumes after being suspended due to acquiring
+         * a semaphore, it should never be treated as a new job release.
+         *
+         * FIXME: This should be done in some more predictable and userspace-controlled way.
+         */
+        now = litmus_clock();
+        if (is_tardy(task, now) &&
+            get_rt_flags(task) != RT_F_EXIT_SEM) {
+                /* new sporadic release */
+                release_at(task, now);
+                sched_trace_task_release(task);
+        }
+        requeue(task, edf);
+        spin_unlock_irqrestore(&pedf->slock, flags);
+        TRACE_TASK(task, "wake up done\n");
+}
+static void psnedf_task_block(struct task_struct *t)
+{
+        psnedf_domain_t *pedf = task_pedf(t);
+        /* only running tasks can block, thus t is in no queue */
+        TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
+        BUG_ON(!is_realtime(t));
+        BUG_ON(is_queued(t));
+        /* if this task is dead, then we need to reset pedf->schedule now
+         * as we might get rescheduled before task_exit executes
+         */
+        if(unlikely(t->state == TASK_DEAD)) {
+                TRACE_TASK(t, "Dead, setting scheduled = NULL\n");
+                pedf->scheduled = NULL;
+        }
+}
+static void psnedf_task_exit(struct task_struct * t)
+{
+        unsigned long flags;
+        psnedf_domain_t*        pedf = task_pedf(t);
+        rt_domain_t*            edf;
+        spin_lock_irqsave(&pedf->slock, flags);
+        if (is_queued(t)) {
+                /* dequeue */
+                edf  = task_edf(t);
+                remove(edf, t);
+        }
+        if (pedf->scheduled == t)
+                pedf->scheduled = NULL;
+        TRACE_TASK(t, "RIP, now reschedule\n");
+        preempt(pedf);
+        spin_unlock_irqrestore(&pedf->slock, flags);
+}
+#ifdef CONFIG_FMLP
+static long psnedf_pi_block(struct pi_semaphore *sem,
+                            struct task_struct *new_waiter)
+{
+        psnedf_domain_t*        pedf;
+        rt_domain_t*            edf;
+        struct task_struct*     t;
+        int cpu  = get_partition(new_waiter);
+        BUG_ON(!new_waiter);
+        if (edf_higher_prio(new_waiter, sem->hp.cpu_task[cpu])) {
+                TRACE_TASK(new_waiter, " boosts priority\n");
+                pedf = task_pedf(new_waiter);
+                edf  = task_edf(new_waiter);
+                /* interrupts already disabled */
+                spin_lock(&pedf->slock);
+                /* store new highest-priority task */
+                sem->hp.cpu_task[cpu] = new_waiter;
+                if (sem->holder &&
+                    get_partition(sem->holder) == get_partition(new_waiter)) {
+                        /* let holder inherit */
+                        sem->holder->rt_param.inh_task = new_waiter;
+                        t = sem->holder;
+                        if (is_queued(t)) {
+                                /* queued in domain*/
+                                remove(edf, t);
+                                /* readd to make priority change take place */
+                                /* FIXME: this looks outdated */
+                                if (is_released(t, litmus_clock()))
+                                        __add_ready(edf, t);
+                                else
+                                        add_release(edf, t);
+                        }
+                }
+                /* check if we need to reschedule */
+                if (edf_preemption_needed(edf, current))
+                        preempt(pedf);
+                spin_unlock(&pedf->slock);
+        }
+        return 0;
+}
+static long psnedf_inherit_priority(struct pi_semaphore *sem,
+                                    struct task_struct *new_owner)
+{
+        int cpu  = get_partition(new_owner);
+        new_owner->rt_param.inh_task = sem->hp.cpu_task[cpu];
+        if (sem->hp.cpu_task[cpu] && new_owner != sem->hp.cpu_task[cpu]) {
+                TRACE_TASK(new_owner,
+                           "inherited priority from %s/%d\n",
+                           sem->hp.cpu_task[cpu]->comm,
+                           sem->hp.cpu_task[cpu]->pid);
+        } else
+                TRACE_TASK(new_owner,
+                           "cannot inherit priority: "
+                           "no higher priority job waits on this CPU!\n");
+        /* make new owner non-preemptable as required by FMLP under
+         * PSN-EDF.
+         */
+        make_np(new_owner);
+        return 0;
+}
+/* This function is called on a semaphore release, and assumes that
+ * the current task is also the semaphore holder.
+ */
+static long psnedf_return_priority(struct pi_semaphore *sem)
+{
+        struct task_struct*     t    = current;
+        psnedf_domain_t*        pedf = task_pedf(t);
+        rt_domain_t*            edf  = task_edf(t);
+        int                     ret  = 0;
+        int                     cpu  = get_partition(current);
+        /* Find new highest-priority semaphore task
+         * if holder task is the current hp.cpu_task[cpu].
+         *
+         * Calling function holds sem->wait.lock.
+         */
+        if (t == sem->hp.cpu_task[cpu])
+                edf_set_hp_cpu_task(sem, cpu);
+        take_np(t);
+        if (current->rt_param.inh_task) {
+                TRACE_CUR("return priority of %s/%d\n",
+                          current->rt_param.inh_task->comm,
+                          current->rt_param.inh_task->pid);
+                spin_lock(&pedf->slock);
+                /* Reset inh_task to NULL. */
+                current->rt_param.inh_task = NULL;
+                /* check if we need to reschedule */
+                if (edf_preemption_needed(edf, current))
+                        preempt(pedf);
+                spin_unlock(&pedf->slock);
+        } else
+                TRACE_CUR(" no priority to return %p\n", sem);
+        return ret;
+}
+#endif
+static long psnedf_admit_task(struct task_struct* tsk)
+{
+        return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
+}
+/*      Plugin object   */
+static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
+        .plugin_name            = "PSN-EDF",
+#ifdef CONFIG_SRP
+        .srp_active             = 1,
+#endif
+        .tick                   = psnedf_tick,
+        .task_new               = psnedf_task_new,
+        .complete_job           = complete_job,
+        .task_exit              = psnedf_task_exit,
+        .schedule               = psnedf_schedule,
+        .task_wake_up           = psnedf_task_wake_up,
+        .task_block             = psnedf_task_block,
+#ifdef CONFIG_FMLP
+        .fmlp_active            = 1,
+        .pi_block               = psnedf_pi_block,
+        .inherit_priority       = psnedf_inherit_priority,
+        .return_priority        = psnedf_return_priority,
+#endif
+        .admit_task             = psnedf_admit_task
+};
+static int __init init_psn_edf(void)
+{
+        int i;
+        /* We do not really want to support cpu hotplug, do we? ;)
+         * However, if we are so crazy to do so,
+         * we cannot use num_online_cpu()
+         */
+        for (i = 0; i < num_online_cpus(); i++) {
+                psnedf_domain_init(remote_pedf(i),
+                                   psnedf_check_resched,
+                                   NULL, i);
+        }
+        return register_sched_plugin(&psn_edf_plugin);
+}
+module_init(init_psn_edf);
author	Andrea Bastoni <bastoni@cs.unc.edu>	2010-01-28 19:03:17 -0500
committer	Andrea Bastoni <bastoni@cs.unc.edu>	2010-01-28 19:04:58 -0500
commit	7ed6b8ee12634ffbd4aa8ad46a660b702f8221e7 (patch)
tree	06da3efe242e397b2fd4597c13a9904547a20d39
parent	55752980a3fac83a651d594186fe76b726350167 (diff)

diff --git a/litmus/Makefile b/litmus/Makefile index 26e0fdb5c2c1..3b3f1af8d5b4 100644 --- a/litmus/Makefile +++ b/litmus/Makefile
@@ -12,6 +12,7 @@ obj-y = sched_plugin.o litmus.o \
12	fmlp.o \	12	fmlp.o \
13	bheap.o \	13	bheap.o \
14	sched_gsn_edf.o \	14	sched_gsn_edf.o \
		15	sched_psn_edf.o \
15	sched_pfair.o	16	sched_pfair.o
16		17
17	obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o	18	obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o


diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c new file mode 100644 index 000000000000..f0ab8ebc5111 --- /dev/null +++ b/litmus/sched_psn_edf.c
@@ -0,0 +1,475 @@
		1	/*
		2	* kernel/sched_psn_edf.c
		3	*
		4	* Implementation of the PSN-EDF scheduler plugin.
		5	* Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
		6	*
		7	* Suspensions and non-preemptable sections are supported.
		8	* Priority inheritance is not supported.
		9	*/
		10
		11	#include <linux/percpu.h>
		12	#include <linux/sched.h>
		13	#include <linux/list.h>
		14	#include <linux/spinlock.h>
		15
		16	#include <linux/module.h>
		17
		18	#include <litmus/litmus.h>
		19	#include <litmus/jobs.h>
		20	#include <litmus/sched_plugin.h>
		21	#include <litmus/edf_common.h>
		22
		23
		24	typedef struct {
		25	rt_domain_t domain;
		26	int cpu;
		27	struct task_struct* scheduled; /* only RT tasks */
		28	/*
		29	* scheduling lock slock
		30	* protects the domain and serializes scheduling decisions
		31	*/
		32	#define slock domain.ready_lock
		33
		34	} psnedf_domain_t;
		35
		36	DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
		37
		38	#define local_edf (&__get_cpu_var(psnedf_domains).domain)
		39	#define local_pedf (&__get_cpu_var(psnedf_domains))
		40	#define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain)
		41	#define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu))
		42	#define task_edf(task) remote_edf(get_partition(task))
		43	#define task_pedf(task) remote_pedf(get_partition(task))
		44
		45
		46	static void psnedf_domain_init(psnedf_domain_t* pedf,
		47	check_resched_needed_t check,
		48	release_jobs_t release,
		49	int cpu)
		50	{
		51	edf_domain_init(&pedf->domain, check, release);
		52	pedf->cpu = cpu;
		53	pedf->scheduled = NULL;
		54	}
		55
		56	static void requeue(struct task_struct* t, rt_domain_t *edf)
		57	{
		58	if (t->state != TASK_RUNNING)
		59	TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
		60
		61	set_rt_flags(t, RT_F_RUNNING);
		62	if (is_released(t, litmus_clock()))
		63	__add_ready(edf, t);
		64	else
		65	add_release(edf, t); /* it has got to wait */
		66	}
		67
		68	/* we assume the lock is being held */
		69	static void preempt(psnedf_domain_t *pedf)
		70	{
		71	if (smp_processor_id() == pedf->cpu) {
		72	if (pedf->scheduled && is_np(pedf->scheduled))
		73	request_exit_np(pedf->scheduled);
		74	else
		75	set_tsk_need_resched(current);
		76	} else
		77	/* in case that it is a remote CPU we have to defer the
		78	* the decision to the remote CPU
		79	*/
		80	smp_send_reschedule(pedf->cpu);
		81	}
		82
		83	/* This check is trivial in partioned systems as we only have to consider
		84	* the CPU of the partition.
		85	*/
		86	static int psnedf_check_resched(rt_domain_t *edf)
		87	{
		88	psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
		89	int ret = 0;
		90
		91	/* because this is a callback from rt_domain_t we already hold
		92	* the necessary lock for the ready queue
		93	*/
		94	if (edf_preemption_needed(edf, pedf->scheduled)) {
		95	preempt(pedf);
		96	ret = 1;
		97	}
		98	return ret;
		99	}
		100
		101	static void job_completion(struct task_struct* t)
		102	{
		103	TRACE_TASK(t, "job_completion().\n");
		104	set_rt_flags(t, RT_F_SLEEP);
		105	prepare_for_next_period(t);
		106	}
		107
		108	static void psnedf_tick(struct task_struct *t)
		109	{
		110	psnedf_domain_t *pedf = local_pedf;
		111
		112	/* Check for inconsistency. We don't need the lock for this since
		113	* ->scheduled is only changed in schedule, which obviously is not
		114	* executing in parallel on this CPU
		115	*/
		116	BUG_ON(is_realtime(t) && t != pedf->scheduled);
		117
		118	if (is_realtime(t) && budget_exhausted(t)) {
		119	if (!is_np(t)) {
		120	set_tsk_need_resched(t);
		121	TRACE("psnedf_scheduler_tick: "
		122	"%d is preemptable "
		123	" => FORCE_RESCHED\n", t->pid);
		124	} else {
		125	TRACE("psnedf_scheduler_tick: "
		126	"%d is non-preemptable, "
		127	"preemption delayed.\n", t->pid);
		128	request_exit_np(t);
		129	}
		130	}
		131	}
		132
		133	static struct task_struct* psnedf_schedule(struct task_struct * prev)
		134	{
		135	psnedf_domain_t* pedf = local_pedf;
		136	rt_domain_t* edf = &pedf->domain;
		137	struct task_struct* next;
		138
		139	int out_of_time, sleep, preempt,
		140	np, exists, blocks, resched;
		141
		142	spin_lock(&pedf->slock);
		143
		144	/* sanity checking
		145	* differently from gedf, when a task exits (dead)
		146	* pedf->schedule may be null and prev _is_ realtime
		147	*/
		148	BUG_ON(pedf->scheduled && pedf->scheduled != prev);
		149	BUG_ON(pedf->scheduled && !is_realtime(prev));
		150
		151	/* (0) Determine state */
		152	exists = pedf->scheduled != NULL;
		153	blocks = exists && !is_running(pedf->scheduled);
		154	out_of_time = exists && budget_exhausted(pedf->scheduled);
		155	np = exists && is_np(pedf->scheduled);
		156	sleep = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP;
		157	preempt = edf_preemption_needed(edf, prev);
		158
		159	/* If we need to preempt do so.
		160	* The following checks set resched to 1 in case of special
		161	* circumstances.
		162	*/
		163	resched = preempt;
		164
		165	/* If a task blocks we have no choice but to reschedule.
		166	*/
		167	if (blocks)
		168	resched = 1;
		169
		170	/* Request a sys_exit_np() call if we would like to preempt but cannot.
		171	* Multiple calls to request_exit_np() don't hurt.
		172	*/
		173	if (np && (out_of_time \|\| preempt \|\| sleep))
		174	request_exit_np(pedf->scheduled);
		175
		176	/* Any task that is preemptable and either exhausts its execution
		177	* budget or wants to sleep completes. We may have to reschedule after
		178	* this.
		179	*/
		180	if (!np && (out_of_time \|\| sleep) && !blocks) {
		181	job_completion(pedf->scheduled);
		182	resched = 1;
		183	}
		184
		185	/* The final scheduling decision. Do we need to switch for some reason?
		186	* Switch if we are in RT mode and have no task or if we need to
		187	* resched.
		188	*/
		189	next = NULL;
		190	if ((!np \|\| blocks) && (resched \|\| !exists)) {
		191	/* Take care of a previously scheduled
		192	* job by taking it out of the Linux runqueue.
		193	*/
		194	if (pedf->scheduled && !blocks)
		195	requeue(pedf->scheduled, edf);
		196	next = __take_ready(edf);
		197	} else
		198	/* Only override Linux scheduler if we have a real-time task
		199	* scheduled that needs to continue.
		200	*/
		201	if (exists)
		202	next = prev;
		203
		204	if (next) {
		205	TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
		206	set_rt_flags(next, RT_F_RUNNING);
		207	} else {
		208	TRACE("becoming idle at %llu\n", litmus_clock());
		209	}
		210
		211	pedf->scheduled = next;
		212	spin_unlock(&pedf->slock);
		213
		214	return next;
		215	}
		216
		217
		218	/* Prepare a task for running in RT mode
		219	*/
		220	static void psnedf_task_new(struct task_struct * t, int on_rq, int running)
		221	{
		222	rt_domain_t* edf = task_edf(t);
		223	psnedf_domain_t* pedf = task_pedf(t);
		224	unsigned long flags;
		225
		226	TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
		227	t->rt_param.task_params.cpu);
		228
		229	/* setup job parameters */
		230	release_at(t, litmus_clock());
		231
		232	/* The task should be running in the queue, otherwise signal
		233	* code will try to wake it up with fatal consequences.
		234	*/
		235	spin_lock_irqsave(&pedf->slock, flags);
		236	if (running) {
		237	/* there shouldn't be anything else running at the time */
		238	BUG_ON(pedf->scheduled);
		239	pedf->scheduled = t;
		240	} else {
		241	requeue(t, edf);
		242	/* maybe we have to reschedule */
		243	preempt(pedf);
		244	}
		245	spin_unlock_irqrestore(&pedf->slock, flags);
		246	}
		247
		248	static void psnedf_task_wake_up(struct task_struct *task)
		249	{
		250	unsigned long flags;
		251	psnedf_domain_t* pedf = task_pedf(task);
		252	rt_domain_t* edf = task_edf(task);
		253	lt_t now;
		254
		255	TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
		256	spin_lock_irqsave(&pedf->slock, flags);
		257	BUG_ON(is_queued(task));
		258	/* We need to take suspensions because of semaphores into
		259	* account! If a job resumes after being suspended due to acquiring
		260	* a semaphore, it should never be treated as a new job release.
		261	*
		262	* FIXME: This should be done in some more predictable and userspace-controlled way.
		263	*/
		264	now = litmus_clock();
		265	if (is_tardy(task, now) &&
		266	get_rt_flags(task) != RT_F_EXIT_SEM) {
		267	/* new sporadic release */
		268	release_at(task, now);
		269	sched_trace_task_release(task);
		270	}
		271	requeue(task, edf);
		272	spin_unlock_irqrestore(&pedf->slock, flags);
		273	TRACE_TASK(task, "wake up done\n");
		274	}
		275
		276	static void psnedf_task_block(struct task_struct *t)
		277	{
		278	psnedf_domain_t *pedf = task_pedf(t);
		279
		280	/* only running tasks can block, thus t is in no queue */
		281	TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
		282
		283	BUG_ON(!is_realtime(t));
		284	BUG_ON(is_queued(t));
		285
		286	/* if this task is dead, then we need to reset pedf->schedule now
		287	* as we might get rescheduled before task_exit executes
		288	*/
		289	if(unlikely(t->state == TASK_DEAD)) {
		290	TRACE_TASK(t, "Dead, setting scheduled = NULL\n");
		291	pedf->scheduled = NULL;
		292	}
		293	}
		294
		295	static void psnedf_task_exit(struct task_struct * t)
		296	{
		297	unsigned long flags;
		298	psnedf_domain_t* pedf = task_pedf(t);
		299	rt_domain_t* edf;
		300
		301	spin_lock_irqsave(&pedf->slock, flags);
		302	if (is_queued(t)) {
		303	/* dequeue */
		304	edf = task_edf(t);
		305	remove(edf, t);
		306	}
		307	if (pedf->scheduled == t)
		308	pedf->scheduled = NULL;
		309
		310	TRACE_TASK(t, "RIP, now reschedule\n");
		311
		312	preempt(pedf);
		313	spin_unlock_irqrestore(&pedf->slock, flags);
		314	}
		315
		316	#ifdef CONFIG_FMLP
		317	static long psnedf_pi_block(struct pi_semaphore *sem,
		318	struct task_struct *new_waiter)
		319	{
		320	psnedf_domain_t* pedf;
		321	rt_domain_t* edf;
		322	struct task_struct* t;
		323	int cpu = get_partition(new_waiter);
		324
		325	BUG_ON(!new_waiter);
		326
		327	if (edf_higher_prio(new_waiter, sem->hp.cpu_task[cpu])) {
		328	TRACE_TASK(new_waiter, " boosts priority\n");
		329	pedf = task_pedf(new_waiter);
		330	edf = task_edf(new_waiter);
		331
		332	/* interrupts already disabled */
		333	spin_lock(&pedf->slock);
		334
		335	/* store new highest-priority task */
		336	sem->hp.cpu_task[cpu] = new_waiter;
		337	if (sem->holder &&
		338	get_partition(sem->holder) == get_partition(new_waiter)) {
		339	/* let holder inherit */
		340	sem->holder->rt_param.inh_task = new_waiter;
		341	t = sem->holder;
		342	if (is_queued(t)) {
		343	/* queued in domain*/
		344	remove(edf, t);
		345	/* readd to make priority change take place */
		346	/* FIXME: this looks outdated */
		347	if (is_released(t, litmus_clock()))
		348	__add_ready(edf, t);
		349	else
		350	add_release(edf, t);
		351	}
		352	}
		353
		354	/* check if we need to reschedule */
		355	if (edf_preemption_needed(edf, current))
		356	preempt(pedf);
		357
		358	spin_unlock(&pedf->slock);
		359	}
		360
		361	return 0;
		362	}
		363
		364	static long psnedf_inherit_priority(struct pi_semaphore *sem,
		365	struct task_struct *new_owner)
		366	{
		367	int cpu = get_partition(new_owner);
		368
		369	new_owner->rt_param.inh_task = sem->hp.cpu_task[cpu];
		370	if (sem->hp.cpu_task[cpu] && new_owner != sem->hp.cpu_task[cpu]) {
		371	TRACE_TASK(new_owner,
		372	"inherited priority from %s/%d\n",
		373	sem->hp.cpu_task[cpu]->comm,
		374	sem->hp.cpu_task[cpu]->pid);
		375	} else
		376	TRACE_TASK(new_owner,
		377	"cannot inherit priority: "
		378	"no higher priority job waits on this CPU!\n");
		379	/* make new owner non-preemptable as required by FMLP under
		380	* PSN-EDF.
		381	*/
		382	make_np(new_owner);
		383	return 0;
		384	}
		385
		386
		387	/* This function is called on a semaphore release, and assumes that
		388	* the current task is also the semaphore holder.
		389	*/
		390	static long psnedf_return_priority(struct pi_semaphore *sem)
		391	{
		392	struct task_struct* t = current;
		393	psnedf_domain_t* pedf = task_pedf(t);
		394	rt_domain_t* edf = task_edf(t);
		395	int ret = 0;
		396	int cpu = get_partition(current);
		397
		398
		399	/* Find new highest-priority semaphore task
		400	* if holder task is the current hp.cpu_task[cpu].
		401	*
		402	* Calling function holds sem->wait.lock.
		403	*/
		404	if (t == sem->hp.cpu_task[cpu])
		405	edf_set_hp_cpu_task(sem, cpu);
		406
		407	take_np(t);
		408	if (current->rt_param.inh_task) {
		409	TRACE_CUR("return priority of %s/%d\n",
		410	current->rt_param.inh_task->comm,
		411	current->rt_param.inh_task->pid);
		412	spin_lock(&pedf->slock);
		413
		414	/* Reset inh_task to NULL. */
		415	current->rt_param.inh_task = NULL;
		416
		417	/* check if we need to reschedule */
		418	if (edf_preemption_needed(edf, current))
		419	preempt(pedf);
		420
		421	spin_unlock(&pedf->slock);
		422	} else
		423	TRACE_CUR(" no priority to return %p\n", sem);
		424
		425	return ret;
		426	}
		427
		428	#endif
		429
		430	static long psnedf_admit_task(struct task_struct* tsk)
		431	{
		432	return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
		433	}
		434
		435	/* Plugin object */
		436	static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
		437	.plugin_name = "PSN-EDF",
		438	#ifdef CONFIG_SRP
		439	.srp_active = 1,
		440	#endif
		441	.tick = psnedf_tick,
		442	.task_new = psnedf_task_new,
		443	.complete_job = complete_job,
		444	.task_exit = psnedf_task_exit,
		445	.schedule = psnedf_schedule,
		446	.task_wake_up = psnedf_task_wake_up,
		447	.task_block = psnedf_task_block,
		448	#ifdef CONFIG_FMLP
		449	.fmlp_active = 1,
		450	.pi_block = psnedf_pi_block,
		451	.inherit_priority = psnedf_inherit_priority,
		452	.return_priority = psnedf_return_priority,
		453	#endif
		454	.admit_task = psnedf_admit_task
		455	};
		456
		457
		458	static int __init init_psn_edf(void)
		459	{
		460	int i;
		461
		462	/* We do not really want to support cpu hotplug, do we? ;)
		463	* However, if we are so crazy to do so,
		464	* we cannot use num_online_cpu()
		465	*/
		466	for (i = 0; i < num_online_cpus(); i++) {
		467	psnedf_domain_init(remote_pedf(i),
		468	psnedf_check_resched,
		469	NULL, i);
		470	}
		471	return register_sched_plugin(&psn_edf_plugin);
		472	}
		473
		474	module_init(init_psn_edf);
		475