Merge branch 'master' into wip-merge-2.6.34

Simple merge between master and 2.6.34 with conflicts resolved. This commit does not compile, the following main problems are still unresolved: - spinlock -> raw_spinlock API changes - kfifo API changes - sched_class API changes Conflicts: Makefile arch/x86/include/asm/hw_irq.h arch/x86/include/asm/unistd_32.h arch/x86/kernel/syscall_table_32.S include/linux/hrtimer.h kernel/sched.c kernel/sched_fair.c
author: Andrea Bastoni <bastoni@cs.unc.edu> 2010-05-29 23:35:01 -0400
committer: Andrea Bastoni <bastoni@cs.unc.edu> 2010-05-29 23:35:01 -0400
commit: 6ffc1fee98c4b995eb3a0285f4f8fb467cb0306e (patch)
tree: 69a05892a41e7f7400fa598ee0bdf8027c8f0fd6 /litmus/sched_psn_edf.c
parent: e40152ee1e1c7a63f4777791863215e3faa37a86 (diff)
parent: 7c1ff4c544dd650cceff3cd69a04bcba60856678 (diff)
1 files changed, 478 insertions, 0 deletions
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
new file mode 100644
index 000000000000..7f71ecfaaaae
--- /dev/null
+++ b/litmus/sched_psn_edf.c
@@ -0,0 +1,478 @@
+/*
+ * kernel/sched_psn_edf.c
+ *
+ * Implementation of the PSN-EDF scheduler plugin.
+ * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
+ *
+ * Suspensions and non-preemptable sections are supported.
+ * Priority inheritance is not supported.
+ */
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <litmus/litmus.h>
+#include <litmus/jobs.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/edf_common.h>
+typedef struct {
+        rt_domain_t             domain;
+        int                     cpu;
+        struct task_struct*     scheduled; /* only RT tasks */
+/*
+ * scheduling lock slock
+ * protects the domain and serializes scheduling decisions
+ */
+#define slock domain.ready_lock
+} psnedf_domain_t;
+DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
+#define local_edf               (&__get_cpu_var(psnedf_domains).domain)
+#define local_pedf              (&__get_cpu_var(psnedf_domains))
+#define remote_edf(cpu)         (&per_cpu(psnedf_domains, cpu).domain)
+#define remote_pedf(cpu)        (&per_cpu(psnedf_domains, cpu))
+#define task_edf(task)          remote_edf(get_partition(task))
+#define task_pedf(task)         remote_pedf(get_partition(task))
+static void psnedf_domain_init(psnedf_domain_t* pedf,
+                               check_resched_needed_t check,
+                               release_jobs_t release,
+                               int cpu)
+{
+        edf_domain_init(&pedf->domain, check, release);
+        pedf->cpu               = cpu;
+        pedf->scheduled         = NULL;
+}
+static void requeue(struct task_struct* t, rt_domain_t *edf)
+{
+        if (t->state != TASK_RUNNING)
+                TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
+        set_rt_flags(t, RT_F_RUNNING);
+        if (is_released(t, litmus_clock()))
+                __add_ready(edf, t);
+        else
+                add_release(edf, t); /* it has got to wait */
+}
+/* we assume the lock is being held */
+static void preempt(psnedf_domain_t *pedf)
+{
+        preempt_if_preemptable(pedf->scheduled, pedf->cpu);
+}
+/* This check is trivial in partioned systems as we only have to consider
+ * the CPU of the partition.
+ */
+static int psnedf_check_resched(rt_domain_t *edf)
+{
+        psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
+        /* because this is a callback from rt_domain_t we already hold
+         * the necessary lock for the ready queue
+         */
+        if (edf_preemption_needed(edf, pedf->scheduled)) {
+                preempt(pedf);
+                return 1;
+        } else
+                return 0;
+}
+static void job_completion(struct task_struct* t, int forced)
+{
+        sched_trace_task_completion(t,forced);
+        TRACE_TASK(t, "job_completion().\n");
+        set_rt_flags(t, RT_F_SLEEP);
+        prepare_for_next_period(t);
+}
+static void psnedf_tick(struct task_struct *t)
+{
+        psnedf_domain_t *pedf = local_pedf;
+        /* Check for inconsistency. We don't need the lock for this since
+         * ->scheduled is only changed in schedule, which obviously is not
+         *  executing in parallel on this CPU
+         */
+        BUG_ON(is_realtime(t) && t != pedf->scheduled);
+        if (is_realtime(t) && budget_exhausted(t)) {
+                if (!is_np(t)) {
+                        set_tsk_need_resched(t);
+                        TRACE("psnedf_scheduler_tick: "
+                              "%d is preemptable "
+                              " => FORCE_RESCHED\n", t->pid);
+                } else if (is_user_np(t)) {
+                        TRACE("psnedf_scheduler_tick: "
+                              "%d is non-preemptable, "
+                              "preemption delayed.\n", t->pid);
+                        request_exit_np(t);
+                }
+        }
+}
+static struct task_struct* psnedf_schedule(struct task_struct * prev)
+{
+        psnedf_domain_t*        pedf = local_pedf;
+        rt_domain_t*            edf  = &pedf->domain;
+        struct task_struct*     next;
+        int                     out_of_time, sleep, preempt,
+                                np, exists, blocks, resched;
+        spin_lock(&pedf->slock);
+        /* sanity checking
+         * differently from gedf, when a task exits (dead)
+         * pedf->schedule may be null and prev _is_ realtime
+         */
+        BUG_ON(pedf->scheduled && pedf->scheduled != prev);
+        BUG_ON(pedf->scheduled && !is_realtime(prev));
+        /* (0) Determine state */
+        exists      = pedf->scheduled != NULL;
+        blocks      = exists && !is_running(pedf->scheduled);
+        out_of_time = exists && budget_exhausted(pedf->scheduled);
+        np          = exists && is_np(pedf->scheduled);
+        sleep       = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP;
+        preempt     = edf_preemption_needed(edf, prev);
+        /* If we need to preempt do so.
+         * The following checks set resched to 1 in case of special
+         * circumstances.
+         */
+        resched = preempt;
+        /* If a task blocks we have no choice but to reschedule.
+         */
+        if (blocks)
+                resched = 1;
+        /* Request a sys_exit_np() call if we would like to preempt but cannot.
+         * Multiple calls to request_exit_np() don't hurt.
+         */
+        if (np && (out_of_time || preempt || sleep))
+                request_exit_np(pedf->scheduled);
+        /* Any task that is preemptable and either exhausts its execution
+         * budget or wants to sleep completes. We may have to reschedule after
+         * this.
+         */
+        if (!np && (out_of_time || sleep) && !blocks) {
+                job_completion(pedf->scheduled, !sleep);
+                resched = 1;
+        }
+        /* The final scheduling decision. Do we need to switch for some reason?
+         * Switch if we are in RT mode and have no task or if we need to
+         * resched.
+         */
+        next = NULL;
+        if ((!np || blocks) && (resched || !exists)) {
+                /* Take care of a previously scheduled
+                 * job by taking it out of the Linux runqueue.
+                 */
+                if (pedf->scheduled && !blocks)
+                        requeue(pedf->scheduled, edf);
+                next = __take_ready(edf);
+        } else
+                /* Only override Linux scheduler if we have a real-time task
+                 * scheduled that needs to continue.
+                 */
+                if (exists)
+                        next = prev;
+        if (next) {
+                TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
+                set_rt_flags(next, RT_F_RUNNING);
+        } else {
+                TRACE("becoming idle at %llu\n", litmus_clock());
+        }
+        pedf->scheduled = next;
+        spin_unlock(&pedf->slock);
+        return next;
+}
+/*      Prepare a task for running in RT mode
+ */
+static void psnedf_task_new(struct task_struct * t, int on_rq, int running)
+{
+        rt_domain_t*            edf  = task_edf(t);
+        psnedf_domain_t*        pedf = task_pedf(t);
+        unsigned long           flags;
+        TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
+                   t->rt_param.task_params.cpu);
+        /* setup job parameters */
+        release_at(t, litmus_clock());
+        /* The task should be running in the queue, otherwise signal
+         * code will try to wake it up with fatal consequences.
+         */
+        spin_lock_irqsave(&pedf->slock, flags);
+        if (running) {
+                /* there shouldn't be anything else running at the time */
+                BUG_ON(pedf->scheduled);
+                pedf->scheduled = t;
+        } else {
+                requeue(t, edf);
+                /* maybe we have to reschedule */
+                preempt(pedf);
+        }
+        spin_unlock_irqrestore(&pedf->slock, flags);
+}
+static void psnedf_task_wake_up(struct task_struct *task)
+{
+        unsigned long           flags;
+        psnedf_domain_t*        pedf = task_pedf(task);
+        rt_domain_t*            edf  = task_edf(task);
+        lt_t                    now;
+        TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
+        spin_lock_irqsave(&pedf->slock, flags);
+        BUG_ON(is_queued(task));
+        /* We need to take suspensions because of semaphores into
+         * account! If a job resumes after being suspended due to acquiring
+         * a semaphore, it should never be treated as a new job release.
+         *
+         * FIXME: This should be done in some more predictable and userspace-controlled way.
+         */
+        now = litmus_clock();
+        if (is_tardy(task, now) &&
+            get_rt_flags(task) != RT_F_EXIT_SEM) {
+                /* new sporadic release */
+                release_at(task, now);
+                sched_trace_task_release(task);
+        }
+        /* Only add to ready queue if it is not the currently-scheduled
+         * task. This could be the case if a task was woken up concurrently
+         * on a remote CPU before the executing CPU got around to actually
+         * de-scheduling the task, i.e., wake_up() raced with schedule()
+         * and won.
+         */
+        if (pedf->scheduled != task)
+                requeue(task, edf);
+        spin_unlock_irqrestore(&pedf->slock, flags);
+        TRACE_TASK(task, "wake up done\n");
+}
+static void psnedf_task_block(struct task_struct *t)
+{
+        /* only running tasks can block, thus t is in no queue */
+        TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
+        BUG_ON(!is_realtime(t));
+        BUG_ON(is_queued(t));
+}
+static void psnedf_task_exit(struct task_struct * t)
+{
+        unsigned long flags;
+        psnedf_domain_t*        pedf = task_pedf(t);
+        rt_domain_t*            edf;
+        spin_lock_irqsave(&pedf->slock, flags);
+        if (is_queued(t)) {
+                /* dequeue */
+                edf  = task_edf(t);
+                remove(edf, t);
+        }
+        if (pedf->scheduled == t)
+                pedf->scheduled = NULL;
+        TRACE_TASK(t, "RIP, now reschedule\n");
+        preempt(pedf);
+        spin_unlock_irqrestore(&pedf->slock, flags);
+}
+#ifdef CONFIG_FMLP
+static long psnedf_pi_block(struct pi_semaphore *sem,
+                            struct task_struct *new_waiter)
+{
+        psnedf_domain_t*        pedf;
+        rt_domain_t*            edf;
+        struct task_struct*     t;
+        int cpu  = get_partition(new_waiter);
+        BUG_ON(!new_waiter);
+        if (edf_higher_prio(new_waiter, sem->hp.cpu_task[cpu])) {
+                TRACE_TASK(new_waiter, " boosts priority\n");
+                pedf = task_pedf(new_waiter);
+                edf  = task_edf(new_waiter);
+                /* interrupts already disabled */
+                spin_lock(&pedf->slock);
+                /* store new highest-priority task */
+                sem->hp.cpu_task[cpu] = new_waiter;
+                if (sem->holder &&
+                    get_partition(sem->holder) == get_partition(new_waiter)) {
+                        /* let holder inherit */
+                        sem->holder->rt_param.inh_task = new_waiter;
+                        t = sem->holder;
+                        if (is_queued(t)) {
+                                /* queued in domain*/
+                                remove(edf, t);
+                                /* readd to make priority change take place */
+                                /* FIXME: this looks outdated */
+                                if (is_released(t, litmus_clock()))
+                                        __add_ready(edf, t);
+                                else
+                                        add_release(edf, t);
+                        }
+                }
+                /* check if we need to reschedule */
+                if (edf_preemption_needed(edf, current))
+                        preempt(pedf);
+                spin_unlock(&pedf->slock);
+        }
+        return 0;
+}
+static long psnedf_inherit_priority(struct pi_semaphore *sem,
+                                    struct task_struct *new_owner)
+{
+        int cpu  = get_partition(new_owner);
+        new_owner->rt_param.inh_task = sem->hp.cpu_task[cpu];
+        if (sem->hp.cpu_task[cpu] && new_owner != sem->hp.cpu_task[cpu]) {
+                TRACE_TASK(new_owner,
+                           "inherited priority from %s/%d\n",
+                           sem->hp.cpu_task[cpu]->comm,
+                           sem->hp.cpu_task[cpu]->pid);
+        } else
+                TRACE_TASK(new_owner,
+                           "cannot inherit priority: "
+                           "no higher priority job waits on this CPU!\n");
+        /* make new owner non-preemptable as required by FMLP under
+         * PSN-EDF.
+         */
+        make_np(new_owner);
+        return 0;
+}
+/* This function is called on a semaphore release, and assumes that
+ * the current task is also the semaphore holder.
+ */
+static long psnedf_return_priority(struct pi_semaphore *sem)
+{
+        struct task_struct*     t    = current;
+        psnedf_domain_t*        pedf = task_pedf(t);
+        rt_domain_t*            edf  = task_edf(t);
+        int                     ret  = 0;
+        int                     cpu  = get_partition(current);
+        int still_np;
+        /* Find new highest-priority semaphore task
+         * if holder task is the current hp.cpu_task[cpu].
+         *
+         * Calling function holds sem->wait.lock.
+         */
+        if (t == sem->hp.cpu_task[cpu])
+                edf_set_hp_cpu_task(sem, cpu);
+        still_np = take_np(current);
+        /* Since we don't nest resources, this
+         * should always be zero */
+        BUG_ON(still_np);
+        if (current->rt_param.inh_task) {
+                TRACE_CUR("return priority of %s/%d\n",
+                          current->rt_param.inh_task->comm,
+                          current->rt_param.inh_task->pid);
+        } else
+                TRACE_CUR(" no priority to return %p\n", sem);
+        /* Always check for delayed preemptions that might have become
+         * necessary due to non-preemptive execution.
+         */
+        spin_lock(&pedf->slock);
+        /* Reset inh_task to NULL. */
+        current->rt_param.inh_task = NULL;
+        /* check if we need to reschedule */
+        if (edf_preemption_needed(edf, current))
+                preempt(pedf);
+        spin_unlock(&pedf->slock);
+        return ret;
+}
+#endif
+static long psnedf_admit_task(struct task_struct* tsk)
+{
+        return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
+}
+/*      Plugin object   */
+static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
+        .plugin_name            = "PSN-EDF",
+#ifdef CONFIG_SRP
+        .srp_active             = 1,
+#endif
+        .tick                   = psnedf_tick,
+        .task_new               = psnedf_task_new,
+        .complete_job           = complete_job,
+        .task_exit              = psnedf_task_exit,
+        .schedule               = psnedf_schedule,
+        .task_wake_up           = psnedf_task_wake_up,
+        .task_block             = psnedf_task_block,
+#ifdef CONFIG_FMLP
+        .fmlp_active            = 1,
+        .pi_block               = psnedf_pi_block,
+        .inherit_priority       = psnedf_inherit_priority,
+        .return_priority        = psnedf_return_priority,
+#endif
+        .admit_task             = psnedf_admit_task
+};
+static int __init init_psn_edf(void)
+{
+        int i;
+        /* We do not really want to support cpu hotplug, do we? ;)
+         * However, if we are so crazy to do so,
+         * we cannot use num_online_cpu()
+         */
+        for (i = 0; i < num_online_cpus(); i++) {
+                psnedf_domain_init(remote_pedf(i),
+                                   psnedf_check_resched,
+                                   NULL, i);
+        }
+        return register_sched_plugin(&psn_edf_plugin);
+}
+module_init(init_psn_edf);
author	Andrea Bastoni <bastoni@cs.unc.edu>	2010-05-29 23:35:01 -0400
committer	Andrea Bastoni <bastoni@cs.unc.edu>	2010-05-29 23:35:01 -0400
commit	6ffc1fee98c4b995eb3a0285f4f8fb467cb0306e (patch)
tree	69a05892a41e7f7400fa598ee0bdf8027c8f0fd6 /litmus/sched_psn_edf.c
parent	e40152ee1e1c7a63f4777791863215e3faa37a86 (diff)
parent	7c1ff4c544dd650cceff3cd69a04bcba60856678 (diff)

diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c new file mode 100644 index 000000000000..7f71ecfaaaae --- /dev/null +++ b/litmus/sched_psn_edf.c
@@ -0,0 +1,478 @@
	1	/*
	2	* kernel/sched_psn_edf.c
	3	*
	4	* Implementation of the PSN-EDF scheduler plugin.
	5	* Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
	6	*
	7	* Suspensions and non-preemptable sections are supported.
	8	* Priority inheritance is not supported.
	9	*/
	10
	11	#include <linux/percpu.h>
	12	#include <linux/sched.h>
	13	#include <linux/list.h>
	14	#include <linux/spinlock.h>
	15
	16	#include <linux/module.h>
	17
	18	#include <litmus/litmus.h>
	19	#include <litmus/jobs.h>
	20	#include <litmus/sched_plugin.h>
	21	#include <litmus/edf_common.h>
	22
	23
	24	typedef struct {
	25	rt_domain_t domain;
	26	int cpu;
	27	struct task_struct* scheduled; /* only RT tasks */
	28	/*
	29	* scheduling lock slock
	30	* protects the domain and serializes scheduling decisions
	31	*/
	32	#define slock domain.ready_lock
	33
	34	} psnedf_domain_t;
	35
	36	DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
	37
	38	#define local_edf (&__get_cpu_var(psnedf_domains).domain)
	39	#define local_pedf (&__get_cpu_var(psnedf_domains))
	40	#define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain)
	41	#define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu))
	42	#define task_edf(task) remote_edf(get_partition(task))
	43	#define task_pedf(task) remote_pedf(get_partition(task))
	44
	45
	46	static void psnedf_domain_init(psnedf_domain_t* pedf,
	47	check_resched_needed_t check,
	48	release_jobs_t release,
	49	int cpu)
	50	{
	51	edf_domain_init(&pedf->domain, check, release);
	52	pedf->cpu = cpu;
	53	pedf->scheduled = NULL;
	54	}
	55
	56	static void requeue(struct task_struct* t, rt_domain_t *edf)
	57	{
	58	if (t->state != TASK_RUNNING)
	59	TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
	60
	61	set_rt_flags(t, RT_F_RUNNING);
	62	if (is_released(t, litmus_clock()))
	63	__add_ready(edf, t);
	64	else
	65	add_release(edf, t); /* it has got to wait */
	66	}
	67
	68	/* we assume the lock is being held */
	69	static void preempt(psnedf_domain_t *pedf)
	70	{
	71	preempt_if_preemptable(pedf->scheduled, pedf->cpu);
	72	}
	73
	74	/* This check is trivial in partioned systems as we only have to consider
	75	* the CPU of the partition.
	76	*/
	77	static int psnedf_check_resched(rt_domain_t *edf)
	78	{
	79	psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
	80
	81	/* because this is a callback from rt_domain_t we already hold
	82	* the necessary lock for the ready queue
	83	*/
	84	if (edf_preemption_needed(edf, pedf->scheduled)) {
	85	preempt(pedf);
	86	return 1;
	87	} else
	88	return 0;
	89	}
	90
	91	static void job_completion(struct task_struct* t, int forced)
	92	{
	93	sched_trace_task_completion(t,forced);
	94	TRACE_TASK(t, "job_completion().\n");
	95
	96	set_rt_flags(t, RT_F_SLEEP);
	97	prepare_for_next_period(t);
	98	}
	99
	100	static void psnedf_tick(struct task_struct *t)
	101	{
	102	psnedf_domain_t *pedf = local_pedf;
	103
	104	/* Check for inconsistency. We don't need the lock for this since
	105	* ->scheduled is only changed in schedule, which obviously is not
	106	* executing in parallel on this CPU
	107	*/
	108	BUG_ON(is_realtime(t) && t != pedf->scheduled);
	109
	110	if (is_realtime(t) && budget_exhausted(t)) {
	111	if (!is_np(t)) {
	112	set_tsk_need_resched(t);
	113	TRACE("psnedf_scheduler_tick: "
	114	"%d is preemptable "
	115	" => FORCE_RESCHED\n", t->pid);
	116	} else if (is_user_np(t)) {
	117	TRACE("psnedf_scheduler_tick: "
	118	"%d is non-preemptable, "
	119	"preemption delayed.\n", t->pid);
	120	request_exit_np(t);
	121	}
	122	}
	123	}
	124
	125	static struct task_struct* psnedf_schedule(struct task_struct * prev)
	126	{
	127	psnedf_domain_t* pedf = local_pedf;
	128	rt_domain_t* edf = &pedf->domain;
	129	struct task_struct* next;
	130
	131	int out_of_time, sleep, preempt,
	132	np, exists, blocks, resched;
	133
	134	spin_lock(&pedf->slock);
	135
	136	/* sanity checking
	137	* differently from gedf, when a task exits (dead)
	138	* pedf->schedule may be null and prev _is_ realtime
	139	*/
	140	BUG_ON(pedf->scheduled && pedf->scheduled != prev);
	141	BUG_ON(pedf->scheduled && !is_realtime(prev));
	142
	143	/* (0) Determine state */
	144	exists = pedf->scheduled != NULL;
	145	blocks = exists && !is_running(pedf->scheduled);
	146	out_of_time = exists && budget_exhausted(pedf->scheduled);
	147	np = exists && is_np(pedf->scheduled);
	148	sleep = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP;
	149	preempt = edf_preemption_needed(edf, prev);
	150
	151	/* If we need to preempt do so.
	152	* The following checks set resched to 1 in case of special
	153	* circumstances.
	154	*/
	155	resched = preempt;
	156
	157	/* If a task blocks we have no choice but to reschedule.
	158	*/
	159	if (blocks)
	160	resched = 1;
	161
	162	/* Request a sys_exit_np() call if we would like to preempt but cannot.
	163	* Multiple calls to request_exit_np() don't hurt.
	164	*/
	165	if (np && (out_of_time \|\| preempt \|\| sleep))
	166	request_exit_np(pedf->scheduled);
	167
	168	/* Any task that is preemptable and either exhausts its execution
	169	* budget or wants to sleep completes. We may have to reschedule after
	170	* this.
	171	*/
	172	if (!np && (out_of_time \|\| sleep) && !blocks) {
	173	job_completion(pedf->scheduled, !sleep);
	174	resched = 1;
	175	}
	176
	177	/* The final scheduling decision. Do we need to switch for some reason?
	178	* Switch if we are in RT mode and have no task or if we need to
	179	* resched.
	180	*/
	181	next = NULL;
	182	if ((!np \|\| blocks) && (resched \|\| !exists)) {
	183	/* Take care of a previously scheduled
	184	* job by taking it out of the Linux runqueue.
	185	*/
	186	if (pedf->scheduled && !blocks)
	187	requeue(pedf->scheduled, edf);
	188	next = __take_ready(edf);
	189	} else
	190	/* Only override Linux scheduler if we have a real-time task
	191	* scheduled that needs to continue.
	192	*/
	193	if (exists)
	194	next = prev;
	195
	196	if (next) {
	197	TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
	198	set_rt_flags(next, RT_F_RUNNING);
	199	} else {
	200	TRACE("becoming idle at %llu\n", litmus_clock());
	201	}
	202
	203	pedf->scheduled = next;
	204	spin_unlock(&pedf->slock);
	205
	206	return next;
	207	}
	208
	209
	210	/* Prepare a task for running in RT mode
	211	*/
	212	static void psnedf_task_new(struct task_struct * t, int on_rq, int running)
	213	{
	214	rt_domain_t* edf = task_edf(t);
	215	psnedf_domain_t* pedf = task_pedf(t);
	216	unsigned long flags;
	217
	218	TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
	219	t->rt_param.task_params.cpu);
	220
	221	/* setup job parameters */
	222	release_at(t, litmus_clock());
	223
	224	/* The task should be running in the queue, otherwise signal
	225	* code will try to wake it up with fatal consequences.
	226	*/
	227	spin_lock_irqsave(&pedf->slock, flags);
	228	if (running) {
	229	/* there shouldn't be anything else running at the time */
	230	BUG_ON(pedf->scheduled);
	231	pedf->scheduled = t;
	232	} else {
	233	requeue(t, edf);
	234	/* maybe we have to reschedule */
	235	preempt(pedf);
	236	}
	237	spin_unlock_irqrestore(&pedf->slock, flags);
	238	}
	239
	240	static void psnedf_task_wake_up(struct task_struct *task)
	241	{
	242	unsigned long flags;
	243	psnedf_domain_t* pedf = task_pedf(task);
	244	rt_domain_t* edf = task_edf(task);
	245	lt_t now;
	246
	247	TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
	248	spin_lock_irqsave(&pedf->slock, flags);
	249	BUG_ON(is_queued(task));
	250	/* We need to take suspensions because of semaphores into
	251	* account! If a job resumes after being suspended due to acquiring
	252	* a semaphore, it should never be treated as a new job release.
	253	*
	254	* FIXME: This should be done in some more predictable and userspace-controlled way.
	255	*/
	256	now = litmus_clock();
	257	if (is_tardy(task, now) &&
	258	get_rt_flags(task) != RT_F_EXIT_SEM) {
	259	/* new sporadic release */
	260	release_at(task, now);
	261	sched_trace_task_release(task);
	262	}
	263
	264	/* Only add to ready queue if it is not the currently-scheduled
	265	* task. This could be the case if a task was woken up concurrently
	266	* on a remote CPU before the executing CPU got around to actually
	267	* de-scheduling the task, i.e., wake_up() raced with schedule()
	268	* and won.
	269	*/
	270	if (pedf->scheduled != task)
	271	requeue(task, edf);
	272
	273	spin_unlock_irqrestore(&pedf->slock, flags);
	274	TRACE_TASK(task, "wake up done\n");
	275	}
	276
	277	static void psnedf_task_block(struct task_struct *t)
	278	{
	279	/* only running tasks can block, thus t is in no queue */
	280	TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
	281
	282	BUG_ON(!is_realtime(t));
	283	BUG_ON(is_queued(t));
	284	}
	285
	286	static void psnedf_task_exit(struct task_struct * t)
	287	{
	288	unsigned long flags;
	289	psnedf_domain_t* pedf = task_pedf(t);
	290	rt_domain_t* edf;
	291
	292	spin_lock_irqsave(&pedf->slock, flags);
	293	if (is_queued(t)) {
	294	/* dequeue */
	295	edf = task_edf(t);
	296	remove(edf, t);
	297	}
	298	if (pedf->scheduled == t)
	299	pedf->scheduled = NULL;
	300
	301	TRACE_TASK(t, "RIP, now reschedule\n");
	302
	303	preempt(pedf);
	304	spin_unlock_irqrestore(&pedf->slock, flags);
	305	}
	306
	307	#ifdef CONFIG_FMLP
	308	static long psnedf_pi_block(struct pi_semaphore *sem,
	309	struct task_struct *new_waiter)
	310	{
	311	psnedf_domain_t* pedf;
	312	rt_domain_t* edf;
	313	struct task_struct* t;
	314	int cpu = get_partition(new_waiter);
	315
	316	BUG_ON(!new_waiter);
	317
	318	if (edf_higher_prio(new_waiter, sem->hp.cpu_task[cpu])) {
	319	TRACE_TASK(new_waiter, " boosts priority\n");
	320	pedf = task_pedf(new_waiter);
	321	edf = task_edf(new_waiter);
	322
	323	/* interrupts already disabled */
	324	spin_lock(&pedf->slock);
	325
	326	/* store new highest-priority task */
	327	sem->hp.cpu_task[cpu] = new_waiter;
	328	if (sem->holder &&
	329	get_partition(sem->holder) == get_partition(new_waiter)) {
	330	/* let holder inherit */
	331	sem->holder->rt_param.inh_task = new_waiter;
	332	t = sem->holder;
	333	if (is_queued(t)) {
	334	/* queued in domain*/
	335	remove(edf, t);
	336	/* readd to make priority change take place */
	337	/* FIXME: this looks outdated */
	338	if (is_released(t, litmus_clock()))
	339	__add_ready(edf, t);
	340	else
	341	add_release(edf, t);
	342	}
	343	}
	344
	345	/* check if we need to reschedule */
	346	if (edf_preemption_needed(edf, current))
	347	preempt(pedf);
	348
	349	spin_unlock(&pedf->slock);
	350	}
	351
	352	return 0;
	353	}
	354
	355	static long psnedf_inherit_priority(struct pi_semaphore *sem,
	356	struct task_struct *new_owner)
	357	{
	358	int cpu = get_partition(new_owner);
	359
	360	new_owner->rt_param.inh_task = sem->hp.cpu_task[cpu];
	361	if (sem->hp.cpu_task[cpu] && new_owner != sem->hp.cpu_task[cpu]) {
	362	TRACE_TASK(new_owner,
	363	"inherited priority from %s/%d\n",
	364	sem->hp.cpu_task[cpu]->comm,
	365	sem->hp.cpu_task[cpu]->pid);
	366	} else
	367	TRACE_TASK(new_owner,
	368	"cannot inherit priority: "
	369	"no higher priority job waits on this CPU!\n");
	370	/* make new owner non-preemptable as required by FMLP under
	371	* PSN-EDF.
	372	*/
	373	make_np(new_owner);
	374	return 0;
	375	}
	376
	377
	378	/* This function is called on a semaphore release, and assumes that
	379	* the current task is also the semaphore holder.
	380	*/
	381	static long psnedf_return_priority(struct pi_semaphore *sem)
	382	{
	383	struct task_struct* t = current;
	384	psnedf_domain_t* pedf = task_pedf(t);
	385	rt_domain_t* edf = task_edf(t);
	386	int ret = 0;
	387	int cpu = get_partition(current);
	388	int still_np;
	389
	390
	391	/* Find new highest-priority semaphore task
	392	* if holder task is the current hp.cpu_task[cpu].
	393	*
	394	* Calling function holds sem->wait.lock.
	395	*/
	396	if (t == sem->hp.cpu_task[cpu])
	397	edf_set_hp_cpu_task(sem, cpu);
	398
	399	still_np = take_np(current);
	400
	401	/* Since we don't nest resources, this
	402	* should always be zero */
	403	BUG_ON(still_np);
	404
	405	if (current->rt_param.inh_task) {
	406	TRACE_CUR("return priority of %s/%d\n",
	407	current->rt_param.inh_task->comm,
	408	current->rt_param.inh_task->pid);
	409	} else
	410	TRACE_CUR(" no priority to return %p\n", sem);
	411
	412
	413	/* Always check for delayed preemptions that might have become
	414	* necessary due to non-preemptive execution.
	415	*/
	416	spin_lock(&pedf->slock);
	417
	418	/* Reset inh_task to NULL. */
	419	current->rt_param.inh_task = NULL;
	420
	421	/* check if we need to reschedule */
	422	if (edf_preemption_needed(edf, current))
	423	preempt(pedf);
	424
	425	spin_unlock(&pedf->slock);
	426
	427
	428	return ret;
	429	}
	430
	431	#endif
	432
	433	static long psnedf_admit_task(struct task_struct* tsk)
	434	{
	435	return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
	436	}
	437
	438	/* Plugin object */
	439	static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
	440	.plugin_name = "PSN-EDF",
	441	#ifdef CONFIG_SRP
	442	.srp_active = 1,
	443	#endif
	444	.tick = psnedf_tick,
	445	.task_new = psnedf_task_new,
	446	.complete_job = complete_job,
	447	.task_exit = psnedf_task_exit,
	448	.schedule = psnedf_schedule,
	449	.task_wake_up = psnedf_task_wake_up,
	450	.task_block = psnedf_task_block,
	451	#ifdef CONFIG_FMLP
	452	.fmlp_active = 1,
	453	.pi_block = psnedf_pi_block,
	454	.inherit_priority = psnedf_inherit_priority,
	455	.return_priority = psnedf_return_priority,
	456	#endif
	457	.admit_task = psnedf_admit_task
	458	};
	459
	460
	461	static int __init init_psn_edf(void)
	462	{
	463	int i;
	464
	465	/* We do not really want to support cpu hotplug, do we? ;)
	466	* However, if we are so crazy to do so,
	467	* we cannot use num_online_cpu()
	468	*/
	469	for (i = 0; i < num_online_cpus(); i++) {
	470	psnedf_domain_init(remote_pedf(i),
	471	psnedf_check_resched,
	472	NULL, i);
	473	}
	474	return register_sched_plugin(&psn_edf_plugin);
	475	}
	476
	477	module_init(init_psn_edf);
	478