2 files changed, 669 insertions, 1 deletions
diff --git a/litmus/Makefile b/litmus/Makefile
index fcb4d7533327..5b38295b992d 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -17,7 +17,8 @@ obj-y     = sched_plugin.o litmus.o \
            srp.o \
            bheap.o \
            binheap.o \
-            ctrldev.o
+            ctrldev.o \
+            sched_psn_edf.o
 obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
new file mode 100644
index 000000000000..0873dc172651
--- /dev/null
+++ b/litmus/sched_psn_edf.c
@@ -0,0 +1,667 @@
+/*
+ * kernel/sched_psn_edf.c
+ *
+ * Implementation of the PSN-EDF scheduler plugin.
+ * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
+ *
+ * Suspensions and non-preemptable sections are supported.
+ * Priority inheritance is not supported.
+ */
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <litmus/litmus.h>
+#include <litmus/jobs.h>
+#include <litmus/preempt.h>
+#include <litmus/budget.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/edf_common.h>
+#include <litmus/sched_trace.h>
+#include <litmus/trace.h>
+typedef struct {
+        rt_domain_t             domain;
+        int                     cpu;
+        struct task_struct*     scheduled; /* only RT tasks */
+/*
+ * scheduling lock slock
+ * protects the domain and serializes scheduling decisions
+ */
+#define slock domain.ready_lock
+} psnedf_domain_t;
+DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
+#define local_edf               (&__get_cpu_var(psnedf_domains).domain)
+#define local_pedf              (&__get_cpu_var(psnedf_domains))
+#define remote_edf(cpu)         (&per_cpu(psnedf_domains, cpu).domain)
+#define remote_pedf(cpu)        (&per_cpu(psnedf_domains, cpu))
+#define task_edf(task)          remote_edf(get_partition(task))
+#define task_pedf(task)         remote_pedf(get_partition(task))
+static void psnedf_domain_init(psnedf_domain_t* pedf,
+                               check_resched_needed_t check,
+                               release_jobs_t release,
+                               int cpu)
+{
+        edf_domain_init(&pedf->domain, check, release);
+        pedf->cpu               = cpu;
+        pedf->scheduled         = NULL;
+}
+static void requeue(struct task_struct* t, rt_domain_t *edf)
+{
+        if (t->state != TASK_RUNNING)
+                TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
+        tsk_rt(t)->completed = 0;
+        if (is_early_releasing(t) || is_released(t, litmus_clock()))
+                __add_ready(edf, t);
+        else
+                add_release(edf, t); /* it has got to wait */
+}
+/* we assume the lock is being held */
+static void preempt(psnedf_domain_t *pedf)
+{
+        preempt_if_preemptable(pedf->scheduled, pedf->cpu);
+}
+#ifdef CONFIG_LITMUS_LOCKING
+static void boost_priority(struct task_struct* t)
+{
+        unsigned long           flags;
+        psnedf_domain_t*        pedf = task_pedf(t);
+        lt_t                    now;
+        raw_spin_lock_irqsave(&pedf->slock, flags);
+        now = litmus_clock();
+        TRACE_TASK(t, "priority boosted at %llu\n", now);
+        tsk_rt(t)->priority_boosted = 1;
+        tsk_rt(t)->boost_start_time = now;
+        if (pedf->scheduled != t) {
+                /* holder may be queued: first stop queue changes */
+                raw_spin_lock(&pedf->domain.release_lock);
+                if (is_queued(t) &&
+                    /* If it is queued, then we need to re-order. */
+                    bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node) &&
+                    /* If we bubbled to the top, then we need to check for preemptions. */
+                    edf_preemption_needed(&pedf->domain, pedf->scheduled))
+                                preempt(pedf);
+                raw_spin_unlock(&pedf->domain.release_lock);
+        } /* else: nothing to do since the job is not queued while scheduled */
+        raw_spin_unlock_irqrestore(&pedf->slock, flags);
+}
+static void unboost_priority(struct task_struct* t)
+{
+        unsigned long           flags;
+        psnedf_domain_t*        pedf = task_pedf(t);
+        lt_t                    now;
+        raw_spin_lock_irqsave(&pedf->slock, flags);
+        now = litmus_clock();
+        /* assumption: this only happens when the job is scheduled */
+        BUG_ON(pedf->scheduled != t);
+        TRACE_TASK(t, "priority restored at %llu\n", now);
+        /* priority boosted jobs must be scheduled */
+        BUG_ON(pedf->scheduled != t);
+        tsk_rt(t)->priority_boosted = 0;
+        tsk_rt(t)->boost_start_time = 0;
+        /* check if this changes anything */
+        if (edf_preemption_needed(&pedf->domain, pedf->scheduled))
+                preempt(pedf);
+        raw_spin_unlock_irqrestore(&pedf->slock, flags);
+}
+#endif
+static int psnedf_preempt_check(psnedf_domain_t *pedf)
+{
+        if (edf_preemption_needed(&pedf->domain, pedf->scheduled)) {
+                preempt(pedf);
+                return 1;
+        } else
+                return 0;
+}
+/* This check is trivial in partioned systems as we only have to consider
+ * the CPU of the partition.
+ */
+static int psnedf_check_resched(rt_domain_t *edf)
+{
+        psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
+        /* because this is a callback from rt_domain_t we already hold
+         * the necessary lock for the ready queue
+         */
+        return psnedf_preempt_check(pedf);
+}
+static void job_completion(struct task_struct* t, int forced)
+{
+        sched_trace_task_completion(t,forced);
+        TRACE_TASK(t, "job_completion().\n");
+        tsk_rt(t)->completed = 0;
+        prepare_for_next_period(t);
+}
+static void psnedf_tick(struct task_struct *t)
+{
+        psnedf_domain_t *pedf = local_pedf;
+        /* Check for inconsistency. We don't need the lock for this since
+         * ->scheduled is only changed in schedule, which obviously is not
+         *  executing in parallel on this CPU
+         */
+        BUG_ON(is_realtime(t) && t != pedf->scheduled);
+        if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
+                if (!is_np(t)) {
+                        litmus_reschedule_local();
+                        TRACE("psnedf_scheduler_tick: "
+                              "%d is preemptable "
+                              " => FORCE_RESCHED\n", t->pid);
+                } else if (is_user_np(t)) {
+                        TRACE("psnedf_scheduler_tick: "
+                              "%d is non-preemptable, "
+                              "preemption delayed.\n", t->pid);
+                        request_exit_np(t);
+                }
+        }
+}
+static struct task_struct* psnedf_schedule(struct task_struct * prev)
+{
+        psnedf_domain_t*        pedf = local_pedf;
+        rt_domain_t*            edf  = &pedf->domain;
+        struct task_struct*     next;
+        int                     out_of_time, sleep, preempt,
+                                np, exists, blocks, resched;
+        raw_spin_lock(&pedf->slock);
+        /* sanity checking
+         * differently from gedf, when a task exits (dead)
+         * pedf->schedule may be null and prev _is_ realtime
+         */
+        BUG_ON(pedf->scheduled && pedf->scheduled != prev);
+        BUG_ON(pedf->scheduled && !is_realtime(prev));
+        /* (0) Determine state */
+        exists      = pedf->scheduled != NULL;
+        blocks      = exists && !is_running(pedf->scheduled);
+        out_of_time = exists &&
+                                  budget_enforced(pedf->scheduled) &&
+                                  budget_exhausted(pedf->scheduled);
+        np          = exists && is_np(pedf->scheduled);
+        sleep       = exists && is_completed(pedf->scheduled);
+        preempt     = edf_preemption_needed(edf, prev);
+        /* If we need to preempt do so.
+         * The following checks set resched to 1 in case of special
+         * circumstances.
+         */
+        resched = preempt;
+        /* If a task blocks we have no choice but to reschedule.
+         */
+        if (blocks)
+                resched = 1;
+        /* Request a sys_exit_np() call if we would like to preempt but cannot.
+         * Multiple calls to request_exit_np() don't hurt.
+         */
+        if (np && (out_of_time || preempt || sleep))
+                request_exit_np(pedf->scheduled);
+        /* Any task that is preemptable and either exhausts its execution
+         * budget or wants to sleep completes. We may have to reschedule after
+         * this.
+         */
+        if (!np && (out_of_time || sleep) && !blocks) {
+                job_completion(pedf->scheduled, !sleep);
+                resched = 1;
+        }
+        /* The final scheduling decision. Do we need to switch for some reason?
+         * Switch if we are in RT mode and have no task or if we need to
+         * resched.
+         */
+        next = NULL;
+        if ((!np || blocks) && (resched || !exists)) {
+                /* When preempting a task that does not block, then
+                 * re-insert it into either the ready queue or the
+                 * release queue (if it completed). requeue() picks
+                 * the appropriate queue.
+                 */
+                if (pedf->scheduled && !blocks)
+                        requeue(pedf->scheduled, edf);
+                next = __take_ready(edf);
+        } else
+                /* Only override Linux scheduler if we have a real-time task
+                 * scheduled that needs to continue.
+                 */
+                if (exists)
+                        next = prev;
+        if (next) {
+                TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
+        } else {
+                TRACE("becoming idle at %llu\n", litmus_clock());
+        }
+        pedf->scheduled = next;
+        sched_state_task_picked();
+        raw_spin_unlock(&pedf->slock);
+        return next;
+}
+/*      Prepare a task for running in RT mode
+ */
+static void psnedf_task_new(struct task_struct * t, int on_rq, int is_scheduled)
+{
+        rt_domain_t*            edf  = task_edf(t);
+        psnedf_domain_t*        pedf = task_pedf(t);
+        unsigned long           flags;
+        TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
+                   t->rt_param.task_params.cpu);
+        /* setup job parameters */
+        release_at(t, litmus_clock());
+        /* The task should be running in the queue, otherwise signal
+         * code will try to wake it up with fatal consequences.
+         */
+        raw_spin_lock_irqsave(&pedf->slock, flags);
+        if (is_scheduled) {
+                /* there shouldn't be anything else scheduled at the time */
+                BUG_ON(pedf->scheduled);
+                pedf->scheduled = t;
+        } else {
+                /* !is_scheduled means it is not scheduled right now, but it
+                 * does not mean that it is suspended. If it is not suspended,
+                 * it still needs to be requeued. If it is suspended, there is
+                 * nothing that we need to do as it will be handled by the
+                 * wake_up() handler. */
+                if (is_running(t)) {
+                        requeue(t, edf);
+                        /* maybe we have to reschedule */
+                        psnedf_preempt_check(pedf);
+                }
+        }
+        raw_spin_unlock_irqrestore(&pedf->slock, flags);
+}
+static void psnedf_task_wake_up(struct task_struct *task)
+{
+        unsigned long           flags;
+        psnedf_domain_t*        pedf = task_pedf(task);
+        rt_domain_t*            edf  = task_edf(task);
+        lt_t                    now;
+        TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
+        raw_spin_lock_irqsave(&pedf->slock, flags);
+        BUG_ON(is_queued(task));
+        now = litmus_clock();
+        if (is_sporadic(task) && is_tardy(task, now)
+#ifdef CONFIG_LITMUS_LOCKING
+        /* We need to take suspensions because of semaphores into
+         * account! If a job resumes after being suspended due to acquiring
+         * a semaphore, it should never be treated as a new job release.
+         */
+            && !is_priority_boosted(task)
+#endif
+                ) {
+                /* new sporadic release */
+                release_at(task, now);
+                sched_trace_task_release(task);
+        }
+        /* Only add to ready queue if it is not the currently-scheduled
+         * task. This could be the case if a task was woken up concurrently
+         * on a remote CPU before the executing CPU got around to actually
+         * de-scheduling the task, i.e., wake_up() raced with schedule()
+         * and won.
+         */
+        if (pedf->scheduled != task) {
+                requeue(task, edf);
+                psnedf_preempt_check(pedf);
+        }
+        raw_spin_unlock_irqrestore(&pedf->slock, flags);
+        TRACE_TASK(task, "wake up done\n");
+}
+static void psnedf_task_block(struct task_struct *t)
+{
+        /* only running tasks can block, thus t is in no queue */
+        TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
+        BUG_ON(!is_realtime(t));
+        BUG_ON(is_queued(t));
+}
+static void psnedf_task_exit(struct task_struct * t)
+{
+        unsigned long flags;
+        psnedf_domain_t*        pedf = task_pedf(t);
+        rt_domain_t*            edf;
+        raw_spin_lock_irqsave(&pedf->slock, flags);
+        if (is_queued(t)) {
+                /* dequeue */
+                edf  = task_edf(t);
+                remove(edf, t);
+        }
+        if (pedf->scheduled == t)
+                pedf->scheduled = NULL;
+        TRACE_TASK(t, "RIP, now reschedule\n");
+        preempt(pedf);
+        raw_spin_unlock_irqrestore(&pedf->slock, flags);
+}
+#ifdef CONFIG_LITMUS_LOCKING
+#include <litmus/fdso.h>
+#include <litmus/srp.h>
+/* ******************** SRP support ************************ */
+static unsigned int psnedf_get_srp_prio(struct task_struct* t)
+{
+        /* assumes implicit deadlines */
+        return get_rt_period(t);
+}
+/* ******************** FMLP support ********************** */
+/* struct for semaphore with priority inheritance */
+struct fmlp_semaphore {
+        struct litmus_lock litmus_lock;
+        /* current resource holder */
+        struct task_struct *owner;
+        /* FIFO queue of waiting tasks */
+        wait_queue_head_t wait;
+};
+static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
+{
+        return container_of(lock, struct fmlp_semaphore, litmus_lock);
+}
+int psnedf_fmlp_lock(struct litmus_lock* l)
+{
+        struct task_struct* t = current;
+        struct fmlp_semaphore *sem = fmlp_from_lock(l);
+        wait_queue_t wait;
+        unsigned long flags;
+        if (!is_realtime(t))
+                return -EPERM;
+        /* prevent nested lock acquisition --- not supported by FMLP */
+        if (tsk_rt(t)->num_locks_held ||
+            tsk_rt(t)->num_local_locks_held)
+                return -EBUSY;
+        spin_lock_irqsave(&sem->wait.lock, flags);
+        if (sem->owner) {
+                /* resource is not free => must suspend and wait */
+                init_waitqueue_entry(&wait, t);
+                /* FIXME: interruptible would be nice some day */
+                set_task_state(t, TASK_UNINTERRUPTIBLE);
+                __add_wait_queue_tail_exclusive(&sem->wait, &wait);
+                TS_LOCK_SUSPEND;
+                /* release lock before sleeping */
+                spin_unlock_irqrestore(&sem->wait.lock, flags);
+                /* We depend on the FIFO order.  Thus, we don't need to recheck
+                 * when we wake up; we are guaranteed to have the lock since
+                 * there is only one wake up per release.
+                 */
+                schedule();
+                TS_LOCK_RESUME;
+                /* Since we hold the lock, no other task will change
+                 * ->owner. We can thus check it without acquiring the spin
+                 * lock. */
+                BUG_ON(sem->owner != t);
+        } else {
+                /* it's ours now */
+                sem->owner = t;
+                /* mark the task as priority-boosted. */
+                boost_priority(t);
+                spin_unlock_irqrestore(&sem->wait.lock, flags);
+        }
+        tsk_rt(t)->num_locks_held++;
+        return 0;
+}
+int psnedf_fmlp_unlock(struct litmus_lock* l)
+{
+        struct task_struct *t = current, *next;
+        struct fmlp_semaphore *sem = fmlp_from_lock(l);
+        unsigned long flags;
+        int err = 0;
+        spin_lock_irqsave(&sem->wait.lock, flags);
+        if (sem->owner != t) {
+                err = -EINVAL;
+                goto out;
+        }
+        tsk_rt(t)->num_locks_held--;
+        /* we lose the benefit of priority boosting */
+        unboost_priority(t);
+        /* check if there are jobs waiting for this resource */
+        next = __waitqueue_remove_first(&sem->wait);
+        if (next) {
+                /* boost next job */
+                boost_priority(next);
+                /* next becomes the resouce holder */
+                sem->owner = next;
+                /* wake up next */
+                wake_up_process(next);
+        } else
+                /* resource becomes available */
+                sem->owner = NULL;
+out:
+        spin_unlock_irqrestore(&sem->wait.lock, flags);
+        return err;
+}
+int psnedf_fmlp_close(struct litmus_lock* l)
+{
+        struct task_struct *t = current;
+        struct fmlp_semaphore *sem = fmlp_from_lock(l);
+        unsigned long flags;
+        int owner;
+        spin_lock_irqsave(&sem->wait.lock, flags);
+        owner = sem->owner == t;
+        spin_unlock_irqrestore(&sem->wait.lock, flags);
+        if (owner)
+                psnedf_fmlp_unlock(l);
+        return 0;
+}
+void psnedf_fmlp_free(struct litmus_lock* lock)
+{
+        kfree(fmlp_from_lock(lock));
+}
+static struct litmus_lock_ops psnedf_fmlp_lock_ops = {
+        .close  = psnedf_fmlp_close,
+        .lock   = psnedf_fmlp_lock,
+        .unlock = psnedf_fmlp_unlock,
+        .deallocate = psnedf_fmlp_free,
+};
+static struct litmus_lock* psnedf_new_fmlp(void)
+{
+        struct fmlp_semaphore* sem;
+        sem = kmalloc(sizeof(*sem), GFP_KERNEL);
+        if (!sem)
+                return NULL;
+        sem->owner   = NULL;
+        init_waitqueue_head(&sem->wait);
+        sem->litmus_lock.ops = &psnedf_fmlp_lock_ops;
+        return &sem->litmus_lock;
+}
+/* **** lock constructor **** */
+static long psnedf_allocate_lock(struct litmus_lock **lock, int type,
+                                 void* __user unused)
+{
+        int err = -ENXIO;
+        struct srp_semaphore* srp;
+        /* PSN-EDF currently supports the SRP for local resources and the FMLP
+         * for global resources. */
+        switch (type) {
+        case FMLP_SEM:
+                /* Flexible Multiprocessor Locking Protocol */
+                *lock = psnedf_new_fmlp();
+                if (*lock)
+                        err = 0;
+                else
+                        err = -ENOMEM;
+                break;
+        case SRP_SEM:
+                /* Baker's Stack Resource Policy */
+                srp = allocate_srp_semaphore();
+                if (srp) {
+                        *lock = &srp->litmus_lock;
+                        err = 0;
+                } else
+                        err = -ENOMEM;
+                break;
+        };
+        return err;
+}
+#endif
+static long psnedf_activate_plugin(void)
+{
+#ifdef CONFIG_RELEASE_MASTER
+        int cpu;
+        for_each_online_cpu(cpu) {
+                remote_edf(cpu)->release_master = atomic_read(&release_master_cpu);
+        }
+#endif
+#ifdef CONFIG_LITMUS_LOCKING
+        get_srp_prio = psnedf_get_srp_prio;
+#endif
+        return 0;
+}
+static long psnedf_admit_task(struct task_struct* tsk)
+{
+        if (task_cpu(tsk) == tsk->rt_param.task_params.cpu
+#ifdef CONFIG_RELEASE_MASTER
+            /* don't allow tasks on release master CPU */
+             && task_cpu(tsk) != remote_edf(task_cpu(tsk))->release_master
+#endif
+                )
+                return 0;
+        else
+                return -EINVAL;
+}
+/*      Plugin object   */
+static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
+        .plugin_name            = "PSN-EDF",
+        .tick                   = psnedf_tick,
+        .task_new               = psnedf_task_new,
+        .complete_job           = complete_job,
+        .task_exit              = psnedf_task_exit,
+        .schedule               = psnedf_schedule,
+        .task_wake_up           = psnedf_task_wake_up,
+        .task_block             = psnedf_task_block,
+        .admit_task             = psnedf_admit_task,
+        .activate_plugin        = psnedf_activate_plugin,
+#ifdef CONFIG_LITMUS_LOCKING
+        .allocate_lock          = psnedf_allocate_lock,
+#endif
+};
+static int __init init_psn_edf(void)
+{
+        int i;
+        /* We do not really want to support cpu hotplug, do we? ;)
+         * However, if we are so crazy to do so,
+         * we cannot use num_online_cpu()
+         */
+        for (i = 0; i < num_online_cpus(); i++) {
+                psnedf_domain_init(remote_pedf(i),
+                                   psnedf_check_resched,
+                                   NULL, i);
+        }
+        return register_sched_plugin(&psn_edf_plugin);
+}
+module_init(init_psn_edf);

diff --git a/litmus/Makefile b/litmus/Makefile index fcb4d7533327..5b38295b992d 100644 --- a/litmus/Makefile +++ b/litmus/Makefile
@@ -17,7 +17,8 @@ obj-y = sched_plugin.o litmus.o \
17	srp.o \	17	srp.o \
18	bheap.o \	18	bheap.o \
19	binheap.o \	19	binheap.o \
20	ctrldev.o	20	ctrldev.o \
		21	sched_psn_edf.o
21		22
22	obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o	23	obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
23		24


diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c new file mode 100644 index 000000000000..0873dc172651 --- /dev/null +++ b/litmus/sched_psn_edf.c
@@ -0,0 +1,667 @@
		1	/*
		2	* kernel/sched_psn_edf.c
		3	*
		4	* Implementation of the PSN-EDF scheduler plugin.
		5	* Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
		6	*
		7	* Suspensions and non-preemptable sections are supported.
		8	* Priority inheritance is not supported.
		9	*/
		10
		11	#include <linux/percpu.h>
		12	#include <linux/sched.h>
		13	#include <linux/list.h>
		14	#include <linux/spinlock.h>
		15	#include <linux/module.h>
		16
		17	#include <litmus/litmus.h>
		18	#include <litmus/jobs.h>
		19	#include <litmus/preempt.h>
		20	#include <litmus/budget.h>
		21	#include <litmus/sched_plugin.h>
		22	#include <litmus/edf_common.h>
		23	#include <litmus/sched_trace.h>
		24	#include <litmus/trace.h>
		25
		26	typedef struct {
		27	rt_domain_t domain;
		28	int cpu;
		29	struct task_struct* scheduled; /* only RT tasks */
		30	/*
		31	* scheduling lock slock
		32	* protects the domain and serializes scheduling decisions
		33	*/
		34	#define slock domain.ready_lock
		35
		36	} psnedf_domain_t;
		37
		38	DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
		39
		40	#define local_edf (&__get_cpu_var(psnedf_domains).domain)
		41	#define local_pedf (&__get_cpu_var(psnedf_domains))
		42	#define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain)
		43	#define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu))
		44	#define task_edf(task) remote_edf(get_partition(task))
		45	#define task_pedf(task) remote_pedf(get_partition(task))
		46
		47
		48	static void psnedf_domain_init(psnedf_domain_t* pedf,
		49	check_resched_needed_t check,
		50	release_jobs_t release,
		51	int cpu)
		52	{
		53	edf_domain_init(&pedf->domain, check, release);
		54	pedf->cpu = cpu;
		55	pedf->scheduled = NULL;
		56	}
		57
		58	static void requeue(struct task_struct* t, rt_domain_t *edf)
		59	{
		60	if (t->state != TASK_RUNNING)
		61	TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
		62
		63	tsk_rt(t)->completed = 0;
		64	if (is_early_releasing(t) \|\| is_released(t, litmus_clock()))
		65	__add_ready(edf, t);
		66	else
		67	add_release(edf, t); /* it has got to wait */
		68	}
		69
		70	/* we assume the lock is being held */
		71	static void preempt(psnedf_domain_t *pedf)
		72	{
		73	preempt_if_preemptable(pedf->scheduled, pedf->cpu);
		74	}
		75
		76	#ifdef CONFIG_LITMUS_LOCKING
		77
		78	static void boost_priority(struct task_struct* t)
		79	{
		80	unsigned long flags;
		81	psnedf_domain_t* pedf = task_pedf(t);
		82	lt_t now;
		83
		84	raw_spin_lock_irqsave(&pedf->slock, flags);
		85	now = litmus_clock();
		86
		87	TRACE_TASK(t, "priority boosted at %llu\n", now);
		88
		89	tsk_rt(t)->priority_boosted = 1;
		90	tsk_rt(t)->boost_start_time = now;
		91
		92	if (pedf->scheduled != t) {
		93	/* holder may be queued: first stop queue changes */
		94	raw_spin_lock(&pedf->domain.release_lock);
		95	if (is_queued(t) &&
		96	/* If it is queued, then we need to re-order. */
		97	bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node) &&
		98	/* If we bubbled to the top, then we need to check for preemptions. */
		99	edf_preemption_needed(&pedf->domain, pedf->scheduled))
		100	preempt(pedf);
		101	raw_spin_unlock(&pedf->domain.release_lock);
		102	} /* else: nothing to do since the job is not queued while scheduled */
		103
		104	raw_spin_unlock_irqrestore(&pedf->slock, flags);
		105	}
		106
		107	static void unboost_priority(struct task_struct* t)
		108	{
		109	unsigned long flags;
		110	psnedf_domain_t* pedf = task_pedf(t);
		111	lt_t now;
		112
		113	raw_spin_lock_irqsave(&pedf->slock, flags);
		114	now = litmus_clock();
		115
		116	/* assumption: this only happens when the job is scheduled */
		117	BUG_ON(pedf->scheduled != t);
		118
		119	TRACE_TASK(t, "priority restored at %llu\n", now);
		120
		121	/* priority boosted jobs must be scheduled */
		122	BUG_ON(pedf->scheduled != t);
		123
		124	tsk_rt(t)->priority_boosted = 0;
		125	tsk_rt(t)->boost_start_time = 0;
		126
		127	/* check if this changes anything */
		128	if (edf_preemption_needed(&pedf->domain, pedf->scheduled))
		129	preempt(pedf);
		130
		131	raw_spin_unlock_irqrestore(&pedf->slock, flags);
		132	}
		133
		134	#endif
		135
		136	static int psnedf_preempt_check(psnedf_domain_t *pedf)
		137	{
		138	if (edf_preemption_needed(&pedf->domain, pedf->scheduled)) {
		139	preempt(pedf);
		140	return 1;
		141	} else
		142	return 0;
		143	}
		144
		145	/* This check is trivial in partioned systems as we only have to consider
		146	* the CPU of the partition.
		147	*/
		148	static int psnedf_check_resched(rt_domain_t *edf)
		149	{
		150	psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
		151
		152	/* because this is a callback from rt_domain_t we already hold
		153	* the necessary lock for the ready queue
		154	*/
		155	return psnedf_preempt_check(pedf);
		156	}
		157
		158	static void job_completion(struct task_struct* t, int forced)
		159	{
		160	sched_trace_task_completion(t,forced);
		161	TRACE_TASK(t, "job_completion().\n");
		162
		163	tsk_rt(t)->completed = 0;
		164	prepare_for_next_period(t);
		165	}
		166
		167	static void psnedf_tick(struct task_struct *t)
		168	{
		169	psnedf_domain_t *pedf = local_pedf;
		170
		171	/* Check for inconsistency. We don't need the lock for this since
		172	* ->scheduled is only changed in schedule, which obviously is not
		173	* executing in parallel on this CPU
		174	*/
		175	BUG_ON(is_realtime(t) && t != pedf->scheduled);
		176
		177	if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
		178	if (!is_np(t)) {
		179	litmus_reschedule_local();
		180	TRACE("psnedf_scheduler_tick: "
		181	"%d is preemptable "
		182	" => FORCE_RESCHED\n", t->pid);
		183	} else if (is_user_np(t)) {
		184	TRACE("psnedf_scheduler_tick: "
		185	"%d is non-preemptable, "
		186	"preemption delayed.\n", t->pid);
		187	request_exit_np(t);
		188	}
		189	}
		190	}
		191
		192	static struct task_struct* psnedf_schedule(struct task_struct * prev)
		193	{
		194	psnedf_domain_t* pedf = local_pedf;
		195	rt_domain_t* edf = &pedf->domain;
		196	struct task_struct* next;
		197
		198	int out_of_time, sleep, preempt,
		199	np, exists, blocks, resched;
		200
		201	raw_spin_lock(&pedf->slock);
		202
		203	/* sanity checking
		204	* differently from gedf, when a task exits (dead)
		205	* pedf->schedule may be null and prev _is_ realtime
		206	*/
		207	BUG_ON(pedf->scheduled && pedf->scheduled != prev);
		208	BUG_ON(pedf->scheduled && !is_realtime(prev));
		209
		210	/* (0) Determine state */
		211	exists = pedf->scheduled != NULL;
		212	blocks = exists && !is_running(pedf->scheduled);
		213	out_of_time = exists &&
		214	budget_enforced(pedf->scheduled) &&
		215	budget_exhausted(pedf->scheduled);
		216	np = exists && is_np(pedf->scheduled);
		217	sleep = exists && is_completed(pedf->scheduled);
		218	preempt = edf_preemption_needed(edf, prev);
		219
		220	/* If we need to preempt do so.
		221	* The following checks set resched to 1 in case of special
		222	* circumstances.
		223	*/
		224	resched = preempt;
		225
		226	/* If a task blocks we have no choice but to reschedule.
		227	*/
		228	if (blocks)
		229	resched = 1;
		230
		231	/* Request a sys_exit_np() call if we would like to preempt but cannot.
		232	* Multiple calls to request_exit_np() don't hurt.
		233	*/
		234	if (np && (out_of_time \|\| preempt \|\| sleep))
		235	request_exit_np(pedf->scheduled);
		236
		237	/* Any task that is preemptable and either exhausts its execution
		238	* budget or wants to sleep completes. We may have to reschedule after
		239	* this.
		240	*/
		241	if (!np && (out_of_time \|\| sleep) && !blocks) {
		242	job_completion(pedf->scheduled, !sleep);
		243	resched = 1;
		244	}
		245
		246	/* The final scheduling decision. Do we need to switch for some reason?
		247	* Switch if we are in RT mode and have no task or if we need to
		248	* resched.
		249	*/
		250	next = NULL;
		251	if ((!np \|\| blocks) && (resched \|\| !exists)) {
		252	/* When preempting a task that does not block, then
		253	* re-insert it into either the ready queue or the
		254	* release queue (if it completed). requeue() picks
		255	* the appropriate queue.
		256	*/
		257	if (pedf->scheduled && !blocks)
		258	requeue(pedf->scheduled, edf);
		259	next = __take_ready(edf);
		260	} else
		261	/* Only override Linux scheduler if we have a real-time task
		262	* scheduled that needs to continue.
		263	*/
		264	if (exists)
		265	next = prev;
		266
		267	if (next) {
		268	TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
		269	} else {
		270	TRACE("becoming idle at %llu\n", litmus_clock());
		271	}
		272
		273	pedf->scheduled = next;
		274	sched_state_task_picked();
		275	raw_spin_unlock(&pedf->slock);
		276
		277	return next;
		278	}
		279
		280
		281	/* Prepare a task for running in RT mode
		282	*/
		283	static void psnedf_task_new(struct task_struct * t, int on_rq, int is_scheduled)
		284	{
		285	rt_domain_t* edf = task_edf(t);
		286	psnedf_domain_t* pedf = task_pedf(t);
		287	unsigned long flags;
		288
		289	TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
		290	t->rt_param.task_params.cpu);
		291
		292	/* setup job parameters */
		293	release_at(t, litmus_clock());
		294
		295	/* The task should be running in the queue, otherwise signal
		296	* code will try to wake it up with fatal consequences.
		297	*/
		298	raw_spin_lock_irqsave(&pedf->slock, flags);
		299	if (is_scheduled) {
		300	/* there shouldn't be anything else scheduled at the time */
		301	BUG_ON(pedf->scheduled);
		302	pedf->scheduled = t;
		303	} else {
		304	/* !is_scheduled means it is not scheduled right now, but it
		305	* does not mean that it is suspended. If it is not suspended,
		306	* it still needs to be requeued. If it is suspended, there is
		307	* nothing that we need to do as it will be handled by the
		308	* wake_up() handler. */
		309	if (is_running(t)) {
		310	requeue(t, edf);
		311	/* maybe we have to reschedule */
		312	psnedf_preempt_check(pedf);
		313	}
		314	}
		315	raw_spin_unlock_irqrestore(&pedf->slock, flags);
		316	}
		317
		318	static void psnedf_task_wake_up(struct task_struct *task)
		319	{
		320	unsigned long flags;
		321	psnedf_domain_t* pedf = task_pedf(task);
		322	rt_domain_t* edf = task_edf(task);
		323	lt_t now;
		324
		325	TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
		326	raw_spin_lock_irqsave(&pedf->slock, flags);
		327	BUG_ON(is_queued(task));
		328	now = litmus_clock();
		329	if (is_sporadic(task) && is_tardy(task, now)
		330	#ifdef CONFIG_LITMUS_LOCKING
		331	/* We need to take suspensions because of semaphores into
		332	* account! If a job resumes after being suspended due to acquiring
		333	* a semaphore, it should never be treated as a new job release.
		334	*/
		335	&& !is_priority_boosted(task)
		336	#endif
		337	) {
		338	/* new sporadic release */
		339	release_at(task, now);
		340	sched_trace_task_release(task);
		341	}
		342
		343	/* Only add to ready queue if it is not the currently-scheduled
		344	* task. This could be the case if a task was woken up concurrently
		345	* on a remote CPU before the executing CPU got around to actually
		346	* de-scheduling the task, i.e., wake_up() raced with schedule()
		347	* and won.
		348	*/
		349	if (pedf->scheduled != task) {
		350	requeue(task, edf);
		351	psnedf_preempt_check(pedf);
		352	}
		353
		354	raw_spin_unlock_irqrestore(&pedf->slock, flags);
		355	TRACE_TASK(task, "wake up done\n");
		356	}
		357
		358	static void psnedf_task_block(struct task_struct *t)
		359	{
		360	/* only running tasks can block, thus t is in no queue */
		361	TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
		362
		363	BUG_ON(!is_realtime(t));
		364	BUG_ON(is_queued(t));
		365	}
		366
		367	static void psnedf_task_exit(struct task_struct * t)
		368	{
		369	unsigned long flags;
		370	psnedf_domain_t* pedf = task_pedf(t);
		371	rt_domain_t* edf;
		372
		373	raw_spin_lock_irqsave(&pedf->slock, flags);
		374	if (is_queued(t)) {
		375	/* dequeue */
		376	edf = task_edf(t);
		377	remove(edf, t);
		378	}
		379	if (pedf->scheduled == t)
		380	pedf->scheduled = NULL;
		381
		382	TRACE_TASK(t, "RIP, now reschedule\n");
		383
		384	preempt(pedf);
		385	raw_spin_unlock_irqrestore(&pedf->slock, flags);
		386	}
		387
		388	#ifdef CONFIG_LITMUS_LOCKING
		389
		390	#include <litmus/fdso.h>
		391	#include <litmus/srp.h>
		392
		393	/* ****************** SRP support ********************** */
		394
		395	static unsigned int psnedf_get_srp_prio(struct task_struct* t)
		396	{
		397	/* assumes implicit deadlines */
		398	return get_rt_period(t);
		399	}
		400
		401	/* ****************** FMLP support ******************** */
		402
		403	/* struct for semaphore with priority inheritance */
		404	struct fmlp_semaphore {
		405	struct litmus_lock litmus_lock;
		406
		407	/* current resource holder */
		408	struct task_struct *owner;
		409
		410	/* FIFO queue of waiting tasks */
		411	wait_queue_head_t wait;
		412	};
		413
		414	static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
		415	{
		416	return container_of(lock, struct fmlp_semaphore, litmus_lock);
		417	}
		418	int psnedf_fmlp_lock(struct litmus_lock* l)
		419	{
		420	struct task_struct* t = current;
		421	struct fmlp_semaphore *sem = fmlp_from_lock(l);
		422	wait_queue_t wait;
		423	unsigned long flags;
		424
		425	if (!is_realtime(t))
		426	return -EPERM;
		427
		428	/* prevent nested lock acquisition --- not supported by FMLP */
		429	if (tsk_rt(t)->num_locks_held \|\|
		430	tsk_rt(t)->num_local_locks_held)
		431	return -EBUSY;
		432
		433	spin_lock_irqsave(&sem->wait.lock, flags);
		434
		435	if (sem->owner) {
		436	/* resource is not free => must suspend and wait */
		437
		438	init_waitqueue_entry(&wait, t);
		439
		440	/* FIXME: interruptible would be nice some day */
		441	set_task_state(t, TASK_UNINTERRUPTIBLE);
		442
		443	__add_wait_queue_tail_exclusive(&sem->wait, &wait);
		444
		445	TS_LOCK_SUSPEND;
		446
		447	/* release lock before sleeping */
		448	spin_unlock_irqrestore(&sem->wait.lock, flags);
		449
		450	/* We depend on the FIFO order. Thus, we don't need to recheck
		451	* when we wake up; we are guaranteed to have the lock since
		452	* there is only one wake up per release.
		453	*/
		454
		455	schedule();
		456
		457	TS_LOCK_RESUME;
		458
		459	/* Since we hold the lock, no other task will change
		460	* ->owner. We can thus check it without acquiring the spin
		461	* lock. */
		462	BUG_ON(sem->owner != t);
		463	} else {
		464	/* it's ours now */
		465	sem->owner = t;
		466
		467	/* mark the task as priority-boosted. */
		468	boost_priority(t);
		469
		470	spin_unlock_irqrestore(&sem->wait.lock, flags);
		471	}
		472
		473	tsk_rt(t)->num_locks_held++;
		474
		475	return 0;
		476	}
		477
		478	int psnedf_fmlp_unlock(struct litmus_lock* l)
		479	{
		480	struct task_struct t = current, next;
		481	struct fmlp_semaphore *sem = fmlp_from_lock(l);
		482	unsigned long flags;
		483	int err = 0;
		484
		485	spin_lock_irqsave(&sem->wait.lock, flags);
		486
		487	if (sem->owner != t) {
		488	err = -EINVAL;
		489	goto out;
		490	}
		491
		492	tsk_rt(t)->num_locks_held--;
		493
		494	/* we lose the benefit of priority boosting */
		495
		496	unboost_priority(t);
		497
		498	/* check if there are jobs waiting for this resource */
		499	next = __waitqueue_remove_first(&sem->wait);
		500	if (next) {
		501	/* boost next job */
		502	boost_priority(next);
		503
		504	/* next becomes the resouce holder */
		505	sem->owner = next;
		506
		507	/* wake up next */
		508	wake_up_process(next);
		509	} else
		510	/* resource becomes available */
		511	sem->owner = NULL;
		512
		513	out:
		514	spin_unlock_irqrestore(&sem->wait.lock, flags);
		515	return err;
		516	}
		517
		518	int psnedf_fmlp_close(struct litmus_lock* l)
		519	{
		520	struct task_struct *t = current;
		521	struct fmlp_semaphore *sem = fmlp_from_lock(l);
		522	unsigned long flags;
		523
		524	int owner;
		525
		526	spin_lock_irqsave(&sem->wait.lock, flags);
		527
		528	owner = sem->owner == t;
		529
		530	spin_unlock_irqrestore(&sem->wait.lock, flags);
		531
		532	if (owner)
		533	psnedf_fmlp_unlock(l);
		534
		535	return 0;
		536	}
		537
		538	void psnedf_fmlp_free(struct litmus_lock* lock)
		539	{
		540	kfree(fmlp_from_lock(lock));
		541	}
		542
		543	static struct litmus_lock_ops psnedf_fmlp_lock_ops = {
		544	.close = psnedf_fmlp_close,
		545	.lock = psnedf_fmlp_lock,
		546	.unlock = psnedf_fmlp_unlock,
		547	.deallocate = psnedf_fmlp_free,
		548	};
		549
		550	static struct litmus_lock* psnedf_new_fmlp(void)
		551	{
		552	struct fmlp_semaphore* sem;
		553
		554	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
		555	if (!sem)
		556	return NULL;
		557
		558	sem->owner = NULL;
		559	init_waitqueue_head(&sem->wait);
		560	sem->litmus_lock.ops = &psnedf_fmlp_lock_ops;
		561
		562	return &sem->litmus_lock;
		563	}
		564
		565	/* ** lock constructor ** */
		566
		567
		568	static long psnedf_allocate_lock(struct litmus_lock **lock, int type,
		569	void* __user unused)
		570	{
		571	int err = -ENXIO;
		572	struct srp_semaphore* srp;
		573
		574	/* PSN-EDF currently supports the SRP for local resources and the FMLP
		575	* for global resources. */
		576	switch (type) {
		577	case FMLP_SEM:
		578	/* Flexible Multiprocessor Locking Protocol */
		579	*lock = psnedf_new_fmlp();
		580	if (*lock)
		581	err = 0;
		582	else
		583	err = -ENOMEM;
		584	break;
		585
		586	case SRP_SEM:
		587	/* Baker's Stack Resource Policy */
		588	srp = allocate_srp_semaphore();
		589	if (srp) {
		590	*lock = &srp->litmus_lock;
		591	err = 0;
		592	} else
		593	err = -ENOMEM;
		594	break;
		595	};
		596
		597	return err;
		598	}
		599
		600	#endif
		601
		602
		603	static long psnedf_activate_plugin(void)
		604	{
		605	#ifdef CONFIG_RELEASE_MASTER
		606	int cpu;
		607
		608	for_each_online_cpu(cpu) {
		609	remote_edf(cpu)->release_master = atomic_read(&release_master_cpu);
		610	}
		611	#endif
		612
		613	#ifdef CONFIG_LITMUS_LOCKING
		614	get_srp_prio = psnedf_get_srp_prio;
		615	#endif
		616
		617	return 0;
		618	}
		619
		620	static long psnedf_admit_task(struct task_struct* tsk)
		621	{
		622	if (task_cpu(tsk) == tsk->rt_param.task_params.cpu
		623	#ifdef CONFIG_RELEASE_MASTER
		624	/* don't allow tasks on release master CPU */
		625	&& task_cpu(tsk) != remote_edf(task_cpu(tsk))->release_master
		626	#endif
		627	)
		628	return 0;
		629	else
		630	return -EINVAL;
		631	}
		632
		633	/* Plugin object */
		634	static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
		635	.plugin_name = "PSN-EDF",
		636	.tick = psnedf_tick,
		637	.task_new = psnedf_task_new,
		638	.complete_job = complete_job,
		639	.task_exit = psnedf_task_exit,
		640	.schedule = psnedf_schedule,
		641	.task_wake_up = psnedf_task_wake_up,
		642	.task_block = psnedf_task_block,
		643	.admit_task = psnedf_admit_task,
		644	.activate_plugin = psnedf_activate_plugin,
		645	#ifdef CONFIG_LITMUS_LOCKING
		646	.allocate_lock = psnedf_allocate_lock,
		647	#endif
		648	};
		649
		650
		651	static int __init init_psn_edf(void)
		652	{
		653	int i;
		654
		655	/* We do not really want to support cpu hotplug, do we? ;)
		656	* However, if we are so crazy to do so,
		657	* we cannot use num_online_cpu()
		658	*/
		659	for (i = 0; i < num_online_cpus(); i++) {
		660	psnedf_domain_init(remote_pedf(i),
		661	psnedf_check_resched,
		662	NULL, i);
		663	}
		664	return register_sched_plugin(&psn_edf_plugin);
		665	}
		666
		667	module_init(init_psn_edf);