Add PSN-EDF scheduler plugin

author: Bjoern Brandenburg <bbb@mpi-sws.org> 2013-02-12 13:15:27 -0500
committer: Bjoern Brandenburg <bbb@mpi-sws.org> 2014-06-07 05:31:08 -0400
commit: 000e29d4d1367efb3bf5de6c58b79d988958fcb2 (patch)
tree: da8243e896fa707d54c6da46eda00c506c4901d4
parent: 1f0829fdfeb0b62077ab8ceb02e9995c95733f96 (diff)
2 files changed, 692 insertions, 1 deletions
diff --git a/litmus/Makefile b/litmus/Makefile
index f7ceabc47197..0db695e35201 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -18,7 +18,9 @@ obj-y     = sched_plugin.o litmus.o \
            bheap.o \
            binheap.o \
            ctrldev.o \
-            uncachedev.o
+            uncachedev.o \
+            sched_psn_edf.o
 obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
new file mode 100644
index 000000000000..dd042dbc11f7
--- /dev/null
+++ b/litmus/sched_psn_edf.c
@@ -0,0 +1,689 @@
+/*
+ * kernel/sched_psn_edf.c
+ *
+ * Implementation of the PSN-EDF scheduler plugin.
+ * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
+ *
+ * Suspensions and non-preemptable sections are supported.
+ * Priority inheritance is not supported.
+ */
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <litmus/litmus.h>
+#include <litmus/jobs.h>
+#include <litmus/preempt.h>
+#include <litmus/budget.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/edf_common.h>
+#include <litmus/sched_trace.h>
+#include <litmus/trace.h>
+/* to set up domain/cpu mappings */
+#include <litmus/litmus_proc.h>
+typedef struct {
+        rt_domain_t             domain;
+        int                     cpu;
+        struct task_struct*     scheduled; /* only RT tasks */
+/*
+ * scheduling lock slock
+ * protects the domain and serializes scheduling decisions
+ */
+#define slock domain.ready_lock
+} psnedf_domain_t;
+DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
+#define local_edf               (&__get_cpu_var(psnedf_domains).domain)
+#define local_pedf              (&__get_cpu_var(psnedf_domains))
+#define remote_edf(cpu)         (&per_cpu(psnedf_domains, cpu).domain)
+#define remote_pedf(cpu)        (&per_cpu(psnedf_domains, cpu))
+#define task_edf(task)          remote_edf(get_partition(task))
+#define task_pedf(task)         remote_pedf(get_partition(task))
+static void psnedf_domain_init(psnedf_domain_t* pedf,
+                               check_resched_needed_t check,
+                               release_jobs_t release,
+                               int cpu)
+{
+        edf_domain_init(&pedf->domain, check, release);
+        pedf->cpu               = cpu;
+        pedf->scheduled         = NULL;
+}
+static void requeue(struct task_struct* t, rt_domain_t *edf)
+{
+        if (t->state != TASK_RUNNING)
+                TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
+        tsk_rt(t)->completed = 0;
+        if (is_early_releasing(t) || is_released(t, litmus_clock()))
+                __add_ready(edf, t);
+        else
+                add_release(edf, t); /* it has got to wait */
+}
+/* we assume the lock is being held */
+static void preempt(psnedf_domain_t *pedf)
+{
+        preempt_if_preemptable(pedf->scheduled, pedf->cpu);
+}
+#ifdef CONFIG_LITMUS_LOCKING
+static void boost_priority(struct task_struct* t)
+{
+        unsigned long           flags;
+        psnedf_domain_t*        pedf = task_pedf(t);
+        lt_t                    now;
+        raw_spin_lock_irqsave(&pedf->slock, flags);
+        now = litmus_clock();
+        TRACE_TASK(t, "priority boosted at %llu\n", now);
+        tsk_rt(t)->priority_boosted = 1;
+        tsk_rt(t)->boost_start_time = now;
+        if (pedf->scheduled != t) {
+                /* holder may be queued: first stop queue changes */
+                raw_spin_lock(&pedf->domain.release_lock);
+                if (is_queued(t) &&
+                    /* If it is queued, then we need to re-order. */
+                    bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node) &&
+                    /* If we bubbled to the top, then we need to check for preemptions. */
+                    edf_preemption_needed(&pedf->domain, pedf->scheduled))
+                                preempt(pedf);
+                raw_spin_unlock(&pedf->domain.release_lock);
+        } /* else: nothing to do since the job is not queued while scheduled */
+        raw_spin_unlock_irqrestore(&pedf->slock, flags);
+}
+static void unboost_priority(struct task_struct* t)
+{
+        unsigned long           flags;
+        psnedf_domain_t*        pedf = task_pedf(t);
+        lt_t                    now;
+        raw_spin_lock_irqsave(&pedf->slock, flags);
+        now = litmus_clock();
+        /* Assumption: this only happens when the job is scheduled.
+         * Exception: If t transitioned to non-real-time mode, we no longer
+         * care about it. */
+        BUG_ON(pedf->scheduled != t && is_realtime(t));
+        TRACE_TASK(t, "priority restored at %llu\n", now);
+        tsk_rt(t)->priority_boosted = 0;
+        tsk_rt(t)->boost_start_time = 0;
+        /* check if this changes anything */
+        if (edf_preemption_needed(&pedf->domain, pedf->scheduled))
+                preempt(pedf);
+        raw_spin_unlock_irqrestore(&pedf->slock, flags);
+}
+#endif
+static int psnedf_preempt_check(psnedf_domain_t *pedf)
+{
+        if (edf_preemption_needed(&pedf->domain, pedf->scheduled)) {
+                preempt(pedf);
+                return 1;
+        } else
+                return 0;
+}
+/* This check is trivial in partioned systems as we only have to consider
+ * the CPU of the partition.
+ */
+static int psnedf_check_resched(rt_domain_t *edf)
+{
+        psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
+        /* because this is a callback from rt_domain_t we already hold
+         * the necessary lock for the ready queue
+         */
+        return psnedf_preempt_check(pedf);
+}
+static void job_completion(struct task_struct* t, int forced)
+{
+        sched_trace_task_completion(t,forced);
+        TRACE_TASK(t, "job_completion().\n");
+        tsk_rt(t)->completed = 0;
+        prepare_for_next_period(t);
+}
+static struct task_struct* psnedf_schedule(struct task_struct * prev)
+{
+        psnedf_domain_t*        pedf = local_pedf;
+        rt_domain_t*            edf  = &pedf->domain;
+        struct task_struct*     next;
+        int                     out_of_time, sleep, preempt,
+                                np, exists, blocks, resched;
+        raw_spin_lock(&pedf->slock);
+        /* sanity checking
+         * differently from gedf, when a task exits (dead)
+         * pedf->schedule may be null and prev _is_ realtime
+         */
+        BUG_ON(pedf->scheduled && pedf->scheduled != prev);
+        BUG_ON(pedf->scheduled && !is_realtime(prev));
+        /* (0) Determine state */
+        exists      = pedf->scheduled != NULL;
+        blocks      = exists && !is_running(pedf->scheduled);
+        out_of_time = exists &&
+                                  budget_enforced(pedf->scheduled) &&
+                                  budget_exhausted(pedf->scheduled);
+        np          = exists && is_np(pedf->scheduled);
+        sleep       = exists && is_completed(pedf->scheduled);
+        preempt     = edf_preemption_needed(edf, prev);
+        /* If we need to preempt do so.
+         * The following checks set resched to 1 in case of special
+         * circumstances.
+         */
+        resched = preempt;
+        /* If a task blocks we have no choice but to reschedule.
+         */
+        if (blocks)
+                resched = 1;
+        /* Request a sys_exit_np() call if we would like to preempt but cannot.
+         * Multiple calls to request_exit_np() don't hurt.
+         */
+        if (np && (out_of_time || preempt || sleep))
+                request_exit_np(pedf->scheduled);
+        /* Any task that is preemptable and either exhausts its execution
+         * budget or wants to sleep completes. We may have to reschedule after
+         * this.
+         */
+        if (!np && (out_of_time || sleep) && !blocks) {
+                job_completion(pedf->scheduled, !sleep);
+                resched = 1;
+        }
+        /* The final scheduling decision. Do we need to switch for some reason?
+         * Switch if we are in RT mode and have no task or if we need to
+         * resched.
+         */
+        next = NULL;
+        if ((!np || blocks) && (resched || !exists)) {
+                /* When preempting a task that does not block, then
+                 * re-insert it into either the ready queue or the
+                 * release queue (if it completed). requeue() picks
+                 * the appropriate queue.
+                 */
+                if (pedf->scheduled && !blocks)
+                        requeue(pedf->scheduled, edf);
+                next = __take_ready(edf);
+        } else
+                /* Only override Linux scheduler if we have a real-time task
+                 * scheduled that needs to continue.
+                 */
+                if (exists)
+                        next = prev;
+        if (next) {
+                TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
+        } else {
+                TRACE("becoming idle at %llu\n", litmus_clock());
+        }
+        pedf->scheduled = next;
+        sched_state_task_picked();
+        raw_spin_unlock(&pedf->slock);
+        return next;
+}
+/*      Prepare a task for running in RT mode
+ */
+static void psnedf_task_new(struct task_struct * t, int on_rq, int is_scheduled)
+{
+        rt_domain_t*            edf  = task_edf(t);
+        psnedf_domain_t*        pedf = task_pedf(t);
+        unsigned long           flags;
+        TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
+                   t->rt_param.task_params.cpu);
+        /* setup job parameters */
+        release_at(t, litmus_clock());
+        /* The task should be running in the queue, otherwise signal
+         * code will try to wake it up with fatal consequences.
+         */
+        raw_spin_lock_irqsave(&pedf->slock, flags);
+        if (is_scheduled) {
+                /* there shouldn't be anything else scheduled at the time */
+                BUG_ON(pedf->scheduled);
+                pedf->scheduled = t;
+        } else {
+                /* !is_scheduled means it is not scheduled right now, but it
+                 * does not mean that it is suspended. If it is not suspended,
+                 * it still needs to be requeued. If it is suspended, there is
+                 * nothing that we need to do as it will be handled by the
+                 * wake_up() handler. */
+                if (is_running(t)) {
+                        requeue(t, edf);
+                        /* maybe we have to reschedule */
+                        psnedf_preempt_check(pedf);
+                }
+        }
+        raw_spin_unlock_irqrestore(&pedf->slock, flags);
+}
+static void psnedf_task_wake_up(struct task_struct *task)
+{
+        unsigned long           flags;
+        psnedf_domain_t*        pedf = task_pedf(task);
+        rt_domain_t*            edf  = task_edf(task);
+        lt_t                    now;
+        TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
+        raw_spin_lock_irqsave(&pedf->slock, flags);
+        BUG_ON(is_queued(task));
+        now = litmus_clock();
+        if (is_sporadic(task) && is_tardy(task, now)
+#ifdef CONFIG_LITMUS_LOCKING
+        /* We need to take suspensions because of semaphores into
+         * account! If a job resumes after being suspended due to acquiring
+         * a semaphore, it should never be treated as a new job release.
+         */
+            && !is_priority_boosted(task)
+#endif
+                ) {
+                /* new sporadic release */
+                release_at(task, now);
+                sched_trace_task_release(task);
+        }
+        /* Only add to ready queue if it is not the currently-scheduled
+         * task. This could be the case if a task was woken up concurrently
+         * on a remote CPU before the executing CPU got around to actually
+         * de-scheduling the task, i.e., wake_up() raced with schedule()
+         * and won.
+         */
+        if (pedf->scheduled != task) {
+                requeue(task, edf);
+                psnedf_preempt_check(pedf);
+        }
+        raw_spin_unlock_irqrestore(&pedf->slock, flags);
+        TRACE_TASK(task, "wake up done\n");
+}
+static void psnedf_task_block(struct task_struct *t)
+{
+        /* only running tasks can block, thus t is in no queue */
+        TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
+        BUG_ON(!is_realtime(t));
+        BUG_ON(is_queued(t));
+}
+static void psnedf_task_exit(struct task_struct * t)
+{
+        unsigned long flags;
+        psnedf_domain_t*        pedf = task_pedf(t);
+        rt_domain_t*            edf;
+        raw_spin_lock_irqsave(&pedf->slock, flags);
+        if (is_queued(t)) {
+                /* dequeue */
+                edf  = task_edf(t);
+                remove(edf, t);
+        }
+        if (pedf->scheduled == t)
+                pedf->scheduled = NULL;
+        TRACE_TASK(t, "RIP, now reschedule\n");
+        preempt(pedf);
+        raw_spin_unlock_irqrestore(&pedf->slock, flags);
+}
+#ifdef CONFIG_LITMUS_LOCKING
+#include <litmus/fdso.h>
+#include <litmus/srp.h>
+/* ******************** SRP support ************************ */
+static unsigned int psnedf_get_srp_prio(struct task_struct* t)
+{
+        return get_rt_relative_deadline(t);
+}
+/* ******************** FMLP support ********************** */
+/* struct for semaphore with priority inheritance */
+struct fmlp_semaphore {
+        struct litmus_lock litmus_lock;
+        /* current resource holder */
+        struct task_struct *owner;
+        /* FIFO queue of waiting tasks */
+        wait_queue_head_t wait;
+};
+static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
+{
+        return container_of(lock, struct fmlp_semaphore, litmus_lock);
+}
+int psnedf_fmlp_lock(struct litmus_lock* l)
+{
+        struct task_struct* t = current;
+        struct fmlp_semaphore *sem = fmlp_from_lock(l);
+        wait_queue_t wait;
+        unsigned long flags;
+        if (!is_realtime(t))
+                return -EPERM;
+        /* prevent nested lock acquisition --- not supported by FMLP */
+        if (tsk_rt(t)->num_locks_held ||
+            tsk_rt(t)->num_local_locks_held)
+                return -EBUSY;
+        spin_lock_irqsave(&sem->wait.lock, flags);
+        if (sem->owner) {
+                /* resource is not free => must suspend and wait */
+                init_waitqueue_entry(&wait, t);
+                /* FIXME: interruptible would be nice some day */
+                set_task_state(t, TASK_UNINTERRUPTIBLE);
+                __add_wait_queue_tail_exclusive(&sem->wait, &wait);
+                TS_LOCK_SUSPEND;
+                /* release lock before sleeping */
+                spin_unlock_irqrestore(&sem->wait.lock, flags);
+                /* We depend on the FIFO order.  Thus, we don't need to recheck
+                 * when we wake up; we are guaranteed to have the lock since
+                 * there is only one wake up per release.
+                 */
+                schedule();
+                TS_LOCK_RESUME;
+                /* Since we hold the lock, no other task will change
+                 * ->owner. We can thus check it without acquiring the spin
+                 * lock. */
+                BUG_ON(sem->owner != t);
+        } else {
+                /* it's ours now */
+                sem->owner = t;
+                /* mark the task as priority-boosted. */
+                boost_priority(t);
+                spin_unlock_irqrestore(&sem->wait.lock, flags);
+        }
+        tsk_rt(t)->num_locks_held++;
+        return 0;
+}
+int psnedf_fmlp_unlock(struct litmus_lock* l)
+{
+        struct task_struct *t = current, *next;
+        struct fmlp_semaphore *sem = fmlp_from_lock(l);
+        unsigned long flags;
+        int err = 0;
+        spin_lock_irqsave(&sem->wait.lock, flags);
+        if (sem->owner != t) {
+                err = -EINVAL;
+                goto out;
+        }
+        tsk_rt(t)->num_locks_held--;
+        /* we lose the benefit of priority boosting */
+        unboost_priority(t);
+        /* check if there are jobs waiting for this resource */
+        next = __waitqueue_remove_first(&sem->wait);
+        if (next) {
+                /* boost next job */
+                boost_priority(next);
+                /* next becomes the resouce holder */
+                sem->owner = next;
+                /* wake up next */
+                wake_up_process(next);
+        } else
+                /* resource becomes available */
+                sem->owner = NULL;
+out:
+        spin_unlock_irqrestore(&sem->wait.lock, flags);
+        return err;
+}
+int psnedf_fmlp_close(struct litmus_lock* l)
+{
+        struct task_struct *t = current;
+        struct fmlp_semaphore *sem = fmlp_from_lock(l);
+        unsigned long flags;
+        int owner;
+        spin_lock_irqsave(&sem->wait.lock, flags);
+        owner = sem->owner == t;
+        spin_unlock_irqrestore(&sem->wait.lock, flags);
+        if (owner)
+                psnedf_fmlp_unlock(l);
+        return 0;
+}
+void psnedf_fmlp_free(struct litmus_lock* lock)
+{
+        kfree(fmlp_from_lock(lock));
+}
+static struct litmus_lock_ops psnedf_fmlp_lock_ops = {
+        .close  = psnedf_fmlp_close,
+        .lock   = psnedf_fmlp_lock,
+        .unlock = psnedf_fmlp_unlock,
+        .deallocate = psnedf_fmlp_free,
+};
+static struct litmus_lock* psnedf_new_fmlp(void)
+{
+        struct fmlp_semaphore* sem;
+        sem = kmalloc(sizeof(*sem), GFP_KERNEL);
+        if (!sem)
+                return NULL;
+        sem->owner   = NULL;
+        init_waitqueue_head(&sem->wait);
+        sem->litmus_lock.ops = &psnedf_fmlp_lock_ops;
+        return &sem->litmus_lock;
+}
+/* **** lock constructor **** */
+static long psnedf_allocate_lock(struct litmus_lock **lock, int type,
+                                 void* __user unused)
+{
+        int err = -ENXIO;
+        struct srp_semaphore* srp;
+        /* PSN-EDF currently supports the SRP for local resources and the FMLP
+         * for global resources. */
+        switch (type) {
+        case FMLP_SEM:
+                /* Flexible Multiprocessor Locking Protocol */
+                *lock = psnedf_new_fmlp();
+                if (*lock)
+                        err = 0;
+                else
+                        err = -ENOMEM;
+                break;
+        case SRP_SEM:
+                /* Baker's Stack Resource Policy */
+                srp = allocate_srp_semaphore();
+                if (srp) {
+                        *lock = &srp->litmus_lock;
+                        err = 0;
+                } else
+                        err = -ENOMEM;
+                break;
+        };
+        return err;
+}
+#endif
+static struct domain_proc_info psnedf_domain_proc_info;
+static long psnedf_get_domain_proc_info(struct domain_proc_info **ret)
+{
+        *ret = &psnedf_domain_proc_info;
+        return 0;
+}
+static void psnedf_setup_domain_proc(void)
+{
+        int i, cpu;
+        int release_master =
+#ifdef CONFIG_RELEASE_MASTER
+                atomic_read(&release_master_cpu);
+#else
+                NO_CPU;
+#endif
+        int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
+        struct cd_mapping *cpu_map, *domain_map;
+        memset(&psnedf_domain_proc_info, sizeof(psnedf_domain_proc_info), 0);
+        init_domain_proc_info(&psnedf_domain_proc_info, num_rt_cpus, num_rt_cpus);
+        psnedf_domain_proc_info.num_cpus = num_rt_cpus;
+        psnedf_domain_proc_info.num_domains = num_rt_cpus;
+        for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
+                if (cpu == release_master)
+                        continue;
+                cpu_map = &psnedf_domain_proc_info.cpu_to_domains[i];
+                domain_map = &psnedf_domain_proc_info.domain_to_cpus[i];
+                cpu_map->id = cpu;
+                domain_map->id = i; /* enumerate w/o counting the release master */
+                cpumask_set_cpu(i, cpu_map->mask);
+                cpumask_set_cpu(cpu, domain_map->mask);
+                ++i;
+        }
+}
+static long psnedf_activate_plugin(void)
+{
+#ifdef CONFIG_RELEASE_MASTER
+        int cpu;
+        for_each_online_cpu(cpu) {
+                remote_edf(cpu)->release_master = atomic_read(&release_master_cpu);
+        }
+#endif
+#ifdef CONFIG_LITMUS_LOCKING
+        get_srp_prio = psnedf_get_srp_prio;
+#endif
+        psnedf_setup_domain_proc();
+        return 0;
+}
+static long psnedf_deactivate_plugin(void)
+{
+        destroy_domain_proc_info(&psnedf_domain_proc_info);
+        return 0;
+}
+static long psnedf_admit_task(struct task_struct* tsk)
+{
+        if (task_cpu(tsk) == tsk->rt_param.task_params.cpu
+#ifdef CONFIG_RELEASE_MASTER
+            /* don't allow tasks on release master CPU */
+             && task_cpu(tsk) != remote_edf(task_cpu(tsk))->release_master
+#endif
+                )
+                return 0;
+        else
+                return -EINVAL;
+}
+/*      Plugin object   */
+static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
+        .plugin_name            = "PSN-EDF",
+        .task_new               = psnedf_task_new,
+        .complete_job           = complete_job,
+        .task_exit              = psnedf_task_exit,
+        .schedule               = psnedf_schedule,
+        .task_wake_up           = psnedf_task_wake_up,
+        .task_block             = psnedf_task_block,
+        .admit_task             = psnedf_admit_task,
+        .activate_plugin        = psnedf_activate_plugin,
+        .deactivate_plugin      = psnedf_deactivate_plugin,
+        .get_domain_proc_info   = psnedf_get_domain_proc_info,
+#ifdef CONFIG_LITMUS_LOCKING
+        .allocate_lock          = psnedf_allocate_lock,
+#endif
+};
+static int __init init_psn_edf(void)
+{
+        int i;
+        /* We do not really want to support cpu hotplug, do we? ;)
+         * However, if we are so crazy to do so,
+         * we cannot use num_online_cpu()
+         */
+        for (i = 0; i < num_online_cpus(); i++) {
+                psnedf_domain_init(remote_pedf(i),
+                                   psnedf_check_resched,
+                                   NULL, i);
+        }
+        return register_sched_plugin(&psn_edf_plugin);
+}
+module_init(init_psn_edf);
author	Bjoern Brandenburg <bbb@mpi-sws.org>	2013-02-12 13:15:27 -0500
committer	Bjoern Brandenburg <bbb@mpi-sws.org>	2014-06-07 05:31:08 -0400
commit	000e29d4d1367efb3bf5de6c58b79d988958fcb2 (patch)
tree	da8243e896fa707d54c6da46eda00c506c4901d4
parent	1f0829fdfeb0b62077ab8ceb02e9995c95733f96 (diff)

diff --git a/litmus/Makefile b/litmus/Makefile index f7ceabc47197..0db695e35201 100644 --- a/litmus/Makefile +++ b/litmus/Makefile
@@ -18,7 +18,9 @@ obj-y = sched_plugin.o litmus.o \
18	bheap.o \	18	bheap.o \
19	binheap.o \	19	binheap.o \
20	ctrldev.o \	20	ctrldev.o \
21	uncachedev.o	21	uncachedev.o \
		22	sched_psn_edf.o
		23
22		24
23	obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o	25	obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
24		26


diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c new file mode 100644 index 000000000000..dd042dbc11f7 --- /dev/null +++ b/litmus/sched_psn_edf.c
@@ -0,0 +1,689 @@
		1	/*
		2	* kernel/sched_psn_edf.c
		3	*
		4	* Implementation of the PSN-EDF scheduler plugin.
		5	* Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
		6	*
		7	* Suspensions and non-preemptable sections are supported.
		8	* Priority inheritance is not supported.
		9	*/
		10
		11	#include <linux/percpu.h>
		12	#include <linux/sched.h>
		13	#include <linux/list.h>
		14	#include <linux/spinlock.h>
		15	#include <linux/module.h>
		16
		17	#include <litmus/litmus.h>
		18	#include <litmus/jobs.h>
		19	#include <litmus/preempt.h>
		20	#include <litmus/budget.h>
		21	#include <litmus/sched_plugin.h>
		22	#include <litmus/edf_common.h>
		23	#include <litmus/sched_trace.h>
		24	#include <litmus/trace.h>
		25
		26	/* to set up domain/cpu mappings */
		27	#include <litmus/litmus_proc.h>
		28
		29	typedef struct {
		30	rt_domain_t domain;
		31	int cpu;
		32	struct task_struct* scheduled; /* only RT tasks */
		33	/*
		34	* scheduling lock slock
		35	* protects the domain and serializes scheduling decisions
		36	*/
		37	#define slock domain.ready_lock
		38
		39	} psnedf_domain_t;
		40
		41	DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
		42
		43	#define local_edf (&__get_cpu_var(psnedf_domains).domain)
		44	#define local_pedf (&__get_cpu_var(psnedf_domains))
		45	#define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain)
		46	#define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu))
		47	#define task_edf(task) remote_edf(get_partition(task))
		48	#define task_pedf(task) remote_pedf(get_partition(task))
		49
		50
		51	static void psnedf_domain_init(psnedf_domain_t* pedf,
		52	check_resched_needed_t check,
		53	release_jobs_t release,
		54	int cpu)
		55	{
		56	edf_domain_init(&pedf->domain, check, release);
		57	pedf->cpu = cpu;
		58	pedf->scheduled = NULL;
		59	}
		60
		61	static void requeue(struct task_struct* t, rt_domain_t *edf)
		62	{
		63	if (t->state != TASK_RUNNING)
		64	TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
		65
		66	tsk_rt(t)->completed = 0;
		67	if (is_early_releasing(t) \|\| is_released(t, litmus_clock()))
		68	__add_ready(edf, t);
		69	else
		70	add_release(edf, t); /* it has got to wait */
		71	}
		72
		73	/* we assume the lock is being held */
		74	static void preempt(psnedf_domain_t *pedf)
		75	{
		76	preempt_if_preemptable(pedf->scheduled, pedf->cpu);
		77	}
		78
		79	#ifdef CONFIG_LITMUS_LOCKING
		80
		81	static void boost_priority(struct task_struct* t)
		82	{
		83	unsigned long flags;
		84	psnedf_domain_t* pedf = task_pedf(t);
		85	lt_t now;
		86
		87	raw_spin_lock_irqsave(&pedf->slock, flags);
		88	now = litmus_clock();
		89
		90	TRACE_TASK(t, "priority boosted at %llu\n", now);
		91
		92	tsk_rt(t)->priority_boosted = 1;
		93	tsk_rt(t)->boost_start_time = now;
		94
		95	if (pedf->scheduled != t) {
		96	/* holder may be queued: first stop queue changes */
		97	raw_spin_lock(&pedf->domain.release_lock);
		98	if (is_queued(t) &&
		99	/* If it is queued, then we need to re-order. */
		100	bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node) &&
		101	/* If we bubbled to the top, then we need to check for preemptions. */
		102	edf_preemption_needed(&pedf->domain, pedf->scheduled))
		103	preempt(pedf);
		104	raw_spin_unlock(&pedf->domain.release_lock);
		105	} /* else: nothing to do since the job is not queued while scheduled */
		106
		107	raw_spin_unlock_irqrestore(&pedf->slock, flags);
		108	}
		109
		110	static void unboost_priority(struct task_struct* t)
		111	{
		112	unsigned long flags;
		113	psnedf_domain_t* pedf = task_pedf(t);
		114	lt_t now;
		115
		116	raw_spin_lock_irqsave(&pedf->slock, flags);
		117	now = litmus_clock();
		118
		119	/* Assumption: this only happens when the job is scheduled.
		120	* Exception: If t transitioned to non-real-time mode, we no longer
		121	* care about it. */
		122	BUG_ON(pedf->scheduled != t && is_realtime(t));
		123
		124	TRACE_TASK(t, "priority restored at %llu\n", now);
		125
		126	tsk_rt(t)->priority_boosted = 0;
		127	tsk_rt(t)->boost_start_time = 0;
		128
		129	/* check if this changes anything */
		130	if (edf_preemption_needed(&pedf->domain, pedf->scheduled))
		131	preempt(pedf);
		132
		133	raw_spin_unlock_irqrestore(&pedf->slock, flags);
		134	}
		135
		136	#endif
		137
		138	static int psnedf_preempt_check(psnedf_domain_t *pedf)
		139	{
		140	if (edf_preemption_needed(&pedf->domain, pedf->scheduled)) {
		141	preempt(pedf);
		142	return 1;
		143	} else
		144	return 0;
		145	}
		146
		147	/* This check is trivial in partioned systems as we only have to consider
		148	* the CPU of the partition.
		149	*/
		150	static int psnedf_check_resched(rt_domain_t *edf)
		151	{
		152	psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
		153
		154	/* because this is a callback from rt_domain_t we already hold
		155	* the necessary lock for the ready queue
		156	*/
		157	return psnedf_preempt_check(pedf);
		158	}
		159
		160	static void job_completion(struct task_struct* t, int forced)
		161	{
		162	sched_trace_task_completion(t,forced);
		163	TRACE_TASK(t, "job_completion().\n");
		164
		165	tsk_rt(t)->completed = 0;
		166	prepare_for_next_period(t);
		167	}
		168
		169	static struct task_struct* psnedf_schedule(struct task_struct * prev)
		170	{
		171	psnedf_domain_t* pedf = local_pedf;
		172	rt_domain_t* edf = &pedf->domain;
		173	struct task_struct* next;
		174
		175	int out_of_time, sleep, preempt,
		176	np, exists, blocks, resched;
		177
		178	raw_spin_lock(&pedf->slock);
		179
		180	/* sanity checking
		181	* differently from gedf, when a task exits (dead)
		182	* pedf->schedule may be null and prev _is_ realtime
		183	*/
		184	BUG_ON(pedf->scheduled && pedf->scheduled != prev);
		185	BUG_ON(pedf->scheduled && !is_realtime(prev));
		186
		187	/* (0) Determine state */
		188	exists = pedf->scheduled != NULL;
		189	blocks = exists && !is_running(pedf->scheduled);
		190	out_of_time = exists &&
		191	budget_enforced(pedf->scheduled) &&
		192	budget_exhausted(pedf->scheduled);
		193	np = exists && is_np(pedf->scheduled);
		194	sleep = exists && is_completed(pedf->scheduled);
		195	preempt = edf_preemption_needed(edf, prev);
		196
		197	/* If we need to preempt do so.
		198	* The following checks set resched to 1 in case of special
		199	* circumstances.
		200	*/
		201	resched = preempt;
		202
		203	/* If a task blocks we have no choice but to reschedule.
		204	*/
		205	if (blocks)
		206	resched = 1;
		207
		208	/* Request a sys_exit_np() call if we would like to preempt but cannot.
		209	* Multiple calls to request_exit_np() don't hurt.
		210	*/
		211	if (np && (out_of_time \|\| preempt \|\| sleep))
		212	request_exit_np(pedf->scheduled);
		213
		214	/* Any task that is preemptable and either exhausts its execution
		215	* budget or wants to sleep completes. We may have to reschedule after
		216	* this.
		217	*/
		218	if (!np && (out_of_time \|\| sleep) && !blocks) {
		219	job_completion(pedf->scheduled, !sleep);
		220	resched = 1;
		221	}
		222
		223	/* The final scheduling decision. Do we need to switch for some reason?
		224	* Switch if we are in RT mode and have no task or if we need to
		225	* resched.
		226	*/
		227	next = NULL;
		228	if ((!np \|\| blocks) && (resched \|\| !exists)) {
		229	/* When preempting a task that does not block, then
		230	* re-insert it into either the ready queue or the
		231	* release queue (if it completed). requeue() picks
		232	* the appropriate queue.
		233	*/
		234	if (pedf->scheduled && !blocks)
		235	requeue(pedf->scheduled, edf);
		236	next = __take_ready(edf);
		237	} else
		238	/* Only override Linux scheduler if we have a real-time task
		239	* scheduled that needs to continue.
		240	*/
		241	if (exists)
		242	next = prev;
		243
		244	if (next) {
		245	TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
		246	} else {
		247	TRACE("becoming idle at %llu\n", litmus_clock());
		248	}
		249
		250	pedf->scheduled = next;
		251	sched_state_task_picked();
		252	raw_spin_unlock(&pedf->slock);
		253
		254	return next;
		255	}
		256
		257
		258	/* Prepare a task for running in RT mode
		259	*/
		260	static void psnedf_task_new(struct task_struct * t, int on_rq, int is_scheduled)
		261	{
		262	rt_domain_t* edf = task_edf(t);
		263	psnedf_domain_t* pedf = task_pedf(t);
		264	unsigned long flags;
		265
		266	TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
		267	t->rt_param.task_params.cpu);
		268
		269	/* setup job parameters */
		270	release_at(t, litmus_clock());
		271
		272	/* The task should be running in the queue, otherwise signal
		273	* code will try to wake it up with fatal consequences.
		274	*/
		275	raw_spin_lock_irqsave(&pedf->slock, flags);
		276	if (is_scheduled) {
		277	/* there shouldn't be anything else scheduled at the time */
		278	BUG_ON(pedf->scheduled);
		279	pedf->scheduled = t;
		280	} else {
		281	/* !is_scheduled means it is not scheduled right now, but it
		282	* does not mean that it is suspended. If it is not suspended,
		283	* it still needs to be requeued. If it is suspended, there is
		284	* nothing that we need to do as it will be handled by the
		285	* wake_up() handler. */
		286	if (is_running(t)) {
		287	requeue(t, edf);
		288	/* maybe we have to reschedule */
		289	psnedf_preempt_check(pedf);
		290	}
		291	}
		292	raw_spin_unlock_irqrestore(&pedf->slock, flags);
		293	}
		294
		295	static void psnedf_task_wake_up(struct task_struct *task)
		296	{
		297	unsigned long flags;
		298	psnedf_domain_t* pedf = task_pedf(task);
		299	rt_domain_t* edf = task_edf(task);
		300	lt_t now;
		301
		302	TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
		303	raw_spin_lock_irqsave(&pedf->slock, flags);
		304	BUG_ON(is_queued(task));
		305	now = litmus_clock();
		306	if (is_sporadic(task) && is_tardy(task, now)
		307	#ifdef CONFIG_LITMUS_LOCKING
		308	/* We need to take suspensions because of semaphores into
		309	* account! If a job resumes after being suspended due to acquiring
		310	* a semaphore, it should never be treated as a new job release.
		311	*/
		312	&& !is_priority_boosted(task)
		313	#endif
		314	) {
		315	/* new sporadic release */
		316	release_at(task, now);
		317	sched_trace_task_release(task);
		318	}
		319
		320	/* Only add to ready queue if it is not the currently-scheduled
		321	* task. This could be the case if a task was woken up concurrently
		322	* on a remote CPU before the executing CPU got around to actually
		323	* de-scheduling the task, i.e., wake_up() raced with schedule()
		324	* and won.
		325	*/
		326	if (pedf->scheduled != task) {
		327	requeue(task, edf);
		328	psnedf_preempt_check(pedf);
		329	}
		330
		331	raw_spin_unlock_irqrestore(&pedf->slock, flags);
		332	TRACE_TASK(task, "wake up done\n");
		333	}
		334
		335	static void psnedf_task_block(struct task_struct *t)
		336	{
		337	/* only running tasks can block, thus t is in no queue */
		338	TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
		339
		340	BUG_ON(!is_realtime(t));
		341	BUG_ON(is_queued(t));
		342	}
		343
		344	static void psnedf_task_exit(struct task_struct * t)
		345	{
		346	unsigned long flags;
		347	psnedf_domain_t* pedf = task_pedf(t);
		348	rt_domain_t* edf;
		349
		350	raw_spin_lock_irqsave(&pedf->slock, flags);
		351	if (is_queued(t)) {
		352	/* dequeue */
		353	edf = task_edf(t);
		354	remove(edf, t);
		355	}
		356	if (pedf->scheduled == t)
		357	pedf->scheduled = NULL;
		358
		359	TRACE_TASK(t, "RIP, now reschedule\n");
		360
		361	preempt(pedf);
		362	raw_spin_unlock_irqrestore(&pedf->slock, flags);
		363	}
		364
		365	#ifdef CONFIG_LITMUS_LOCKING
		366
		367	#include <litmus/fdso.h>
		368	#include <litmus/srp.h>
		369
		370	/* ****************** SRP support ********************** */
		371
		372	static unsigned int psnedf_get_srp_prio(struct task_struct* t)
		373	{
		374	return get_rt_relative_deadline(t);
		375	}
		376
		377	/* ****************** FMLP support ******************** */
		378
		379	/* struct for semaphore with priority inheritance */
		380	struct fmlp_semaphore {
		381	struct litmus_lock litmus_lock;
		382
		383	/* current resource holder */
		384	struct task_struct *owner;
		385
		386	/* FIFO queue of waiting tasks */
		387	wait_queue_head_t wait;
		388	};
		389
		390	static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
		391	{
		392	return container_of(lock, struct fmlp_semaphore, litmus_lock);
		393	}
		394	int psnedf_fmlp_lock(struct litmus_lock* l)
		395	{
		396	struct task_struct* t = current;
		397	struct fmlp_semaphore *sem = fmlp_from_lock(l);
		398	wait_queue_t wait;
		399	unsigned long flags;
		400
		401	if (!is_realtime(t))
		402	return -EPERM;
		403
		404	/* prevent nested lock acquisition --- not supported by FMLP */
		405	if (tsk_rt(t)->num_locks_held \|\|
		406	tsk_rt(t)->num_local_locks_held)
		407	return -EBUSY;
		408
		409	spin_lock_irqsave(&sem->wait.lock, flags);
		410
		411	if (sem->owner) {
		412	/* resource is not free => must suspend and wait */
		413
		414	init_waitqueue_entry(&wait, t);
		415
		416	/* FIXME: interruptible would be nice some day */
		417	set_task_state(t, TASK_UNINTERRUPTIBLE);
		418
		419	__add_wait_queue_tail_exclusive(&sem->wait, &wait);
		420
		421	TS_LOCK_SUSPEND;
		422
		423	/* release lock before sleeping */
		424	spin_unlock_irqrestore(&sem->wait.lock, flags);
		425
		426	/* We depend on the FIFO order. Thus, we don't need to recheck
		427	* when we wake up; we are guaranteed to have the lock since
		428	* there is only one wake up per release.
		429	*/
		430
		431	schedule();
		432
		433	TS_LOCK_RESUME;
		434
		435	/* Since we hold the lock, no other task will change
		436	* ->owner. We can thus check it without acquiring the spin
		437	* lock. */
		438	BUG_ON(sem->owner != t);
		439	} else {
		440	/* it's ours now */
		441	sem->owner = t;
		442
		443	/* mark the task as priority-boosted. */
		444	boost_priority(t);
		445
		446	spin_unlock_irqrestore(&sem->wait.lock, flags);
		447	}
		448
		449	tsk_rt(t)->num_locks_held++;
		450
		451	return 0;
		452	}
		453
		454	int psnedf_fmlp_unlock(struct litmus_lock* l)
		455	{
		456	struct task_struct t = current, next;
		457	struct fmlp_semaphore *sem = fmlp_from_lock(l);
		458	unsigned long flags;
		459	int err = 0;
		460
		461	spin_lock_irqsave(&sem->wait.lock, flags);
		462
		463	if (sem->owner != t) {
		464	err = -EINVAL;
		465	goto out;
		466	}
		467
		468	tsk_rt(t)->num_locks_held--;
		469
		470	/* we lose the benefit of priority boosting */
		471
		472	unboost_priority(t);
		473
		474	/* check if there are jobs waiting for this resource */
		475	next = __waitqueue_remove_first(&sem->wait);
		476	if (next) {
		477	/* boost next job */
		478	boost_priority(next);
		479
		480	/* next becomes the resouce holder */
		481	sem->owner = next;
		482
		483	/* wake up next */
		484	wake_up_process(next);
		485	} else
		486	/* resource becomes available */
		487	sem->owner = NULL;
		488
		489	out:
		490	spin_unlock_irqrestore(&sem->wait.lock, flags);
		491	return err;
		492	}
		493
		494	int psnedf_fmlp_close(struct litmus_lock* l)
		495	{
		496	struct task_struct *t = current;
		497	struct fmlp_semaphore *sem = fmlp_from_lock(l);
		498	unsigned long flags;
		499
		500	int owner;
		501
		502	spin_lock_irqsave(&sem->wait.lock, flags);
		503
		504	owner = sem->owner == t;
		505
		506	spin_unlock_irqrestore(&sem->wait.lock, flags);
		507
		508	if (owner)
		509	psnedf_fmlp_unlock(l);
		510
		511	return 0;
		512	}
		513
		514	void psnedf_fmlp_free(struct litmus_lock* lock)
		515	{
		516	kfree(fmlp_from_lock(lock));
		517	}
		518
		519	static struct litmus_lock_ops psnedf_fmlp_lock_ops = {
		520	.close = psnedf_fmlp_close,
		521	.lock = psnedf_fmlp_lock,
		522	.unlock = psnedf_fmlp_unlock,
		523	.deallocate = psnedf_fmlp_free,
		524	};
		525
		526	static struct litmus_lock* psnedf_new_fmlp(void)
		527	{
		528	struct fmlp_semaphore* sem;
		529
		530	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
		531	if (!sem)
		532	return NULL;
		533
		534	sem->owner = NULL;
		535	init_waitqueue_head(&sem->wait);
		536	sem->litmus_lock.ops = &psnedf_fmlp_lock_ops;
		537
		538	return &sem->litmus_lock;
		539	}
		540
		541	/* ** lock constructor ** */
		542
		543
		544	static long psnedf_allocate_lock(struct litmus_lock **lock, int type,
		545	void* __user unused)
		546	{
		547	int err = -ENXIO;
		548	struct srp_semaphore* srp;
		549
		550	/* PSN-EDF currently supports the SRP for local resources and the FMLP
		551	* for global resources. */
		552	switch (type) {
		553	case FMLP_SEM:
		554	/* Flexible Multiprocessor Locking Protocol */
		555	*lock = psnedf_new_fmlp();
		556	if (*lock)
		557	err = 0;
		558	else
		559	err = -ENOMEM;
		560	break;
		561
		562	case SRP_SEM:
		563	/* Baker's Stack Resource Policy */
		564	srp = allocate_srp_semaphore();
		565	if (srp) {
		566	*lock = &srp->litmus_lock;
		567	err = 0;
		568	} else
		569	err = -ENOMEM;
		570	break;
		571	};
		572
		573	return err;
		574	}
		575
		576	#endif
		577
		578	static struct domain_proc_info psnedf_domain_proc_info;
		579	static long psnedf_get_domain_proc_info(struct domain_proc_info **ret)
		580	{
		581	*ret = &psnedf_domain_proc_info;
		582	return 0;
		583	}
		584
		585	static void psnedf_setup_domain_proc(void)
		586	{
		587	int i, cpu;
		588	int release_master =
		589	#ifdef CONFIG_RELEASE_MASTER
		590	atomic_read(&release_master_cpu);
		591	#else
		592	NO_CPU;
		593	#endif
		594	int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
		595	struct cd_mapping cpu_map, domain_map;
		596
		597	memset(&psnedf_domain_proc_info, sizeof(psnedf_domain_proc_info), 0);
		598	init_domain_proc_info(&psnedf_domain_proc_info, num_rt_cpus, num_rt_cpus);
		599	psnedf_domain_proc_info.num_cpus = num_rt_cpus;
		600	psnedf_domain_proc_info.num_domains = num_rt_cpus;
		601
		602	for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
		603	if (cpu == release_master)
		604	continue;
		605	cpu_map = &psnedf_domain_proc_info.cpu_to_domains[i];
		606	domain_map = &psnedf_domain_proc_info.domain_to_cpus[i];
		607
		608	cpu_map->id = cpu;
		609	domain_map->id = i; /* enumerate w/o counting the release master */
		610	cpumask_set_cpu(i, cpu_map->mask);
		611	cpumask_set_cpu(cpu, domain_map->mask);
		612	++i;
		613	}
		614	}
		615
		616	static long psnedf_activate_plugin(void)
		617	{
		618	#ifdef CONFIG_RELEASE_MASTER
		619	int cpu;
		620
		621	for_each_online_cpu(cpu) {
		622	remote_edf(cpu)->release_master = atomic_read(&release_master_cpu);
		623	}
		624	#endif
		625
		626	#ifdef CONFIG_LITMUS_LOCKING
		627	get_srp_prio = psnedf_get_srp_prio;
		628	#endif
		629
		630	psnedf_setup_domain_proc();
		631
		632	return 0;
		633	}
		634
		635	static long psnedf_deactivate_plugin(void)
		636	{
		637	destroy_domain_proc_info(&psnedf_domain_proc_info);
		638	return 0;
		639	}
		640
		641	static long psnedf_admit_task(struct task_struct* tsk)
		642	{
		643	if (task_cpu(tsk) == tsk->rt_param.task_params.cpu
		644	#ifdef CONFIG_RELEASE_MASTER
		645	/* don't allow tasks on release master CPU */
		646	&& task_cpu(tsk) != remote_edf(task_cpu(tsk))->release_master
		647	#endif
		648	)
		649	return 0;
		650	else
		651	return -EINVAL;
		652	}
		653
		654	/* Plugin object */
		655	static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
		656	.plugin_name = "PSN-EDF",
		657	.task_new = psnedf_task_new,
		658	.complete_job = complete_job,
		659	.task_exit = psnedf_task_exit,
		660	.schedule = psnedf_schedule,
		661	.task_wake_up = psnedf_task_wake_up,
		662	.task_block = psnedf_task_block,
		663	.admit_task = psnedf_admit_task,
		664	.activate_plugin = psnedf_activate_plugin,
		665	.deactivate_plugin = psnedf_deactivate_plugin,
		666	.get_domain_proc_info = psnedf_get_domain_proc_info,
		667	#ifdef CONFIG_LITMUS_LOCKING
		668	.allocate_lock = psnedf_allocate_lock,
		669	#endif
		670	};
		671
		672
		673	static int __init init_psn_edf(void)
		674	{
		675	int i;
		676
		677	/* We do not really want to support cpu hotplug, do we? ;)
		678	* However, if we are so crazy to do so,
		679	* we cannot use num_online_cpu()
		680	*/
		681	for (i = 0; i < num_online_cpus(); i++) {
		682	psnedf_domain_init(remote_pedf(i),
		683	psnedf_check_resched,
		684	NULL, i);
		685	}
		686	return register_sched_plugin(&psn_edf_plugin);
		687	}
		688
		689	module_init(init_psn_edf);