Add PSN-EDF scheduler plugin

author: Bjoern Brandenburg <bbb@mpi-sws.org> 2015-08-09 07:18:55 -0400
committer: Bjoern Brandenburg <bbb@mpi-sws.org> 2015-08-09 07:20:34 -0400
commit: 47efe5234212e0f72369a6964dfd84d4e9968a11 (patch)
tree: 2c489ea0b878363068cfcb0ecb33054b79d7d2a4
parent: 4410877f28d1a20b5c98dd153de78c8342ac76f3 (diff)
2 files changed, 691 insertions, 1 deletions
diff --git a/litmus/Makefile b/litmus/Makefile
index c85abc7389c5..4e53c4f69744 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -18,7 +18,9 @@ obj-y     = sched_plugin.o litmus.o \
            bheap.o \
            binheap.o \
            ctrldev.o \
-            uncachedev.o
+            uncachedev.o \
+            sched_psn_edf.o
 obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
 obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
new file mode 100644
index 000000000000..2549a3fc28b9
--- /dev/null
+++ b/litmus/sched_psn_edf.c
@@ -0,0 +1,688 @@
+/*
+ * kernel/sched_psn_edf.c
+ *
+ * Implementation of the PSN-EDF scheduler plugin.
+ * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
+ *
+ * Suspensions and non-preemptable sections are supported.
+ * Priority inheritance is not supported.
+ */
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <litmus/litmus.h>
+#include <litmus/jobs.h>
+#include <litmus/preempt.h>
+#include <litmus/budget.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/edf_common.h>
+#include <litmus/sched_trace.h>
+#include <litmus/trace.h>
+/* to set up domain/cpu mappings */
+#include <litmus/litmus_proc.h>
+typedef struct {
+        rt_domain_t             domain;
+        int                     cpu;
+        struct task_struct*     scheduled; /* only RT tasks */
+/*
+ * scheduling lock slock
+ * protects the domain and serializes scheduling decisions
+ */
+#define slock domain.ready_lock
+} psnedf_domain_t;
+DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
+#define local_edf               (&(this_cpu_ptr(&psnedf_domains)->domain))
+#define local_pedf              (this_cpu_ptr(&psnedf_domains))
+#define remote_edf(cpu)         (&per_cpu(psnedf_domains, cpu).domain)
+#define remote_pedf(cpu)        (&per_cpu(psnedf_domains, cpu))
+#define task_edf(task)          remote_edf(get_partition(task))
+#define task_pedf(task)         remote_pedf(get_partition(task))
+static void psnedf_domain_init(psnedf_domain_t* pedf,
+                               check_resched_needed_t check,
+                               release_jobs_t release,
+                               int cpu)
+{
+        edf_domain_init(&pedf->domain, check, release);
+        pedf->cpu               = cpu;
+        pedf->scheduled         = NULL;
+}
+static void requeue(struct task_struct* t, rt_domain_t *edf)
+{
+        if (t->state != TASK_RUNNING)
+                TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
+        tsk_rt(t)->completed = 0;
+        if (is_early_releasing(t) || is_released(t, litmus_clock()))
+                __add_ready(edf, t);
+        else
+                add_release(edf, t); /* it has got to wait */
+}
+/* we assume the lock is being held */
+static void preempt(psnedf_domain_t *pedf)
+{
+        preempt_if_preemptable(pedf->scheduled, pedf->cpu);
+}
+#ifdef CONFIG_LITMUS_LOCKING
+static void boost_priority(struct task_struct* t)
+{
+        unsigned long           flags;
+        psnedf_domain_t*        pedf = task_pedf(t);
+        lt_t                    now;
+        raw_spin_lock_irqsave(&pedf->slock, flags);
+        now = litmus_clock();
+        TRACE_TASK(t, "priority boosted at %llu\n", now);
+        tsk_rt(t)->priority_boosted = 1;
+        tsk_rt(t)->boost_start_time = now;
+        if (pedf->scheduled != t) {
+                /* holder may be queued: first stop queue changes */
+                raw_spin_lock(&pedf->domain.release_lock);
+                if (is_queued(t) &&
+                    /* If it is queued, then we need to re-order. */
+                    bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node) &&
+                    /* If we bubbled to the top, then we need to check for preemptions. */
+                    edf_preemption_needed(&pedf->domain, pedf->scheduled))
+                                preempt(pedf);
+                raw_spin_unlock(&pedf->domain.release_lock);
+        } /* else: nothing to do since the job is not queued while scheduled */
+        raw_spin_unlock_irqrestore(&pedf->slock, flags);
+}
+static void unboost_priority(struct task_struct* t)
+{
+        unsigned long           flags;
+        psnedf_domain_t*        pedf = task_pedf(t);
+        lt_t                    now;
+        raw_spin_lock_irqsave(&pedf->slock, flags);
+        now = litmus_clock();
+        /* Assumption: this only happens when the job is scheduled.
+         * Exception: If t transitioned to non-real-time mode, we no longer
+         * care about it. */
+        BUG_ON(pedf->scheduled != t && is_realtime(t));
+        TRACE_TASK(t, "priority restored at %llu\n", now);
+        tsk_rt(t)->priority_boosted = 0;
+        tsk_rt(t)->boost_start_time = 0;
+        /* check if this changes anything */
+        if (edf_preemption_needed(&pedf->domain, pedf->scheduled))
+                preempt(pedf);
+        raw_spin_unlock_irqrestore(&pedf->slock, flags);
+}
+#endif
+static int psnedf_preempt_check(psnedf_domain_t *pedf)
+{
+        if (edf_preemption_needed(&pedf->domain, pedf->scheduled)) {
+                preempt(pedf);
+                return 1;
+        } else
+                return 0;
+}
+/* This check is trivial in partioned systems as we only have to consider
+ * the CPU of the partition.
+ */
+static int psnedf_check_resched(rt_domain_t *edf)
+{
+        psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
+        /* because this is a callback from rt_domain_t we already hold
+         * the necessary lock for the ready queue
+         */
+        return psnedf_preempt_check(pedf);
+}
+static void job_completion(struct task_struct* t, int forced)
+{
+        sched_trace_task_completion(t, forced);
+        TRACE_TASK(t, "job_completion(forced=%d).\n", forced);
+        tsk_rt(t)->completed = 0;
+        prepare_for_next_period(t);
+}
+static struct task_struct* psnedf_schedule(struct task_struct * prev)
+{
+        psnedf_domain_t*        pedf = local_pedf;
+        rt_domain_t*            edf  = &pedf->domain;
+        struct task_struct*     next;
+        int                     out_of_time, sleep, preempt,
+                                np, exists, blocks, resched;
+        raw_spin_lock(&pedf->slock);
+        /* sanity checking
+         * differently from gedf, when a task exits (dead)
+         * pedf->schedule may be null and prev _is_ realtime
+         */
+        BUG_ON(pedf->scheduled && pedf->scheduled != prev);
+        BUG_ON(pedf->scheduled && !is_realtime(prev));
+        /* (0) Determine state */
+        exists      = pedf->scheduled != NULL;
+        blocks      = exists && !is_current_running();
+        out_of_time = exists && budget_enforced(pedf->scheduled)
+                             && budget_exhausted(pedf->scheduled);
+        np          = exists && is_np(pedf->scheduled);
+        sleep       = exists && is_completed(pedf->scheduled);
+        preempt     = edf_preemption_needed(edf, prev);
+        /* If we need to preempt do so.
+         * The following checks set resched to 1 in case of special
+         * circumstances.
+         */
+        resched = preempt;
+        /* If a task blocks we have no choice but to reschedule.
+         */
+        if (blocks)
+                resched = 1;
+        /* Request a sys_exit_np() call if we would like to preempt but cannot.
+         * Multiple calls to request_exit_np() don't hurt.
+         */
+        if (np && (out_of_time || preempt || sleep))
+                request_exit_np(pedf->scheduled);
+        /* Any task that is preemptable and either exhausts its execution
+         * budget or wants to sleep completes. We may have to reschedule after
+         * this.
+         */
+        if (!np && (out_of_time || sleep)) {
+                job_completion(pedf->scheduled, !sleep);
+                resched = 1;
+        }
+        /* The final scheduling decision. Do we need to switch for some reason?
+         * Switch if we are in RT mode and have no task or if we need to
+         * resched.
+         */
+        next = NULL;
+        if ((!np || blocks) && (resched || !exists)) {
+                /* When preempting a task that does not block, then
+                 * re-insert it into either the ready queue or the
+                 * release queue (if it completed). requeue() picks
+                 * the appropriate queue.
+                 */
+                if (pedf->scheduled && !blocks)
+                        requeue(pedf->scheduled, edf);
+                next = __take_ready(edf);
+        } else
+                /* Only override Linux scheduler if we have a real-time task
+                 * scheduled that needs to continue.
+                 */
+                if (exists)
+                        next = prev;
+        if (next) {
+                TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
+        } else {
+                TRACE("becoming idle at %llu\n", litmus_clock());
+        }
+        pedf->scheduled = next;
+        sched_state_task_picked();
+        raw_spin_unlock(&pedf->slock);
+        return next;
+}
+/*      Prepare a task for running in RT mode
+ */
+static void psnedf_task_new(struct task_struct * t, int on_rq, int is_scheduled)
+{
+        rt_domain_t*            edf  = task_edf(t);
+        psnedf_domain_t*        pedf = task_pedf(t);
+        unsigned long           flags;
+        TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
+                   t->rt_param.task_params.cpu);
+        /* setup job parameters */
+        release_at(t, litmus_clock());
+        /* The task should be running in the queue, otherwise signal
+         * code will try to wake it up with fatal consequences.
+         */
+        raw_spin_lock_irqsave(&pedf->slock, flags);
+        if (is_scheduled) {
+                /* there shouldn't be anything else scheduled at the time */
+                BUG_ON(pedf->scheduled);
+                pedf->scheduled = t;
+        } else {
+                /* !is_scheduled means it is not scheduled right now, but it
+                 * does not mean that it is suspended. If it is not suspended,
+                 * it still needs to be requeued. If it is suspended, there is
+                 * nothing that we need to do as it will be handled by the
+                 * wake_up() handler. */
+                if (on_rq) {
+                        requeue(t, edf);
+                        /* maybe we have to reschedule */
+                        psnedf_preempt_check(pedf);
+                }
+        }
+        raw_spin_unlock_irqrestore(&pedf->slock, flags);
+}
+static void psnedf_task_wake_up(struct task_struct *task)
+{
+        unsigned long           flags;
+        psnedf_domain_t*        pedf = task_pedf(task);
+        rt_domain_t*            edf  = task_edf(task);
+        lt_t                    now;
+        TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
+        raw_spin_lock_irqsave(&pedf->slock, flags);
+        BUG_ON(is_queued(task));
+        now = litmus_clock();
+        if (is_sporadic(task) && is_tardy(task, now)
+#ifdef CONFIG_LITMUS_LOCKING
+        /* We need to take suspensions because of semaphores into
+         * account! If a job resumes after being suspended due to acquiring
+         * a semaphore, it should never be treated as a new job release.
+         */
+            && !is_priority_boosted(task)
+#endif
+                ) {
+                /* new sporadic release */
+                release_at(task, now);
+                sched_trace_task_release(task);
+        }
+        /* Only add to ready queue if it is not the currently-scheduled
+         * task. This could be the case if a task was woken up concurrently
+         * on a remote CPU before the executing CPU got around to actually
+         * de-scheduling the task, i.e., wake_up() raced with schedule()
+         * and won.
+         */
+        if (pedf->scheduled != task) {
+                requeue(task, edf);
+                psnedf_preempt_check(pedf);
+        }
+        raw_spin_unlock_irqrestore(&pedf->slock, flags);
+        TRACE_TASK(task, "wake up done\n");
+}
+static void psnedf_task_block(struct task_struct *t)
+{
+        /* only running tasks can block, thus t is in no queue */
+        TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
+        BUG_ON(!is_realtime(t));
+        BUG_ON(is_queued(t));
+}
+static void psnedf_task_exit(struct task_struct * t)
+{
+        unsigned long flags;
+        psnedf_domain_t*        pedf = task_pedf(t);
+        rt_domain_t*            edf;
+        raw_spin_lock_irqsave(&pedf->slock, flags);
+        if (is_queued(t)) {
+                /* dequeue */
+                edf  = task_edf(t);
+                remove(edf, t);
+        }
+        if (pedf->scheduled == t)
+                pedf->scheduled = NULL;
+        TRACE_TASK(t, "RIP, now reschedule\n");
+        preempt(pedf);
+        raw_spin_unlock_irqrestore(&pedf->slock, flags);
+}
+#ifdef CONFIG_LITMUS_LOCKING
+#include <litmus/fdso.h>
+#include <litmus/srp.h>
+/* ******************** SRP support ************************ */
+static unsigned int psnedf_get_srp_prio(struct task_struct* t)
+{
+        return get_rt_relative_deadline(t);
+}
+/* ******************** FMLP support ********************** */
+/* struct for semaphore with priority inheritance */
+struct fmlp_semaphore {
+        struct litmus_lock litmus_lock;
+        /* current resource holder */
+        struct task_struct *owner;
+        /* FIFO queue of waiting tasks */
+        wait_queue_head_t wait;
+};
+static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
+{
+        return container_of(lock, struct fmlp_semaphore, litmus_lock);
+}
+int psnedf_fmlp_lock(struct litmus_lock* l)
+{
+        struct task_struct* t = current;
+        struct fmlp_semaphore *sem = fmlp_from_lock(l);
+        wait_queue_t wait;
+        unsigned long flags;
+        if (!is_realtime(t))
+                return -EPERM;
+        /* prevent nested lock acquisition --- not supported by FMLP */
+        if (tsk_rt(t)->num_locks_held ||
+            tsk_rt(t)->num_local_locks_held)
+                return -EBUSY;
+        spin_lock_irqsave(&sem->wait.lock, flags);
+        if (sem->owner) {
+                /* resource is not free => must suspend and wait */
+                init_waitqueue_entry(&wait, t);
+                /* FIXME: interruptible would be nice some day */
+                set_task_state(t, TASK_UNINTERRUPTIBLE);
+                __add_wait_queue_tail_exclusive(&sem->wait, &wait);
+                TS_LOCK_SUSPEND;
+                /* release lock before sleeping */
+                spin_unlock_irqrestore(&sem->wait.lock, flags);
+                /* We depend on the FIFO order.  Thus, we don't need to recheck
+                 * when we wake up; we are guaranteed to have the lock since
+                 * there is only one wake up per release.
+                 */
+                schedule();
+                TS_LOCK_RESUME;
+                /* Since we hold the lock, no other task will change
+                 * ->owner. We can thus check it without acquiring the spin
+                 * lock. */
+                BUG_ON(sem->owner != t);
+        } else {
+                /* it's ours now */
+                sem->owner = t;
+                /* mark the task as priority-boosted. */
+                boost_priority(t);
+                spin_unlock_irqrestore(&sem->wait.lock, flags);
+        }
+        tsk_rt(t)->num_locks_held++;
+        return 0;
+}
+int psnedf_fmlp_unlock(struct litmus_lock* l)
+{
+        struct task_struct *t = current, *next;
+        struct fmlp_semaphore *sem = fmlp_from_lock(l);
+        unsigned long flags;
+        int err = 0;
+        spin_lock_irqsave(&sem->wait.lock, flags);
+        if (sem->owner != t) {
+                err = -EINVAL;
+                goto out;
+        }
+        tsk_rt(t)->num_locks_held--;
+        /* we lose the benefit of priority boosting */
+        unboost_priority(t);
+        /* check if there are jobs waiting for this resource */
+        next = __waitqueue_remove_first(&sem->wait);
+        if (next) {
+                /* boost next job */
+                boost_priority(next);
+                /* next becomes the resouce holder */
+                sem->owner = next;
+                /* wake up next */
+                wake_up_process(next);
+        } else
+                /* resource becomes available */
+                sem->owner = NULL;
+out:
+        spin_unlock_irqrestore(&sem->wait.lock, flags);
+        return err;
+}
+int psnedf_fmlp_close(struct litmus_lock* l)
+{
+        struct task_struct *t = current;
+        struct fmlp_semaphore *sem = fmlp_from_lock(l);
+        unsigned long flags;
+        int owner;
+        spin_lock_irqsave(&sem->wait.lock, flags);
+        owner = sem->owner == t;
+        spin_unlock_irqrestore(&sem->wait.lock, flags);
+        if (owner)
+                psnedf_fmlp_unlock(l);
+        return 0;
+}
+void psnedf_fmlp_free(struct litmus_lock* lock)
+{
+        kfree(fmlp_from_lock(lock));
+}
+static struct litmus_lock_ops psnedf_fmlp_lock_ops = {
+        .close  = psnedf_fmlp_close,
+        .lock   = psnedf_fmlp_lock,
+        .unlock = psnedf_fmlp_unlock,
+        .deallocate = psnedf_fmlp_free,
+};
+static struct litmus_lock* psnedf_new_fmlp(void)
+{
+        struct fmlp_semaphore* sem;
+        sem = kmalloc(sizeof(*sem), GFP_KERNEL);
+        if (!sem)
+                return NULL;
+        sem->owner   = NULL;
+        init_waitqueue_head(&sem->wait);
+        sem->litmus_lock.ops = &psnedf_fmlp_lock_ops;
+        return &sem->litmus_lock;
+}
+/* **** lock constructor **** */
+static long psnedf_allocate_lock(struct litmus_lock **lock, int type,
+                                 void* __user unused)
+{
+        int err = -ENXIO;
+        struct srp_semaphore* srp;
+        /* PSN-EDF currently supports the SRP for local resources and the FMLP
+         * for global resources. */
+        switch (type) {
+        case FMLP_SEM:
+                /* Flexible Multiprocessor Locking Protocol */
+                *lock = psnedf_new_fmlp();
+                if (*lock)
+                        err = 0;
+                else
+                        err = -ENOMEM;
+                break;
+        case SRP_SEM:
+                /* Baker's Stack Resource Policy */
+                srp = allocate_srp_semaphore();
+                if (srp) {
+                        *lock = &srp->litmus_lock;
+                        err = 0;
+                } else
+                        err = -ENOMEM;
+                break;
+        };
+        return err;
+}
+#endif
+static struct domain_proc_info psnedf_domain_proc_info;
+static long psnedf_get_domain_proc_info(struct domain_proc_info **ret)
+{
+        *ret = &psnedf_domain_proc_info;
+        return 0;
+}
+static void psnedf_setup_domain_proc(void)
+{
+        int i, cpu;
+        int release_master =
+#ifdef CONFIG_RELEASE_MASTER
+                atomic_read(&release_master_cpu);
+#else
+                NO_CPU;
+#endif
+        int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
+        struct cd_mapping *cpu_map, *domain_map;
+        memset(&psnedf_domain_proc_info, sizeof(psnedf_domain_proc_info), 0);
+        init_domain_proc_info(&psnedf_domain_proc_info, num_rt_cpus, num_rt_cpus);
+        psnedf_domain_proc_info.num_cpus = num_rt_cpus;
+        psnedf_domain_proc_info.num_domains = num_rt_cpus;
+        for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
+                if (cpu == release_master)
+                        continue;
+                cpu_map = &psnedf_domain_proc_info.cpu_to_domains[i];
+                domain_map = &psnedf_domain_proc_info.domain_to_cpus[i];
+                cpu_map->id = cpu;
+                domain_map->id = i; /* enumerate w/o counting the release master */
+                cpumask_set_cpu(i, cpu_map->mask);
+                cpumask_set_cpu(cpu, domain_map->mask);
+                ++i;
+        }
+}
+static long psnedf_activate_plugin(void)
+{
+#ifdef CONFIG_RELEASE_MASTER
+        int cpu;
+        for_each_online_cpu(cpu) {
+                remote_edf(cpu)->release_master = atomic_read(&release_master_cpu);
+        }
+#endif
+#ifdef CONFIG_LITMUS_LOCKING
+        get_srp_prio = psnedf_get_srp_prio;
+#endif
+        psnedf_setup_domain_proc();
+        return 0;
+}
+static long psnedf_deactivate_plugin(void)
+{
+        destroy_domain_proc_info(&psnedf_domain_proc_info);
+        return 0;
+}
+static long psnedf_admit_task(struct task_struct* tsk)
+{
+        if (task_cpu(tsk) == tsk->rt_param.task_params.cpu
+#ifdef CONFIG_RELEASE_MASTER
+            /* don't allow tasks on release master CPU */
+             && task_cpu(tsk) != remote_edf(task_cpu(tsk))->release_master
+#endif
+                )
+                return 0;
+        else
+                return -EINVAL;
+}
+/*      Plugin object   */
+static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
+        .plugin_name            = "PSN-EDF",
+        .task_new               = psnedf_task_new,
+        .complete_job           = complete_job,
+        .task_exit              = psnedf_task_exit,
+        .schedule               = psnedf_schedule,
+        .task_wake_up           = psnedf_task_wake_up,
+        .task_block             = psnedf_task_block,
+        .admit_task             = psnedf_admit_task,
+        .activate_plugin        = psnedf_activate_plugin,
+        .deactivate_plugin      = psnedf_deactivate_plugin,
+        .get_domain_proc_info   = psnedf_get_domain_proc_info,
+#ifdef CONFIG_LITMUS_LOCKING
+        .allocate_lock          = psnedf_allocate_lock,
+#endif
+};
+static int __init init_psn_edf(void)
+{
+        int i;
+        /* We do not really want to support cpu hotplug, do we? ;)
+         * However, if we are so crazy to do so,
+         * we cannot use num_online_cpu()
+         */
+        for (i = 0; i < num_online_cpus(); i++) {
+                psnedf_domain_init(remote_pedf(i),
+                                   psnedf_check_resched,
+                                   NULL, i);
+        }
+        return register_sched_plugin(&psn_edf_plugin);
+}
+module_init(init_psn_edf);
author	Bjoern Brandenburg <bbb@mpi-sws.org>	2015-08-09 07:18:55 -0400
committer	Bjoern Brandenburg <bbb@mpi-sws.org>	2015-08-09 07:20:34 -0400
commit	47efe5234212e0f72369a6964dfd84d4e9968a11 (patch)
tree	2c489ea0b878363068cfcb0ecb33054b79d7d2a4
parent	4410877f28d1a20b5c98dd153de78c8342ac76f3 (diff)

diff --git a/litmus/Makefile b/litmus/Makefile index c85abc7389c5..4e53c4f69744 100644 --- a/litmus/Makefile +++ b/litmus/Makefile
@@ -18,7 +18,9 @@ obj-y = sched_plugin.o litmus.o \
18	bheap.o \	18	bheap.o \
19	binheap.o \	19	binheap.o \
20	ctrldev.o \	20	ctrldev.o \
21	uncachedev.o	21	uncachedev.o \
		22	sched_psn_edf.o
		23
22		24
23	obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o	25	obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
24	obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o	26	obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o


diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c new file mode 100644 index 000000000000..2549a3fc28b9 --- /dev/null +++ b/litmus/sched_psn_edf.c
@@ -0,0 +1,688 @@
		1	/*
		2	* kernel/sched_psn_edf.c
		3	*
		4	* Implementation of the PSN-EDF scheduler plugin.
		5	* Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
		6	*
		7	* Suspensions and non-preemptable sections are supported.
		8	* Priority inheritance is not supported.
		9	*/
		10
		11	#include <linux/percpu.h>
		12	#include <linux/sched.h>
		13	#include <linux/list.h>
		14	#include <linux/spinlock.h>
		15	#include <linux/module.h>
		16
		17	#include <litmus/litmus.h>
		18	#include <litmus/jobs.h>
		19	#include <litmus/preempt.h>
		20	#include <litmus/budget.h>
		21	#include <litmus/sched_plugin.h>
		22	#include <litmus/edf_common.h>
		23	#include <litmus/sched_trace.h>
		24	#include <litmus/trace.h>
		25
		26	/* to set up domain/cpu mappings */
		27	#include <litmus/litmus_proc.h>
		28
		29	typedef struct {
		30	rt_domain_t domain;
		31	int cpu;
		32	struct task_struct* scheduled; /* only RT tasks */
		33	/*
		34	* scheduling lock slock
		35	* protects the domain and serializes scheduling decisions
		36	*/
		37	#define slock domain.ready_lock
		38
		39	} psnedf_domain_t;
		40
		41	DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
		42
		43	#define local_edf (&(this_cpu_ptr(&psnedf_domains)->domain))
		44	#define local_pedf (this_cpu_ptr(&psnedf_domains))
		45	#define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain)
		46	#define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu))
		47	#define task_edf(task) remote_edf(get_partition(task))
		48	#define task_pedf(task) remote_pedf(get_partition(task))
		49
		50
		51	static void psnedf_domain_init(psnedf_domain_t* pedf,
		52	check_resched_needed_t check,
		53	release_jobs_t release,
		54	int cpu)
		55	{
		56	edf_domain_init(&pedf->domain, check, release);
		57	pedf->cpu = cpu;
		58	pedf->scheduled = NULL;
		59	}
		60
		61	static void requeue(struct task_struct* t, rt_domain_t *edf)
		62	{
		63	if (t->state != TASK_RUNNING)
		64	TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
		65
		66	tsk_rt(t)->completed = 0;
		67	if (is_early_releasing(t) \|\| is_released(t, litmus_clock()))
		68	__add_ready(edf, t);
		69	else
		70	add_release(edf, t); /* it has got to wait */
		71	}
		72
		73	/* we assume the lock is being held */
		74	static void preempt(psnedf_domain_t *pedf)
		75	{
		76	preempt_if_preemptable(pedf->scheduled, pedf->cpu);
		77	}
		78
		79	#ifdef CONFIG_LITMUS_LOCKING
		80
		81	static void boost_priority(struct task_struct* t)
		82	{
		83	unsigned long flags;
		84	psnedf_domain_t* pedf = task_pedf(t);
		85	lt_t now;
		86
		87	raw_spin_lock_irqsave(&pedf->slock, flags);
		88	now = litmus_clock();
		89
		90	TRACE_TASK(t, "priority boosted at %llu\n", now);
		91
		92	tsk_rt(t)->priority_boosted = 1;
		93	tsk_rt(t)->boost_start_time = now;
		94
		95	if (pedf->scheduled != t) {
		96	/* holder may be queued: first stop queue changes */
		97	raw_spin_lock(&pedf->domain.release_lock);
		98	if (is_queued(t) &&
		99	/* If it is queued, then we need to re-order. */
		100	bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node) &&
		101	/* If we bubbled to the top, then we need to check for preemptions. */
		102	edf_preemption_needed(&pedf->domain, pedf->scheduled))
		103	preempt(pedf);
		104	raw_spin_unlock(&pedf->domain.release_lock);
		105	} /* else: nothing to do since the job is not queued while scheduled */
		106
		107	raw_spin_unlock_irqrestore(&pedf->slock, flags);
		108	}
		109
		110	static void unboost_priority(struct task_struct* t)
		111	{
		112	unsigned long flags;
		113	psnedf_domain_t* pedf = task_pedf(t);
		114	lt_t now;
		115
		116	raw_spin_lock_irqsave(&pedf->slock, flags);
		117	now = litmus_clock();
		118
		119	/* Assumption: this only happens when the job is scheduled.
		120	* Exception: If t transitioned to non-real-time mode, we no longer
		121	* care about it. */
		122	BUG_ON(pedf->scheduled != t && is_realtime(t));
		123
		124	TRACE_TASK(t, "priority restored at %llu\n", now);
		125
		126	tsk_rt(t)->priority_boosted = 0;
		127	tsk_rt(t)->boost_start_time = 0;
		128
		129	/* check if this changes anything */
		130	if (edf_preemption_needed(&pedf->domain, pedf->scheduled))
		131	preempt(pedf);
		132
		133	raw_spin_unlock_irqrestore(&pedf->slock, flags);
		134	}
		135
		136	#endif
		137
		138	static int psnedf_preempt_check(psnedf_domain_t *pedf)
		139	{
		140	if (edf_preemption_needed(&pedf->domain, pedf->scheduled)) {
		141	preempt(pedf);
		142	return 1;
		143	} else
		144	return 0;
		145	}
		146
		147	/* This check is trivial in partioned systems as we only have to consider
		148	* the CPU of the partition.
		149	*/
		150	static int psnedf_check_resched(rt_domain_t *edf)
		151	{
		152	psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
		153
		154	/* because this is a callback from rt_domain_t we already hold
		155	* the necessary lock for the ready queue
		156	*/
		157	return psnedf_preempt_check(pedf);
		158	}
		159
		160	static void job_completion(struct task_struct* t, int forced)
		161	{
		162	sched_trace_task_completion(t, forced);
		163	TRACE_TASK(t, "job_completion(forced=%d).\n", forced);
		164
		165	tsk_rt(t)->completed = 0;
		166	prepare_for_next_period(t);
		167	}
		168
		169	static struct task_struct* psnedf_schedule(struct task_struct * prev)
		170	{
		171	psnedf_domain_t* pedf = local_pedf;
		172	rt_domain_t* edf = &pedf->domain;
		173	struct task_struct* next;
		174
		175	int out_of_time, sleep, preempt,
		176	np, exists, blocks, resched;
		177
		178	raw_spin_lock(&pedf->slock);
		179
		180	/* sanity checking
		181	* differently from gedf, when a task exits (dead)
		182	* pedf->schedule may be null and prev _is_ realtime
		183	*/
		184	BUG_ON(pedf->scheduled && pedf->scheduled != prev);
		185	BUG_ON(pedf->scheduled && !is_realtime(prev));
		186
		187	/* (0) Determine state */
		188	exists = pedf->scheduled != NULL;
		189	blocks = exists && !is_current_running();
		190	out_of_time = exists && budget_enforced(pedf->scheduled)
		191	&& budget_exhausted(pedf->scheduled);
		192	np = exists && is_np(pedf->scheduled);
		193	sleep = exists && is_completed(pedf->scheduled);
		194	preempt = edf_preemption_needed(edf, prev);
		195
		196	/* If we need to preempt do so.
		197	* The following checks set resched to 1 in case of special
		198	* circumstances.
		199	*/
		200	resched = preempt;
		201
		202	/* If a task blocks we have no choice but to reschedule.
		203	*/
		204	if (blocks)
		205	resched = 1;
		206
		207	/* Request a sys_exit_np() call if we would like to preempt but cannot.
		208	* Multiple calls to request_exit_np() don't hurt.
		209	*/
		210	if (np && (out_of_time \|\| preempt \|\| sleep))
		211	request_exit_np(pedf->scheduled);
		212
		213	/* Any task that is preemptable and either exhausts its execution
		214	* budget or wants to sleep completes. We may have to reschedule after
		215	* this.
		216	*/
		217	if (!np && (out_of_time \|\| sleep)) {
		218	job_completion(pedf->scheduled, !sleep);
		219	resched = 1;
		220	}
		221
		222	/* The final scheduling decision. Do we need to switch for some reason?
		223	* Switch if we are in RT mode and have no task or if we need to
		224	* resched.
		225	*/
		226	next = NULL;
		227	if ((!np \|\| blocks) && (resched \|\| !exists)) {
		228	/* When preempting a task that does not block, then
		229	* re-insert it into either the ready queue or the
		230	* release queue (if it completed). requeue() picks
		231	* the appropriate queue.
		232	*/
		233	if (pedf->scheduled && !blocks)
		234	requeue(pedf->scheduled, edf);
		235	next = __take_ready(edf);
		236	} else
		237	/* Only override Linux scheduler if we have a real-time task
		238	* scheduled that needs to continue.
		239	*/
		240	if (exists)
		241	next = prev;
		242
		243	if (next) {
		244	TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
		245	} else {
		246	TRACE("becoming idle at %llu\n", litmus_clock());
		247	}
		248
		249	pedf->scheduled = next;
		250	sched_state_task_picked();
		251	raw_spin_unlock(&pedf->slock);
		252
		253	return next;
		254	}
		255
		256
		257	/* Prepare a task for running in RT mode
		258	*/
		259	static void psnedf_task_new(struct task_struct * t, int on_rq, int is_scheduled)
		260	{
		261	rt_domain_t* edf = task_edf(t);
		262	psnedf_domain_t* pedf = task_pedf(t);
		263	unsigned long flags;
		264
		265	TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
		266	t->rt_param.task_params.cpu);
		267
		268	/* setup job parameters */
		269	release_at(t, litmus_clock());
		270
		271	/* The task should be running in the queue, otherwise signal
		272	* code will try to wake it up with fatal consequences.
		273	*/
		274	raw_spin_lock_irqsave(&pedf->slock, flags);
		275	if (is_scheduled) {
		276	/* there shouldn't be anything else scheduled at the time */
		277	BUG_ON(pedf->scheduled);
		278	pedf->scheduled = t;
		279	} else {
		280	/* !is_scheduled means it is not scheduled right now, but it
		281	* does not mean that it is suspended. If it is not suspended,
		282	* it still needs to be requeued. If it is suspended, there is
		283	* nothing that we need to do as it will be handled by the
		284	* wake_up() handler. */
		285	if (on_rq) {
		286	requeue(t, edf);
		287	/* maybe we have to reschedule */
		288	psnedf_preempt_check(pedf);
		289	}
		290	}
		291	raw_spin_unlock_irqrestore(&pedf->slock, flags);
		292	}
		293
		294	static void psnedf_task_wake_up(struct task_struct *task)
		295	{
		296	unsigned long flags;
		297	psnedf_domain_t* pedf = task_pedf(task);
		298	rt_domain_t* edf = task_edf(task);
		299	lt_t now;
		300
		301	TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
		302	raw_spin_lock_irqsave(&pedf->slock, flags);
		303	BUG_ON(is_queued(task));
		304	now = litmus_clock();
		305	if (is_sporadic(task) && is_tardy(task, now)
		306	#ifdef CONFIG_LITMUS_LOCKING
		307	/* We need to take suspensions because of semaphores into
		308	* account! If a job resumes after being suspended due to acquiring
		309	* a semaphore, it should never be treated as a new job release.
		310	*/
		311	&& !is_priority_boosted(task)
		312	#endif
		313	) {
		314	/* new sporadic release */
		315	release_at(task, now);
		316	sched_trace_task_release(task);
		317	}
		318
		319	/* Only add to ready queue if it is not the currently-scheduled
		320	* task. This could be the case if a task was woken up concurrently
		321	* on a remote CPU before the executing CPU got around to actually
		322	* de-scheduling the task, i.e., wake_up() raced with schedule()
		323	* and won.
		324	*/
		325	if (pedf->scheduled != task) {
		326	requeue(task, edf);
		327	psnedf_preempt_check(pedf);
		328	}
		329
		330	raw_spin_unlock_irqrestore(&pedf->slock, flags);
		331	TRACE_TASK(task, "wake up done\n");
		332	}
		333
		334	static void psnedf_task_block(struct task_struct *t)
		335	{
		336	/* only running tasks can block, thus t is in no queue */
		337	TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
		338
		339	BUG_ON(!is_realtime(t));
		340	BUG_ON(is_queued(t));
		341	}
		342
		343	static void psnedf_task_exit(struct task_struct * t)
		344	{
		345	unsigned long flags;
		346	psnedf_domain_t* pedf = task_pedf(t);
		347	rt_domain_t* edf;
		348
		349	raw_spin_lock_irqsave(&pedf->slock, flags);
		350	if (is_queued(t)) {
		351	/* dequeue */
		352	edf = task_edf(t);
		353	remove(edf, t);
		354	}
		355	if (pedf->scheduled == t)
		356	pedf->scheduled = NULL;
		357
		358	TRACE_TASK(t, "RIP, now reschedule\n");
		359
		360	preempt(pedf);
		361	raw_spin_unlock_irqrestore(&pedf->slock, flags);
		362	}
		363
		364	#ifdef CONFIG_LITMUS_LOCKING
		365
		366	#include <litmus/fdso.h>
		367	#include <litmus/srp.h>
		368
		369	/* ****************** SRP support ********************** */
		370
		371	static unsigned int psnedf_get_srp_prio(struct task_struct* t)
		372	{
		373	return get_rt_relative_deadline(t);
		374	}
		375
		376	/* ****************** FMLP support ******************** */
		377
		378	/* struct for semaphore with priority inheritance */
		379	struct fmlp_semaphore {
		380	struct litmus_lock litmus_lock;
		381
		382	/* current resource holder */
		383	struct task_struct *owner;
		384
		385	/* FIFO queue of waiting tasks */
		386	wait_queue_head_t wait;
		387	};
		388
		389	static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
		390	{
		391	return container_of(lock, struct fmlp_semaphore, litmus_lock);
		392	}
		393	int psnedf_fmlp_lock(struct litmus_lock* l)
		394	{
		395	struct task_struct* t = current;
		396	struct fmlp_semaphore *sem = fmlp_from_lock(l);
		397	wait_queue_t wait;
		398	unsigned long flags;
		399
		400	if (!is_realtime(t))
		401	return -EPERM;
		402
		403	/* prevent nested lock acquisition --- not supported by FMLP */
		404	if (tsk_rt(t)->num_locks_held \|\|
		405	tsk_rt(t)->num_local_locks_held)
		406	return -EBUSY;
		407
		408	spin_lock_irqsave(&sem->wait.lock, flags);
		409
		410	if (sem->owner) {
		411	/* resource is not free => must suspend and wait */
		412
		413	init_waitqueue_entry(&wait, t);
		414
		415	/* FIXME: interruptible would be nice some day */
		416	set_task_state(t, TASK_UNINTERRUPTIBLE);
		417
		418	__add_wait_queue_tail_exclusive(&sem->wait, &wait);
		419
		420	TS_LOCK_SUSPEND;
		421
		422	/* release lock before sleeping */
		423	spin_unlock_irqrestore(&sem->wait.lock, flags);
		424
		425	/* We depend on the FIFO order. Thus, we don't need to recheck
		426	* when we wake up; we are guaranteed to have the lock since
		427	* there is only one wake up per release.
		428	*/
		429
		430	schedule();
		431
		432	TS_LOCK_RESUME;
		433
		434	/* Since we hold the lock, no other task will change
		435	* ->owner. We can thus check it without acquiring the spin
		436	* lock. */
		437	BUG_ON(sem->owner != t);
		438	} else {
		439	/* it's ours now */
		440	sem->owner = t;
		441
		442	/* mark the task as priority-boosted. */
		443	boost_priority(t);
		444
		445	spin_unlock_irqrestore(&sem->wait.lock, flags);
		446	}
		447
		448	tsk_rt(t)->num_locks_held++;
		449
		450	return 0;
		451	}
		452
		453	int psnedf_fmlp_unlock(struct litmus_lock* l)
		454	{
		455	struct task_struct t = current, next;
		456	struct fmlp_semaphore *sem = fmlp_from_lock(l);
		457	unsigned long flags;
		458	int err = 0;
		459
		460	spin_lock_irqsave(&sem->wait.lock, flags);
		461
		462	if (sem->owner != t) {
		463	err = -EINVAL;
		464	goto out;
		465	}
		466
		467	tsk_rt(t)->num_locks_held--;
		468
		469	/* we lose the benefit of priority boosting */
		470
		471	unboost_priority(t);
		472
		473	/* check if there are jobs waiting for this resource */
		474	next = __waitqueue_remove_first(&sem->wait);
		475	if (next) {
		476	/* boost next job */
		477	boost_priority(next);
		478
		479	/* next becomes the resouce holder */
		480	sem->owner = next;
		481
		482	/* wake up next */
		483	wake_up_process(next);
		484	} else
		485	/* resource becomes available */
		486	sem->owner = NULL;
		487
		488	out:
		489	spin_unlock_irqrestore(&sem->wait.lock, flags);
		490	return err;
		491	}
		492
		493	int psnedf_fmlp_close(struct litmus_lock* l)
		494	{
		495	struct task_struct *t = current;
		496	struct fmlp_semaphore *sem = fmlp_from_lock(l);
		497	unsigned long flags;
		498
		499	int owner;
		500
		501	spin_lock_irqsave(&sem->wait.lock, flags);
		502
		503	owner = sem->owner == t;
		504
		505	spin_unlock_irqrestore(&sem->wait.lock, flags);
		506
		507	if (owner)
		508	psnedf_fmlp_unlock(l);
		509
		510	return 0;
		511	}
		512
		513	void psnedf_fmlp_free(struct litmus_lock* lock)
		514	{
		515	kfree(fmlp_from_lock(lock));
		516	}
		517
		518	static struct litmus_lock_ops psnedf_fmlp_lock_ops = {
		519	.close = psnedf_fmlp_close,
		520	.lock = psnedf_fmlp_lock,
		521	.unlock = psnedf_fmlp_unlock,
		522	.deallocate = psnedf_fmlp_free,
		523	};
		524
		525	static struct litmus_lock* psnedf_new_fmlp(void)
		526	{
		527	struct fmlp_semaphore* sem;
		528
		529	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
		530	if (!sem)
		531	return NULL;
		532
		533	sem->owner = NULL;
		534	init_waitqueue_head(&sem->wait);
		535	sem->litmus_lock.ops = &psnedf_fmlp_lock_ops;
		536
		537	return &sem->litmus_lock;
		538	}
		539
		540	/* ** lock constructor ** */
		541
		542
		543	static long psnedf_allocate_lock(struct litmus_lock **lock, int type,
		544	void* __user unused)
		545	{
		546	int err = -ENXIO;
		547	struct srp_semaphore* srp;
		548
		549	/* PSN-EDF currently supports the SRP for local resources and the FMLP
		550	* for global resources. */
		551	switch (type) {
		552	case FMLP_SEM:
		553	/* Flexible Multiprocessor Locking Protocol */
		554	*lock = psnedf_new_fmlp();
		555	if (*lock)
		556	err = 0;
		557	else
		558	err = -ENOMEM;
		559	break;
		560
		561	case SRP_SEM:
		562	/* Baker's Stack Resource Policy */
		563	srp = allocate_srp_semaphore();
		564	if (srp) {
		565	*lock = &srp->litmus_lock;
		566	err = 0;
		567	} else
		568	err = -ENOMEM;
		569	break;
		570	};
		571
		572	return err;
		573	}
		574
		575	#endif
		576
		577	static struct domain_proc_info psnedf_domain_proc_info;
		578	static long psnedf_get_domain_proc_info(struct domain_proc_info **ret)
		579	{
		580	*ret = &psnedf_domain_proc_info;
		581	return 0;
		582	}
		583
		584	static void psnedf_setup_domain_proc(void)
		585	{
		586	int i, cpu;
		587	int release_master =
		588	#ifdef CONFIG_RELEASE_MASTER
		589	atomic_read(&release_master_cpu);
		590	#else
		591	NO_CPU;
		592	#endif
		593	int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
		594	struct cd_mapping cpu_map, domain_map;
		595
		596	memset(&psnedf_domain_proc_info, sizeof(psnedf_domain_proc_info), 0);
		597	init_domain_proc_info(&psnedf_domain_proc_info, num_rt_cpus, num_rt_cpus);
		598	psnedf_domain_proc_info.num_cpus = num_rt_cpus;
		599	psnedf_domain_proc_info.num_domains = num_rt_cpus;
		600
		601	for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
		602	if (cpu == release_master)
		603	continue;
		604	cpu_map = &psnedf_domain_proc_info.cpu_to_domains[i];
		605	domain_map = &psnedf_domain_proc_info.domain_to_cpus[i];
		606
		607	cpu_map->id = cpu;
		608	domain_map->id = i; /* enumerate w/o counting the release master */
		609	cpumask_set_cpu(i, cpu_map->mask);
		610	cpumask_set_cpu(cpu, domain_map->mask);
		611	++i;
		612	}
		613	}
		614
		615	static long psnedf_activate_plugin(void)
		616	{
		617	#ifdef CONFIG_RELEASE_MASTER
		618	int cpu;
		619
		620	for_each_online_cpu(cpu) {
		621	remote_edf(cpu)->release_master = atomic_read(&release_master_cpu);
		622	}
		623	#endif
		624
		625	#ifdef CONFIG_LITMUS_LOCKING
		626	get_srp_prio = psnedf_get_srp_prio;
		627	#endif
		628
		629	psnedf_setup_domain_proc();
		630
		631	return 0;
		632	}
		633
		634	static long psnedf_deactivate_plugin(void)
		635	{
		636	destroy_domain_proc_info(&psnedf_domain_proc_info);
		637	return 0;
		638	}
		639
		640	static long psnedf_admit_task(struct task_struct* tsk)
		641	{
		642	if (task_cpu(tsk) == tsk->rt_param.task_params.cpu
		643	#ifdef CONFIG_RELEASE_MASTER
		644	/* don't allow tasks on release master CPU */
		645	&& task_cpu(tsk) != remote_edf(task_cpu(tsk))->release_master
		646	#endif
		647	)
		648	return 0;
		649	else
		650	return -EINVAL;
		651	}
		652
		653	/* Plugin object */
		654	static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
		655	.plugin_name = "PSN-EDF",
		656	.task_new = psnedf_task_new,
		657	.complete_job = complete_job,
		658	.task_exit = psnedf_task_exit,
		659	.schedule = psnedf_schedule,
		660	.task_wake_up = psnedf_task_wake_up,
		661	.task_block = psnedf_task_block,
		662	.admit_task = psnedf_admit_task,
		663	.activate_plugin = psnedf_activate_plugin,
		664	.deactivate_plugin = psnedf_deactivate_plugin,
		665	.get_domain_proc_info = psnedf_get_domain_proc_info,
		666	#ifdef CONFIG_LITMUS_LOCKING
		667	.allocate_lock = psnedf_allocate_lock,
		668	#endif
		669	};
		670
		671
		672	static int __init init_psn_edf(void)
		673	{
		674	int i;
		675
		676	/* We do not really want to support cpu hotplug, do we? ;)
		677	* However, if we are so crazy to do so,
		678	* we cannot use num_online_cpu()
		679	*/
		680	for (i = 0; i < num_online_cpus(); i++) {
		681	psnedf_domain_init(remote_pedf(i),
		682	psnedf_check_resched,
		683	NULL, i);
		684	}
		685	return register_sched_plugin(&psn_edf_plugin);
		686	}
		687
		688	module_init(init_psn_edf);