Add PSN-EDF scheduler plugin

PSN-EDF: fix wrong memset() PSN-EDF: use sched_trace_last_suspension_as_completion() PSN-EDF: use inferred_sporadic_job_release_at PSN-EDF: include np.h
author: Bjoern Brandenburg <bbb@mpi-sws.org> 2015-08-09 07:18:55 -0400
committer: Bjoern Brandenburg <bbb@mpi-sws.org> 2017-05-26 17:12:40 -0400
commit: 8bc21314c60e342460e2e286217df7108b56ecde (patch)
tree: db8be955c35ef7e8e3296cd8676fb304b3b5f6af /litmus
parent: 0e8e8602ad72b1a6cee4c24869cd58b5c2b1d8c4 (diff)
2 files changed, 690 insertions, 1 deletions
diff --git a/litmus/Makefile b/litmus/Makefile
index 56499562defb..1871953d3fc6 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -18,7 +18,9 @@ obj-y     = sched_plugin.o litmus.o \
            bheap.o \
            binheap.o \
            ctrldev.o \
-            uncachedev.o
+            uncachedev.o \
+            sched_psn_edf.o
 obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
 obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
new file mode 100644
index 000000000000..e01fddb7cedf
--- /dev/null
+++ b/litmus/sched_psn_edf.c
@@ -0,0 +1,687 @@
+/*
+ * kernel/sched_psn_edf.c
+ *
+ * Implementation of the PSN-EDF scheduler plugin.
+ * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
+ *
+ * Suspensions and non-preemptable sections are supported.
+ * Priority inheritance is not supported.
+ */
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <litmus/litmus.h>
+#include <litmus/jobs.h>
+#include <litmus/preempt.h>
+#include <litmus/budget.h>
+#include <litmus/np.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/edf_common.h>
+#include <litmus/sched_trace.h>
+#include <litmus/trace.h>
+/* to set up domain/cpu mappings */
+#include <litmus/litmus_proc.h>
+typedef struct {
+        rt_domain_t             domain;
+        int                     cpu;
+        struct task_struct*     scheduled; /* only RT tasks */
+/*
+ * scheduling lock slock
+ * protects the domain and serializes scheduling decisions
+ */
+#define slock domain.ready_lock
+} psnedf_domain_t;
+DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
+#define local_edf               (&(this_cpu_ptr(&psnedf_domains)->domain))
+#define local_pedf              (this_cpu_ptr(&psnedf_domains))
+#define remote_edf(cpu)         (&per_cpu(psnedf_domains, cpu).domain)
+#define remote_pedf(cpu)        (&per_cpu(psnedf_domains, cpu))
+#define task_edf(task)          remote_edf(get_partition(task))
+#define task_pedf(task)         remote_pedf(get_partition(task))
+static void psnedf_domain_init(psnedf_domain_t* pedf,
+                               check_resched_needed_t check,
+                               release_jobs_t release,
+                               int cpu)
+{
+        edf_domain_init(&pedf->domain, check, release);
+        pedf->cpu               = cpu;
+        pedf->scheduled         = NULL;
+}
+static void requeue(struct task_struct* t, rt_domain_t *edf)
+{
+        if (t->state != TASK_RUNNING)
+                TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
+        tsk_rt(t)->completed = 0;
+        if (is_early_releasing(t) || is_released(t, litmus_clock()))
+                __add_ready(edf, t);
+        else
+                add_release(edf, t); /* it has got to wait */
+}
+/* we assume the lock is being held */
+static void preempt(psnedf_domain_t *pedf)
+{
+        preempt_if_preemptable(pedf->scheduled, pedf->cpu);
+}
+#ifdef CONFIG_LITMUS_LOCKING
+static void boost_priority(struct task_struct* t)
+{
+        unsigned long           flags;
+        psnedf_domain_t*        pedf = task_pedf(t);
+        lt_t                    now;
+        raw_spin_lock_irqsave(&pedf->slock, flags);
+        now = litmus_clock();
+        TRACE_TASK(t, "priority boosted at %llu\n", now);
+        tsk_rt(t)->priority_boosted = 1;
+        tsk_rt(t)->boost_start_time = now;
+        if (pedf->scheduled != t) {
+                /* holder may be queued: first stop queue changes */
+                raw_spin_lock(&pedf->domain.release_lock);
+                if (is_queued(t) &&
+                    /* If it is queued, then we need to re-order. */
+                    bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node) &&
+                    /* If we bubbled to the top, then we need to check for preemptions. */
+                    edf_preemption_needed(&pedf->domain, pedf->scheduled))
+                                preempt(pedf);
+                raw_spin_unlock(&pedf->domain.release_lock);
+        } /* else: nothing to do since the job is not queued while scheduled */
+        raw_spin_unlock_irqrestore(&pedf->slock, flags);
+}
+static void unboost_priority(struct task_struct* t)
+{
+        unsigned long           flags;
+        psnedf_domain_t*        pedf = task_pedf(t);
+        lt_t                    now;
+        raw_spin_lock_irqsave(&pedf->slock, flags);
+        now = litmus_clock();
+        /* Assumption: this only happens when the job is scheduled.
+         * Exception: If t transitioned to non-real-time mode, we no longer
+         * care about it. */
+        BUG_ON(pedf->scheduled != t && is_realtime(t));
+        TRACE_TASK(t, "priority restored at %llu\n", now);
+        tsk_rt(t)->priority_boosted = 0;
+        tsk_rt(t)->boost_start_time = 0;
+        /* check if this changes anything */
+        if (edf_preemption_needed(&pedf->domain, pedf->scheduled))
+                preempt(pedf);
+        raw_spin_unlock_irqrestore(&pedf->slock, flags);
+}
+#endif
+static int psnedf_preempt_check(psnedf_domain_t *pedf)
+{
+        if (edf_preemption_needed(&pedf->domain, pedf->scheduled)) {
+                preempt(pedf);
+                return 1;
+        } else
+                return 0;
+}
+/* This check is trivial in partioned systems as we only have to consider
+ * the CPU of the partition.
+ */
+static int psnedf_check_resched(rt_domain_t *edf)
+{
+        psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
+        /* because this is a callback from rt_domain_t we already hold
+         * the necessary lock for the ready queue
+         */
+        return psnedf_preempt_check(pedf);
+}
+static void job_completion(struct task_struct* t, int forced)
+{
+        sched_trace_task_completion(t, forced);
+        TRACE_TASK(t, "job_completion(forced=%d).\n", forced);
+        tsk_rt(t)->completed = 0;
+        prepare_for_next_period(t);
+}
+static struct task_struct* psnedf_schedule(struct task_struct * prev)
+{
+        psnedf_domain_t*        pedf = local_pedf;
+        rt_domain_t*            edf  = &pedf->domain;
+        struct task_struct*     next;
+        int                     out_of_time, sleep, preempt,
+                                np, exists, blocks, resched;
+        raw_spin_lock(&pedf->slock);
+        /* sanity checking
+         * differently from gedf, when a task exits (dead)
+         * pedf->schedule may be null and prev _is_ realtime
+         */
+        BUG_ON(pedf->scheduled && pedf->scheduled != prev);
+        BUG_ON(pedf->scheduled && !is_realtime(prev));
+        /* (0) Determine state */
+        exists      = pedf->scheduled != NULL;
+        blocks      = exists && !is_current_running();
+        out_of_time = exists && budget_enforced(pedf->scheduled)
+                             && budget_exhausted(pedf->scheduled);
+        np          = exists && is_np(pedf->scheduled);
+        sleep       = exists && is_completed(pedf->scheduled);
+        preempt     = edf_preemption_needed(edf, prev);
+        /* If we need to preempt do so.
+         * The following checks set resched to 1 in case of special
+         * circumstances.
+         */
+        resched = preempt;
+        /* If a task blocks we have no choice but to reschedule.
+         */
+        if (blocks)
+                resched = 1;
+        /* Request a sys_exit_np() call if we would like to preempt but cannot.
+         * Multiple calls to request_exit_np() don't hurt.
+         */
+        if (np && (out_of_time || preempt || sleep))
+                request_exit_np(pedf->scheduled);
+        /* Any task that is preemptable and either exhausts its execution
+         * budget or wants to sleep completes. We may have to reschedule after
+         * this.
+         */
+        if (!np && (out_of_time || sleep)) {
+                job_completion(pedf->scheduled, !sleep);
+                resched = 1;
+        }
+        /* The final scheduling decision. Do we need to switch for some reason?
+         * Switch if we are in RT mode and have no task or if we need to
+         * resched.
+         */
+        next = NULL;
+        if ((!np || blocks) && (resched || !exists)) {
+                /* When preempting a task that does not block, then
+                 * re-insert it into either the ready queue or the
+                 * release queue (if it completed). requeue() picks
+                 * the appropriate queue.
+                 */
+                if (pedf->scheduled && !blocks)
+                        requeue(pedf->scheduled, edf);
+                next = __take_ready(edf);
+        } else
+                /* Only override Linux scheduler if we have a real-time task
+                 * scheduled that needs to continue.
+                 */
+                if (exists)
+                        next = prev;
+        if (next) {
+                TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
+        } else {
+                TRACE("becoming idle at %llu\n", litmus_clock());
+        }
+        pedf->scheduled = next;
+        sched_state_task_picked();
+        raw_spin_unlock(&pedf->slock);
+        return next;
+}
+/*      Prepare a task for running in RT mode
+ */
+static void psnedf_task_new(struct task_struct * t, int on_rq, int is_scheduled)
+{
+        rt_domain_t*            edf  = task_edf(t);
+        psnedf_domain_t*        pedf = task_pedf(t);
+        unsigned long           flags;
+        TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
+                   t->rt_param.task_params.cpu);
+        /* setup job parameters */
+        release_at(t, litmus_clock());
+        /* The task should be running in the queue, otherwise signal
+         * code will try to wake it up with fatal consequences.
+         */
+        raw_spin_lock_irqsave(&pedf->slock, flags);
+        if (is_scheduled) {
+                /* there shouldn't be anything else scheduled at the time */
+                BUG_ON(pedf->scheduled);
+                pedf->scheduled = t;
+        } else {
+                /* !is_scheduled means it is not scheduled right now, but it
+                 * does not mean that it is suspended. If it is not suspended,
+                 * it still needs to be requeued. If it is suspended, there is
+                 * nothing that we need to do as it will be handled by the
+                 * wake_up() handler. */
+                if (on_rq) {
+                        requeue(t, edf);
+                        /* maybe we have to reschedule */
+                        psnedf_preempt_check(pedf);
+                }
+        }
+        raw_spin_unlock_irqrestore(&pedf->slock, flags);
+}
+static void psnedf_task_wake_up(struct task_struct *task)
+{
+        unsigned long           flags;
+        psnedf_domain_t*        pedf = task_pedf(task);
+        rt_domain_t*            edf  = task_edf(task);
+        lt_t                    now;
+        TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
+        raw_spin_lock_irqsave(&pedf->slock, flags);
+        BUG_ON(is_queued(task));
+        now = litmus_clock();
+        if (is_sporadic(task) && is_tardy(task, now)
+#ifdef CONFIG_LITMUS_LOCKING
+        /* We need to take suspensions because of semaphores into
+         * account! If a job resumes after being suspended due to acquiring
+         * a semaphore, it should never be treated as a new job release.
+         */
+            && !is_priority_boosted(task)
+#endif
+                ) {
+                        inferred_sporadic_job_release_at(task, now);
+        }
+        /* Only add to ready queue if it is not the currently-scheduled
+         * task. This could be the case if a task was woken up concurrently
+         * on a remote CPU before the executing CPU got around to actually
+         * de-scheduling the task, i.e., wake_up() raced with schedule()
+         * and won.
+         */
+        if (pedf->scheduled != task) {
+                requeue(task, edf);
+                psnedf_preempt_check(pedf);
+        }
+        raw_spin_unlock_irqrestore(&pedf->slock, flags);
+        TRACE_TASK(task, "wake up done\n");
+}
+static void psnedf_task_block(struct task_struct *t)
+{
+        /* only running tasks can block, thus t is in no queue */
+        TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
+        BUG_ON(!is_realtime(t));
+        BUG_ON(is_queued(t));
+}
+static void psnedf_task_exit(struct task_struct * t)
+{
+        unsigned long flags;
+        psnedf_domain_t*        pedf = task_pedf(t);
+        rt_domain_t*            edf;
+        raw_spin_lock_irqsave(&pedf->slock, flags);
+        if (is_queued(t)) {
+                /* dequeue */
+                edf  = task_edf(t);
+                remove(edf, t);
+        }
+        if (pedf->scheduled == t)
+                pedf->scheduled = NULL;
+        TRACE_TASK(t, "RIP, now reschedule\n");
+        preempt(pedf);
+        raw_spin_unlock_irqrestore(&pedf->slock, flags);
+}
+#ifdef CONFIG_LITMUS_LOCKING
+#include <litmus/fdso.h>
+#include <litmus/srp.h>
+/* ******************** SRP support ************************ */
+static unsigned int psnedf_get_srp_prio(struct task_struct* t)
+{
+        return get_rt_relative_deadline(t);
+}
+/* ******************** FMLP support ********************** */
+/* struct for semaphore with priority inheritance */
+struct fmlp_semaphore {
+        struct litmus_lock litmus_lock;
+        /* current resource holder */
+        struct task_struct *owner;
+        /* FIFO queue of waiting tasks */
+        wait_queue_head_t wait;
+};
+static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
+{
+        return container_of(lock, struct fmlp_semaphore, litmus_lock);
+}
+int psnedf_fmlp_lock(struct litmus_lock* l)
+{
+        struct task_struct* t = current;
+        struct fmlp_semaphore *sem = fmlp_from_lock(l);
+        wait_queue_t wait;
+        unsigned long flags;
+        if (!is_realtime(t))
+                return -EPERM;
+        /* prevent nested lock acquisition --- not supported by FMLP */
+        if (tsk_rt(t)->num_locks_held ||
+            tsk_rt(t)->num_local_locks_held)
+                return -EBUSY;
+        spin_lock_irqsave(&sem->wait.lock, flags);
+        if (sem->owner) {
+                /* resource is not free => must suspend and wait */
+                init_waitqueue_entry(&wait, t);
+                /* FIXME: interruptible would be nice some day */
+                set_task_state(t, TASK_UNINTERRUPTIBLE);
+                __add_wait_queue_tail_exclusive(&sem->wait, &wait);
+                TS_LOCK_SUSPEND;
+                /* release lock before sleeping */
+                spin_unlock_irqrestore(&sem->wait.lock, flags);
+                /* We depend on the FIFO order.  Thus, we don't need to recheck
+                 * when we wake up; we are guaranteed to have the lock since
+                 * there is only one wake up per release.
+                 */
+                schedule();
+                TS_LOCK_RESUME;
+                /* Since we hold the lock, no other task will change
+                 * ->owner. We can thus check it without acquiring the spin
+                 * lock. */
+                BUG_ON(sem->owner != t);
+        } else {
+                /* it's ours now */
+                sem->owner = t;
+                /* mark the task as priority-boosted. */
+                boost_priority(t);
+                spin_unlock_irqrestore(&sem->wait.lock, flags);
+        }
+        tsk_rt(t)->num_locks_held++;
+        return 0;
+}
+int psnedf_fmlp_unlock(struct litmus_lock* l)
+{
+        struct task_struct *t = current, *next;
+        struct fmlp_semaphore *sem = fmlp_from_lock(l);
+        unsigned long flags;
+        int err = 0;
+        spin_lock_irqsave(&sem->wait.lock, flags);
+        if (sem->owner != t) {
+                err = -EINVAL;
+                goto out;
+        }
+        tsk_rt(t)->num_locks_held--;
+        /* we lose the benefit of priority boosting */
+        unboost_priority(t);
+        /* check if there are jobs waiting for this resource */
+        next = __waitqueue_remove_first(&sem->wait);
+        if (next) {
+                /* boost next job */
+                boost_priority(next);
+                /* next becomes the resouce holder */
+                sem->owner = next;
+                /* wake up next */
+                wake_up_process(next);
+        } else
+                /* resource becomes available */
+                sem->owner = NULL;
+out:
+        spin_unlock_irqrestore(&sem->wait.lock, flags);
+        return err;
+}
+int psnedf_fmlp_close(struct litmus_lock* l)
+{
+        struct task_struct *t = current;
+        struct fmlp_semaphore *sem = fmlp_from_lock(l);
+        unsigned long flags;
+        int owner;
+        spin_lock_irqsave(&sem->wait.lock, flags);
+        owner = sem->owner == t;
+        spin_unlock_irqrestore(&sem->wait.lock, flags);
+        if (owner)
+                psnedf_fmlp_unlock(l);
+        return 0;
+}
+void psnedf_fmlp_free(struct litmus_lock* lock)
+{
+        kfree(fmlp_from_lock(lock));
+}
+static struct litmus_lock_ops psnedf_fmlp_lock_ops = {
+        .close  = psnedf_fmlp_close,
+        .lock   = psnedf_fmlp_lock,
+        .unlock = psnedf_fmlp_unlock,
+        .deallocate = psnedf_fmlp_free,
+};
+static struct litmus_lock* psnedf_new_fmlp(void)
+{
+        struct fmlp_semaphore* sem;
+        sem = kmalloc(sizeof(*sem), GFP_KERNEL);
+        if (!sem)
+                return NULL;
+        sem->owner   = NULL;
+        init_waitqueue_head(&sem->wait);
+        sem->litmus_lock.ops = &psnedf_fmlp_lock_ops;
+        return &sem->litmus_lock;
+}
+/* **** lock constructor **** */
+static long psnedf_allocate_lock(struct litmus_lock **lock, int type,
+                                 void* __user unused)
+{
+        int err = -ENXIO;
+        struct srp_semaphore* srp;
+        /* PSN-EDF currently supports the SRP for local resources and the FMLP
+         * for global resources. */
+        switch (type) {
+        case FMLP_SEM:
+                /* Flexible Multiprocessor Locking Protocol */
+                *lock = psnedf_new_fmlp();
+                if (*lock)
+                        err = 0;
+                else
+                        err = -ENOMEM;
+                break;
+        case SRP_SEM:
+                /* Baker's Stack Resource Policy */
+                srp = allocate_srp_semaphore();
+                if (srp) {
+                        *lock = &srp->litmus_lock;
+                        err = 0;
+                } else
+                        err = -ENOMEM;
+                break;
+        };
+        return err;
+}
+#endif
+static struct domain_proc_info psnedf_domain_proc_info;
+static long psnedf_get_domain_proc_info(struct domain_proc_info **ret)
+{
+        *ret = &psnedf_domain_proc_info;
+        return 0;
+}
+static void psnedf_setup_domain_proc(void)
+{
+        int i, cpu;
+        int release_master =
+#ifdef CONFIG_RELEASE_MASTER
+                atomic_read(&release_master_cpu);
+#else
+                NO_CPU;
+#endif
+        int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
+        struct cd_mapping *cpu_map, *domain_map;
+        memset(&psnedf_domain_proc_info, 0, sizeof(psnedf_domain_proc_info));
+        init_domain_proc_info(&psnedf_domain_proc_info, num_rt_cpus, num_rt_cpus);
+        psnedf_domain_proc_info.num_cpus = num_rt_cpus;
+        psnedf_domain_proc_info.num_domains = num_rt_cpus;
+        for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
+                if (cpu == release_master)
+                        continue;
+                cpu_map = &psnedf_domain_proc_info.cpu_to_domains[i];
+                domain_map = &psnedf_domain_proc_info.domain_to_cpus[i];
+                cpu_map->id = cpu;
+                domain_map->id = i; /* enumerate w/o counting the release master */
+                cpumask_set_cpu(i, cpu_map->mask);
+                cpumask_set_cpu(cpu, domain_map->mask);
+                ++i;
+        }
+}
+static long psnedf_activate_plugin(void)
+{
+#ifdef CONFIG_RELEASE_MASTER
+        int cpu;
+        for_each_online_cpu(cpu) {
+                remote_edf(cpu)->release_master = atomic_read(&release_master_cpu);
+        }
+#endif
+#ifdef CONFIG_LITMUS_LOCKING
+        get_srp_prio = psnedf_get_srp_prio;
+#endif
+        psnedf_setup_domain_proc();
+        return 0;
+}
+static long psnedf_deactivate_plugin(void)
+{
+        destroy_domain_proc_info(&psnedf_domain_proc_info);
+        return 0;
+}
+static long psnedf_admit_task(struct task_struct* tsk)
+{
+        if (task_cpu(tsk) == tsk->rt_param.task_params.cpu
+#ifdef CONFIG_RELEASE_MASTER
+            /* don't allow tasks on release master CPU */
+             && task_cpu(tsk) != remote_edf(task_cpu(tsk))->release_master
+#endif
+                )
+                return 0;
+        else
+                return -EINVAL;
+}
+/*      Plugin object   */
+static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
+        .plugin_name            = "PSN-EDF",
+        .task_new               = psnedf_task_new,
+        .complete_job           = complete_job,
+        .task_exit              = psnedf_task_exit,
+        .schedule               = psnedf_schedule,
+        .task_wake_up           = psnedf_task_wake_up,
+        .task_block             = psnedf_task_block,
+        .admit_task             = psnedf_admit_task,
+        .activate_plugin        = psnedf_activate_plugin,
+        .deactivate_plugin      = psnedf_deactivate_plugin,
+        .get_domain_proc_info   = psnedf_get_domain_proc_info,
+#ifdef CONFIG_LITMUS_LOCKING
+        .allocate_lock          = psnedf_allocate_lock,
+#endif
+};
+static int __init init_psn_edf(void)
+{
+        int i;
+        /* We do not really want to support cpu hotplug, do we? ;)
+         * However, if we are so crazy to do so,
+         * we cannot use num_online_cpu()
+         */
+        for (i = 0; i < num_online_cpus(); i++) {
+                psnedf_domain_init(remote_pedf(i),
+                                   psnedf_check_resched,
+                                   NULL, i);
+        }
+        return register_sched_plugin(&psn_edf_plugin);
+}
+module_init(init_psn_edf);
author	Bjoern Brandenburg <bbb@mpi-sws.org>	2015-08-09 07:18:55 -0400
committer	Bjoern Brandenburg <bbb@mpi-sws.org>	2017-05-26 17:12:40 -0400
commit	8bc21314c60e342460e2e286217df7108b56ecde (patch)
tree	db8be955c35ef7e8e3296cd8676fb304b3b5f6af /litmus
parent	0e8e8602ad72b1a6cee4c24869cd58b5c2b1d8c4 (diff)

diff --git a/litmus/Makefile b/litmus/Makefile index 56499562defb..1871953d3fc6 100644 --- a/litmus/Makefile +++ b/litmus/Makefile
@@ -18,7 +18,9 @@ obj-y = sched_plugin.o litmus.o \
18	bheap.o \	18	bheap.o \
19	binheap.o \	19	binheap.o \
20	ctrldev.o \	20	ctrldev.o \
21	uncachedev.o	21	uncachedev.o \
		22	sched_psn_edf.o
		23
22		24
23	obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o	25	obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
24	obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o	26	obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o


diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c new file mode 100644 index 000000000000..e01fddb7cedf --- /dev/null +++ b/litmus/sched_psn_edf.c
@@ -0,0 +1,687 @@
		1	/*
		2	* kernel/sched_psn_edf.c
		3	*
		4	* Implementation of the PSN-EDF scheduler plugin.
		5	* Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
		6	*
		7	* Suspensions and non-preemptable sections are supported.
		8	* Priority inheritance is not supported.
		9	*/
		10
		11	#include <linux/percpu.h>
		12	#include <linux/sched.h>
		13	#include <linux/list.h>
		14	#include <linux/spinlock.h>
		15	#include <linux/module.h>
		16
		17	#include <litmus/litmus.h>
		18	#include <litmus/jobs.h>
		19	#include <litmus/preempt.h>
		20	#include <litmus/budget.h>
		21	#include <litmus/np.h>
		22	#include <litmus/sched_plugin.h>
		23	#include <litmus/edf_common.h>
		24	#include <litmus/sched_trace.h>
		25	#include <litmus/trace.h>
		26
		27	/* to set up domain/cpu mappings */
		28	#include <litmus/litmus_proc.h>
		29
		30	typedef struct {
		31	rt_domain_t domain;
		32	int cpu;
		33	struct task_struct* scheduled; /* only RT tasks */
		34	/*
		35	* scheduling lock slock
		36	* protects the domain and serializes scheduling decisions
		37	*/
		38	#define slock domain.ready_lock
		39
		40	} psnedf_domain_t;
		41
		42	DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
		43
		44	#define local_edf (&(this_cpu_ptr(&psnedf_domains)->domain))
		45	#define local_pedf (this_cpu_ptr(&psnedf_domains))
		46	#define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain)
		47	#define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu))
		48	#define task_edf(task) remote_edf(get_partition(task))
		49	#define task_pedf(task) remote_pedf(get_partition(task))
		50
		51
		52	static void psnedf_domain_init(psnedf_domain_t* pedf,
		53	check_resched_needed_t check,
		54	release_jobs_t release,
		55	int cpu)
		56	{
		57	edf_domain_init(&pedf->domain, check, release);
		58	pedf->cpu = cpu;
		59	pedf->scheduled = NULL;
		60	}
		61
		62	static void requeue(struct task_struct* t, rt_domain_t *edf)
		63	{
		64	if (t->state != TASK_RUNNING)
		65	TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
		66
		67	tsk_rt(t)->completed = 0;
		68	if (is_early_releasing(t) \|\| is_released(t, litmus_clock()))
		69	__add_ready(edf, t);
		70	else
		71	add_release(edf, t); /* it has got to wait */
		72	}
		73
		74	/* we assume the lock is being held */
		75	static void preempt(psnedf_domain_t *pedf)
		76	{
		77	preempt_if_preemptable(pedf->scheduled, pedf->cpu);
		78	}
		79
		80	#ifdef CONFIG_LITMUS_LOCKING
		81
		82	static void boost_priority(struct task_struct* t)
		83	{
		84	unsigned long flags;
		85	psnedf_domain_t* pedf = task_pedf(t);
		86	lt_t now;
		87
		88	raw_spin_lock_irqsave(&pedf->slock, flags);
		89	now = litmus_clock();
		90
		91	TRACE_TASK(t, "priority boosted at %llu\n", now);
		92
		93	tsk_rt(t)->priority_boosted = 1;
		94	tsk_rt(t)->boost_start_time = now;
		95
		96	if (pedf->scheduled != t) {
		97	/* holder may be queued: first stop queue changes */
		98	raw_spin_lock(&pedf->domain.release_lock);
		99	if (is_queued(t) &&
		100	/* If it is queued, then we need to re-order. */
		101	bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node) &&
		102	/* If we bubbled to the top, then we need to check for preemptions. */
		103	edf_preemption_needed(&pedf->domain, pedf->scheduled))
		104	preempt(pedf);
		105	raw_spin_unlock(&pedf->domain.release_lock);
		106	} /* else: nothing to do since the job is not queued while scheduled */
		107
		108	raw_spin_unlock_irqrestore(&pedf->slock, flags);
		109	}
		110
		111	static void unboost_priority(struct task_struct* t)
		112	{
		113	unsigned long flags;
		114	psnedf_domain_t* pedf = task_pedf(t);
		115	lt_t now;
		116
		117	raw_spin_lock_irqsave(&pedf->slock, flags);
		118	now = litmus_clock();
		119
		120	/* Assumption: this only happens when the job is scheduled.
		121	* Exception: If t transitioned to non-real-time mode, we no longer
		122	* care about it. */
		123	BUG_ON(pedf->scheduled != t && is_realtime(t));
		124
		125	TRACE_TASK(t, "priority restored at %llu\n", now);
		126
		127	tsk_rt(t)->priority_boosted = 0;
		128	tsk_rt(t)->boost_start_time = 0;
		129
		130	/* check if this changes anything */
		131	if (edf_preemption_needed(&pedf->domain, pedf->scheduled))
		132	preempt(pedf);
		133
		134	raw_spin_unlock_irqrestore(&pedf->slock, flags);
		135	}
		136
		137	#endif
		138
		139	static int psnedf_preempt_check(psnedf_domain_t *pedf)
		140	{
		141	if (edf_preemption_needed(&pedf->domain, pedf->scheduled)) {
		142	preempt(pedf);
		143	return 1;
		144	} else
		145	return 0;
		146	}
		147
		148	/* This check is trivial in partioned systems as we only have to consider
		149	* the CPU of the partition.
		150	*/
		151	static int psnedf_check_resched(rt_domain_t *edf)
		152	{
		153	psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
		154
		155	/* because this is a callback from rt_domain_t we already hold
		156	* the necessary lock for the ready queue
		157	*/
		158	return psnedf_preempt_check(pedf);
		159	}
		160
		161	static void job_completion(struct task_struct* t, int forced)
		162	{
		163	sched_trace_task_completion(t, forced);
		164	TRACE_TASK(t, "job_completion(forced=%d).\n", forced);
		165
		166	tsk_rt(t)->completed = 0;
		167	prepare_for_next_period(t);
		168	}
		169
		170	static struct task_struct* psnedf_schedule(struct task_struct * prev)
		171	{
		172	psnedf_domain_t* pedf = local_pedf;
		173	rt_domain_t* edf = &pedf->domain;
		174	struct task_struct* next;
		175
		176	int out_of_time, sleep, preempt,
		177	np, exists, blocks, resched;
		178
		179	raw_spin_lock(&pedf->slock);
		180
		181	/* sanity checking
		182	* differently from gedf, when a task exits (dead)
		183	* pedf->schedule may be null and prev _is_ realtime
		184	*/
		185	BUG_ON(pedf->scheduled && pedf->scheduled != prev);
		186	BUG_ON(pedf->scheduled && !is_realtime(prev));
		187
		188	/* (0) Determine state */
		189	exists = pedf->scheduled != NULL;
		190	blocks = exists && !is_current_running();
		191	out_of_time = exists && budget_enforced(pedf->scheduled)
		192	&& budget_exhausted(pedf->scheduled);
		193	np = exists && is_np(pedf->scheduled);
		194	sleep = exists && is_completed(pedf->scheduled);
		195	preempt = edf_preemption_needed(edf, prev);
		196
		197	/* If we need to preempt do so.
		198	* The following checks set resched to 1 in case of special
		199	* circumstances.
		200	*/
		201	resched = preempt;
		202
		203	/* If a task blocks we have no choice but to reschedule.
		204	*/
		205	if (blocks)
		206	resched = 1;
		207
		208	/* Request a sys_exit_np() call if we would like to preempt but cannot.
		209	* Multiple calls to request_exit_np() don't hurt.
		210	*/
		211	if (np && (out_of_time \|\| preempt \|\| sleep))
		212	request_exit_np(pedf->scheduled);
		213
		214	/* Any task that is preemptable and either exhausts its execution
		215	* budget or wants to sleep completes. We may have to reschedule after
		216	* this.
		217	*/
		218	if (!np && (out_of_time \|\| sleep)) {
		219	job_completion(pedf->scheduled, !sleep);
		220	resched = 1;
		221	}
		222
		223	/* The final scheduling decision. Do we need to switch for some reason?
		224	* Switch if we are in RT mode and have no task or if we need to
		225	* resched.
		226	*/
		227	next = NULL;
		228	if ((!np \|\| blocks) && (resched \|\| !exists)) {
		229	/* When preempting a task that does not block, then
		230	* re-insert it into either the ready queue or the
		231	* release queue (if it completed). requeue() picks
		232	* the appropriate queue.
		233	*/
		234	if (pedf->scheduled && !blocks)
		235	requeue(pedf->scheduled, edf);
		236	next = __take_ready(edf);
		237	} else
		238	/* Only override Linux scheduler if we have a real-time task
		239	* scheduled that needs to continue.
		240	*/
		241	if (exists)
		242	next = prev;
		243
		244	if (next) {
		245	TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
		246	} else {
		247	TRACE("becoming idle at %llu\n", litmus_clock());
		248	}
		249
		250	pedf->scheduled = next;
		251	sched_state_task_picked();
		252	raw_spin_unlock(&pedf->slock);
		253
		254	return next;
		255	}
		256
		257
		258	/* Prepare a task for running in RT mode
		259	*/
		260	static void psnedf_task_new(struct task_struct * t, int on_rq, int is_scheduled)
		261	{
		262	rt_domain_t* edf = task_edf(t);
		263	psnedf_domain_t* pedf = task_pedf(t);
		264	unsigned long flags;
		265
		266	TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
		267	t->rt_param.task_params.cpu);
		268
		269	/* setup job parameters */
		270	release_at(t, litmus_clock());
		271
		272	/* The task should be running in the queue, otherwise signal
		273	* code will try to wake it up with fatal consequences.
		274	*/
		275	raw_spin_lock_irqsave(&pedf->slock, flags);
		276	if (is_scheduled) {
		277	/* there shouldn't be anything else scheduled at the time */
		278	BUG_ON(pedf->scheduled);
		279	pedf->scheduled = t;
		280	} else {
		281	/* !is_scheduled means it is not scheduled right now, but it
		282	* does not mean that it is suspended. If it is not suspended,
		283	* it still needs to be requeued. If it is suspended, there is
		284	* nothing that we need to do as it will be handled by the
		285	* wake_up() handler. */
		286	if (on_rq) {
		287	requeue(t, edf);
		288	/* maybe we have to reschedule */
		289	psnedf_preempt_check(pedf);
		290	}
		291	}
		292	raw_spin_unlock_irqrestore(&pedf->slock, flags);
		293	}
		294
		295	static void psnedf_task_wake_up(struct task_struct *task)
		296	{
		297	unsigned long flags;
		298	psnedf_domain_t* pedf = task_pedf(task);
		299	rt_domain_t* edf = task_edf(task);
		300	lt_t now;
		301
		302	TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
		303	raw_spin_lock_irqsave(&pedf->slock, flags);
		304	BUG_ON(is_queued(task));
		305	now = litmus_clock();
		306	if (is_sporadic(task) && is_tardy(task, now)
		307	#ifdef CONFIG_LITMUS_LOCKING
		308	/* We need to take suspensions because of semaphores into
		309	* account! If a job resumes after being suspended due to acquiring
		310	* a semaphore, it should never be treated as a new job release.
		311	*/
		312	&& !is_priority_boosted(task)
		313	#endif
		314	) {
		315	inferred_sporadic_job_release_at(task, now);
		316	}
		317
		318	/* Only add to ready queue if it is not the currently-scheduled
		319	* task. This could be the case if a task was woken up concurrently
		320	* on a remote CPU before the executing CPU got around to actually
		321	* de-scheduling the task, i.e., wake_up() raced with schedule()
		322	* and won.
		323	*/
		324	if (pedf->scheduled != task) {
		325	requeue(task, edf);
		326	psnedf_preempt_check(pedf);
		327	}
		328
		329	raw_spin_unlock_irqrestore(&pedf->slock, flags);
		330	TRACE_TASK(task, "wake up done\n");
		331	}
		332
		333	static void psnedf_task_block(struct task_struct *t)
		334	{
		335	/* only running tasks can block, thus t is in no queue */
		336	TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
		337
		338	BUG_ON(!is_realtime(t));
		339	BUG_ON(is_queued(t));
		340	}
		341
		342	static void psnedf_task_exit(struct task_struct * t)
		343	{
		344	unsigned long flags;
		345	psnedf_domain_t* pedf = task_pedf(t);
		346	rt_domain_t* edf;
		347
		348	raw_spin_lock_irqsave(&pedf->slock, flags);
		349	if (is_queued(t)) {
		350	/* dequeue */
		351	edf = task_edf(t);
		352	remove(edf, t);
		353	}
		354	if (pedf->scheduled == t)
		355	pedf->scheduled = NULL;
		356
		357	TRACE_TASK(t, "RIP, now reschedule\n");
		358
		359	preempt(pedf);
		360	raw_spin_unlock_irqrestore(&pedf->slock, flags);
		361	}
		362
		363	#ifdef CONFIG_LITMUS_LOCKING
		364
		365	#include <litmus/fdso.h>
		366	#include <litmus/srp.h>
		367
		368	/* ****************** SRP support ********************** */
		369
		370	static unsigned int psnedf_get_srp_prio(struct task_struct* t)
		371	{
		372	return get_rt_relative_deadline(t);
		373	}
		374
		375	/* ****************** FMLP support ******************** */
		376
		377	/* struct for semaphore with priority inheritance */
		378	struct fmlp_semaphore {
		379	struct litmus_lock litmus_lock;
		380
		381	/* current resource holder */
		382	struct task_struct *owner;
		383
		384	/* FIFO queue of waiting tasks */
		385	wait_queue_head_t wait;
		386	};
		387
		388	static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
		389	{
		390	return container_of(lock, struct fmlp_semaphore, litmus_lock);
		391	}
		392	int psnedf_fmlp_lock(struct litmus_lock* l)
		393	{
		394	struct task_struct* t = current;
		395	struct fmlp_semaphore *sem = fmlp_from_lock(l);
		396	wait_queue_t wait;
		397	unsigned long flags;
		398
		399	if (!is_realtime(t))
		400	return -EPERM;
		401
		402	/* prevent nested lock acquisition --- not supported by FMLP */
		403	if (tsk_rt(t)->num_locks_held \|\|
		404	tsk_rt(t)->num_local_locks_held)
		405	return -EBUSY;
		406
		407	spin_lock_irqsave(&sem->wait.lock, flags);
		408
		409	if (sem->owner) {
		410	/* resource is not free => must suspend and wait */
		411
		412	init_waitqueue_entry(&wait, t);
		413
		414	/* FIXME: interruptible would be nice some day */
		415	set_task_state(t, TASK_UNINTERRUPTIBLE);
		416
		417	__add_wait_queue_tail_exclusive(&sem->wait, &wait);
		418
		419	TS_LOCK_SUSPEND;
		420
		421	/* release lock before sleeping */
		422	spin_unlock_irqrestore(&sem->wait.lock, flags);
		423
		424	/* We depend on the FIFO order. Thus, we don't need to recheck
		425	* when we wake up; we are guaranteed to have the lock since
		426	* there is only one wake up per release.
		427	*/
		428
		429	schedule();
		430
		431	TS_LOCK_RESUME;
		432
		433	/* Since we hold the lock, no other task will change
		434	* ->owner. We can thus check it without acquiring the spin
		435	* lock. */
		436	BUG_ON(sem->owner != t);
		437	} else {
		438	/* it's ours now */
		439	sem->owner = t;
		440
		441	/* mark the task as priority-boosted. */
		442	boost_priority(t);
		443
		444	spin_unlock_irqrestore(&sem->wait.lock, flags);
		445	}
		446
		447	tsk_rt(t)->num_locks_held++;
		448
		449	return 0;
		450	}
		451
		452	int psnedf_fmlp_unlock(struct litmus_lock* l)
		453	{
		454	struct task_struct t = current, next;
		455	struct fmlp_semaphore *sem = fmlp_from_lock(l);
		456	unsigned long flags;
		457	int err = 0;
		458
		459	spin_lock_irqsave(&sem->wait.lock, flags);
		460
		461	if (sem->owner != t) {
		462	err = -EINVAL;
		463	goto out;
		464	}
		465
		466	tsk_rt(t)->num_locks_held--;
		467
		468	/* we lose the benefit of priority boosting */
		469
		470	unboost_priority(t);
		471
		472	/* check if there are jobs waiting for this resource */
		473	next = __waitqueue_remove_first(&sem->wait);
		474	if (next) {
		475	/* boost next job */
		476	boost_priority(next);
		477
		478	/* next becomes the resouce holder */
		479	sem->owner = next;
		480
		481	/* wake up next */
		482	wake_up_process(next);
		483	} else
		484	/* resource becomes available */
		485	sem->owner = NULL;
		486
		487	out:
		488	spin_unlock_irqrestore(&sem->wait.lock, flags);
		489	return err;
		490	}
		491
		492	int psnedf_fmlp_close(struct litmus_lock* l)
		493	{
		494	struct task_struct *t = current;
		495	struct fmlp_semaphore *sem = fmlp_from_lock(l);
		496	unsigned long flags;
		497
		498	int owner;
		499
		500	spin_lock_irqsave(&sem->wait.lock, flags);
		501
		502	owner = sem->owner == t;
		503
		504	spin_unlock_irqrestore(&sem->wait.lock, flags);
		505
		506	if (owner)
		507	psnedf_fmlp_unlock(l);
		508
		509	return 0;
		510	}
		511
		512	void psnedf_fmlp_free(struct litmus_lock* lock)
		513	{
		514	kfree(fmlp_from_lock(lock));
		515	}
		516
		517	static struct litmus_lock_ops psnedf_fmlp_lock_ops = {
		518	.close = psnedf_fmlp_close,
		519	.lock = psnedf_fmlp_lock,
		520	.unlock = psnedf_fmlp_unlock,
		521	.deallocate = psnedf_fmlp_free,
		522	};
		523
		524	static struct litmus_lock* psnedf_new_fmlp(void)
		525	{
		526	struct fmlp_semaphore* sem;
		527
		528	sem = kmalloc(sizeof(*sem), GFP_KERNEL);
		529	if (!sem)
		530	return NULL;
		531
		532	sem->owner = NULL;
		533	init_waitqueue_head(&sem->wait);
		534	sem->litmus_lock.ops = &psnedf_fmlp_lock_ops;
		535
		536	return &sem->litmus_lock;
		537	}
		538
		539	/* ** lock constructor ** */
		540
		541
		542	static long psnedf_allocate_lock(struct litmus_lock **lock, int type,
		543	void* __user unused)
		544	{
		545	int err = -ENXIO;
		546	struct srp_semaphore* srp;
		547
		548	/* PSN-EDF currently supports the SRP for local resources and the FMLP
		549	* for global resources. */
		550	switch (type) {
		551	case FMLP_SEM:
		552	/* Flexible Multiprocessor Locking Protocol */
		553	*lock = psnedf_new_fmlp();
		554	if (*lock)
		555	err = 0;
		556	else
		557	err = -ENOMEM;
		558	break;
		559
		560	case SRP_SEM:
		561	/* Baker's Stack Resource Policy */
		562	srp = allocate_srp_semaphore();
		563	if (srp) {
		564	*lock = &srp->litmus_lock;
		565	err = 0;
		566	} else
		567	err = -ENOMEM;
		568	break;
		569	};
		570
		571	return err;
		572	}
		573
		574	#endif
		575
		576	static struct domain_proc_info psnedf_domain_proc_info;
		577	static long psnedf_get_domain_proc_info(struct domain_proc_info **ret)
		578	{
		579	*ret = &psnedf_domain_proc_info;
		580	return 0;
		581	}
		582
		583	static void psnedf_setup_domain_proc(void)
		584	{
		585	int i, cpu;
		586	int release_master =
		587	#ifdef CONFIG_RELEASE_MASTER
		588	atomic_read(&release_master_cpu);
		589	#else
		590	NO_CPU;
		591	#endif
		592	int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
		593	struct cd_mapping cpu_map, domain_map;
		594
		595	memset(&psnedf_domain_proc_info, 0, sizeof(psnedf_domain_proc_info));
		596	init_domain_proc_info(&psnedf_domain_proc_info, num_rt_cpus, num_rt_cpus);
		597	psnedf_domain_proc_info.num_cpus = num_rt_cpus;
		598	psnedf_domain_proc_info.num_domains = num_rt_cpus;
		599
		600	for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
		601	if (cpu == release_master)
		602	continue;
		603	cpu_map = &psnedf_domain_proc_info.cpu_to_domains[i];
		604	domain_map = &psnedf_domain_proc_info.domain_to_cpus[i];
		605
		606	cpu_map->id = cpu;
		607	domain_map->id = i; /* enumerate w/o counting the release master */
		608	cpumask_set_cpu(i, cpu_map->mask);
		609	cpumask_set_cpu(cpu, domain_map->mask);
		610	++i;
		611	}
		612	}
		613
		614	static long psnedf_activate_plugin(void)
		615	{
		616	#ifdef CONFIG_RELEASE_MASTER
		617	int cpu;
		618
		619	for_each_online_cpu(cpu) {
		620	remote_edf(cpu)->release_master = atomic_read(&release_master_cpu);
		621	}
		622	#endif
		623
		624	#ifdef CONFIG_LITMUS_LOCKING
		625	get_srp_prio = psnedf_get_srp_prio;
		626	#endif
		627
		628	psnedf_setup_domain_proc();
		629
		630	return 0;
		631	}
		632
		633	static long psnedf_deactivate_plugin(void)
		634	{
		635	destroy_domain_proc_info(&psnedf_domain_proc_info);
		636	return 0;
		637	}
		638
		639	static long psnedf_admit_task(struct task_struct* tsk)
		640	{
		641	if (task_cpu(tsk) == tsk->rt_param.task_params.cpu
		642	#ifdef CONFIG_RELEASE_MASTER
		643	/* don't allow tasks on release master CPU */
		644	&& task_cpu(tsk) != remote_edf(task_cpu(tsk))->release_master
		645	#endif
		646	)
		647	return 0;
		648	else
		649	return -EINVAL;
		650	}
		651
		652	/* Plugin object */
		653	static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
		654	.plugin_name = "PSN-EDF",
		655	.task_new = psnedf_task_new,
		656	.complete_job = complete_job,
		657	.task_exit = psnedf_task_exit,
		658	.schedule = psnedf_schedule,
		659	.task_wake_up = psnedf_task_wake_up,
		660	.task_block = psnedf_task_block,
		661	.admit_task = psnedf_admit_task,
		662	.activate_plugin = psnedf_activate_plugin,
		663	.deactivate_plugin = psnedf_deactivate_plugin,
		664	.get_domain_proc_info = psnedf_get_domain_proc_info,
		665	#ifdef CONFIG_LITMUS_LOCKING
		666	.allocate_lock = psnedf_allocate_lock,
		667	#endif
		668	};
		669
		670
		671	static int __init init_psn_edf(void)
		672	{
		673	int i;
		674
		675	/* We do not really want to support cpu hotplug, do we? ;)
		676	* However, if we are so crazy to do so,
		677	* we cannot use num_online_cpu()
		678	*/
		679	for (i = 0; i < num_online_cpus(); i++) {
		680	psnedf_domain_init(remote_pedf(i),
		681	psnedf_check_resched,
		682	NULL, i);
		683	}
		684	return register_sched_plugin(&psn_edf_plugin);
		685	}
		686
		687	module_init(init_psn_edf);