Re-add LITMUS files, but don't "connect the wires"

- Added the LITMUS^RT source files from the most recent version of LITMUS (based on the old kernel). - This change does *not* actually make use of any of the new files, because I wanted to make sure that any changes for getting LITMUS to actually work are clearly visible in future commits. If I made any such changes before committing the files themselves, then it wouldn't be as easy to see what needed to change in the LITMUS source files, and the large number of changes would make it more difficult to see what needed to change in the base Linux sources, too.
author: Nathan O <otternes@cs.unc.edu> 2019-11-15 11:19:35 -0500
committer: Nathan O <otternes@cs.unc.edu> 2019-11-15 11:19:35 -0500
commit: 2627f203874e04500ea80f6e588cd659bec5866b (patch)
tree: feec07a6a87a24460a19808dcd88ba36ad03201d /litmus/sched_pres.c
parent: bf929479893052b1c7bfe23a4e7a903643076350 (diff)
1 files changed, 612 insertions, 0 deletions
diff --git a/litmus/sched_pres.c b/litmus/sched_pres.c
new file mode 100644
index 000000000000..0a3270346656
--- /dev/null
+++ b/litmus/sched_pres.c
@@ -0,0 +1,612 @@
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/preempt.h>
+#include <litmus/debug_trace.h>
+#include <litmus/litmus.h>
+#include <litmus/jobs.h>
+#include <litmus/budget.h>
+#include <litmus/litmus_proc.h>
+#include <litmus/sched_trace.h>
+#include <litmus/reservations/reservation.h>
+#include <litmus/reservations/alloc.h>
+struct pres_task_state {
+        struct reservation_client *client;
+        int cpu;
+        struct task_client res_info;
+};
+struct pres_cpu_state {
+        raw_spinlock_t lock;
+        struct sup_reservation_environment sup_env;
+        struct hrtimer timer;
+        int cpu;
+        struct task_struct* scheduled;
+};
+static DEFINE_PER_CPU(struct pres_cpu_state, pres_cpu_state);
+#define cpu_state_for(cpu_id)   (&per_cpu(pres_cpu_state, cpu_id))
+#define local_cpu_state()       (this_cpu_ptr(&pres_cpu_state))
+static struct pres_task_state* get_pres_state(struct task_struct *tsk)
+{
+        return (struct pres_task_state*) tsk_rt(tsk)->plugin_state;
+}
+static void task_departs(struct task_struct *tsk, int job_complete)
+{
+        struct pres_task_state* state = get_pres_state(tsk);
+        struct reservation* res;
+        struct reservation_client *client;
+        client = state->client;
+        res    = client->reservation;
+        res->ops->client_departs(res, client, job_complete);
+        TRACE_TASK(tsk, "client_departs: removed from reservation R%d\n", res->id);
+}
+static void task_arrives(struct task_struct *tsk)
+{
+        struct pres_task_state* state = get_pres_state(tsk);
+        struct reservation* res;
+        struct reservation_client *client;
+        client = state->client;
+        res    = client->reservation;
+        res->ops->client_arrives(res, client);
+        TRACE_TASK(tsk, "client_arrives: added to reservation R%d\n", res->id);
+}
+/* NOTE: drops state->lock */
+static void pres_update_timer_and_unlock(struct pres_cpu_state *state)
+{
+        int local;
+        lt_t update, now;
+        update = state->sup_env.next_scheduler_update;
+        now = state->sup_env.env.current_time;
+        /* Be sure we're actually running on the right core,
+         * as pres_update_timer() is also called from pres_task_resume(),
+         * which might be called on any CPU when a thread resumes.
+         */
+        local = local_cpu_state() == state;
+        /* Must drop state lock before calling into hrtimer_start(), which
+         * may raise a softirq, which in turn may wake ksoftirqd. */
+        raw_spin_unlock(&state->lock);
+        if (update <= now) {
+                litmus_reschedule(state->cpu);
+        } else if (likely(local && update != SUP_NO_SCHEDULER_UPDATE)) {
+                /* Reprogram only if not already set correctly. */
+                if (!hrtimer_active(&state->timer) ||
+                    ktime_to_ns(hrtimer_get_expires(&state->timer)) != update) {
+                        TRACE("canceling timer...\n");
+                        hrtimer_cancel(&state->timer);
+                        TRACE("setting scheduler timer for %llu\n", update);
+                        hrtimer_start(&state->timer,
+                                        ns_to_ktime(update),
+                                        HRTIMER_MODE_ABS_PINNED);
+                        if (update < litmus_clock()) {
+                                /* uh oh, timer expired while trying to set it */
+                                TRACE("timer expired during setting "
+                                      "update:%llu now:%llu actual:%llu\n",
+                                      update, now, litmus_clock());
+                                /* The timer HW may not have been reprogrammed
+                                 * correctly; force rescheduling now. */
+                                litmus_reschedule(state->cpu);
+                        }
+                }
+        } else if (unlikely(!local && update != SUP_NO_SCHEDULER_UPDATE)) {
+                /* Poke remote core only if timer needs to be set earlier than
+                 * it is currently set.
+                 */
+                TRACE("pres_update_timer for remote CPU %d (update=%llu, "
+                      "active:%d, set:%llu)\n",
+                        state->cpu,
+                        update,
+                        hrtimer_active(&state->timer),
+                        ktime_to_ns(hrtimer_get_expires(&state->timer)));
+                if (!hrtimer_active(&state->timer) ||
+                    ktime_to_ns(hrtimer_get_expires(&state->timer)) > update) {
+                        TRACE("poking CPU %d so that it can update its "
+                               "scheduling timer (active:%d, set:%llu)\n",
+                               state->cpu,
+                               hrtimer_active(&state->timer),
+                               ktime_to_ns(hrtimer_get_expires(&state->timer)));
+                        litmus_reschedule(state->cpu);
+                }
+        }
+}
+static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
+{
+        unsigned long flags;
+        enum hrtimer_restart restart = HRTIMER_NORESTART;
+        struct pres_cpu_state *state;
+        lt_t update, now;
+        state = container_of(timer, struct pres_cpu_state, timer);
+        /* The scheduling timer should only fire on the local CPU, because
+         * otherwise deadlocks via timer_cancel() are possible.
+         * Note: this does not interfere with dedicated interrupt handling, as
+         * even under dedicated interrupt handling scheduling timers for
+         * budget enforcement must occur locally on each CPU.
+         */
+        BUG_ON(state->cpu != raw_smp_processor_id());
+        raw_spin_lock_irqsave(&state->lock, flags);
+        sup_update_time(&state->sup_env, litmus_clock());
+        update = state->sup_env.next_scheduler_update;
+        now = state->sup_env.env.current_time;
+        TRACE_CUR("on_scheduling_timer at %llu, upd:%llu (for cpu=%d)\n",
+                now, update, state->cpu);
+        if (update <= now) {
+                litmus_reschedule_local();
+        } else if (update != SUP_NO_SCHEDULER_UPDATE) {
+                hrtimer_set_expires(timer, ns_to_ktime(update));
+                restart = HRTIMER_RESTART;
+        }
+        raw_spin_unlock_irqrestore(&state->lock, flags);
+        return restart;
+}
+static struct task_struct* pres_schedule(struct task_struct * prev)
+{
+        /* next == NULL means "schedule background work". */
+        struct pres_cpu_state *state = local_cpu_state();
+        raw_spin_lock(&state->lock);
+        BUG_ON(state->scheduled && state->scheduled != prev);
+        BUG_ON(state->scheduled && !is_realtime(prev));
+        /* update time */
+        state->sup_env.will_schedule = true;
+        sup_update_time(&state->sup_env, litmus_clock());
+        /* figure out what to schedule next */
+        state->scheduled = sup_dispatch(&state->sup_env);
+        /* Notify LITMUS^RT core that we've arrived at a scheduling decision. */
+        sched_state_task_picked();
+        /* program scheduler timer */
+        state->sup_env.will_schedule = false;
+        /* NOTE: drops state->lock */
+        pres_update_timer_and_unlock(state);
+        if (prev != state->scheduled && is_realtime(prev))
+                TRACE_TASK(prev, "descheduled.\n");
+        if (state->scheduled)
+                TRACE_TASK(state->scheduled, "scheduled.\n");
+        return state->scheduled;
+}
+static void resume_legacy_task_model_updates(struct task_struct *tsk)
+{
+        lt_t now;
+        if (is_sporadic(tsk)) {
+                /* If this sporadic task was gone for a "long" time and woke up past
+                 * its deadline, then give it a new budget by triggering a job
+                 * release. This is purely cosmetic and has no effect on the
+                 * P-RES scheduler. */
+                now = litmus_clock();
+                if (is_tardy(tsk, now)) {
+                        inferred_sporadic_job_release_at(tsk, now);
+                }
+        }
+}
+/* Called when a task should be removed from the ready queue.
+ */
+static void pres_task_block(struct task_struct *tsk)
+{
+        unsigned long flags;
+        struct pres_task_state* tinfo = get_pres_state(tsk);
+        struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
+        TRACE_TASK(tsk, "thread suspends at %llu (state:%d, running:%d)\n",
+                litmus_clock(), tsk->state, is_current_running());
+        raw_spin_lock_irqsave(&state->lock, flags);
+        sup_update_time(&state->sup_env, litmus_clock());
+        task_departs(tsk, is_completed(tsk));
+        raw_spin_unlock_irqrestore(&state->lock, flags);
+}
+/* Called when the state of tsk changes back to TASK_RUNNING.
+ * We need to requeue the task.
+ */
+static void pres_task_resume(struct task_struct  *tsk)
+{
+        unsigned long flags;
+        struct pres_task_state* tinfo = get_pres_state(tsk);
+        struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
+        TRACE_TASK(tsk, "thread wakes up at %llu\n", litmus_clock());
+        raw_spin_lock_irqsave(&state->lock, flags);
+        /* Assumption: litmus_clock() is synchronized across cores,
+         * since we might not actually be executing on tinfo->cpu
+         * at the moment. */
+        sup_update_time(&state->sup_env, litmus_clock());
+        task_arrives(tsk);
+        /* NOTE: drops state->lock */
+        pres_update_timer_and_unlock(state);
+        local_irq_restore(flags);
+        resume_legacy_task_model_updates(tsk);
+}
+static long pres_admit_task(struct task_struct *tsk)
+{
+        long err = -EINVAL;
+        unsigned long flags;
+        struct reservation *res;
+        struct pres_cpu_state *state;
+        struct pres_task_state *tinfo = kzalloc(sizeof(*tinfo), GFP_ATOMIC);
+        if (!tinfo)
+                return -ENOMEM;
+        preempt_disable();
+        /* NOTE: this is obviously racy w.r.t. affinity changes since
+         *       we are not holding any runqueue locks. */
+        if (tsk->nr_cpus_allowed != 1) {
+                printk(KERN_WARNING "%s/%d: task does not have "
+                       "singleton affinity mask\n",
+                        tsk->comm, tsk->pid);
+                state = cpu_state_for(task_cpu(tsk));
+        } else {
+                state = cpu_state_for(cpumask_first(&tsk->cpus_allowed));
+        }
+        TRACE_TASK(tsk, "on CPU %d, valid?:%d\n",
+                task_cpu(tsk), cpumask_test_cpu(task_cpu(tsk), &tsk->cpus_allowed));
+        raw_spin_lock_irqsave(&state->lock, flags);
+        res = sup_find_by_id(&state->sup_env, tsk_rt(tsk)->task_params.cpu);
+        /* found the appropriate reservation (or vCPU) */
+        if (res) {
+                task_client_init(&tinfo->res_info, tsk, res);
+                tinfo->cpu = state->cpu;
+                tinfo->client = &tinfo->res_info.client;
+                tsk_rt(tsk)->plugin_state = tinfo;
+                err = 0;
+                /* disable LITMUS^RT's per-thread budget enforcement */
+                tsk_rt(tsk)->task_params.budget_policy = NO_ENFORCEMENT;
+        } else {
+                printk(KERN_WARNING "Could not find reservation %d on "
+                       "core %d for task %s/%d\n",
+                       tsk_rt(tsk)->task_params.cpu, state->cpu,
+                       tsk->comm, tsk->pid);
+        }
+        raw_spin_unlock_irqrestore(&state->lock, flags);
+        preempt_enable();
+        if (err)
+                kfree(tinfo);
+        return err;
+}
+static void task_new_legacy_task_model_updates(struct task_struct *tsk)
+{
+        lt_t now = litmus_clock();
+        /* the first job exists starting as of right now */
+        release_at(tsk, now);
+        sched_trace_task_release(tsk);
+}
+static void pres_task_new(struct task_struct *tsk, int on_runqueue,
+                          int is_running)
+{
+        unsigned long flags;
+        struct pres_task_state* tinfo = get_pres_state(tsk);
+        struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
+        TRACE_TASK(tsk, "new RT task %llu (on_rq:%d, running:%d)\n",
+                   litmus_clock(), on_runqueue, is_running);
+        /* acquire the lock protecting the state and disable interrupts */
+        raw_spin_lock_irqsave(&state->lock, flags);
+        if (is_running) {
+                state->scheduled = tsk;
+                /* make sure this task should actually be running */
+                litmus_reschedule_local();
+        }
+        if (on_runqueue || is_running) {
+                /* Assumption: litmus_clock() is synchronized across cores
+                 * [see comment in pres_task_resume()] */
+                sup_update_time(&state->sup_env, litmus_clock());
+                task_arrives(tsk);
+                /* NOTE: drops state->lock */
+                pres_update_timer_and_unlock(state);
+                local_irq_restore(flags);
+        } else
+                raw_spin_unlock_irqrestore(&state->lock, flags);
+        task_new_legacy_task_model_updates(tsk);
+}
+static bool pres_fork_task(struct task_struct *tsk)
+{
+        TRACE_CUR("is forking\n");
+        TRACE_TASK(tsk, "forked child rt:%d cpu:%d task_cpu:%d "
+                        "wcet:%llu per:%llu\n",
+                is_realtime(tsk),
+                tsk_rt(tsk)->task_params.cpu,
+                task_cpu(tsk),
+                tsk_rt(tsk)->task_params.exec_cost,
+                tsk_rt(tsk)->task_params.period);
+        /* We always allow forking. */
+        /* The newly forked task will be in the same reservation. */
+        return true;
+}
+static void pres_task_exit(struct task_struct *tsk)
+{
+        unsigned long flags;
+        struct pres_task_state* tinfo = get_pres_state(tsk);
+        struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
+        raw_spin_lock_irqsave(&state->lock, flags);
+        TRACE_TASK(tsk, "task exits at %llu (present:%d sched:%d)\n",
+                litmus_clock(), is_present(tsk), state->scheduled == tsk);
+        if (state->scheduled == tsk)
+                state->scheduled = NULL;
+        /* remove from queues */
+        if (is_present(tsk)) {
+                /* Assumption: litmus_clock() is synchronized across cores
+                 * [see comment in pres_task_resume()] */
+                sup_update_time(&state->sup_env, litmus_clock());
+                task_departs(tsk, 0);
+                /* NOTE: drops state->lock */
+                pres_update_timer_and_unlock(state);
+                local_irq_restore(flags);
+        } else
+                raw_spin_unlock_irqrestore(&state->lock, flags);
+        kfree(tsk_rt(tsk)->plugin_state);
+        tsk_rt(tsk)->plugin_state = NULL;
+}
+static void pres_current_budget(lt_t *used_so_far, lt_t *remaining)
+{
+        struct pres_task_state *tstate = get_pres_state(current);
+        struct pres_cpu_state *state;
+        /* FIXME: protect against concurrent task_exit() */
+        local_irq_disable();
+        state = cpu_state_for(tstate->cpu);
+        raw_spin_lock(&state->lock);
+        sup_update_time(&state->sup_env, litmus_clock());
+        if (remaining)
+                *remaining = tstate->client->reservation->cur_budget;
+        if (used_so_far)
+                *used_so_far = tstate->client->reservation->budget_consumed;
+        pres_update_timer_and_unlock(state);
+        local_irq_enable();
+}
+static long do_pres_reservation_create(
+        int res_type,
+        struct reservation_config *config)
+{
+        struct pres_cpu_state *state;
+        struct reservation* res;
+        struct reservation* new_res = NULL;
+        unsigned long flags;
+        long err;
+        /* Allocate before we grab a spin lock. */
+        switch (res_type) {
+                case PERIODIC_POLLING:
+                case SPORADIC_POLLING:
+                        err = alloc_polling_reservation(res_type, config, &new_res);
+                        break;
+                case TABLE_DRIVEN:
+                        err = alloc_table_driven_reservation(config, &new_res);
+                        break;
+                default:
+                        err = -EINVAL;
+                        break;
+        }
+        if (err)
+                return err;
+        state = cpu_state_for(config->cpu);
+        raw_spin_lock_irqsave(&state->lock, flags);
+        res = sup_find_by_id(&state->sup_env, config->id);
+        if (!res) {
+                sup_add_new_reservation(&state->sup_env, new_res);
+                err = config->id;
+        } else {
+                err = -EEXIST;
+        }
+        raw_spin_unlock_irqrestore(&state->lock, flags);
+        if (err < 0)
+                kfree(new_res);
+        return err;
+}
+static long pres_reservation_create(int res_type, void* __user _config)
+{
+        struct reservation_config config;
+        TRACE("Attempt to create reservation (%d)\n", res_type);
+        if (copy_from_user(&config, _config, sizeof(config)))
+                return -EFAULT;
+        if (config.cpu < 0 || !cpu_online(config.cpu)) {
+                printk(KERN_ERR "invalid polling reservation (%u): "
+                       "CPU %d offline\n", config.id, config.cpu);
+                return -EINVAL;
+        }
+        return do_pres_reservation_create(res_type, &config);
+}
+static struct domain_proc_info pres_domain_proc_info;
+static long pres_get_domain_proc_info(struct domain_proc_info **ret)
+{
+        *ret = &pres_domain_proc_info;
+        return 0;
+}
+static void pres_setup_domain_proc(void)
+{
+        int i, cpu;
+        int num_rt_cpus = num_online_cpus();
+        struct cd_mapping *cpu_map, *domain_map;
+        memset(&pres_domain_proc_info, 0, sizeof(pres_domain_proc_info));
+        init_domain_proc_info(&pres_domain_proc_info, num_rt_cpus, num_rt_cpus);
+        pres_domain_proc_info.num_cpus = num_rt_cpus;
+        pres_domain_proc_info.num_domains = num_rt_cpus;
+        i = 0;
+        for_each_online_cpu(cpu) {
+                cpu_map = &pres_domain_proc_info.cpu_to_domains[i];
+                domain_map = &pres_domain_proc_info.domain_to_cpus[i];
+                cpu_map->id = cpu;
+                domain_map->id = i;
+                cpumask_set_cpu(i, cpu_map->mask);
+                cpumask_set_cpu(cpu, domain_map->mask);
+                ++i;
+        }
+}
+static long pres_activate_plugin(void)
+{
+        int cpu;
+        struct pres_cpu_state *state;
+        for_each_online_cpu(cpu) {
+                TRACE("Initializing CPU%d...\n", cpu);
+                state = cpu_state_for(cpu);
+                raw_spin_lock_init(&state->lock);
+                state->cpu = cpu;
+                state->scheduled = NULL;
+                sup_init(&state->sup_env);
+                hrtimer_init(&state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+                state->timer.function = on_scheduling_timer;
+        }
+        pres_setup_domain_proc();
+        return 0;
+}
+static long pres_deactivate_plugin(void)
+{
+        int cpu;
+        struct pres_cpu_state *state;
+        struct reservation *res;
+        for_each_online_cpu(cpu) {
+                state = cpu_state_for(cpu);
+                raw_spin_lock(&state->lock);
+                hrtimer_cancel(&state->timer);
+                /* Delete all reservations --- assumes struct reservation
+                 * is prefix of containing struct. */
+                while (!list_empty(&state->sup_env.all_reservations)) {
+                        res = list_first_entry(
+                                &state->sup_env.all_reservations,
+                                struct reservation, all_list);
+                        list_del(&res->all_list);
+                        if (res->ops->shutdown)
+                                res->ops->shutdown(res);
+                        kfree(res);
+                }
+                raw_spin_unlock(&state->lock);
+        }
+        destroy_domain_proc_info(&pres_domain_proc_info);
+        return 0;
+}
+static struct sched_plugin pres_plugin = {
+        .plugin_name            = "P-RES",
+        .schedule               = pres_schedule,
+        .task_block             = pres_task_block,
+        .task_wake_up           = pres_task_resume,
+        .admit_task             = pres_admit_task,
+        .task_new               = pres_task_new,
+        .fork_task              = pres_fork_task,
+        .task_exit              = pres_task_exit,
+        .complete_job           = complete_job_oneshot,
+        .get_domain_proc_info   = pres_get_domain_proc_info,
+        .activate_plugin        = pres_activate_plugin,
+        .deactivate_plugin      = pres_deactivate_plugin,
+        .reservation_create     = pres_reservation_create,
+        .current_budget         = pres_current_budget,
+};
+static int __init init_pres(void)
+{
+        return register_sched_plugin(&pres_plugin);
+}
+module_init(init_pres);
author	Nathan O <otternes@cs.unc.edu>	2019-11-15 11:19:35 -0500
committer	Nathan O <otternes@cs.unc.edu>	2019-11-15 11:19:35 -0500
commit	2627f203874e04500ea80f6e588cd659bec5866b (patch)
tree	feec07a6a87a24460a19808dcd88ba36ad03201d /litmus/sched_pres.c
parent	bf929479893052b1c7bfe23a4e7a903643076350 (diff)

diff --git a/litmus/sched_pres.c b/litmus/sched_pres.c new file mode 100644 index 000000000000..0a3270346656 --- /dev/null +++ b/litmus/sched_pres.c
@@ -0,0 +1,612 @@
	1	#include <linux/percpu.h>
	2	#include <linux/slab.h>
	3	#include <linux/module.h>
	4	#include <asm/uaccess.h>
	5
	6	#include <litmus/sched_plugin.h>
	7	#include <litmus/preempt.h>
	8	#include <litmus/debug_trace.h>
	9
	10	#include <litmus/litmus.h>
	11	#include <litmus/jobs.h>
	12	#include <litmus/budget.h>
	13	#include <litmus/litmus_proc.h>
	14	#include <litmus/sched_trace.h>
	15
	16	#include <litmus/reservations/reservation.h>
	17	#include <litmus/reservations/alloc.h>
	18
	19	struct pres_task_state {
	20	struct reservation_client *client;
	21	int cpu;
	22	struct task_client res_info;
	23	};
	24
	25	struct pres_cpu_state {
	26	raw_spinlock_t lock;
	27
	28	struct sup_reservation_environment sup_env;
	29	struct hrtimer timer;
	30
	31	int cpu;
	32	struct task_struct* scheduled;
	33	};
	34
	35	static DEFINE_PER_CPU(struct pres_cpu_state, pres_cpu_state);
	36
	37	#define cpu_state_for(cpu_id) (&per_cpu(pres_cpu_state, cpu_id))
	38	#define local_cpu_state() (this_cpu_ptr(&pres_cpu_state))
	39
	40	static struct pres_task_state* get_pres_state(struct task_struct *tsk)
	41	{
	42	return (struct pres_task_state*) tsk_rt(tsk)->plugin_state;
	43	}
	44
	45	static void task_departs(struct task_struct *tsk, int job_complete)
	46	{
	47	struct pres_task_state* state = get_pres_state(tsk);
	48	struct reservation* res;
	49	struct reservation_client *client;
	50
	51	client = state->client;
	52	res = client->reservation;
	53
	54	res->ops->client_departs(res, client, job_complete);
	55	TRACE_TASK(tsk, "client_departs: removed from reservation R%d\n", res->id);
	56	}
	57
	58	static void task_arrives(struct task_struct *tsk)
	59	{
	60	struct pres_task_state* state = get_pres_state(tsk);
	61	struct reservation* res;
	62	struct reservation_client *client;
	63
	64	client = state->client;
	65	res = client->reservation;
	66
	67	res->ops->client_arrives(res, client);
	68	TRACE_TASK(tsk, "client_arrives: added to reservation R%d\n", res->id);
	69	}
	70
	71	/* NOTE: drops state->lock */
	72	static void pres_update_timer_and_unlock(struct pres_cpu_state *state)
	73	{
	74	int local;
	75	lt_t update, now;
	76
	77	update = state->sup_env.next_scheduler_update;
	78	now = state->sup_env.env.current_time;
	79
	80	/* Be sure we're actually running on the right core,
	81	* as pres_update_timer() is also called from pres_task_resume(),
	82	* which might be called on any CPU when a thread resumes.
	83	*/
	84	local = local_cpu_state() == state;
	85
	86	/* Must drop state lock before calling into hrtimer_start(), which
	87	* may raise a softirq, which in turn may wake ksoftirqd. */
	88	raw_spin_unlock(&state->lock);
	89
	90	if (update <= now) {
	91	litmus_reschedule(state->cpu);
	92	} else if (likely(local && update != SUP_NO_SCHEDULER_UPDATE)) {
	93	/* Reprogram only if not already set correctly. */
	94	if (!hrtimer_active(&state->timer) \|\|
	95	ktime_to_ns(hrtimer_get_expires(&state->timer)) != update) {
	96	TRACE("canceling timer...\n");
	97	hrtimer_cancel(&state->timer);
	98	TRACE("setting scheduler timer for %llu\n", update);
	99	hrtimer_start(&state->timer,
	100	ns_to_ktime(update),
	101	HRTIMER_MODE_ABS_PINNED);
	102	if (update < litmus_clock()) {
	103	/* uh oh, timer expired while trying to set it */
	104	TRACE("timer expired during setting "
	105	"update:%llu now:%llu actual:%llu\n",
	106	update, now, litmus_clock());
	107	/* The timer HW may not have been reprogrammed
	108	* correctly; force rescheduling now. */
	109	litmus_reschedule(state->cpu);
	110	}
	111	}
	112	} else if (unlikely(!local && update != SUP_NO_SCHEDULER_UPDATE)) {
	113	/* Poke remote core only if timer needs to be set earlier than
	114	* it is currently set.
	115	*/
	116	TRACE("pres_update_timer for remote CPU %d (update=%llu, "
	117	"active:%d, set:%llu)\n",
	118	state->cpu,
	119	update,
	120	hrtimer_active(&state->timer),
	121	ktime_to_ns(hrtimer_get_expires(&state->timer)));
	122	if (!hrtimer_active(&state->timer) \|\|
	123	ktime_to_ns(hrtimer_get_expires(&state->timer)) > update) {
	124	TRACE("poking CPU %d so that it can update its "
	125	"scheduling timer (active:%d, set:%llu)\n",
	126	state->cpu,
	127	hrtimer_active(&state->timer),
	128	ktime_to_ns(hrtimer_get_expires(&state->timer)));
	129	litmus_reschedule(state->cpu);
	130	}
	131	}
	132	}
	133
	134	static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
	135	{
	136	unsigned long flags;
	137	enum hrtimer_restart restart = HRTIMER_NORESTART;
	138	struct pres_cpu_state *state;
	139	lt_t update, now;
	140
	141	state = container_of(timer, struct pres_cpu_state, timer);
	142
	143	/* The scheduling timer should only fire on the local CPU, because
	144	* otherwise deadlocks via timer_cancel() are possible.
	145	* Note: this does not interfere with dedicated interrupt handling, as
	146	* even under dedicated interrupt handling scheduling timers for
	147	* budget enforcement must occur locally on each CPU.
	148	*/
	149	BUG_ON(state->cpu != raw_smp_processor_id());
	150
	151	raw_spin_lock_irqsave(&state->lock, flags);
	152	sup_update_time(&state->sup_env, litmus_clock());
	153
	154	update = state->sup_env.next_scheduler_update;
	155	now = state->sup_env.env.current_time;
	156
	157	TRACE_CUR("on_scheduling_timer at %llu, upd:%llu (for cpu=%d)\n",
	158	now, update, state->cpu);
	159
	160	if (update <= now) {
	161	litmus_reschedule_local();
	162	} else if (update != SUP_NO_SCHEDULER_UPDATE) {
	163	hrtimer_set_expires(timer, ns_to_ktime(update));
	164	restart = HRTIMER_RESTART;
	165	}
	166
	167	raw_spin_unlock_irqrestore(&state->lock, flags);
	168
	169	return restart;
	170	}
	171
	172	static struct task_struct* pres_schedule(struct task_struct * prev)
	173	{
	174	/* next == NULL means "schedule background work". */
	175	struct pres_cpu_state *state = local_cpu_state();
	176
	177	raw_spin_lock(&state->lock);
	178
	179	BUG_ON(state->scheduled && state->scheduled != prev);
	180	BUG_ON(state->scheduled && !is_realtime(prev));
	181
	182	/* update time */
	183	state->sup_env.will_schedule = true;
	184	sup_update_time(&state->sup_env, litmus_clock());
	185
	186	/* figure out what to schedule next */
	187	state->scheduled = sup_dispatch(&state->sup_env);
	188
	189	/* Notify LITMUS^RT core that we've arrived at a scheduling decision. */
	190	sched_state_task_picked();
	191
	192	/* program scheduler timer */
	193	state->sup_env.will_schedule = false;
	194	/* NOTE: drops state->lock */
	195	pres_update_timer_and_unlock(state);
	196
	197	if (prev != state->scheduled && is_realtime(prev))
	198	TRACE_TASK(prev, "descheduled.\n");
	199	if (state->scheduled)
	200	TRACE_TASK(state->scheduled, "scheduled.\n");
	201
	202	return state->scheduled;
	203	}
	204
	205	static void resume_legacy_task_model_updates(struct task_struct *tsk)
	206	{
	207	lt_t now;
	208	if (is_sporadic(tsk)) {
	209	/* If this sporadic task was gone for a "long" time and woke up past
	210	* its deadline, then give it a new budget by triggering a job
	211	* release. This is purely cosmetic and has no effect on the
	212	* P-RES scheduler. */
	213
	214	now = litmus_clock();
	215	if (is_tardy(tsk, now)) {
	216	inferred_sporadic_job_release_at(tsk, now);
	217	}
	218	}
	219	}
	220
	221
	222	/* Called when a task should be removed from the ready queue.
	223	*/
	224	static void pres_task_block(struct task_struct *tsk)
	225	{
	226	unsigned long flags;
	227	struct pres_task_state* tinfo = get_pres_state(tsk);
	228	struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
	229
	230	TRACE_TASK(tsk, "thread suspends at %llu (state:%d, running:%d)\n",
	231	litmus_clock(), tsk->state, is_current_running());
	232
	233	raw_spin_lock_irqsave(&state->lock, flags);
	234	sup_update_time(&state->sup_env, litmus_clock());
	235	task_departs(tsk, is_completed(tsk));
	236	raw_spin_unlock_irqrestore(&state->lock, flags);
	237	}
	238
	239
	240	/* Called when the state of tsk changes back to TASK_RUNNING.
	241	* We need to requeue the task.
	242	*/
	243	static void pres_task_resume(struct task_struct *tsk)
	244	{
	245	unsigned long flags;
	246	struct pres_task_state* tinfo = get_pres_state(tsk);
	247	struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
	248
	249	TRACE_TASK(tsk, "thread wakes up at %llu\n", litmus_clock());
	250
	251	raw_spin_lock_irqsave(&state->lock, flags);
	252	/* Assumption: litmus_clock() is synchronized across cores,
	253	* since we might not actually be executing on tinfo->cpu
	254	* at the moment. */
	255	sup_update_time(&state->sup_env, litmus_clock());
	256	task_arrives(tsk);
	257	/* NOTE: drops state->lock */
	258	pres_update_timer_and_unlock(state);
	259	local_irq_restore(flags);
	260
	261	resume_legacy_task_model_updates(tsk);
	262	}
	263
	264	static long pres_admit_task(struct task_struct *tsk)
	265	{
	266	long err = -EINVAL;
	267	unsigned long flags;
	268	struct reservation *res;
	269	struct pres_cpu_state *state;
	270	struct pres_task_state tinfo = kzalloc(sizeof(tinfo), GFP_ATOMIC);
	271
	272	if (!tinfo)
	273	return -ENOMEM;
	274
	275	preempt_disable();
	276
	277	/* NOTE: this is obviously racy w.r.t. affinity changes since
	278	* we are not holding any runqueue locks. */
	279	if (tsk->nr_cpus_allowed != 1) {
	280	printk(KERN_WARNING "%s/%d: task does not have "
	281	"singleton affinity mask\n",
	282	tsk->comm, tsk->pid);
	283	state = cpu_state_for(task_cpu(tsk));
	284	} else {
	285	state = cpu_state_for(cpumask_first(&tsk->cpus_allowed));
	286	}
	287
	288	TRACE_TASK(tsk, "on CPU %d, valid?:%d\n",
	289	task_cpu(tsk), cpumask_test_cpu(task_cpu(tsk), &tsk->cpus_allowed));
	290
	291	raw_spin_lock_irqsave(&state->lock, flags);
	292
	293	res = sup_find_by_id(&state->sup_env, tsk_rt(tsk)->task_params.cpu);
	294
	295	/* found the appropriate reservation (or vCPU) */
	296	if (res) {
	297	task_client_init(&tinfo->res_info, tsk, res);
	298	tinfo->cpu = state->cpu;
	299	tinfo->client = &tinfo->res_info.client;
	300	tsk_rt(tsk)->plugin_state = tinfo;
	301	err = 0;
	302
	303	/* disable LITMUS^RT's per-thread budget enforcement */
	304	tsk_rt(tsk)->task_params.budget_policy = NO_ENFORCEMENT;
	305	} else {
	306	printk(KERN_WARNING "Could not find reservation %d on "
	307	"core %d for task %s/%d\n",
	308	tsk_rt(tsk)->task_params.cpu, state->cpu,
	309	tsk->comm, tsk->pid);
	310	}
	311
	312	raw_spin_unlock_irqrestore(&state->lock, flags);
	313
	314	preempt_enable();
	315
	316	if (err)
	317	kfree(tinfo);
	318
	319	return err;
	320	}
	321
	322	static void task_new_legacy_task_model_updates(struct task_struct *tsk)
	323	{
	324	lt_t now = litmus_clock();
	325
	326	/* the first job exists starting as of right now */
	327	release_at(tsk, now);
	328	sched_trace_task_release(tsk);
	329	}
	330
	331	static void pres_task_new(struct task_struct *tsk, int on_runqueue,
	332	int is_running)
	333	{
	334	unsigned long flags;
	335	struct pres_task_state* tinfo = get_pres_state(tsk);
	336	struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
	337
	338	TRACE_TASK(tsk, "new RT task %llu (on_rq:%d, running:%d)\n",
	339	litmus_clock(), on_runqueue, is_running);
	340
	341	/* acquire the lock protecting the state and disable interrupts */
	342	raw_spin_lock_irqsave(&state->lock, flags);
	343
	344	if (is_running) {
	345	state->scheduled = tsk;
	346	/* make sure this task should actually be running */
	347	litmus_reschedule_local();
	348	}
	349
	350	if (on_runqueue \|\| is_running) {
	351	/* Assumption: litmus_clock() is synchronized across cores
	352	* [see comment in pres_task_resume()] */
	353	sup_update_time(&state->sup_env, litmus_clock());
	354	task_arrives(tsk);
	355	/* NOTE: drops state->lock */
	356	pres_update_timer_and_unlock(state);
	357	local_irq_restore(flags);
	358	} else
	359	raw_spin_unlock_irqrestore(&state->lock, flags);
	360
	361	task_new_legacy_task_model_updates(tsk);
	362	}
	363
	364	static bool pres_fork_task(struct task_struct *tsk)
	365	{
	366	TRACE_CUR("is forking\n");
	367	TRACE_TASK(tsk, "forked child rt:%d cpu:%d task_cpu:%d "
	368	"wcet:%llu per:%llu\n",
	369	is_realtime(tsk),
	370	tsk_rt(tsk)->task_params.cpu,
	371	task_cpu(tsk),
	372	tsk_rt(tsk)->task_params.exec_cost,
	373	tsk_rt(tsk)->task_params.period);
	374
	375	/* We always allow forking. */
	376	/* The newly forked task will be in the same reservation. */
	377	return true;
	378	}
	379
	380	static void pres_task_exit(struct task_struct *tsk)
	381	{
	382	unsigned long flags;
	383	struct pres_task_state* tinfo = get_pres_state(tsk);
	384	struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
	385
	386	raw_spin_lock_irqsave(&state->lock, flags);
	387
	388	TRACE_TASK(tsk, "task exits at %llu (present:%d sched:%d)\n",
	389	litmus_clock(), is_present(tsk), state->scheduled == tsk);
	390
	391	if (state->scheduled == tsk)
	392	state->scheduled = NULL;
	393
	394	/* remove from queues */
	395	if (is_present(tsk)) {
	396	/* Assumption: litmus_clock() is synchronized across cores
	397	* [see comment in pres_task_resume()] */
	398	sup_update_time(&state->sup_env, litmus_clock());
	399	task_departs(tsk, 0);
	400	/* NOTE: drops state->lock */
	401	pres_update_timer_and_unlock(state);
	402	local_irq_restore(flags);
	403	} else
	404	raw_spin_unlock_irqrestore(&state->lock, flags);
	405
	406	kfree(tsk_rt(tsk)->plugin_state);
	407	tsk_rt(tsk)->plugin_state = NULL;
	408	}
	409
	410	static void pres_current_budget(lt_t used_so_far, lt_t remaining)
	411	{
	412	struct pres_task_state *tstate = get_pres_state(current);
	413	struct pres_cpu_state *state;
	414
	415	/* FIXME: protect against concurrent task_exit() */
	416
	417	local_irq_disable();
	418
	419	state = cpu_state_for(tstate->cpu);
	420
	421	raw_spin_lock(&state->lock);
	422
	423	sup_update_time(&state->sup_env, litmus_clock());
	424	if (remaining)
	425	*remaining = tstate->client->reservation->cur_budget;
	426	if (used_so_far)
	427	*used_so_far = tstate->client->reservation->budget_consumed;
	428	pres_update_timer_and_unlock(state);
	429
	430	local_irq_enable();
	431	}
	432
	433	static long do_pres_reservation_create(
	434	int res_type,
	435	struct reservation_config *config)
	436	{
	437	struct pres_cpu_state *state;
	438	struct reservation* res;
	439	struct reservation* new_res = NULL;
	440	unsigned long flags;
	441	long err;
	442
	443	/* Allocate before we grab a spin lock. */
	444	switch (res_type) {
	445	case PERIODIC_POLLING:
	446	case SPORADIC_POLLING:
	447	err = alloc_polling_reservation(res_type, config, &new_res);
	448	break;
	449
	450	case TABLE_DRIVEN:
	451	err = alloc_table_driven_reservation(config, &new_res);
	452	break;
	453
	454	default:
	455	err = -EINVAL;
	456	break;
	457	}
	458
	459	if (err)
	460	return err;
	461
	462	state = cpu_state_for(config->cpu);
	463	raw_spin_lock_irqsave(&state->lock, flags);
	464
	465	res = sup_find_by_id(&state->sup_env, config->id);
	466	if (!res) {
	467	sup_add_new_reservation(&state->sup_env, new_res);
	468	err = config->id;
	469	} else {
	470	err = -EEXIST;
	471	}
	472
	473	raw_spin_unlock_irqrestore(&state->lock, flags);
	474
	475	if (err < 0)
	476	kfree(new_res);
	477
	478	return err;
	479	}
	480
	481	static long pres_reservation_create(int res_type, void* __user _config)
	482	{
	483	struct reservation_config config;
	484
	485	TRACE("Attempt to create reservation (%d)\n", res_type);
	486
	487	if (copy_from_user(&config, _config, sizeof(config)))
	488	return -EFAULT;
	489
	490	if (config.cpu < 0 \|\| !cpu_online(config.cpu)) {
	491	printk(KERN_ERR "invalid polling reservation (%u): "
	492	"CPU %d offline\n", config.id, config.cpu);
	493	return -EINVAL;
	494	}
	495
	496	return do_pres_reservation_create(res_type, &config);
	497	}
	498
	499	static struct domain_proc_info pres_domain_proc_info;
	500
	501	static long pres_get_domain_proc_info(struct domain_proc_info **ret)
	502	{
	503	*ret = &pres_domain_proc_info;
	504	return 0;
	505	}
	506
	507	static void pres_setup_domain_proc(void)
	508	{
	509	int i, cpu;
	510	int num_rt_cpus = num_online_cpus();
	511
	512	struct cd_mapping cpu_map, domain_map;
	513
	514	memset(&pres_domain_proc_info, 0, sizeof(pres_domain_proc_info));
	515	init_domain_proc_info(&pres_domain_proc_info, num_rt_cpus, num_rt_cpus);
	516	pres_domain_proc_info.num_cpus = num_rt_cpus;
	517	pres_domain_proc_info.num_domains = num_rt_cpus;
	518
	519	i = 0;
	520	for_each_online_cpu(cpu) {
	521	cpu_map = &pres_domain_proc_info.cpu_to_domains[i];
	522	domain_map = &pres_domain_proc_info.domain_to_cpus[i];
	523
	524	cpu_map->id = cpu;
	525	domain_map->id = i;
	526	cpumask_set_cpu(i, cpu_map->mask);
	527	cpumask_set_cpu(cpu, domain_map->mask);
	528	++i;
	529	}
	530	}
	531
	532	static long pres_activate_plugin(void)
	533	{
	534	int cpu;
	535	struct pres_cpu_state *state;
	536
	537	for_each_online_cpu(cpu) {
	538	TRACE("Initializing CPU%d...\n", cpu);
	539
	540	state = cpu_state_for(cpu);
	541
	542	raw_spin_lock_init(&state->lock);
	543	state->cpu = cpu;
	544	state->scheduled = NULL;
	545
	546	sup_init(&state->sup_env);
	547
	548	hrtimer_init(&state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
	549	state->timer.function = on_scheduling_timer;
	550	}
	551
	552	pres_setup_domain_proc();
	553
	554	return 0;
	555	}
	556
	557	static long pres_deactivate_plugin(void)
	558	{
	559	int cpu;
	560	struct pres_cpu_state *state;
	561	struct reservation *res;
	562
	563	for_each_online_cpu(cpu) {
	564	state = cpu_state_for(cpu);
	565	raw_spin_lock(&state->lock);
	566
	567	hrtimer_cancel(&state->timer);
	568
	569	/* Delete all reservations --- assumes struct reservation
	570	* is prefix of containing struct. */
	571
	572	while (!list_empty(&state->sup_env.all_reservations)) {
	573	res = list_first_entry(
	574	&state->sup_env.all_reservations,
	575	struct reservation, all_list);
	576	list_del(&res->all_list);
	577	if (res->ops->shutdown)
	578	res->ops->shutdown(res);
	579	kfree(res);
	580	}
	581
	582	raw_spin_unlock(&state->lock);
	583	}
	584
	585	destroy_domain_proc_info(&pres_domain_proc_info);
	586	return 0;
	587	}
	588
	589	static struct sched_plugin pres_plugin = {
	590	.plugin_name = "P-RES",
	591	.schedule = pres_schedule,
	592	.task_block = pres_task_block,
	593	.task_wake_up = pres_task_resume,
	594	.admit_task = pres_admit_task,
	595	.task_new = pres_task_new,
	596	.fork_task = pres_fork_task,
	597	.task_exit = pres_task_exit,
	598	.complete_job = complete_job_oneshot,
	599	.get_domain_proc_info = pres_get_domain_proc_info,
	600	.activate_plugin = pres_activate_plugin,
	601	.deactivate_plugin = pres_deactivate_plugin,
	602	.reservation_create = pres_reservation_create,
	603	.current_budget = pres_current_budget,
	604	};
	605
	606	static int __init init_pres(void)
	607	{
	608	return register_sched_plugin(&pres_plugin);
	609	}
	610
	611	module_init(init_pres);
	612