From 1d516ebe47adcc6998f6bb8dbee7942e20f6eaf9 Mon Sep 17 00:00:00 2001
From: Christopher Kenna <cjk@cs.unc.edu>
Date: Mon, 5 Sep 2011 22:57:09 -0400
Subject: Partial cyclic executive plugin.

This is not tested and missing everything for /proc. I just am checking
it in so that it the code is backed up in a git repository.
---
 include/litmus/sched_mc.h |   1 +
 litmus/Kconfig            |   8 +
 litmus/Makefile           |   2 +-
 litmus/litmus.c           |  16 +-
 litmus/sched_mc_ce.c      | 495 ++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 517 insertions(+), 5 deletions(-)
 create mode 100644 litmus/sched_mc_ce.c

diff --git a/include/litmus/sched_mc.h b/include/litmus/sched_mc.h
index 66910773f430..266f89172f19 100644
--- a/include/litmus/sched_mc.h
+++ b/include/litmus/sched_mc.h
@@ -15,6 +15,7 @@ enum crit_level {
 
 struct mc_task {
 	enum crit_level crit;
+	int lvl_a_id;
 };
 
 struct mc_job {
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 9a1cc2436580..b8f6a9159eb2 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -32,6 +32,14 @@ config PLUGIN_MC
 
        If unsure, say Yes.
 
+config PLUGIN_MC_LEVEL_A_MAX_TASKS
+       int "Maximum level A tasks"
+       depends on PLUGIN_MC
+       range 1 128
+       default 32
+       help
+       The maximum number of level A tasks allowed (per-cpu) in level A.
+
 config RELEASE_MASTER
         bool "Release-master Support"
 	depends on ARCH_HAS_SEND_PULL_TIMERS
diff --git a/litmus/Makefile b/litmus/Makefile
index 7d7003592138..03dc31a12711 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -22,7 +22,7 @@ obj-y     = sched_plugin.o litmus.o \
 
 obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
 obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
-obj-$(CONFIG_PLUGIN_MC) += sched_mc.o
+obj-$(CONFIG_PLUGIN_MC) += sched_mc.o sched_mc_ce.o
 
 obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
 obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 16b3aeda5615..7db9fdadc7db 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -314,17 +314,25 @@ asmlinkage long sys_set_rt_task_mc_param(pid_t pid, struct mc_task __user *param
 		goto out_unlock;
 	}
 
+	/* check parameters passed in are valid */
 	if (mc.crit < CRIT_LEVEL_A || mc.crit > CRIT_LEVEL_D)
 	{
 		printk(KERN_WARNING "litmus: real-time task %d rejected because "
 			"of invalid criticality level\n", pid);
 		goto out_unlock;
 	}
+	if (CRIT_LEVEL_A == mc.crit &&
+			(mc.lvl_a_id < 0 ||
+			 mc.lvl_a_id >= CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS))
+	{
+		printk(KERN_WARNING "litmus: real-time task %d rejected because "
+			"of invalid level A id\n", pid);
+		goto out_unlock;
+	}
 
 	mc_data = tsk_rt(target)->mc_data;
-	if (!mc_data)
-	{
-		mc_data = kmalloc(sizeof(*mc_data), GFP_ATOMIC);
+	if (!mc_data) {
+		mc_data = kzalloc(sizeof(*mc_data), GFP_ATOMIC);
 		if (!mc_data)
 		{
 			retval = -ENOMEM;
@@ -332,8 +340,8 @@ asmlinkage long sys_set_rt_task_mc_param(pid_t pid, struct mc_task __user *param
 		}
 		tsk_rt(target)->mc_data = mc_data;
 	}
-	mc_data->mc_task.crit = mc.crit;
 
+	mc_data->mc_task = mc;
 	retval = 0;
 out_unlock:
 	read_unlock_irq(&tasklist_lock);
diff --git a/litmus/sched_mc_ce.c b/litmus/sched_mc_ce.c
new file mode 100644
index 000000000000..21d1d8789c2b
--- /dev/null
+++ b/litmus/sched_mc_ce.c
@@ -0,0 +1,495 @@
+/**
+ * litmus/sched_mc_ce.c
+ *
+ * The Cyclic Executive (CE) scheduler used by the mixed criticality scheduling
+ * algorithm.
+ */
+
+#include <asm/atomic.h>
+
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/hrtimer.h>
+#include <linux/pid.h>
+#include <linux/sched.h>
+
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/rt_domain.h>
+#include <litmus/rt_param.h>
+#include <litmus/sched_mc.h>
+
+static struct sched_plugin mc_ce_plugin __cacheline_aligned_in_smp;
+
+#define tsk_mc_data(t) (tsk_rt(t)->mc_data)
+#define tsk_mc_crit(t) (tsk_mc_data(t)->mc_task.crit)
+#define is_active_plugin() (litmus == mc_ce_plugin)
+
+static atomic_t start_time_set = ATOMIC_INIT(0);
+static atomic64_t start_time = ATOMIC64_INIT(0);
+
+/*
+ * Cache the budget along with the struct PID for a task so that we don't need
+ * to fetch its task_struct every time we check to see what should be
+ * scheduled.
+ */
+struct ce_dom_pid_entry {
+	struct pid *pid;
+	lt_t exec_cost;
+	lt_t acc_time;
+};
+
+struct ce_dom_data {
+	int cpu;
+	struct task_struct *scheduled, *should_schedule;
+	/*
+	 * Each CPU needs a mapping of level A ID (integer) to struct pid so
+	 * that we can get its task struct.
+	 */
+	struct ce_dom_pid_entry pid_entries[CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS];
+	int num_pid_entries;
+	lt_t cycle_time;
+	struct hrtimer_start_on_info timer_info;
+	struct hrtimer timer;
+};
+
+DEFINE_PER_CPU(domain_t, mc_ce_doms);
+DEFINE_PER_CPU(struct ce_dom_data, _mc_ce_dom_data);
+
+/*
+ * Return the index into the PID entries table of what to schedule next.
+ * Don't call if the table is empty. Assumes the caller has the domain lock.
+ *
+ * TODO Currently O(n) in the number of tasks on the CPU. Binary search?
+ */
+static int mc_ce_schedule_at(const lt_t when, const domain_t *dom)
+{
+	const struct ce_dom_data *ce_data = dom->data;
+	const struct ce_dom_pid_entry *pid_entry = NULL;
+	lt_t offset;
+	int i;
+
+	BUG_ON(ce_data->cycle_time < 1);
+	BUG_ON(ce_data->num_pid_entries < 1);
+
+	offset = (when - atomic64_read(&start_time)) % ce_data->cycle_time;
+	for (i = 0; i < ce_data->num_pid_entries; ++i) {
+		pid_entry = &ce_data->pid_entries[i];
+		if (offset < pid_entry->acc_time) {
+			/* found task to schedule in this window */
+			break;
+		}
+	}
+	/* can only happen if cycle_time is not right */
+	BUG_ON(pid_entry->acc_time > ce_data->cycle_time);
+	return i;
+}
+
+static struct task_struct *mc_ce_schedule(struct task_struct *prev)
+{
+	domain_t *dom = &per_cpu(mc_ce_doms, smp_processor_id());
+	struct ce_dom_data *ce_data = dom->data;
+	struct task_struct *next = NULL;
+	/* for states */
+	int exists, np, preempt;
+
+	raw_spin_lock(dom->lock);
+
+	/* sanity checking */
+	BUG_ON(ce_data->scheduled && ce_data->scheduled != prev);
+	BUG_ON(ce_data->scheduled && !is_realtime(prev));
+	BUG_ON(is_realtime(prev) && !ce_data->scheduled);
+
+	/* figure out state */
+	exists	= ce_data->scheduled != NULL;
+	np	= exists && is_np(ce_data->scheduled);
+	preempt	= ce_data->scheduled != ce_data->should_schedule;
+
+	if (np) {
+		/* scheduled real time task needs to continue */
+		request_exit_np(ce_data->scheduled);
+		next = prev;
+	} else if (ce_data->should_schedule &&
+			is_running(ce_data->should_schedule)) {
+		/* schedule the task for this period if it's not blocked */
+			next = ce_data->should_schedule;
+	}
+
+	sched_state_task_picked();
+	raw_spin_unlock(dom->lock);
+	return next;
+}
+
+static void mc_ce_finish_switch(struct task_struct *prev)
+{
+	domain_t *dom = &per_cpu(mc_ce_doms, smp_processor_id());
+	struct ce_dom_data *ce_data = dom->data;
+
+	if (is_realtime(current) && CRIT_LEVEL_A == tsk_mc_crit(current))
+		ce_data->scheduled = current;
+	else
+		ce_data->scheduled = NULL;
+}
+
+/*
+ * Called for every local timer interrupt.
+ * Linux calls this with interrupts disabled, AFAIK.
+ */
+static void mc_ce_tick(struct task_struct *ts)
+{
+	domain_t *dom = &per_cpu(mc_ce_doms, smp_processor_id());
+	struct ce_dom_data *ce_data = dom->data;
+	struct task_struct *should_schedule;
+
+	if (is_realtime(ts) && CRIT_LEVEL_A == tsk_mc_crit(ts)) {
+		raw_spin_lock(dom->lock);
+		should_schedule = ce_data->should_schedule;
+		raw_spin_unlock(dom->lock);
+
+		if (!is_np(ts) && ts != should_schedule) {
+			litmus_reschedule_local();
+		} else if (is_user_np(ts)) {
+			request_exit_np(ts);
+		}
+	}
+}
+
+/*
+ * Admit task called to see if this task is permitted to enter the system.
+ * Here we look up the task's PID structure and save it in the proper slot on
+ * the CPU this task will run on.
+ */
+static long mc_ce_admit_task(struct task_struct *ts)
+{
+	domain_t *dom = &per_cpu(mc_ce_doms, get_partition(ts));
+	struct ce_dom_data *ce_data = dom->data;
+	struct mc_data *mcd = tsk_mc_data(ts);
+	struct pid *pid = NULL;
+	long retval = -EINVAL;
+	unsigned long flags;
+	const int lvl_a_id = mcd->mc_task.lvl_a_id;
+
+	/* check the task has migrated to the right CPU (like in sched_cedf) */
+	if (task_cpu(ts) != get_partition(ts)) {
+		printk(KERN_INFO "litmus: %d admitted on CPU %d but want %d ",
+				ts->pid, task_cpu(ts), get_partition(ts));
+		goto out;
+	}
+
+	/* only level A tasks can be CE */
+	if (!mcd || CRIT_LEVEL_A != tsk_mc_crit(ts)) {
+		printk(KERN_INFO "litmus: non-MC or non level A task %d\n",
+				ts->pid);
+		goto out;
+	}
+
+	/* try and get the task's PID structure */
+	pid = get_task_pid(ts, PIDTYPE_PID);
+	if (IS_ERR_OR_NULL(pid)) {
+		printk(KERN_INFO "litmus: couldn't get pid struct for %d\n",
+				ts->pid);
+		goto out;
+	}
+
+	raw_spin_lock_irqsave(dom->lock, flags);
+	if (lvl_a_id >= ce_data->num_pid_entries) {
+		printk(KERN_INFO "litmus: level A id greater than expected "
+				"number of tasks %d for %d cpu %d\n",
+				ce_data->num_pid_entries, ts->pid,
+				get_partition(ts));
+		goto out_put_pid_unlock;
+	}
+	if (ce_data->pid_entries[lvl_a_id].pid) {
+		printk(KERN_INFO "litmus: have saved pid info id: %d cpu: %d\n",
+				lvl_a_id, get_partition(ts));
+		goto out_put_pid_unlock;
+	}
+	if (get_exec_cost(ts) != ce_data->pid_entries[lvl_a_id].exec_cost) {
+		printk(KERN_INFO "litmus: saved exec cost %llu and task exec "
+				"cost %llu differ\n",
+				ce_data->pid_entries[lvl_a_id].exec_cost,
+				get_exec_cost(ts));
+		goto out_put_pid_unlock;
+	}
+	ce_data->pid_entries[lvl_a_id].pid = pid;
+	retval = 0;
+	/* don't call put_pid if we are successful */
+	goto out_unlock;
+
+out_put_pid_unlock:
+	put_pid(pid);
+out_unlock:
+	raw_spin_unlock_irqrestore(dom->lock, flags);
+out:
+	return retval;
+}
+
+/*
+ * Called to set up a new real-time task (after the admit_task callback).
+ * At this point the task's struct PID is already hooked up on the destination
+ * CPU. The task may already be running.
+ */
+static void mc_ce_task_new(struct task_struct *ts, int on_rq, int running)
+{
+	domain_t *dom = &per_cpu(mc_ce_doms, task_cpu(ts));
+	struct ce_dom_data *ce_data = dom->data;
+	struct pid *pid_should_be_running;
+	unsigned long flags;
+	int idx, should_be_running;
+
+	raw_spin_lock_irqsave(dom->lock, flags);
+	idx = mc_ce_schedule_at(litmus_clock(), dom);
+	pid_should_be_running = ce_data->pid_entries[idx].pid;
+	rcu_read_lock();
+	should_be_running = (ts == pid_task(pid_should_be_running, PIDTYPE_PID));
+	rcu_read_unlock();
+	if (running) {
+		/* admit task checks that the task is not on the wrong CPU */
+		BUG_ON(task_cpu(ts) != get_partition(ts));
+		BUG_ON(ce_data->scheduled);
+		ce_data->scheduled = ts;
+
+		if (!should_be_running)
+			preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
+	} else if (!running && should_be_running) {
+		ce_data->should_schedule = ts;
+		preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
+	}
+	raw_spin_unlock_irqrestore(dom->lock, flags);
+}
+
+/*
+ * Called to re-introduce a task after blocking.
+ * Can potentailly be called multiple times.
+ */
+static void mc_ce_task_wake_up(struct task_struct *ts)
+{
+	domain_t *dom = &per_cpu(mc_ce_doms, smp_processor_id());
+	struct ce_dom_data *ce_data = dom->data;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(dom->lock, flags);
+	if (ts == ce_data->should_schedule && ts != ce_data->scheduled)
+		preempt_if_preemptable(ts, ce_data->cpu);
+	raw_spin_unlock_irqrestore(dom->lock, flags);
+}
+
+/*
+ * Called to notify the plugin of a blocking real-time tasks. Only called for
+ * real-time tasks and before schedule is called.
+ */
+static void mc_ce_task_block(struct task_struct *ts)
+{
+	/* nothing to do because it will be taken care of in schedule */
+}
+
+/*
+ * The complete_job function is called when the complete_job syscall
+ * is called from user land.
+ */
+static long mc_ce_complete_job(void)
+{
+	/* TODO */
+	printk(KERN_EMERG "complete job called TODO\n");
+	BUG();
+	return 0;
+}
+
+/*
+ * Called when a task switches from RT mode back to normal mode.
+ */
+static void mc_ce_task_exit(struct task_struct *ts)
+{
+	domain_t *dom = &per_cpu(mc_ce_doms, get_partition(ts));
+	struct ce_dom_data *ce_data = dom->data;
+	unsigned long flags;
+	struct pid *pid;
+	const int lvl_a_id = tsk_mc_data(ts)->mc_task.lvl_a_id;;
+
+	BUG_ON(task_cpu(ts) != get_partition(ts));
+	BUG_ON(CRIT_LEVEL_A != tsk_mc_crit(ts));
+	BUG_ON(lvl_a_id >= ce_data->num_pid_entries);
+
+	raw_spin_lock_irqsave(dom->lock, flags);
+	pid = ce_data->pid_entries[lvl_a_id].pid;
+	BUG_ON(!pid);
+	put_pid(pid);
+	ce_data->pid_entries[lvl_a_id].pid = NULL;
+	raw_spin_unlock_irqrestore(dom->lock, flags);
+}
+
+/***********************************************************
+ * Timer stuff
+ **********************************************************/
+
+/*
+ * What to do when a timer fires. The timer should only be armed if the number
+ * of PID entries is positive.
+ */
+static enum hrtimer_restart timer_callback(struct hrtimer *timer)
+{
+	struct ce_dom_data *ce_data = container_of(timer,
+			struct ce_dom_data, timer);
+	domain_t *dom = &per_cpu(mc_ce_doms, ce_data->cpu);
+	/* relative and absolute times for cycles */
+	lt_t now, offset_rel, cycle_start_abs, next_timer_abs;
+	struct ce_dom_pid_entry *pid_entry;
+	unsigned long flags;
+	int idx;
+
+	raw_spin_lock_irqsave(dom->lock, flags);
+
+	now = litmus_clock();
+	offset_rel = (now - atomic64_read(&start_time)) % ce_data->cycle_time;
+	cycle_start_abs = now - offset_rel;
+	idx = mc_ce_schedule_at(now, dom);
+	pid_entry = &ce_data->pid_entries[idx];
+	next_timer_abs = cycle_start_abs + pid_entry->acc_time;
+	hrtimer_set_expires(timer, ns_to_ktime(next_timer_abs));
+	/* get the task_struct (pid_task can accept a NULL) */
+	rcu_read_lock();
+	ce_data->should_schedule = pid_task(pid_entry->pid, PIDTYPE_PID);
+	rcu_read_unlock();
+	if (ce_data->scheduled != ce_data->should_schedule)
+		preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
+
+	raw_spin_unlock_irqrestore(dom->lock, flags);
+
+	return HRTIMER_RESTART;
+}
+
+/*
+ * Cancel timers on all CPUs. Returns 1 if any were active.
+ */
+static int cancel_all_timers(void)
+{
+	struct ce_dom_data *ce_data;
+	domain_t *dom;
+	int cpu, ret = 0;
+
+	for_each_online_cpu(cpu) {
+		dom = &per_cpu(mc_ce_doms, cpu);
+		ce_data = dom->data;
+		ret = ret || hrtimer_cancel(&ce_data->timer);
+	}
+	return ret;
+}
+
+/*
+ * Arm all timers so that they start at the new value of start time.
+ * Any CPU without CE PID entries won't have a timer armed.
+ * All timers should be canceled before calling this.
+ */
+static void arm_all_timers(void)
+{
+	struct ce_dom_data *ce_data;
+	domain_t *dom;
+	int cpu;
+	const lt_t start = atomic64_read(&start_time);
+
+	for_each_online_cpu(cpu) {
+		dom = &per_cpu(mc_ce_doms, cpu);
+		ce_data = dom->data;
+		if (0 == ce_data->num_pid_entries)
+			continue;
+		hrtimer_start_on(cpu, &ce_data->timer_info, &ce_data->timer,
+				ns_to_ktime(start), HRTIMER_MODE_ABS_PINNED);
+	}
+}
+
+/*
+ * There are no real releases in the CE, but the task releasing code will
+ * call this. We can re-set our notion of the CE period start to make
+ * the schedule line up.
+ */
+static void mc_ce_release_at(struct task_struct *ts, lt_t start)
+{
+	if (atomic_inc_and_test(&start_time_set)) {
+		/* in this case, we won the race */
+		atomic64_set(&start_time, start);
+		cancel_all_timers();
+		arm_all_timers();
+	} else
+		atomic_dec(&start_time_set);
+}
+
+static long mc_ce_activate_plugin(void)
+{
+	struct ce_dom_data *ce_data;
+	domain_t *dom;
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		dom = &per_cpu(mc_ce_doms, cpu);
+		ce_data = dom->data;
+		ce_data->scheduled = NULL;
+		ce_data->should_schedule= NULL;
+	}
+
+	atomic_set(&start_time_set, 0);
+	atomic64_set(&start_time, litmus_clock());
+	arm_all_timers();
+	return 0;
+}
+
+static long mc_ce_deactivate_plugin(void)
+{
+	domain_t *dom;
+	struct ce_dom_data *ce_data;
+	int cpu;
+
+	cancel_all_timers();
+	for_each_online_cpu(cpu) {
+		dom = &per_cpu(mc_ce_doms, cpu);
+		ce_data = dom->data;
+		atomic_set(&ce_data->timer_info.state,
+				HRTIMER_START_ON_INACTIVE);
+	}
+	return 0;
+}
+
+/*	Plugin object	*/
+static struct sched_plugin mc_ce_plugin __cacheline_aligned_in_smp = {
+	.plugin_name		= "MC-CE",
+	.admit_task		= mc_ce_admit_task,
+	.task_new		= mc_ce_task_new,
+	.complete_job		= mc_ce_complete_job,
+	.release_at		= mc_ce_release_at,
+	.task_exit		= mc_ce_task_exit,
+	.schedule		= mc_ce_schedule,
+	.finish_switch		= mc_ce_finish_switch,
+	.tick			= mc_ce_tick,
+	.task_wake_up		= mc_ce_task_wake_up,
+	.task_block		= mc_ce_task_block,
+	.activate_plugin	= mc_ce_activate_plugin,
+	.deactivate_plugin	= mc_ce_deactivate_plugin,
+};
+
+static int __init init_sched_mc_ce(void)
+{
+	struct ce_dom_data *ce_data;
+	domain_t *dom;
+	int cpu, i;
+
+	for_each_online_cpu(cpu) {
+		dom = &per_cpu(mc_ce_doms, cpu);
+		pd_domain_init(dom, NULL, NULL, NULL, NULL);
+		dom->data = &per_cpu(_mc_ce_dom_data, cpu);
+		ce_data = dom->data;
+		hrtimer_init(&ce_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+		hrtimer_start_on_info_init(&ce_data->timer_info);
+		ce_data->cpu = cpu;
+		ce_data->num_pid_entries = 0;
+		ce_data->cycle_time = 0;
+		ce_data->timer.function = timer_callback;
+
+		for (i = 0; i < CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS; ++i) {
+			ce_data->pid_entries[i].pid = NULL;
+			ce_data->pid_entries[i].exec_cost = 0;
+			ce_data->pid_entries[i].acc_time = 0;
+		}
+	}
+	return register_sched_plugin(&mc_ce_plugin);
+}
+
+module_init(init_sched_mc_ce);
-- 
cgit v1.2.2