From 1d516ebe47adcc6998f6bb8dbee7942e20f6eaf9 Mon Sep 17 00:00:00 2001 From: Christopher Kenna Date: Mon, 5 Sep 2011 22:57:09 -0400 Subject: Partial cyclic executive plugin. This is not tested and missing everything for /proc. I just am checking it in so that it the code is backed up in a git repository. --- include/litmus/sched_mc.h | 1 + litmus/Kconfig | 8 + litmus/Makefile | 2 +- litmus/litmus.c | 16 +- litmus/sched_mc_ce.c | 495 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 517 insertions(+), 5 deletions(-) create mode 100644 litmus/sched_mc_ce.c diff --git a/include/litmus/sched_mc.h b/include/litmus/sched_mc.h index 66910773f430..266f89172f19 100644 --- a/include/litmus/sched_mc.h +++ b/include/litmus/sched_mc.h @@ -15,6 +15,7 @@ enum crit_level { struct mc_task { enum crit_level crit; + int lvl_a_id; }; struct mc_job { diff --git a/litmus/Kconfig b/litmus/Kconfig index 9a1cc2436580..b8f6a9159eb2 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig @@ -32,6 +32,14 @@ config PLUGIN_MC If unsure, say Yes. +config PLUGIN_MC_LEVEL_A_MAX_TASKS + int "Maximum level A tasks" + depends on PLUGIN_MC + range 1 128 + default 32 + help + The maximum number of level A tasks allowed (per-cpu) in level A. + config RELEASE_MASTER bool "Release-master Support" depends on ARCH_HAS_SEND_PULL_TIMERS diff --git a/litmus/Makefile b/litmus/Makefile index 7d7003592138..03dc31a12711 100644 --- a/litmus/Makefile +++ b/litmus/Makefile @@ -22,7 +22,7 @@ obj-y = sched_plugin.o litmus.o \ obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o -obj-$(CONFIG_PLUGIN_MC) += sched_mc.o +obj-$(CONFIG_PLUGIN_MC) += sched_mc.o sched_mc_ce.o obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o diff --git a/litmus/litmus.c b/litmus/litmus.c index 16b3aeda5615..7db9fdadc7db 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c @@ -314,17 +314,25 @@ asmlinkage long sys_set_rt_task_mc_param(pid_t pid, struct mc_task __user *param goto out_unlock; } + /* check parameters passed in are valid */ if (mc.crit < CRIT_LEVEL_A || mc.crit > CRIT_LEVEL_D) { printk(KERN_WARNING "litmus: real-time task %d rejected because " "of invalid criticality level\n", pid); goto out_unlock; } + if (CRIT_LEVEL_A == mc.crit && + (mc.lvl_a_id < 0 || + mc.lvl_a_id >= CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS)) + { + printk(KERN_WARNING "litmus: real-time task %d rejected because " + "of invalid level A id\n", pid); + goto out_unlock; + } mc_data = tsk_rt(target)->mc_data; - if (!mc_data) - { - mc_data = kmalloc(sizeof(*mc_data), GFP_ATOMIC); + if (!mc_data) { + mc_data = kzalloc(sizeof(*mc_data), GFP_ATOMIC); if (!mc_data) { retval = -ENOMEM; @@ -332,8 +340,8 @@ asmlinkage long sys_set_rt_task_mc_param(pid_t pid, struct mc_task __user *param } tsk_rt(target)->mc_data = mc_data; } - mc_data->mc_task.crit = mc.crit; + mc_data->mc_task = mc; retval = 0; out_unlock: read_unlock_irq(&tasklist_lock); diff --git a/litmus/sched_mc_ce.c b/litmus/sched_mc_ce.c new file mode 100644 index 000000000000..21d1d8789c2b --- /dev/null +++ b/litmus/sched_mc_ce.c @@ -0,0 +1,495 @@ +/** + * litmus/sched_mc_ce.c + * + * The Cyclic Executive (CE) scheduler used by the mixed criticality scheduling + * algorithm. + */ + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static struct sched_plugin mc_ce_plugin __cacheline_aligned_in_smp; + +#define tsk_mc_data(t) (tsk_rt(t)->mc_data) +#define tsk_mc_crit(t) (tsk_mc_data(t)->mc_task.crit) +#define is_active_plugin() (litmus == mc_ce_plugin) + +static atomic_t start_time_set = ATOMIC_INIT(0); +static atomic64_t start_time = ATOMIC64_INIT(0); + +/* + * Cache the budget along with the struct PID for a task so that we don't need + * to fetch its task_struct every time we check to see what should be + * scheduled. + */ +struct ce_dom_pid_entry { + struct pid *pid; + lt_t exec_cost; + lt_t acc_time; +}; + +struct ce_dom_data { + int cpu; + struct task_struct *scheduled, *should_schedule; + /* + * Each CPU needs a mapping of level A ID (integer) to struct pid so + * that we can get its task struct. + */ + struct ce_dom_pid_entry pid_entries[CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS]; + int num_pid_entries; + lt_t cycle_time; + struct hrtimer_start_on_info timer_info; + struct hrtimer timer; +}; + +DEFINE_PER_CPU(domain_t, mc_ce_doms); +DEFINE_PER_CPU(struct ce_dom_data, _mc_ce_dom_data); + +/* + * Return the index into the PID entries table of what to schedule next. + * Don't call if the table is empty. Assumes the caller has the domain lock. + * + * TODO Currently O(n) in the number of tasks on the CPU. Binary search? + */ +static int mc_ce_schedule_at(const lt_t when, const domain_t *dom) +{ + const struct ce_dom_data *ce_data = dom->data; + const struct ce_dom_pid_entry *pid_entry = NULL; + lt_t offset; + int i; + + BUG_ON(ce_data->cycle_time < 1); + BUG_ON(ce_data->num_pid_entries < 1); + + offset = (when - atomic64_read(&start_time)) % ce_data->cycle_time; + for (i = 0; i < ce_data->num_pid_entries; ++i) { + pid_entry = &ce_data->pid_entries[i]; + if (offset < pid_entry->acc_time) { + /* found task to schedule in this window */ + break; + } + } + /* can only happen if cycle_time is not right */ + BUG_ON(pid_entry->acc_time > ce_data->cycle_time); + return i; +} + +static struct task_struct *mc_ce_schedule(struct task_struct *prev) +{ + domain_t *dom = &per_cpu(mc_ce_doms, smp_processor_id()); + struct ce_dom_data *ce_data = dom->data; + struct task_struct *next = NULL; + /* for states */ + int exists, np, preempt; + + raw_spin_lock(dom->lock); + + /* sanity checking */ + BUG_ON(ce_data->scheduled && ce_data->scheduled != prev); + BUG_ON(ce_data->scheduled && !is_realtime(prev)); + BUG_ON(is_realtime(prev) && !ce_data->scheduled); + + /* figure out state */ + exists = ce_data->scheduled != NULL; + np = exists && is_np(ce_data->scheduled); + preempt = ce_data->scheduled != ce_data->should_schedule; + + if (np) { + /* scheduled real time task needs to continue */ + request_exit_np(ce_data->scheduled); + next = prev; + } else if (ce_data->should_schedule && + is_running(ce_data->should_schedule)) { + /* schedule the task for this period if it's not blocked */ + next = ce_data->should_schedule; + } + + sched_state_task_picked(); + raw_spin_unlock(dom->lock); + return next; +} + +static void mc_ce_finish_switch(struct task_struct *prev) +{ + domain_t *dom = &per_cpu(mc_ce_doms, smp_processor_id()); + struct ce_dom_data *ce_data = dom->data; + + if (is_realtime(current) && CRIT_LEVEL_A == tsk_mc_crit(current)) + ce_data->scheduled = current; + else + ce_data->scheduled = NULL; +} + +/* + * Called for every local timer interrupt. + * Linux calls this with interrupts disabled, AFAIK. + */ +static void mc_ce_tick(struct task_struct *ts) +{ + domain_t *dom = &per_cpu(mc_ce_doms, smp_processor_id()); + struct ce_dom_data *ce_data = dom->data; + struct task_struct *should_schedule; + + if (is_realtime(ts) && CRIT_LEVEL_A == tsk_mc_crit(ts)) { + raw_spin_lock(dom->lock); + should_schedule = ce_data->should_schedule; + raw_spin_unlock(dom->lock); + + if (!is_np(ts) && ts != should_schedule) { + litmus_reschedule_local(); + } else if (is_user_np(ts)) { + request_exit_np(ts); + } + } +} + +/* + * Admit task called to see if this task is permitted to enter the system. + * Here we look up the task's PID structure and save it in the proper slot on + * the CPU this task will run on. + */ +static long mc_ce_admit_task(struct task_struct *ts) +{ + domain_t *dom = &per_cpu(mc_ce_doms, get_partition(ts)); + struct ce_dom_data *ce_data = dom->data; + struct mc_data *mcd = tsk_mc_data(ts); + struct pid *pid = NULL; + long retval = -EINVAL; + unsigned long flags; + const int lvl_a_id = mcd->mc_task.lvl_a_id; + + /* check the task has migrated to the right CPU (like in sched_cedf) */ + if (task_cpu(ts) != get_partition(ts)) { + printk(KERN_INFO "litmus: %d admitted on CPU %d but want %d ", + ts->pid, task_cpu(ts), get_partition(ts)); + goto out; + } + + /* only level A tasks can be CE */ + if (!mcd || CRIT_LEVEL_A != tsk_mc_crit(ts)) { + printk(KERN_INFO "litmus: non-MC or non level A task %d\n", + ts->pid); + goto out; + } + + /* try and get the task's PID structure */ + pid = get_task_pid(ts, PIDTYPE_PID); + if (IS_ERR_OR_NULL(pid)) { + printk(KERN_INFO "litmus: couldn't get pid struct for %d\n", + ts->pid); + goto out; + } + + raw_spin_lock_irqsave(dom->lock, flags); + if (lvl_a_id >= ce_data->num_pid_entries) { + printk(KERN_INFO "litmus: level A id greater than expected " + "number of tasks %d for %d cpu %d\n", + ce_data->num_pid_entries, ts->pid, + get_partition(ts)); + goto out_put_pid_unlock; + } + if (ce_data->pid_entries[lvl_a_id].pid) { + printk(KERN_INFO "litmus: have saved pid info id: %d cpu: %d\n", + lvl_a_id, get_partition(ts)); + goto out_put_pid_unlock; + } + if (get_exec_cost(ts) != ce_data->pid_entries[lvl_a_id].exec_cost) { + printk(KERN_INFO "litmus: saved exec cost %llu and task exec " + "cost %llu differ\n", + ce_data->pid_entries[lvl_a_id].exec_cost, + get_exec_cost(ts)); + goto out_put_pid_unlock; + } + ce_data->pid_entries[lvl_a_id].pid = pid; + retval = 0; + /* don't call put_pid if we are successful */ + goto out_unlock; + +out_put_pid_unlock: + put_pid(pid); +out_unlock: + raw_spin_unlock_irqrestore(dom->lock, flags); +out: + return retval; +} + +/* + * Called to set up a new real-time task (after the admit_task callback). + * At this point the task's struct PID is already hooked up on the destination + * CPU. The task may already be running. + */ +static void mc_ce_task_new(struct task_struct *ts, int on_rq, int running) +{ + domain_t *dom = &per_cpu(mc_ce_doms, task_cpu(ts)); + struct ce_dom_data *ce_data = dom->data; + struct pid *pid_should_be_running; + unsigned long flags; + int idx, should_be_running; + + raw_spin_lock_irqsave(dom->lock, flags); + idx = mc_ce_schedule_at(litmus_clock(), dom); + pid_should_be_running = ce_data->pid_entries[idx].pid; + rcu_read_lock(); + should_be_running = (ts == pid_task(pid_should_be_running, PIDTYPE_PID)); + rcu_read_unlock(); + if (running) { + /* admit task checks that the task is not on the wrong CPU */ + BUG_ON(task_cpu(ts) != get_partition(ts)); + BUG_ON(ce_data->scheduled); + ce_data->scheduled = ts; + + if (!should_be_running) + preempt_if_preemptable(ce_data->scheduled, ce_data->cpu); + } else if (!running && should_be_running) { + ce_data->should_schedule = ts; + preempt_if_preemptable(ce_data->scheduled, ce_data->cpu); + } + raw_spin_unlock_irqrestore(dom->lock, flags); +} + +/* + * Called to re-introduce a task after blocking. + * Can potentailly be called multiple times. + */ +static void mc_ce_task_wake_up(struct task_struct *ts) +{ + domain_t *dom = &per_cpu(mc_ce_doms, smp_processor_id()); + struct ce_dom_data *ce_data = dom->data; + unsigned long flags; + + raw_spin_lock_irqsave(dom->lock, flags); + if (ts == ce_data->should_schedule && ts != ce_data->scheduled) + preempt_if_preemptable(ts, ce_data->cpu); + raw_spin_unlock_irqrestore(dom->lock, flags); +} + +/* + * Called to notify the plugin of a blocking real-time tasks. Only called for + * real-time tasks and before schedule is called. + */ +static void mc_ce_task_block(struct task_struct *ts) +{ + /* nothing to do because it will be taken care of in schedule */ +} + +/* + * The complete_job function is called when the complete_job syscall + * is called from user land. + */ +static long mc_ce_complete_job(void) +{ + /* TODO */ + printk(KERN_EMERG "complete job called TODO\n"); + BUG(); + return 0; +} + +/* + * Called when a task switches from RT mode back to normal mode. + */ +static void mc_ce_task_exit(struct task_struct *ts) +{ + domain_t *dom = &per_cpu(mc_ce_doms, get_partition(ts)); + struct ce_dom_data *ce_data = dom->data; + unsigned long flags; + struct pid *pid; + const int lvl_a_id = tsk_mc_data(ts)->mc_task.lvl_a_id;; + + BUG_ON(task_cpu(ts) != get_partition(ts)); + BUG_ON(CRIT_LEVEL_A != tsk_mc_crit(ts)); + BUG_ON(lvl_a_id >= ce_data->num_pid_entries); + + raw_spin_lock_irqsave(dom->lock, flags); + pid = ce_data->pid_entries[lvl_a_id].pid; + BUG_ON(!pid); + put_pid(pid); + ce_data->pid_entries[lvl_a_id].pid = NULL; + raw_spin_unlock_irqrestore(dom->lock, flags); +} + +/*********************************************************** + * Timer stuff + **********************************************************/ + +/* + * What to do when a timer fires. The timer should only be armed if the number + * of PID entries is positive. + */ +static enum hrtimer_restart timer_callback(struct hrtimer *timer) +{ + struct ce_dom_data *ce_data = container_of(timer, + struct ce_dom_data, timer); + domain_t *dom = &per_cpu(mc_ce_doms, ce_data->cpu); + /* relative and absolute times for cycles */ + lt_t now, offset_rel, cycle_start_abs, next_timer_abs; + struct ce_dom_pid_entry *pid_entry; + unsigned long flags; + int idx; + + raw_spin_lock_irqsave(dom->lock, flags); + + now = litmus_clock(); + offset_rel = (now - atomic64_read(&start_time)) % ce_data->cycle_time; + cycle_start_abs = now - offset_rel; + idx = mc_ce_schedule_at(now, dom); + pid_entry = &ce_data->pid_entries[idx]; + next_timer_abs = cycle_start_abs + pid_entry->acc_time; + hrtimer_set_expires(timer, ns_to_ktime(next_timer_abs)); + /* get the task_struct (pid_task can accept a NULL) */ + rcu_read_lock(); + ce_data->should_schedule = pid_task(pid_entry->pid, PIDTYPE_PID); + rcu_read_unlock(); + if (ce_data->scheduled != ce_data->should_schedule) + preempt_if_preemptable(ce_data->scheduled, ce_data->cpu); + + raw_spin_unlock_irqrestore(dom->lock, flags); + + return HRTIMER_RESTART; +} + +/* + * Cancel timers on all CPUs. Returns 1 if any were active. + */ +static int cancel_all_timers(void) +{ + struct ce_dom_data *ce_data; + domain_t *dom; + int cpu, ret = 0; + + for_each_online_cpu(cpu) { + dom = &per_cpu(mc_ce_doms, cpu); + ce_data = dom->data; + ret = ret || hrtimer_cancel(&ce_data->timer); + } + return ret; +} + +/* + * Arm all timers so that they start at the new value of start time. + * Any CPU without CE PID entries won't have a timer armed. + * All timers should be canceled before calling this. + */ +static void arm_all_timers(void) +{ + struct ce_dom_data *ce_data; + domain_t *dom; + int cpu; + const lt_t start = atomic64_read(&start_time); + + for_each_online_cpu(cpu) { + dom = &per_cpu(mc_ce_doms, cpu); + ce_data = dom->data; + if (0 == ce_data->num_pid_entries) + continue; + hrtimer_start_on(cpu, &ce_data->timer_info, &ce_data->timer, + ns_to_ktime(start), HRTIMER_MODE_ABS_PINNED); + } +} + +/* + * There are no real releases in the CE, but the task releasing code will + * call this. We can re-set our notion of the CE period start to make + * the schedule line up. + */ +static void mc_ce_release_at(struct task_struct *ts, lt_t start) +{ + if (atomic_inc_and_test(&start_time_set)) { + /* in this case, we won the race */ + atomic64_set(&start_time, start); + cancel_all_timers(); + arm_all_timers(); + } else + atomic_dec(&start_time_set); +} + +static long mc_ce_activate_plugin(void) +{ + struct ce_dom_data *ce_data; + domain_t *dom; + int cpu; + + for_each_online_cpu(cpu) { + dom = &per_cpu(mc_ce_doms, cpu); + ce_data = dom->data; + ce_data->scheduled = NULL; + ce_data->should_schedule= NULL; + } + + atomic_set(&start_time_set, 0); + atomic64_set(&start_time, litmus_clock()); + arm_all_timers(); + return 0; +} + +static long mc_ce_deactivate_plugin(void) +{ + domain_t *dom; + struct ce_dom_data *ce_data; + int cpu; + + cancel_all_timers(); + for_each_online_cpu(cpu) { + dom = &per_cpu(mc_ce_doms, cpu); + ce_data = dom->data; + atomic_set(&ce_data->timer_info.state, + HRTIMER_START_ON_INACTIVE); + } + return 0; +} + +/* Plugin object */ +static struct sched_plugin mc_ce_plugin __cacheline_aligned_in_smp = { + .plugin_name = "MC-CE", + .admit_task = mc_ce_admit_task, + .task_new = mc_ce_task_new, + .complete_job = mc_ce_complete_job, + .release_at = mc_ce_release_at, + .task_exit = mc_ce_task_exit, + .schedule = mc_ce_schedule, + .finish_switch = mc_ce_finish_switch, + .tick = mc_ce_tick, + .task_wake_up = mc_ce_task_wake_up, + .task_block = mc_ce_task_block, + .activate_plugin = mc_ce_activate_plugin, + .deactivate_plugin = mc_ce_deactivate_plugin, +}; + +static int __init init_sched_mc_ce(void) +{ + struct ce_dom_data *ce_data; + domain_t *dom; + int cpu, i; + + for_each_online_cpu(cpu) { + dom = &per_cpu(mc_ce_doms, cpu); + pd_domain_init(dom, NULL, NULL, NULL, NULL); + dom->data = &per_cpu(_mc_ce_dom_data, cpu); + ce_data = dom->data; + hrtimer_init(&ce_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_start_on_info_init(&ce_data->timer_info); + ce_data->cpu = cpu; + ce_data->num_pid_entries = 0; + ce_data->cycle_time = 0; + ce_data->timer.function = timer_callback; + + for (i = 0; i < CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS; ++i) { + ce_data->pid_entries[i].pid = NULL; + ce_data->pid_entries[i].exec_cost = 0; + ce_data->pid_entries[i].acc_time = 0; + } + } + return register_sched_plugin(&mc_ce_plugin); +} + +module_init(init_sched_mc_ce); -- cgit v1.2.2