/**
* litmus/sched_mc_ce.c
*
* The Cyclic Executive (CE) scheduler used by the mixed criticality scheduling
* algorithm.
*/
#include <asm/atomic.h>
#include <asm/uaccess.h>
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/hrtimer.h>
#include <linux/pid.h>
#include <linux/sched.h>
#include <linux/proc_fs.h>
#include <litmus/litmus.h>
#include <litmus/sched_plugin.h>
#include <litmus/rt_domain.h>
#include <litmus/rt_param.h>
#include <litmus/litmus_proc.h>
#include <litmus/sched_trace.h>
#include <litmus/jobs.h>
#include <litmus/sched_mc.h>
static struct sched_plugin mc_ce_plugin __cacheline_aligned_in_smp;
#define is_active_plugin() (litmus == &mc_ce_plugin)
#define get_ce_data(dom_data_ref) (dom_data_ref->domain.data)
static atomic_t start_time_set = ATOMIC_INIT(-1);
static atomic64_t start_time = ATOMIC64_INIT(0);
static struct proc_dir_entry *mc_ce_dir = NULL, *ce_file = NULL;
DEFINE_PER_CPU(domain_data_t, mc_ce_doms);
DEFINE_PER_CPU(rt_domain_t, mc_ce_rts);
DEFINE_PER_CPU(struct ce_dom_data, _mc_ce_dom_data);
/* Return the address of the domain_t for this CPU, used by the
* mixed-criticality plugin. */
domain_data_t *ce_domain_for(int cpu)
{
return &per_cpu(mc_ce_doms, cpu);
}
/*
* Get the offset into the cycle taking the start time into account.
*/
static inline lt_t get_cycle_offset(const lt_t when, const lt_t cycle_time)
{
long long st = atomic64_read(&start_time);
lt_t offset = (when - st) % cycle_time;
TRACE("when: %llu cycle_time: %llu start_time: %lld offset %llu\n",
when, cycle_time, st, offset);
return offset;
}
/*
* The user land job completion call will set the RT_F_SLEEP flag and then
* call schedule. This function is used when schedule sleeps a task.
*
* Do not call prepare_for_next_period on Level-A tasks!
*/
static void mc_ce_job_completion(struct task_struct *ts)
{
const domain_data_t *dom_data = &per_cpu(mc_ce_doms, smp_processor_id());
const struct ce_dom_data *ce_data = get_ce_data(dom_data);
const int idx = tsk_mc_data(ts)->mc_task.lvl_a_id;
const struct ce_dom_pid_entry *pid_entry =
&ce_data->pid_entries[idx];
int just_finished;
TRACE_TASK(ts, "completed\n");
sched_trace_task_completion(ts, 0);
/* post-increment is important here */
just_finished = (tsk_rt(ts)->job_params.job_no)++;
/* Job completes in expected window: everything is normal.
* Job completes in an earlier window: BUG(), that's wrong.
* Job completes in a later window: The job is behind.
*/
if (just_finished < pid_entry->expected_job) {
/* this job is already released because it's running behind */
set_rt_flags(ts, RT_F_RUNNING);
TRACE_TASK(ts, "appears behind: the expected job is %d but "
"job %d just completed\n",
pid_entry->expected_job, just_finished);
} else if (pid_entry->expected_job < just_finished) {
printk(KERN_CRIT "job %d completed in expected job %d which "
"seems too early\n", just_finished,
pid_entry->expected_job);
BUG();
}
}
/*
* Return the index into the PID entries table of what to schedule next.
* Don't call if the table is empty. Assumes the caller has the domain lock.
* The offset parameter is the offset into the cycle.
*
* TODO Currently O(n) in the number of tasks on the CPU. Binary search?
*/
static int mc_ce_schedule_at(const domain_t *dom, lt_t offset)
{
const struct ce_dom_data *ce_data = dom->data;
const struct ce_dom_pid_entry *pid_entry = NULL;
int i;
BUG_ON(ce_data->cycle_time < 1);
BUG_ON(ce_data->num_pid_entries < 1);
for (i = 0; i < ce_data->num_pid_entries; ++i) {
pid_entry = &ce_data->pid_entries[i];
if (offset < pid_entry->acc_time) {
/* found task to schedule in this window */
break;
}
}
/* can only happen if cycle_time is not right */
BUG_ON(pid_entry->acc_time > ce_data->cycle_time);
TRACE("schedule at returned task %d for CPU %d\n", i, ce_data->cpu);
return i;
}
static struct task_struct *mc_ce_schedule(struct task_struct *prev)
{
domain_data_t *dom_data = &per_cpu(mc_ce_doms, smp_processor_id());
domain_t *dom = &dom_data->domain;
struct ce_dom_data *ce_data = get_ce_data(dom_data);
struct task_struct *next = NULL;
int exists, sleep, should_sched_exists, should_sched_blocked,
should_sched_asleep;
raw_spin_lock(dom->lock);
/* sanity checking */
BUG_ON(ce_data->scheduled && ce_data->scheduled != prev);
BUG_ON(ce_data->scheduled && !is_realtime(prev));
BUG_ON(is_realtime(prev) && !ce_data->scheduled);
exists = NULL != ce_data->scheduled;
sleep = exists && RT_F_SLEEP == get_rt_flags(ce_data->scheduled);
TRACE("exists: %d, sleep: %d\n", exists, sleep);
if (sleep)
mc_ce_job_completion(ce_data->scheduled);
/* these checks must go after the call to mc_ce_job_completion in case
* a late task needs to be scheduled again right away and its the only
* task on a core
*/
should_sched_exists = NULL != ce_data->should_schedule;
should_sched_blocked = should_sched_exists &&
!is_running(ce_data->should_schedule);
should_sched_asleep = should_sched_exists &&
RT_F_SLEEP == get_rt_flags(ce_data->should_schedule);
TRACE("should_sched_exists: %d, should_sched_blocked: %d, "
"should_sched_asleep: %d\n", should_sched_exists,
should_sched_blocked, should_sched_asleep);
if (should_sched_exists && !should_sched_blocked &&
!should_sched_asleep) {
/*
* schedule the task that should be executing in the cyclic
* schedule if it is not blocked and not sleeping
*/
next = ce_data->should_schedule;
}
sched_state_task_picked();
raw_spin_unlock(dom->lock);
return next;
}
static void mc_ce_finish_switch(struct task_struct *prev)
{
domain_data_t *dom_data = &per_cpu(mc_ce_doms, smp_processor_id());
struct ce_dom_data *ce_data = get_ce_data(dom_data);
TRACE("finish switch\n");
if (is_realtime(current) && CRIT_LEVEL_A == tsk_mc_crit(current))
ce_data->scheduled = current;
else
ce_data->scheduled = NULL;
}
/*
* Called for every local timer interrupt.
* Linux calls this with interrupts disabled, AFAIK.
*/
static void mc_ce_tick(struct task_struct *ts)
{
domain_data_t *dom_data = &per_cpu(mc_ce_doms, smp_processor_id());
domain_t *dom = &dom_data->domain;
struct ce_dom_data *ce_data = get_ce_data(dom_data);
struct task_struct *should_schedule;
if (is_realtime(ts) && CRIT_LEVEL_A == tsk_mc_crit(ts)) {
raw_spin_lock(dom->lock);
should_schedule = ce_data->should_schedule;
raw_spin_unlock(dom->lock);
if (!is_np(ts) && ts != should_schedule) {
litmus_reschedule_local();
} else if (is_user_np(ts)) {
request_exit_np(ts);
}
}
}
/*
* Admit task called to see if this task is permitted to enter the system.
* Here we look up the task's PID structure and save it in the proper slot on
* the CPU this task will run on.
*/
static long __mc_ce_admit_task(struct task_struct *ts)
{
domain_data_t *dom_data = &per_cpu(mc_ce_doms, get_partition(ts));
struct ce_dom_data *ce_data = get_ce_data(dom_data);
struct mc_data *mcd = tsk_mc_data(ts);
struct pid *pid = NULL;
long retval = -EINVAL;
const int lvl_a_id = mcd->mc_task.lvl_a_id;
/* check the task has migrated to the right CPU (like in sched_cedf) */
if (task_cpu(ts) != get_partition(ts)) {
printk(KERN_INFO "litmus: %d admitted on CPU %d but want %d ",
ts->pid, task_cpu(ts), get_partition(ts));
goto out;
}
/* only level A tasks can be CE */
if (!mcd || CRIT_LEVEL_A != tsk_mc_crit(ts)) {
printk(KERN_INFO "litmus: non-MC or non level A task %d\n",
ts->pid);
goto out;
}
/* try and get the task's PID structure */
pid = get_task_pid(ts, PIDTYPE_PID);
if (IS_ERR_OR_NULL(pid)) {
printk(KERN_INFO "litmus: couldn't get pid struct for %d\n",
ts->pid);
goto out;
}
if (lvl_a_id >= ce_data->num_pid_entries) {
printk(KERN_INFO "litmus: level A id greater than expected "
"number of tasks %d for %d cpu %d\n",
ce_data->num_pid_entries, ts->pid,
get_partition(ts));
goto out_put_pid;
}
if (ce_data->pid_entries[lvl_a_id].pid) {
printk(KERN_INFO "litmus: have saved pid info id: %d cpu: %d\n",
lvl_a_id, get_partition(ts));
goto out_put_pid;
}
if (get_exec_cost(ts) >= ce_data->pid_entries[lvl_a_id].budget) {
printk(KERN_INFO "litmus: execution cost %llu is larger than "
"the budget %llu\n",
get_exec_cost(ts),
ce_data->pid_entries[lvl_a_id].budget);
goto out_put_pid;
}
ce_data->pid_entries[lvl_a_id].pid = pid;
retval = 0;
/* don't call put_pid if we are successful */
goto out;
out_put_pid:
put_pid(pid);
out:
return retval;
}
static long mc_ce_admit_task(struct task_struct *ts)
{
domain_data_t *dom_data = &per_cpu(mc_ce_doms, get_partition(ts));
domain_t *dom = &dom_data->domain;
unsigned long flags, retval;
raw_spin_lock_irqsave(dom->lock, flags);
retval = __mc_ce_admit_task(ts);
raw_spin_unlock_irqrestore(dom->lock, flags);
return retval;
}
/*
* Called to set up a new real-time task (after the admit_task callback).
* At this point the task's struct PID is already hooked up on the destination
* CPU. The task may already be running.
*/
static void mc_ce_task_new(struct task_struct *ts, int on_rq, int running)
{
domain_data_t *dom_data = &per_cpu(mc_ce_doms, task_cpu(ts));
domain_t *dom = &dom_data->domain;
struct ce_dom_data *ce_data = get_ce_data(dom_data);
struct pid *pid_should_be_running;
struct ce_dom_pid_entry *pid_entry;
unsigned long flags;
int idx, should_be_running;
lt_t offset;
/* have to call mc_ce_schedule_at because the task only gets a PID
* entry after calling admit_task */
raw_spin_lock_irqsave(dom->lock, flags);
pid_entry = &ce_data->pid_entries[tsk_mc_data(ts)->mc_task.lvl_a_id];
/* initialize some task state */
set_rt_flags(ts, RT_F_RUNNING);
tsk_rt(ts)->job_params.job_no = 0;
offset = get_cycle_offset(litmus_clock(), ce_data->cycle_time);
idx = mc_ce_schedule_at(dom, offset);
pid_should_be_running = ce_data->pid_entries[idx].pid;
rcu_read_lock();
should_be_running = (ts == pid_task(pid_should_be_running, PIDTYPE_PID));
rcu_read_unlock();
if (running) {
/* admit task checks that the task is not on the wrong CPU */
BUG_ON(task_cpu(ts) != get_partition(ts));
BUG_ON(ce_data->scheduled);
ce_data->scheduled = ts;
if (should_be_running)
ce_data->should_schedule = ts;
else
preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
} else if (!running && should_be_running) {
ce_data->should_schedule = ts;
preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
}
raw_spin_unlock_irqrestore(dom->lock, flags);
}
/*
* Called to re-introduce a task after blocking.
* Can potentailly be called multiple times.
*/
static void mc_ce_task_wake_up(struct task_struct *ts)
{
domain_data_t *dom_data = &per_cpu(mc_ce_doms, smp_processor_id());
domain_t *dom = &dom_data->domain;
struct ce_dom_data *ce_data = get_ce_data(dom_data);
unsigned long flags;
TRACE_TASK(ts, "wake up\n");
raw_spin_lock_irqsave(dom->lock, flags);
if (ts == ce_data->should_schedule && ts != ce_data->scheduled)
preempt_if_preemptable(ts, ce_data->cpu);
raw_spin_unlock_irqrestore(dom->lock, flags);
}
/*
* Called to notify the plugin of a blocking real-time tasks. Only called for
* real-time tasks and before schedule is called.
*/
static void mc_ce_task_block(struct task_struct *ts)
{
/* nothing to do because it will be taken care of in schedule */
TRACE_TASK(ts, "blocked\n");
}
/*
* Called when a task switches from RT mode back to normal mode.
*/
void mc_ce_task_exit(struct task_struct *ts)
{
domain_data_t *dom_data = &per_cpu(mc_ce_doms, get_partition(ts));
domain_t *dom = &dom_data->domain;
struct ce_dom_data *ce_data = get_ce_data(dom_data);
unsigned long flags;
struct pid *pid;
const int lvl_a_id = tsk_mc_data(ts)->mc_task.lvl_a_id;
TRACE_TASK(ts, "exited\n");
BUG_ON(task_cpu(ts) != get_partition(ts));
BUG_ON(CRIT_LEVEL_A != tsk_mc_crit(ts));
BUG_ON(lvl_a_id >= ce_data->num_pid_entries);
raw_spin_lock_irqsave(dom->lock, flags);
pid = ce_data->pid_entries[lvl_a_id].pid;
BUG_ON(!pid);
put_pid(pid);
ce_data->pid_entries[lvl_a_id].pid = NULL;
if (ce_data->scheduled == ts)
ce_data->scheduled = NULL;
if (ce_data->should_schedule == ts)
ce_data->should_schedule = NULL;
raw_spin_unlock_irqrestore(dom->lock, flags);
}
/***********************************************************
* Timer stuff
**********************************************************/
void __mc_ce_timer_callback(struct hrtimer *timer)
{
/* relative and absolute times for cycles */
lt_t now, offset_rel, cycle_start_abs, next_timer_abs;
struct task_struct *should_schedule;
struct ce_dom_pid_entry *pid_entry;
struct ce_dom_data *ce_data;
domain_data_t *dom_data;
domain_t *dom;
int idx, budget_overrun;
ce_data = container_of(timer, struct ce_dom_data, timer);
dom_data = &per_cpu(mc_ce_doms, ce_data->cpu);
dom = &dom_data->domain;
/* Based off of the current time, figure out the offset into the cycle
* and the cycle's start time, and determine what should be scheduled.
*/
now = litmus_clock();
offset_rel = get_cycle_offset(now, ce_data->cycle_time);
cycle_start_abs = now - offset_rel;
idx = mc_ce_schedule_at(dom, offset_rel);
pid_entry = &ce_data->pid_entries[idx];
/* set the timer to fire at the next cycle start */
next_timer_abs = cycle_start_abs + pid_entry->acc_time;
hrtimer_set_expires(timer, ns_to_ktime(next_timer_abs));
TRACE("timer: now: %llu offset_rel: %llu cycle_start_abs: %llu "
"next_timer_abs: %llu\n", now, offset_rel,
cycle_start_abs, next_timer_abs);
/* get the task_struct (pid_task can accept a NULL) */
rcu_read_lock();
should_schedule = pid_task(pid_entry->pid, PIDTYPE_PID);
rcu_read_unlock();
ce_data->should_schedule = should_schedule;
if (should_schedule && 0 == atomic_read(&start_time_set)) {
/*
* If jobs are not overrunning their budgets, then this
* should not happen.
*/
pid_entry->expected_job++;
budget_overrun = pid_entry->expected_job !=
tsk_rt(should_schedule)->job_params.job_no;
if (budget_overrun)
TRACE_TASK(should_schedule, "timer expected job number: %d "
"but current job: %d\n",
pid_entry->expected_job,
tsk_rt(should_schedule)->job_params.job_no);
}
if (ce_data->should_schedule) {
tsk_rt(should_schedule)->job_params.deadline =
cycle_start_abs + pid_entry->acc_time;
tsk_rt(should_schedule)->job_params.release =
tsk_rt(should_schedule)->job_params.deadline -
pid_entry->budget;
tsk_rt(should_schedule)->job_params.exec_time = 0;
sched_trace_task_release(should_schedule);
set_rt_flags(ce_data->should_schedule, RT_F_RUNNING);
}
}
/*
* What to do when a timer fires. The timer should only be armed if the number
* of PID entries is positive.
*/
static enum hrtimer_restart mc_ce_timer_callback(struct hrtimer *timer)
{
struct ce_dom_data *ce_data;
unsigned long flags;
domain_data_t *dom_data;
domain_t *dom;
ce_data = container_of(timer, struct ce_dom_data, timer);
dom_data = &per_cpu(mc_ce_doms, ce_data->cpu);
dom = &dom_data->domain;
TRACE("timer callback on CPU %d (before lock)\n", ce_data->cpu);
raw_spin_lock_irqsave(dom->lock, flags);
__mc_ce_timer_callback(timer);
if (ce_data->scheduled != ce_data->should_schedule)
preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
raw_spin_unlock_irqrestore(dom->lock, flags);
return HRTIMER_RESTART;
}
/*
* Cancel timers on all CPUs. Returns 1 if any were active.
*/
static int cancel_all_timers(void)
{
struct ce_dom_data *ce_data;
domain_data_t *dom_data;
int cpu, ret = 0, cancel_res;
TRACE("cancel all timers\n");
for_each_online_cpu(cpu) {
dom_data = &per_cpu(mc_ce_doms, cpu);
ce_data = get_ce_data(dom_data);
ce_data->should_schedule = NULL;
cancel_res = hrtimer_cancel(&ce_data->timer);
atomic_set(&ce_data->timer_info.state,
HRTIMER_START_ON_INACTIVE);
ret = ret || cancel_res;
}
return ret;
}
/*
* Arm all timers so that they start at the new value of start time.
* Any CPU without CE PID entries won't have a timer armed.
* All timers should be canceled before calling this.
*/
static void arm_all_timers(void)
{
struct ce_dom_data *ce_data;
domain_data_t *dom_data;
int cpu, idx;
const lt_t start = atomic64_read(&start_time);
TRACE("arm all timers\n");
for_each_online_cpu(cpu) {
dom_data = &per_cpu(mc_ce_doms, cpu);
ce_data = get_ce_data(dom_data);
if (0 == ce_data->num_pid_entries)
continue;
for (idx = 0; idx < ce_data->num_pid_entries; idx++) {
ce_data->pid_entries[idx].expected_job = -1;
}
TRACE("arming timer for CPU %d\n", cpu);
hrtimer_start_on(cpu, &ce_data->timer_info, &ce_data->timer,
ns_to_ktime(start), HRTIMER_MODE_ABS_PINNED);
}
}
/*
* There are no real releases in the CE, but the task release syscall will
* call this. We can re-set our notion of the CE period start to make
* the schedule look pretty.
*/
void mc_ce_release_at(struct task_struct *ts, lt_t start)
{
TRACE_TASK(ts, "release at\n");
if (atomic_inc_and_test(&start_time_set)) {
/* in this case, we won the race */
cancel_all_timers();
atomic64_set(&start_time, start);
arm_all_timers();
} else
atomic_dec(&start_time_set);
}
long mc_ce_activate_plugin(void)
{
struct ce_dom_data *ce_data;
domain_data_t *dom_data;
int cpu;
for_each_online_cpu(cpu) {
dom_data = &per_cpu(mc_ce_doms, cpu);
ce_data = get_ce_data(dom_data);
ce_data->scheduled = NULL;
ce_data->should_schedule = NULL;
}
atomic_set(&start_time_set, -1);
atomic64_set(&start_time, litmus_clock());
/* may not want to arm timers on activation, just after release */
arm_all_timers();
return 0;
}
static void clear_pid_entries(void)
{
int cpu, entry;
domain_data_t *dom_data;
struct ce_dom_data *ce_data;
for_each_online_cpu(cpu) {
dom_data = &per_cpu(mc_ce_doms, cpu);
ce_data = get_ce_data(dom_data);
ce_data->num_pid_entries = 0;
ce_data->cycle_time = 0;
for (entry = 0; entry < CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS;
++entry) {
if (NULL != ce_data->pid_entries[entry].pid) {
put_pid(ce_data->pid_entries[entry].pid);
ce_data->pid_entries[entry].pid = NULL;
}
ce_data->pid_entries[entry].budget = 0;
ce_data->pid_entries[entry].acc_time = 0;
ce_data->pid_entries[entry].expected_job = -1;
}
}
}
long mc_ce_deactivate_plugin(void)
{
cancel_all_timers();
return 0;
}
/* Plugin object */
static struct sched_plugin mc_ce_plugin __cacheline_aligned_in_smp = {
.plugin_name = "MC-CE",
.admit_task = mc_ce_admit_task,
.task_new = mc_ce_task_new,
.complete_job = complete_job,
.release_at = mc_ce_release_at,
.task_exit = mc_ce_task_exit,
.schedule = mc_ce_schedule,
.finish_switch = mc_ce_finish_switch,
.tick = mc_ce_tick,
.task_wake_up = mc_ce_task_wake_up,
.task_block = mc_ce_task_block,
.activate_plugin = mc_ce_activate_plugin,
.deactivate_plugin = mc_ce_deactivate_plugin,
};
static int setup_proc(void);
static int __init init_sched_mc_ce(void)
{
struct ce_dom_data *ce_data;
domain_data_t *dom_data;
domain_t *dom;
rt_domain_t *rt;
int cpu, err;
for_each_online_cpu(cpu) {
dom_data = &per_cpu(mc_ce_doms, cpu);
dom = &dom_data->domain;
rt = &per_cpu(mc_ce_rts, cpu);
pd_domain_init(dom, rt, NULL, NULL, NULL, NULL, NULL);
dom->data = &per_cpu(_mc_ce_dom_data, cpu);
ce_data = get_ce_data(dom_data);
hrtimer_init(&ce_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
hrtimer_start_on_info_init(&ce_data->timer_info);
ce_data->cpu = cpu;
ce_data->timer.function = mc_ce_timer_callback;
}
clear_pid_entries();
err = setup_proc();
if (!err)
err = register_sched_plugin(&mc_ce_plugin);
return err;
}
#define BUF_SIZE PAGE_SIZE
static int write_into_proc(char *proc_buf, const int proc_size, char *fmt, ...)
{
static char buf[BUF_SIZE];
int n;
va_list args;
/* When writing to procfs, we don't care about the trailing null that
* is not included in the count returned by vscnprintf.
*/
va_start(args, fmt);
n = vsnprintf(buf, BUF_SIZE, fmt, args);
va_end(args);
if (BUF_SIZE <= n || proc_size <= n) {
/* too big for formatting buffer or proc (less null byte) */
n = -EINVAL;
goto out;
}
memcpy(proc_buf, buf, n);
out:
return n;
}
#undef BUF_SIZE
/*
* Writes a PID entry to the procfs.
*
* @page buffer to write into.
* @count bytes available in the buffer
*/
#define PID_SPACE 15
#define TASK_INFO_BUF (PID_SPACE + TASK_COMM_LEN)
static int write_pid_entry(char *page, const int count, const int cpu,
const int task, struct ce_dom_pid_entry *pid_entry)
{
static char task_info[TASK_INFO_BUF];
struct task_struct *ts;
int n = 0, err, ti_n;
char *ti_b;
if (pid_entry->pid) {
rcu_read_lock();
ts = pid_task(pid_entry->pid, PIDTYPE_PID);
rcu_read_unlock();
/* get some information about the task */
if (ts) {
ti_b = task_info;
ti_n = snprintf(ti_b, PID_SPACE, "%d", ts->pid);
if (PID_SPACE <= ti_n)
ti_n = PID_SPACE - 1;
ti_b += ti_n;
*ti_b = ' '; /* nuke the null byte */
ti_b++;
get_task_comm(ti_b, ts);
} else {
strncpy(task_info, "pid_task() failed :(",
TASK_INFO_BUF);
}
} else
strncpy(task_info, "no", TASK_INFO_BUF);
task_info[TASK_INFO_BUF - 1] = '\0'; /* just to be sure */
err = write_into_proc(page + n, count - n, "# task: %s\n", task_info);
if (err < 0) {
n = -ENOSPC;
goto out;
}
n += err;
err = write_into_proc(page + n, count - n, "%d, %d, %llu\n",
cpu, task, pid_entry->budget);
if (err < 0) {
n = -ENOSPC;
goto out;
}
n += err;
out:
return n;
}
#undef PID_SPACE
#undef TASK_INFO_BUF
/*
* Called when the user-land reads from proc.
*/
static int proc_read_ce_file(char *page, char **start, off_t off, int count,
int *eof, void *data)
{
int n = 0, err, cpu, t;
struct ce_dom_data *ce_data;
domain_data_t *dom_data;
if (off > 0) {
printk(KERN_INFO "litmus: MC-CE called read with off > 0\n");
goto out;
}
for_each_online_cpu(cpu) {
dom_data = &per_cpu(mc_ce_doms, cpu);
ce_data = get_ce_data(dom_data);
for (t = 0; t < ce_data->num_pid_entries; ++t) {
err = write_pid_entry(page + n, count - n,
cpu, t, &ce_data->pid_entries[t]);
if (err < 0) {
n = -ENOSPC;
goto out;
}
n += err;
}
}
out:
*eof = 1;
return n;
}
/*
* Skip a commented line.
*/
static int skip_comment(const char *buf, const unsigned long max)
{
unsigned long i = 0;
const char *c = buf;
if (0 == max || !c || *c != '#')
return 0;
++c; ++i;
for (; i < max; ++i) {
if (*c == '\n') {
++c; ++i;
break;
}
++c;
}
return i;
}
/* a budget of 5 milliseconds is probably reasonable */
#define BUDGET_THRESHOLD 5000000ULL
static int setup_pid_entry(const int cpu, const int task, const lt_t budget)
{
domain_data_t *dom_data = &per_cpu(mc_ce_doms, cpu);
struct ce_dom_data *ce_data = get_ce_data(dom_data);
struct ce_dom_pid_entry *new_entry;
int err = 0;
/* check the inputs */
if (cpu < 0 || NR_CPUS <= cpu || task < 0 ||
CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS <= task ||
budget < 1) {
printk(KERN_INFO "litmus: bad cpu, task ID, or budget sent to "
"MC-CE proc\n");
err = -EINVAL;
goto out;
}
/* check for small budgets */
if (BUDGET_THRESHOLD > budget) {
printk(KERN_CRIT "litmus: you gave a small budget for an "
"MC-CE task; that might be an issue.\n");
}
/* check that we have space for a new entry */
if (CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS <= ce_data->num_pid_entries) {
printk(KERN_INFO "litmus: too many MC-CE tasks for cpu "
"%d\n", cpu);
err = -EINVAL;
goto out;
}
/* add the new entry */
new_entry = &ce_data->pid_entries[ce_data->num_pid_entries];
BUG_ON(NULL != new_entry->pid);
new_entry->budget = budget;
new_entry->acc_time = ce_data->cycle_time + budget;
/* update the domain entry */
ce_data->cycle_time += budget;
ce_data->num_pid_entries++;
out:
return err;
}
#undef BUDGET_THRESHOLD
/*
* Called when the user-land writes to proc.
*
* Error checking is quite minimal. Format is:
* <cpu>, <process ID>, <budget>
*/
#define PROCFS_MAX_SIZE PAGE_SIZE
static int proc_write_ce_file(struct file *file, const char __user *buffer,
unsigned long count, void *data)
{
static char kbuf[PROCFS_MAX_SIZE];
char *c = kbuf, *c_skipped;
int cpu, task, cnt = 0, chars_read, converted, err;
lt_t budget;
if (is_active_plugin()) {
printk(KERN_INFO "litmus: can't edit MC-CE proc when plugin "
"active\n");
cnt = -EINVAL;
goto out;
}
if (count > PROCFS_MAX_SIZE) {
printk(KERN_INFO "litmus: MC-CE procfs got too many bytes "
"from user-space.\n");
cnt = -EINVAL;
goto out;
}
if (copy_from_user(kbuf, buffer, count)) {
printk(KERN_INFO "litmus: couldn't copy from user %s\n",
__FUNCTION__);
cnt = -EFAULT;
goto out;
}
clear_pid_entries();
while (cnt < count) {
c_skipped = skip_spaces(c);
if (c_skipped != c) {
chars_read = c_skipped - c;
cnt += chars_read;
c += chars_read;
continue;
}
if (*c == '#') {
chars_read = skip_comment(c, count - cnt);
cnt += chars_read;
c += chars_read;
continue;
}
converted = sscanf(c, "%d, %d, %llu%n", &cpu, &task, &budget,
&chars_read);
if (3 != converted) {
printk(KERN_INFO "litmus: MC-CE procfs expected three "
"arguments, but got %d.\n", converted);
cnt = -EINVAL;
goto out;
}
cnt += chars_read;
c += chars_read;
err = setup_pid_entry(cpu, task, budget);
if (err) {
cnt = -EINVAL;
goto out;
}
}
out:
return cnt;
}
#undef PROCFS_MAX_SIZE
#define CE_FILE_PROC_NAME "ce_file"
static void tear_down_proc(void)
{
if (ce_file)
remove_proc_entry(CE_FILE_PROC_NAME, mc_ce_dir);
if (mc_ce_dir)
remove_plugin_proc_dir(&mc_ce_plugin);
}
static int setup_proc(void)
{
int err;
err = make_plugin_proc_dir(&mc_ce_plugin, &mc_ce_dir);
if (err) {
printk(KERN_ERR "could not create MC-CE procfs dir.\n");
goto out;
}
ce_file = create_proc_entry(CE_FILE_PROC_NAME, 0644, mc_ce_dir);
if (!ce_file) {
printk(KERN_ERR "could not create MC-CE procfs file.\n");
err = -EIO;
goto out_remove_proc;
}
ce_file->read_proc = proc_read_ce_file;
ce_file->write_proc = proc_write_ce_file;
goto out;
out_remove_proc:
tear_down_proc();
out:
return err;
}
#undef CE_FILE_PROC_NAME
static void clean_sched_mc_ce(void)
{
tear_down_proc();
}
module_init(init_sched_mc_ce);
module_exit(clean_sched_mc_ce);