#include <linux/percpu.h>
#include <linux/slab.h>
#include <asm/uaccess.h>
#include <litmus/sched_plugin.h>
#include <litmus/preempt.h>
#include <litmus/debug_trace.h>
#include <litmus/litmus.h>
#include <litmus/jobs.h>
#include <litmus/budget.h>
#include <litmus/litmus_proc.h>
#include <litmus/reservation.h>
#include <litmus/polling_reservations.h>
struct pres_task_state {
struct task_client res_info;
int cpu;
};
struct pres_cpu_state {
raw_spinlock_t lock;
struct sup_reservation_environment sup_env;
struct hrtimer timer;
int cpu;
struct task_struct* scheduled;
};
static DEFINE_PER_CPU(struct pres_cpu_state, pres_cpu_state);
#define cpu_state_for(cpu_id) (&per_cpu(pres_cpu_state, cpu_id))
#define local_cpu_state() (&__get_cpu_var(pres_cpu_state))
static struct pres_task_state* get_pres_state(struct task_struct *tsk)
{
return (struct pres_task_state*) tsk_rt(tsk)->plugin_state;
}
static void task_departs(struct task_struct *tsk, int job_complete)
{
struct pres_task_state* state = get_pres_state(tsk);
struct reservation* res;
struct reservation_client *client;
res = state->res_info.client.reservation;
client = &state->res_info.client;
res->ops->client_departs(res, client, job_complete);
}
static void task_arrives(struct task_struct *tsk)
{
struct pres_task_state* state = get_pres_state(tsk);
struct reservation* res;
struct reservation_client *client;
res = state->res_info.client.reservation;
client = &state->res_info.client;
res->ops->client_arrives(res, client);
}
/* NOTE: drops state->lock */
static void pres_update_timer_and_unlock(struct pres_cpu_state *state)
{
int local;
lt_t update, now;
update = state->sup_env.next_scheduler_update;
now = state->sup_env.env.current_time;
/* Be sure we're actually running on the right core,
* as pres_update_timer() is also called from pres_task_resume(),
* which might be called on any CPU when a thread resumes.
*/
local = local_cpu_state() == state;
/* Must drop state lock before calling into hrtimer_start(), which
* may raise a softirq, which in turn may wake ksoftirqd. */
raw_spin_unlock(&state->lock);
if (update <= now) {
litmus_reschedule(state->cpu);
} else if (likely(local && update != SUP_NO_SCHEDULER_UPDATE)) {
/* Reprogram only if not already set correctly. */
if (!hrtimer_active(&state->timer) ||
ktime_to_ns(hrtimer_get_expires(&state->timer)) != update) {
TRACE("canceling timer...\n");
hrtimer_cancel(&state->timer);
TRACE("setting scheduler timer for %llu\n", update);
/* We cannot use hrtimer_start() here because the
* wakeup flag must be set to zero. */
__hrtimer_start_range_ns(&state->timer,
ns_to_ktime(update),
0 /* timer coalescing slack */,
HRTIMER_MODE_ABS_PINNED,
0 /* wakeup */);
}
} else if (unlikely(!local && update != SUP_NO_SCHEDULER_UPDATE)) {
/* Poke remote core only if timer needs to be set earlier than
* it is currently set.
*/
TRACE("pres_update_timer for remote CPU %d (update=%llu, "
"active:%d, set:%llu)\n",
state->cpu,
update,
hrtimer_active(&state->timer),
ktime_to_ns(hrtimer_get_expires(&state->timer)));
if (!hrtimer_active(&state->timer) ||
ktime_to_ns(hrtimer_get_expires(&state->timer)) > update) {
TRACE("poking CPU %d so that it can update its "
"scheduling timer (active:%d, set:%llu)\n",
state->cpu,
hrtimer_active(&state->timer),
ktime_to_ns(hrtimer_get_expires(&state->timer)));
litmus_reschedule(state->cpu);
}
}
}
static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer)
{
unsigned long flags;
enum hrtimer_restart restart = HRTIMER_NORESTART;
struct pres_cpu_state *state;
lt_t update, now;
state = container_of(timer, struct pres_cpu_state, timer);
/* The scheduling timer should only fire on the local CPU, because
* otherwise deadlocks via timer_cancel() are possible.
* Note: this does not interfere with dedicated interrupt handling, as
* even under dedicated interrupt handling scheduling timers for
* budget enforcement must occur locally on each CPU.
*/
BUG_ON(state->cpu != raw_smp_processor_id());
raw_spin_lock_irqsave(&state->lock, flags);
sup_update_time(&state->sup_env, litmus_clock());
update = state->sup_env.next_scheduler_update;
now = state->sup_env.env.current_time;
TRACE_CUR("on_scheduling_timer at %llu, upd:%llu (for cpu=%d)\n",
now, update, state->cpu);
if (update <= now) {
litmus_reschedule_local();
} else if (update != SUP_NO_SCHEDULER_UPDATE) {
hrtimer_set_expires(timer, ns_to_ktime(update));
restart = HRTIMER_RESTART;
}
raw_spin_unlock_irqrestore(&state->lock, flags);
return restart;
}
static struct task_struct* pres_schedule(struct task_struct * prev)
{
/* next == NULL means "schedule background work". */
struct pres_cpu_state *state = local_cpu_state();
raw_spin_lock(&state->lock);
BUG_ON(state->scheduled && state->scheduled != prev);
BUG_ON(state->scheduled && !is_realtime(prev));
/* update time */
state->sup_env.will_schedule = true;
sup_update_time(&state->sup_env, litmus_clock());
/* remove task from reservation if it blocks */
if (is_realtime(prev) && !is_running(prev))
task_departs(prev, is_completed(prev));
/* figure out what to schedule next */
state->scheduled = sup_dispatch(&state->sup_env);
/* Notify LITMUS^RT core that we've arrived at a scheduling decision. */
sched_state_task_picked();
/* program scheduler timer */
state->sup_env.will_schedule = false;
/* NOTE: drops state->lock */
pres_update_timer_and_unlock(state);
if (prev != state->scheduled && is_realtime(prev))
TRACE_TASK(prev, "descheduled.\n");
if (state->scheduled)
TRACE_TASK(state->scheduled, "scheduled.\n");
return state->scheduled;
}
static void resume_legacy_task_model_updates(struct task_struct *tsk)
{
lt_t now;
if (is_sporadic(tsk)) {
/* If this sporadic task was gone for a "long" time and woke up past
* its deadline, then give it a new budget by triggering a job
* release. This is purely cosmetic and has no effect on the
* P-RES scheduler. */
now = litmus_clock();
if (is_tardy(tsk, now))
release_at(tsk, now);
}
}
/* Called when the state of tsk changes back to TASK_RUNNING.
* We need to requeue the task.
*/
static void pres_task_resume(struct task_struct *tsk)
{
unsigned long flags;
struct pres_task_state* tinfo = get_pres_state(tsk);
struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
TRACE_TASK(tsk, "thread wakes up at %llu\n", litmus_clock());
raw_spin_lock_irqsave(&state->lock, flags);
/* Requeue if self-suspension was already processed. */
if (state->scheduled != tsk)
{
/* Assumption: litmus_clock() is synchronized across cores,
* since we might not actually be executing on tinfo->cpu
* at the moment. */
sup_update_time(&state->sup_env, litmus_clock());
task_arrives(tsk);
/* NOTE: drops state->lock */
pres_update_timer_and_unlock(state);
local_irq_restore(flags);
} else
raw_spin_unlock_irqrestore(&state->lock, flags);
resume_legacy_task_model_updates(tsk);
}
/* syscall backend for job completions */
static long pres_complete_job(void)
{
ktime_t next_release;
long err;
TRACE_CUR("pres_complete_job at %llu\n", litmus_clock());
tsk_rt(current)->completed = 1;
prepare_for_next_period(current);
next_release = ns_to_ktime(get_release(current));
set_current_state(TASK_INTERRUPTIBLE);
err = schedule_hrtimeout(&next_release, HRTIMER_MODE_ABS);
TRACE_CUR("pres_complete_job returns at %llu\n", litmus_clock());
return err;
}
static long pres_admit_task(struct task_struct *tsk)
{
long err = -ESRCH;
unsigned long flags;
struct reservation *res;
struct pres_cpu_state *state;
struct pres_task_state *tinfo = kzalloc(sizeof(*tinfo), GFP_ATOMIC);
if (!tinfo)
return -ENOMEM;
preempt_disable();
state = cpu_state_for(task_cpu(tsk));
raw_spin_lock_irqsave(&state->lock, flags);
res = sup_find_by_id(&state->sup_env, tsk_rt(tsk)->task_params.cpu);
/* found the appropriate reservation (or vCPU) */
if (res) {
task_client_init(&tinfo->res_info, tsk, res);
tinfo->cpu = task_cpu(tsk);
tsk_rt(tsk)->plugin_state = tinfo;
err = 0;
}
raw_spin_unlock_irqrestore(&state->lock, flags);
preempt_enable();
if (err)
kfree(tinfo);
return err;
}
static void task_new_legacy_task_model_updates(struct task_struct *tsk)
{
lt_t now = litmus_clock();
/* the first job exists starting as of right now */
release_at(tsk, now);
}
static void pres_task_new(struct task_struct *tsk, int on_runqueue,
int is_running)
{
unsigned long flags;
struct pres_task_state* tinfo = get_pres_state(tsk);
struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
TRACE_TASK(tsk, "new RT task %llu (on_rq:%d, running:%d)\n",
litmus_clock(), on_runqueue, is_running);
/* acquire the lock protecting the state and disable interrupts */
raw_spin_lock_irqsave(&state->lock, flags);
if (is_running) {
state->scheduled = tsk;
/* make sure this task should actually be running */
litmus_reschedule_local();
}
if (on_runqueue || is_running) {
/* Assumption: litmus_clock() is synchronized across cores
* [see comment in pres_task_resume()] */
sup_update_time(&state->sup_env, litmus_clock());
task_arrives(tsk);
/* NOTE: drops state->lock */
pres_update_timer_and_unlock(state);
local_irq_restore(flags);
} else
raw_spin_unlock_irqrestore(&state->lock, flags);
task_new_legacy_task_model_updates(tsk);
}
static void pres_task_exit(struct task_struct *tsk)
{
unsigned long flags;
struct pres_task_state* tinfo = get_pres_state(tsk);
struct pres_cpu_state *state = cpu_state_for(tinfo->cpu);
raw_spin_lock_irqsave(&state->lock, flags);
if (state->scheduled == tsk)
state->scheduled = NULL;
/* remove from queues */
if (is_running(tsk)) {
/* Assumption: litmus_clock() is synchronized across cores
* [see comment in pres_task_resume()] */
sup_update_time(&state->sup_env, litmus_clock());
task_departs(tsk, 0);
/* NOTE: drops state->lock */
pres_update_timer_and_unlock(state);
local_irq_restore(flags);
} else
raw_spin_unlock_irqrestore(&state->lock, flags);
kfree(tsk_rt(tsk)->plugin_state);
tsk_rt(tsk)->plugin_state = NULL;
}
static long create_polling_reservation(
int res_type,
struct reservation_config *config)
{
struct pres_cpu_state *state;
struct reservation* res;
struct polling_reservation *pres;
unsigned long flags;
int use_edf = config->priority == LITMUS_NO_PRIORITY;
int periodic = res_type == PERIODIC_POLLING;
long err = -EINVAL;
if (config->polling_params.budget >
config->polling_params.period) {
printk(KERN_ERR "invalid polling reservation (%u): "
"budget > period\n", config->id);
return -EINVAL;
}
if (config->polling_params.budget >
config->polling_params.relative_deadline
&& config->polling_params.relative_deadline) {
printk(KERN_ERR "invalid polling reservation (%u): "
"budget > deadline\n", config->id);
return -EINVAL;
}
if (config->polling_params.offset >
config->polling_params.period) {
printk(KERN_ERR "invalid polling reservation (%u): "
"offset > period\n", config->id);
return -EINVAL;
}
/* Allocate before we grab a spin lock.
* Todo: would be nice to use a core-local allocation.
*/
pres = kzalloc(sizeof(*pres), GFP_KERNEL);
if (!pres)
return -ENOMEM;
state = cpu_state_for(config->cpu);
raw_spin_lock_irqsave(&state->lock, flags);
res = sup_find_by_id(&state->sup_env, config->id);
if (!res) {
polling_reservation_init(pres, use_edf, periodic,
config->polling_params.budget,
config->polling_params.period,
config->polling_params.relative_deadline,
config->polling_params.offset);
pres->res.id = config->id;
if (!use_edf)
pres->res.priority = config->priority;
sup_add_new_reservation(&state->sup_env, &pres->res);
err = config->id;
} else {
err = -EEXIST;
}
raw_spin_unlock_irqrestore(&state->lock, flags);
if (err < 0)
kfree(pres);
return err;
}
#define MAX_INTERVALS 1024
static long create_table_driven_reservation(
struct reservation_config *config)
{
struct pres_cpu_state *state;
struct reservation* res;
struct table_driven_reservation *td_res = NULL;
struct lt_interval *slots = NULL;
size_t slots_size;
unsigned int i, num_slots;
unsigned long flags;
long err = -EINVAL;
if (!config->table_driven_params.num_intervals) {
printk(KERN_ERR "invalid table-driven reservation (%u): "
"no intervals\n", config->id);
return -EINVAL;
}
if (config->table_driven_params.num_intervals > MAX_INTERVALS) {
printk(KERN_ERR "invalid table-driven reservation (%u): "
"too many intervals (max: %d)\n", config->id, MAX_INTERVALS);
return -EINVAL;
}
num_slots = config->table_driven_params.num_intervals;
slots_size = sizeof(slots[0]) * num_slots;
slots = kzalloc(slots_size, GFP_KERNEL);
if (!slots)
return -ENOMEM;
td_res = kzalloc(sizeof(*td_res), GFP_KERNEL);
if (!td_res)
err = -ENOMEM;
else
err = copy_from_user(slots,
config->table_driven_params.intervals, slots_size);
if (!err) {
/* sanity checks */
for (i = 0; !err && i < num_slots; i++)
if (slots[i].end <= slots[i].start) {
printk(KERN_ERR
"invalid table-driven reservation (%u): "
"invalid interval %u => [%llu, %llu]\n",
config->id, i,
slots[i].start, slots[i].end);
err = -EINVAL;
}
for (i = 0; !err && i + 1 < num_slots; i++)
if (slots[i + 1].start <= slots[i].end) {
printk(KERN_ERR
"invalid table-driven reservation (%u): "
"overlapping intervals %u, %u\n",
config->id, i, i + 1);
err = -EINVAL;
}
if (slots[num_slots - 1].end >
config->table_driven_params.major_cycle_length) {
printk(KERN_ERR
"invalid table-driven reservation (%u): last "
"interval ends past major cycle %llu > %llu\n",
config->id,
slots[num_slots - 1].end,
config->table_driven_params.major_cycle_length);
err = -EINVAL;
}
}
if (!err) {
state = cpu_state_for(config->cpu);
raw_spin_lock_irqsave(&state->lock, flags);
res = sup_find_by_id(&state->sup_env, config->id);
if (!res) {
table_driven_reservation_init(td_res,
config->table_driven_params.major_cycle_length,
slots, num_slots);
td_res->res.id = config->id;
td_res->res.priority = config->priority;
sup_add_new_reservation(&state->sup_env, &td_res->res);
err = config->id;
} else {
err = -EEXIST;
}
raw_spin_unlock_irqrestore(&state->lock, flags);
}
if (err < 0) {
kfree(slots);
kfree(td_res);
}
return err;
}
static long pres_reservation_create(int res_type, void* __user _config)
{
long ret = -EINVAL;
struct reservation_config config;
TRACE("Attempt to create reservation (%d)\n", res_type);
if (copy_from_user(&config, _config, sizeof(config)))
return -EFAULT;
if (config.cpu < 0 || !cpu_online(config.cpu)) {
printk(KERN_ERR "invalid polling reservation (%u): "
"CPU %d offline\n", config.id, config.cpu);
return -EINVAL;
}
switch (res_type) {
case PERIODIC_POLLING:
case SPORADIC_POLLING:
ret = create_polling_reservation(res_type, &config);
break;
case TABLE_DRIVEN:
ret = create_table_driven_reservation(&config);
break;
default:
return -EINVAL;
};
return ret;
}
static struct domain_proc_info pres_domain_proc_info;
static long pres_get_domain_proc_info(struct domain_proc_info **ret)
{
*ret = &pres_domain_proc_info;
return 0;
}
static void pres_setup_domain_proc(void)
{
int i, cpu;
int num_rt_cpus = num_online_cpus();
struct cd_mapping *cpu_map, *domain_map;
memset(&pres_domain_proc_info, sizeof(pres_domain_proc_info), 0);
init_domain_proc_info(&pres_domain_proc_info, num_rt_cpus, num_rt_cpus);
pres_domain_proc_info.num_cpus = num_rt_cpus;
pres_domain_proc_info.num_domains = num_rt_cpus;
i = 0;
for_each_online_cpu(cpu) {
cpu_map = &pres_domain_proc_info.cpu_to_domains[i];
domain_map = &pres_domain_proc_info.domain_to_cpus[i];
cpu_map->id = cpu;
domain_map->id = i;
cpumask_set_cpu(i, cpu_map->mask);
cpumask_set_cpu(cpu, domain_map->mask);
++i;
}
}
static long pres_activate_plugin(void)
{
int cpu;
struct pres_cpu_state *state;
for_each_online_cpu(cpu) {
TRACE("Initializing CPU%d...\n", cpu);
state = cpu_state_for(cpu);
raw_spin_lock_init(&state->lock);
state->cpu = cpu;
state->scheduled = NULL;
sup_init(&state->sup_env);
hrtimer_init(&state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
state->timer.function = on_scheduling_timer;
}
pres_setup_domain_proc();
return 0;
}
static long pres_deactivate_plugin(void)
{
int cpu;
struct pres_cpu_state *state;
struct reservation *res;
for_each_online_cpu(cpu) {
state = cpu_state_for(cpu);
raw_spin_lock(&state->lock);
hrtimer_cancel(&state->timer);
/* Delete all reservations --- assumes struct reservation
* is prefix of containing struct. */
while (!list_empty(&state->sup_env.active_reservations)) {
res = list_first_entry(
&state->sup_env.active_reservations,
struct reservation, list);
list_del(&res->list);
kfree(res);
}
while (!list_empty(&state->sup_env.inactive_reservations)) {
res = list_first_entry(
&state->sup_env.inactive_reservations,
struct reservation, list);
list_del(&res->list);
kfree(res);
}
while (!list_empty(&state->sup_env.depleted_reservations)) {
res = list_first_entry(
&state->sup_env.depleted_reservations,
struct reservation, list);
list_del(&res->list);
kfree(res);
}
raw_spin_unlock(&state->lock);
}
destroy_domain_proc_info(&pres_domain_proc_info);
return 0;
}
static struct sched_plugin pres_plugin = {
.plugin_name = "P-RES",
.schedule = pres_schedule,
.task_wake_up = pres_task_resume,
.admit_task = pres_admit_task,
.task_new = pres_task_new,
.task_exit = pres_task_exit,
.complete_job = pres_complete_job,
.get_domain_proc_info = pres_get_domain_proc_info,
.activate_plugin = pres_activate_plugin,
.deactivate_plugin = pres_deactivate_plugin,
.reservation_create = pres_reservation_create,
};
static int __init init_pres(void)
{
return register_sched_plugin(&pres_plugin);
}
module_init(init_pres);