/* EDF-WM: based on PSN-EDF.
*/
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/module.h>
#include <litmus/litmus.h>
#include <litmus/jobs.h>
#include <litmus/sched_plugin.h>
#include <litmus/edf_common.h>
typedef struct {
rt_domain_t domain;
int cpu;
struct task_struct* scheduled; /* only RT tasks */
/* The enforcement timer is used to accurately police
* slice budgets. */
struct hrtimer enforcement_timer;
int timer_armed;
/*
* scheduling lock slock
* protects the domain and serializes scheduling decisions
*/
#define slock domain.ready_lock
} wm_domain_t;
DEFINE_PER_CPU(wm_domain_t, wm_domains);
#define TRACE_DOM(dom, fmt, args...) \
TRACE("(wm_domains[%d]) " fmt, (dom)->cpu, ##args)
#define local_domain (&__get_cpu_var(wm_domains))
#define remote_domain(cpu) (&per_cpu(wm_domains, cpu))
#define domain_of_task(task) (remote_domain(get_partition(task)))
#define domain_from_timer(t) (container_of((t), wm_domain_t, enforcement_timer))
static int is_sliced_task(struct task_struct* t)
{
return tsk_rt(t)->task_params.semi_part.wm.count;
}
static void compute_slice_params(struct task_struct* t)
{
struct rt_param* p = tsk_rt(t);
/* Here we do a little trick to make the generic EDF code
* play well with job slices. We overwrite the job-level
* release and deadline fields with the slice-specific values
* so that we can enqueue this task in an EDF rt_domain_t
* without issue. The actual values are cached in the semi_part.wm
* structure. */
p->job_params.deadline = p->semi_part.wm.job_release +
p->semi_part.wm.slice->deadline;
p->job_params.release = p->semi_part.wm.job_release +
p->semi_part.wm.slice->offset;
/* Similarly, we play a trick on the cpu field. */
p->task_params.cpu = p->semi_part.wm.slice->cpu;
/* update the per-slice budget reference */
p->semi_part.wm.exec_time = p->job_params.exec_time;
}
static void complete_sliced_job(struct task_struct* t)
{
struct rt_param* p = tsk_rt(t);
/* We need to undo our trickery to the
* job parameters (see above). */
p->job_params.release = p->semi_part.wm.job_release;
p->job_params.deadline = p->semi_part.wm.job_deadline;
/* Ok, now let generic code do the actual work. */
prepare_for_next_period(t);
/* And finally cache the updated parameters. */
p->semi_part.wm.job_release = p->job_params.release;
p->semi_part.wm.job_deadline = p->job_params.deadline;
}
static void advance_next_slice(struct task_struct* t, int completion_signaled)
{
int idx;
struct rt_param* p = tsk_rt(t);
/* make sure this is actually a sliced job */
BUG_ON(!is_sliced_task(t));
/* determine index of current slice */
idx = p->semi_part.wm.slice -
p->task_params.semi_part.wm.slices;
if (completion_signaled)
idx = 0;
else
/* increment and wrap around, if necessary */
idx = (idx + 1) % p->task_params.semi_part.wm.count;
/* point to next slice */
p->semi_part.wm.slice =
p->task_params.semi_part.wm.slices + idx;
/* Check if we need to update essential job parameters. */
if (!idx) {
/* job completion */
sched_trace_task_completion(t, !completion_signaled);
complete_sliced_job(t);
}
/* Update job parameters for new slice. */
compute_slice_params(t);
}
static int slice_budget_exhausted(struct task_struct* t)
{
struct rt_param* p = tsk_rt(t);
/* Compute how much execution time has been consumed
* since last slice advancement. */
lt_t slice_exec = p->job_params.exec_time - p->semi_part.wm.exec_time;
return slice_exec >= p->semi_part.wm.slice->budget;
}
/* we assume the lock is being held */
static void preempt(wm_domain_t *dom)
{
TRACE_DOM(dom, "will be preempted.\n");
/* We pass NULL as the task since non-preemptive sections are not
* supported in this plugin, so per-task checks are not needed. */
preempt_if_preemptable(NULL, dom->cpu);
}
static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer)
{
wm_domain_t *dom = domain_from_timer(timer);
unsigned long flags;
raw_spin_lock_irqsave(&dom->slock, flags);
if (likely(dom->timer_armed)) {
TRACE_DOM(dom, "enforcement timer fired.\n");
dom->timer_armed = 0;
preempt(dom);
} else
TRACE_DOM(dom, "timer fired but not armed???\n");
raw_spin_unlock_irqrestore(&dom->slock, flags);
return HRTIMER_NORESTART;
}
static void wm_domain_init(wm_domain_t* dom,
check_resched_needed_t check,
release_jobs_t release,
int cpu)
{
edf_domain_init(&dom->domain, check, release);
dom->cpu = cpu;
dom->scheduled = NULL;
dom->timer_armed = 0;
hrtimer_init(&dom->enforcement_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
dom->enforcement_timer.function = on_enforcement_timeout;
}
static void requeue(struct task_struct* t, rt_domain_t *edf)
{
if (t->state != TASK_RUNNING)
TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
set_rt_flags(t, RT_F_RUNNING);
if (is_released(t, litmus_clock()))
__add_ready(edf, t);
else
add_release(edf, t); /* it has got to wait */
}
static int wm_check_resched(rt_domain_t *edf)
{
wm_domain_t *dom = container_of(edf, wm_domain_t, domain);
/* because this is a callback from rt_domain_t we already hold
* the necessary lock for the ready queue
*/
if (edf_preemption_needed(edf, dom->scheduled)) {
preempt(dom);
return 1;
} else
return 0;
}
static void regular_job_completion(struct task_struct* t, int forced)
{
sched_trace_task_completion(t, forced);
TRACE_TASK(t, "job_completion().\n");
set_rt_flags(t, RT_F_SLEEP);
prepare_for_next_period(t);
}
static void wm_job_or_slice_completion(struct task_struct* t,
int completion_signaled)
{
if (is_sliced_task(t))
advance_next_slice(t, completion_signaled);
else
regular_job_completion(t, !completion_signaled);
}
static int wm_budget_exhausted(struct task_struct* t)
{
if (is_sliced_task(t))
return slice_budget_exhausted(t);
else
return budget_exhausted(t);
}
static void wm_tick(struct task_struct *t)
{
wm_domain_t *dom = local_domain;
/* Check for inconsistency. We don't need the lock for this since
* ->scheduled is only changed in schedule, which obviously is not
* executing in parallel on this CPU
*/
BUG_ON(is_realtime(t) && t != dom->scheduled);
if (is_realtime(t) && budget_enforced(t) && wm_budget_exhausted(t)) {
set_tsk_need_resched(t);
TRACE_DOM(dom, "budget of %d exhausted in tick\n",
t->pid);
}
}
static struct task_struct* wm_schedule(struct task_struct * prev)
{
wm_domain_t* dom = local_domain;
rt_domain_t* edf = &dom->domain;
struct task_struct* next;
int out_of_time, sleep, preempt,
exists, blocks, resched;
raw_spin_lock(&dom->slock);
/* sanity checking
* differently from gedf, when a task exits (dead)
* dom->schedule may be null and prev _is_ realtime
*/
BUG_ON(dom->scheduled && dom->scheduled != prev);
BUG_ON(dom->scheduled && !is_realtime(prev));
/* (0) Determine state */
exists = dom->scheduled != NULL;
blocks = exists && !is_running(dom->scheduled);
out_of_time = exists
&& budget_enforced(dom->scheduled)
&& wm_budget_exhausted(dom->scheduled);
sleep = exists && get_rt_flags(dom->scheduled) == RT_F_SLEEP;
preempt = edf_preemption_needed(edf, prev);
/* If we need to preempt do so.
* The following checks set resched to 1 in case of special
* circumstances.
*/
resched = preempt;
/* If a task blocks we have no choice but to reschedule.
*/
if (blocks)
resched = 1;
/* Any task that is preemptable and either exhausts its execution
* budget or wants to sleep completes. We may have to reschedule after
* this.
*/
if ((out_of_time || sleep) && !blocks) {
wm_job_or_slice_completion(dom->scheduled, sleep);
resched = 1;
}
/* The final scheduling decision. Do we need to switch for some reason?
* Switch if we are in RT mode and have no task or if we need to
* resched.
*/
next = NULL;
if (resched || !exists) {
if (dom->scheduled && !blocks)
requeue(dom->scheduled, edf);
next = __take_ready(edf);
} else
/* Only override Linux scheduler if we have a real-time task
* scheduled that needs to continue. */
if (exists)
next = prev;
if (next) {
TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
set_rt_flags(next, RT_F_RUNNING);
} else {
TRACE("becoming idle at %llu\n", litmus_clock());
}
dom->scheduled = next;
raw_spin_unlock(&dom->slock);
return next;
}
/* Prepare a task for running in RT mode
*/
static void wm_task_new(struct task_struct * t, int on_rq, int running)
{
wm_domain_t* dom = domain_of_task(t);
rt_domain_t* edf = &dom->domain;
unsigned long flags;
TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
t->rt_param.task_params.cpu);
/* setup job parameters */
release_at(t, litmus_clock());
/* The task should be running in the queue, otherwise signal
* code will try to wake it up with fatal consequences.
*/
raw_spin_lock_irqsave(&dom->slock, flags);
if (running) {
/* there shouldn't be anything else running at the time */
BUG_ON(dom->scheduled);
dom->scheduled = t;
} else {
requeue(t, edf);
/* maybe we have to reschedule */
preempt(dom);
}
raw_spin_unlock_irqrestore(&dom->slock, flags);
}
static void wm_task_wake_up(struct task_struct *task)
{
unsigned long flags;
wm_domain_t* dom = domain_of_task(task);
rt_domain_t* edf = &dom->domain;
lt_t now;
TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
raw_spin_lock_irqsave(&dom->slock, flags);
BUG_ON(is_queued(task));
now = litmus_clock();
if (is_tardy(task, now)) {
/* new sporadic release */
release_at(task, now);
sched_trace_task_release(task);
}
/* Only add to ready queue if it is not the currently-scheduled
* task. This could be the case if a task was woken up concurrently
* on a remote CPU before the executing CPU got around to actually
* de-scheduling the task, i.e., wake_up() raced with schedule()
* and won.
*/
if (dom->scheduled != task)
requeue(task, edf);
raw_spin_unlock_irqrestore(&dom->slock, flags);
TRACE_TASK(task, "wake up done\n");
}
static void wm_task_block(struct task_struct *t)
{
/* only running tasks can block, thus t is in no queue */
TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
BUG_ON(!is_realtime(t));
BUG_ON(is_queued(t));
}
static void wm_task_exit(struct task_struct * t)
{
unsigned long flags;
wm_domain_t* dom = domain_of_task(t);
rt_domain_t* edf = &dom->domain;
raw_spin_lock_irqsave(&dom->slock, flags);
if (is_queued(t)) {
/* dequeue */
remove(edf, t);
}
if (dom->scheduled == t)
dom->scheduled = NULL;
TRACE_TASK(t, "RIP, now reschedule\n");
preempt(dom);
raw_spin_unlock_irqrestore(&dom->slock, flags);
}
static long wm_admit_task(struct task_struct* tsk)
{
return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
}
/* Plugin object */
static struct sched_plugin edf_wm_plugin __cacheline_aligned_in_smp = {
.plugin_name = "EDF-WM",
#ifdef CONFIG_SRP
.srp_active = 1,
#endif
.tick = wm_tick,
.task_new = wm_task_new,
.complete_job = complete_job,
.task_exit = wm_task_exit,
.schedule = wm_schedule,
.task_wake_up = wm_task_wake_up,
.task_block = wm_task_block,
.admit_task = wm_admit_task
};
static int __init init_edf_wm(void)
{
int i;
/* We do not really want to support cpu hotplug, do we? ;)
* However, if we are so crazy to do so,
* we cannot use num_online_cpu()
*/
for (i = 0; i < num_online_cpus(); i++) {
wm_domain_init(remote_domain(i),
wm_check_resched,
NULL, i);
}
return register_sched_plugin(&edf_wm_plugin);
}
module_init(init_edf_wm);