/*
* litmus/sched_edf_fm.c
*
* Implementation of the EDF-fm scheduling algorithm.
*/
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/module.h>
#include <litmus/litmus.h>
#include <litmus/jobs.h>
#include <litmus/sched_plugin.h>
#include <litmus/edf_common.h>
typedef struct {
rt_domain_t domain;
int cpu;
struct task_struct* scheduled; /* only RT tasks */
/* domain lock */
#define slock domain.ready_lock
} edffm_domain_t;
DEFINE_PER_CPU(edffm_domain_t, edffm_domains);
#define local_edffm (&__get_cpu_var(edffm_domains))
#define remote_edf(cpu) (&per_cpu(edffm_domains, cpu).domain)
#define remote_edffm(cpu) (&per_cpu(edffm_domains, cpu))
#define task_edf(task) remote_edf(get_partition(task))
#define task_edffm(task) remote_edffm(get_partition(task))
#define edffm_params(t) (t->rt_param.task_params.semi_part.fm)
/* Is the task a migratory task? */
#define is_migrat_task(task) (edffm_params(task).nr_cpus)
/* t is on the wrong CPU (it should be requeued properly) */
#define wrong_cpu(t) is_migrat_task((t)) && task_cpu((t)) != get_partition((t))
/* Get next CPU */
#define migrat_next_cpu(t) \
((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \
edffm_params(t).cpus[1] : \
edffm_params(t).cpus[0])
/* Get current cpu */
#define migrat_cur_cpu(t) \
((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \
edffm_params(t).cpus[0] : \
edffm_params(t).cpus[1])
/* Manipulate share for current cpu */
#define cur_cpu_fract_num(t) \
((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \
edffm_params(t).fraction[0][0] : \
edffm_params(t).fraction[0][1])
#define cur_cpu_fract_den(t) \
((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \
edffm_params(t).fraction[1][0] : \
edffm_params(t).fraction[1][1])
/* Get job number for current cpu */
#define cur_cpu_job_no(t) \
((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \
tsk_rt(t)->semi_part.cpu_job_no[0] : \
tsk_rt(t)->semi_part.cpu_job_no[1])
/* What is the current cpu position in the array? */
#define edffm_cpu_pos(cpu,t) \
((cpu == edffm_params(t).cpus[0]) ? \
0 : 1)
/*
* EDF-fm: migratory tasks have higher prio than fixed, EDF in both classes.
* (Both first and second may be NULL).
*/
int edffm_higher_prio(struct task_struct* first, struct task_struct* second)
{
if ((first && edffm_params(first).nr_cpus) ||
(second && edffm_params(second).nr_cpus)) {
if ((first && edffm_params(first).nr_cpus) &&
(second && edffm_params(second).nr_cpus))
/* both are migrating */
return edf_higher_prio(first, second);
if (first && edffm_params(first).nr_cpus)
/* first is migrating */
return 1;
else
/* second is migrating */
return 0;
}
/* both are fixed or not real time */
return edf_higher_prio(first, second);
}
/* need_to_preempt - check whether the task t needs to be preempted
* call only with irqs disabled and with ready_lock acquired
*/
int edffm_preemption_needed(rt_domain_t* rt, struct task_struct *t)
{
/* we need the read lock for edf_ready_queue */
/* no need to preempt if there is nothing pending */
if (!__jobs_pending(rt))
return 0;
/* we need to reschedule if t doesn't exist */
if (!t)
return 1;
/* make sure to get non-rt stuff out of the way */
return !is_realtime(t) || edffm_higher_prio(__next_ready(rt), t);
}
/* we assume the lock is being held */
static void preempt(edffm_domain_t *edffm)
{
preempt_if_preemptable(edffm->scheduled, edffm->cpu);
}
static void edffm_release_jobs(rt_domain_t* rt, struct bheap* tasks)
{
unsigned long flags;
edffm_domain_t *edffm = container_of(rt, edffm_domain_t, domain);
raw_spin_lock_irqsave(&edffm->slock, flags);
__merge_ready(rt, tasks);
if (edffm_preemption_needed(rt, edffm->scheduled))
preempt(edffm);
raw_spin_unlock_irqrestore(&edffm->slock, flags);
}
/* EDF-fm uses the "release_master" field to force the next release for
* the task 'task' to happen on a remote CPU. The remote cpu for task is
* previously set up during job_completion() taking into consideration
* whether a task is a migratory task or not.
*/
static inline void
edffm_add_release_remote(struct task_struct *task)
{
unsigned long flags;
rt_domain_t *rt = task_edf(task);
raw_spin_lock_irqsave(&rt->tobe_lock, flags);
/* "modify" destination cpu */
rt->release_master = get_partition(task);
TRACE_TASK(task, "Add remote release: smp_proc_id = %d, cpu = %d, remote = %d\n",
smp_processor_id(), task_cpu(task), rt->release_master);
/* trigger future release */
__add_release(rt, task);
/* reset proper release_master and unlock */
rt->release_master = NO_CPU;
raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
}
/* perform double ready_queue locking in an orderwise fashion
* this is called with: interrupt disabled and rq->lock held (from
* schedule())
*/
static noinline void double_domain_lock(edffm_domain_t *dom1, edffm_domain_t *dom2)
{
if (dom1 == dom2) {
/* fake */
raw_spin_lock(&dom1->slock);
} else {
if (dom1 < dom2) {
raw_spin_lock(&dom1->slock);
raw_spin_lock(&dom2->slock);
TRACE("acquired %d and %d\n", dom1->cpu, dom2->cpu);
} else {
raw_spin_lock(&dom2->slock);
raw_spin_lock(&dom1->slock);
TRACE("acquired %d and %d\n", dom2->cpu, dom1->cpu);
}
}
}
/* Directly insert a task in a remote ready queue. This function
* should only be called if this task is a migrating task and its
* last job for this CPU just completed (a new one is released for
* a remote CPU), but the new job is already tardy.
*/
static noinline void insert_task_in_remote_ready(struct task_struct *task)
{
edffm_domain_t *this = remote_edffm(task_cpu(task));
edffm_domain_t *remote = remote_edffm(get_partition(task));
BUG_ON(get_partition(task) != remote->cpu);
TRACE_TASK(task, "Migrate From P%d -> To P%d\n",
this->cpu, remote->cpu);
TRACE_TASK(task, "Inserting in remote ready queue\n");
WARN_ON(!irqs_disabled());
raw_spin_unlock(&this->slock);
mb();
TRACE_TASK(task,"edffm_lock %d released\n", this->cpu);
/* lock both ready queues */
double_domain_lock(this, remote);
mb();
__add_ready(&remote->domain, task);
/* release remote but keep ours */
raw_spin_unlock(&remote->slock);
TRACE_TASK(task,"edffm_lock %d released\n", remote->cpu);
/* ask remote cpu to reschedule, we are already rescheduling on this */
preempt(remote);
}
static void requeue(struct task_struct* t, rt_domain_t *edf)
{
if (t->state != TASK_RUNNING)
TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
set_rt_flags(t, RT_F_RUNNING);
if (is_released(t, litmus_clock())) {
if (wrong_cpu(t)) {
/* this should only happen if t just completed, but
* its next release is already tardy, so it should be
* migrated and inserted in the remote ready queue
*/
TRACE_TASK(t, "Migrating task already released, "
"move from P%d to P%d\n",
task_cpu(t), get_partition(t));
insert_task_in_remote_ready(t);
} else {
/* not a migrat task or the job is on the right CPU */
__add_ready(edf, t);
}
} else {
if (wrong_cpu(t)) {
TRACE_TASK(t, "Migrating task, adding remote release\n");
edffm_add_release_remote(t);
} else {
TRACE_TASK(t, "Adding local release\n");
add_release(edf, t);
}
}
}
/* Update statistics for the _current_ job.
* - job_no was incremented _before_ starting this job
* (release_at / prepare_for_next_period)
* - cpu_job_no is incremented when the job completes
*/
static void update_job_counter(struct task_struct *t)
{
int cpu_pos;
/* Which CPU counter should be incremented? */
cpu_pos = edffm_cpu_pos(t->rt_param.task_params.cpu, t);
t->rt_param.semi_part.cpu_job_no[cpu_pos]++;
TRACE_TASK(t, "job_no = %d, cpu_job_no(pos %d) = %d, cpu %d\n",
t->rt_param.job_params.job_no, cpu_pos, cur_cpu_job_no(t),
t->rt_param.task_params.cpu);
}
/* What is the next cpu for this job? (eq. 8, in EDF-Fm paper) */
static int next_cpu_for_job(struct task_struct *t)
{
BUG_ON(!is_migrat_task(t));
if ((t->rt_param.job_params.job_no) ==
(((lt_t) cur_cpu_job_no(t) * cur_cpu_fract_den(t)) /
cur_cpu_fract_num(t)))
return edffm_params(t).cpus[0];
return edffm_params(t).cpus[1];
}
/* If needed (the share for task t on this CPU is exhausted), updates
* the task_params.cpu for the _migrating_ task t
*/
static void change_migrat_cpu_if_needed(struct task_struct *t)
{
BUG_ON(!is_migrat_task(t));
/* EDF-fm: if it is a migrating task and it has already executed
* the required number of jobs on this CPU, we need to move it
* on its next CPU; changing the cpu here will affect the requeue
* and the next release
*/
if (unlikely(next_cpu_for_job(t) != migrat_cur_cpu(t))) {
tsk_rt(t)->task_params.cpu = migrat_next_cpu(t);
TRACE_TASK(t, "EDF-fm: will migrate job %d -> %d\n",
task_cpu(t), tsk_rt(t)->task_params.cpu);
return;
}
TRACE_TASK(t, "EDF-fm: job will stay on %d -> %d\n",
task_cpu(t), tsk_rt(t)->task_params.cpu);
}
static void job_completion(struct task_struct* t, int forced)
{
sched_trace_task_completion(t,forced);
TRACE_TASK(t, "job_completion().\n");
if (unlikely(is_migrat_task(t))) {
update_job_counter(t);
change_migrat_cpu_if_needed(t);
}
set_rt_flags(t, RT_F_SLEEP);
prepare_for_next_period(t);
}
static void edffm_tick(struct task_struct *t)
{
edffm_domain_t *edffm = local_edffm;
BUG_ON(is_realtime(t) && t != edffm->scheduled);
if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
set_tsk_need_resched(t);
TRACE("edffm_scheduler_tick: "
"%d is preemptable "
" => FORCE_RESCHED\n", t->pid);
}
}
static struct task_struct* edffm_schedule(struct task_struct * prev)
{
edffm_domain_t* edffm = local_edffm;
rt_domain_t* edf = &edffm->domain;
struct task_struct* next;
int out_of_time, sleep, preempt, exists, blocks, change_cpu, resched;
raw_spin_lock(&edffm->slock);
BUG_ON(edffm->scheduled && edffm->scheduled != prev);
BUG_ON(edffm->scheduled && !is_realtime(prev));
/* (0) Determine state */
exists = edffm->scheduled != NULL;
blocks = exists && !is_running(edffm->scheduled);
out_of_time = exists &&
budget_enforced(edffm->scheduled) &&
budget_exhausted(edffm->scheduled);
sleep = exists && get_rt_flags(edffm->scheduled) == RT_F_SLEEP;
change_cpu = exists && wrong_cpu(edffm->scheduled);
preempt = edffm_preemption_needed(edf, prev);
BUG_ON(blocks && change_cpu);
if (exists)
TRACE_TASK(prev,
"blocks:%d out_of_time:%d sleep:%d preempt:%d "
"wrong_cpu:%d state:%d sig:%d\n",
blocks, out_of_time, sleep, preempt,
change_cpu, prev->state, signal_pending(prev));
/* If we need to preempt do so. */
resched = preempt;
/* If a task blocks we have no choice but to reschedule. */
if (blocks)
resched = 1;
/* If a task has just woken up, it was tardy and the wake up
* raced with this schedule, a new job has already been released,
* but scheduled should be enqueued on a remote ready queue, and a
* new task should be selected for the current queue.
*/
if (change_cpu)
resched = 1;
/* Any task that is preemptable and either exhausts its execution
* budget or wants to sleep completes. We may have to reschedule after
* this.
*/
if ((out_of_time || sleep) && !blocks) {
job_completion(edffm->scheduled, !sleep);
resched = 1;
}
/* The final scheduling decision. Do we need to switch for some reason?
* Switch if we are in RT mode and have no task or if we need to
* resched.
*/
next = NULL;
if (resched || !exists) {
if (edffm->scheduled && !blocks)
requeue(edffm->scheduled, edf);
next = __take_ready(edf);
} else
/* Only override Linux scheduler if we have a real-time task
* scheduled that needs to continue.
*/
if (exists)
next = prev;
if (next) {
TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
set_rt_flags(next, RT_F_RUNNING);
} else {
TRACE("becoming idle at %llu\n", litmus_clock());
}
edffm->scheduled = next;
raw_spin_unlock(&edffm->slock);
return next;
}
/* Prepare a task for running in RT mode
*/
static void edffm_task_new(struct task_struct * t, int on_rq, int running)
{
rt_domain_t* edf = task_edf(t);
edffm_domain_t* edffm = task_edffm(t);
unsigned long flags;
TRACE_TASK(t, "EDF-fm: task new, cpu = %d\n",
t->rt_param.task_params.cpu);
release_at(t, litmus_clock());
update_job_counter(t);
/* The task should be running in the queue, otherwise signal
* code will try to wake it up with fatal consequences.
*/
raw_spin_lock_irqsave(&edffm->slock, flags);
if (running) {
/* there shouldn't be anything else running at the time */
BUG_ON(edffm->scheduled);
edffm->scheduled = t;
} else {
requeue(t, edf);
/* maybe we have to reschedule */
preempt(edffm);
}
raw_spin_unlock_irqrestore(&edffm->slock, flags);
}
static void edffm_task_wake_up(struct task_struct *task)
{
unsigned long flags;
edffm_domain_t* edffm = task_edffm(task);
rt_domain_t* edf = task_edf(task);
lt_t now;
TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
TRACE_TASK(task, "acquire edffm %d\n", edffm->cpu);
raw_spin_lock_irqsave(&edffm->slock, flags);
BUG_ON(edffm != task_edffm(task));
BUG_ON(is_queued(task));
now = litmus_clock();
if (is_tardy(task, now)) {
if (unlikely(is_migrat_task(task))) {
/* a new job will be released.
* Update current job counter */
update_job_counter(task);
/* Switch CPU if needed */
change_migrat_cpu_if_needed(task);
}
/* new sporadic release */
TRACE_TASK(task, "release new\n");
release_at(task, now);
sched_trace_task_release(task);
}
/* Only add to ready queue if it is not the currently-scheduled
* task. This could be the case if a task was woken up concurrently
* on a remote CPU before the executing CPU got around to actually
* de-scheduling the task, i.e., wake_up() raced with schedule()
* and won.
*/
if (edffm->scheduled != task)
requeue(task, edf);
raw_spin_unlock_irqrestore(&edffm->slock, flags);
TRACE_TASK(task, "release edffm %d\n", edffm->cpu);
TRACE_TASK(task, "wake up done\n");
}
static void edffm_task_block(struct task_struct *t)
{
TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
BUG_ON(!is_realtime(t));
if (is_queued(t)) {
edffm_domain_t *edffm = local_edffm;
TRACE_TASK(t, "task blocked, race with wakeup, "
"remove from queue %d\n", edffm->cpu);
remove(&edffm->domain, t);
}
}
static void edffm_task_exit(struct task_struct * t)
{
unsigned long flags;
edffm_domain_t* edffm = task_edffm(t);
rt_domain_t* edf;
raw_spin_lock_irqsave(&edffm->slock, flags);
if (is_queued(t)) {
/* dequeue */
edf = task_edf(t);
remove(edf, t);
}
if (edffm->scheduled == t)
edffm->scheduled = NULL;
TRACE_TASK(t, "RIP\n");
preempt(edffm);
raw_spin_unlock_irqrestore(&edffm->slock, flags);
}
static long edffm_admit_task(struct task_struct* tsk)
{
return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
}
/* Plugin object */
static struct sched_plugin edffm_plugin __cacheline_aligned_in_smp = {
.plugin_name = "EDF-fm",
.tick = edffm_tick,
.task_new = edffm_task_new,
.complete_job = complete_job,
.task_exit = edffm_task_exit,
.schedule = edffm_schedule,
.task_wake_up = edffm_task_wake_up,
.task_block = edffm_task_block,
.admit_task = edffm_admit_task
};
static int __init init_edffm(void)
{
int i;
edffm_domain_t *edffm;
/* Note, broken if num_online_cpus() may change */
for (i = 0; i < num_online_cpus(); i++) {
edffm = remote_edffm(i);
edffm->cpu = i;
edffm->scheduled = NULL;
edf_domain_init(&edffm->domain, NULL, edffm_release_jobs);
}
return register_sched_plugin(&edffm_plugin);
}
module_init(init_edffm);