From 7be35bec55b1b6d6a2483e0ec3703a483da1e5a6 Mon Sep 17 00:00:00 2001 From: "Bjoern B. Brandenburg" Date: Wed, 26 Jan 2011 17:22:03 -0500 Subject: Add EDF-fm plugin [semi-part backport] --- include/litmus/rt_param.h | 34 +++ litmus/Makefile | 3 +- litmus/sched_edf_fm.c | 565 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 601 insertions(+), 1 deletion(-) create mode 100644 litmus/sched_edf_fm.c diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 1290e2939e33..6f43deacb3e1 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h @@ -35,6 +35,31 @@ typedef enum { PRECISE_ENFORCEMENT /* NOT IMPLEMENTED - enforced with hrtimers */ } budget_policy_t; + +/* The parameters for EDF-Fm scheduling algorithm. + * Each task may be fixed or migratory. Migratory tasks may + * migrate on 2 (contiguous) CPU only. NR_CPUS_EDF_FM = 2. + */ +#define NR_CPUS_EDF_FM 2 + +struct edffm_params { + /* EDF-fm where can a migratory task execute? */ + unsigned int cpus[NR_CPUS_EDF_FM]; + /* how many cpus are used by this task? + * fixed = 0, migratory = (NR_CPUS_EDF_FM - 1) + * Efficient way to allow writing cpus[nr_cpus]. + */ + unsigned int nr_cpus; + /* Fraction of this task exec_cost that each CPU should handle. + * We keep the fraction divided in num/denom : a matrix of + * (NR_CPUS_EDF_FM rows) x (2 columns). + * The first column is the numerator of the fraction. + * The second column is the denominator. + * In EDF-fm this is a 2*2 matrix + */ + lt_t fraction[2][NR_CPUS_EDF_FM]; +}; + /* Parameters for NPS-F semi-partitioned scheduling algorithm. * Each (cpu, budget) entry defines the share ('budget' in ns, a % of * the slot_length) of the notional processor on the CPU 'cpu'. @@ -86,6 +111,9 @@ struct rt_task { /* parameters used by the semi-partitioned algorithms */ union { + /* EDF-Fm; defined in sched_edf_fm.c */ + struct edffm_params fm; + /* NPS-F; defined in sched_npsf.c * id for the server (notional processor) that holds * this task; the same npfs_id can be assigned to "the same" @@ -243,6 +271,12 @@ struct rt_param { /* runtime info for the semi-part plugins */ union { + /* EDF-Fm runtime information + * number of jobs handled by this cpu + * (to determine next cpu for a migrating task) + */ + unsigned int cpu_job_no[NR_CPUS_EDF_FM]; + /* EDF-WM runtime information */ struct { /* at which exec time did the current slice start? */ diff --git a/litmus/Makefile b/litmus/Makefile index f26736964479..5533a58eb684 100644 --- a/litmus/Makefile +++ b/litmus/Makefile @@ -16,7 +16,8 @@ obj-y = sched_plugin.o litmus.o \ sched_gsn_edf.o \ sched_psn_edf.o \ sched_edf_wm.o \ - sched_npsf.o + sched_npsf.o \ + sched_edf_fm.o obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o diff --git a/litmus/sched_edf_fm.c b/litmus/sched_edf_fm.c new file mode 100644 index 000000000000..b7210725c072 --- /dev/null +++ b/litmus/sched_edf_fm.c @@ -0,0 +1,565 @@ +/* + * litmus/sched_edf_fm.c + * + * Implementation of the EDF-fm scheduling algorithm. + */ + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +typedef struct { + rt_domain_t domain; + int cpu; + struct task_struct* scheduled; /* only RT tasks */ +/* domain lock */ +#define slock domain.ready_lock +} edffm_domain_t; + +DEFINE_PER_CPU(edffm_domain_t, edffm_domains); + +#define local_edffm (&__get_cpu_var(edffm_domains)) +#define remote_edf(cpu) (&per_cpu(edffm_domains, cpu).domain) +#define remote_edffm(cpu) (&per_cpu(edffm_domains, cpu)) +#define task_edf(task) remote_edf(get_partition(task)) +#define task_edffm(task) remote_edffm(get_partition(task)) + +#define edffm_params(t) (t->rt_param.task_params.semi_part.fm) + +/* Is the task a migratory task? */ +#define is_migrat_task(task) (edffm_params(task).nr_cpus) +/* t is on the wrong CPU (it should be requeued properly) */ +#define wrong_cpu(t) is_migrat_task((t)) && task_cpu((t)) != get_partition((t)) +/* Get next CPU */ +#define migrat_next_cpu(t) \ + ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \ + edffm_params(t).cpus[1] : \ + edffm_params(t).cpus[0]) +/* Get current cpu */ +#define migrat_cur_cpu(t) \ + ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \ + edffm_params(t).cpus[0] : \ + edffm_params(t).cpus[1]) +/* Manipulate share for current cpu */ +#define cur_cpu_fract_num(t) \ + ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \ + edffm_params(t).fraction[0][0] : \ + edffm_params(t).fraction[0][1]) +#define cur_cpu_fract_den(t) \ + ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \ + edffm_params(t).fraction[1][0] : \ + edffm_params(t).fraction[1][1]) +/* Get job number for current cpu */ +#define cur_cpu_job_no(t) \ + ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \ + tsk_rt(t)->semi_part.cpu_job_no[0] : \ + tsk_rt(t)->semi_part.cpu_job_no[1]) +/* What is the current cpu position in the array? */ +#define edffm_cpu_pos(cpu,t) \ + ((cpu == edffm_params(t).cpus[0]) ? \ + 0 : 1) + +/* + * EDF-fm: migratory tasks have higher prio than fixed, EDF in both classes. + * (Both first and second may be NULL). + */ +int edffm_higher_prio(struct task_struct* first, struct task_struct* second) +{ + if ((first && edffm_params(first).nr_cpus) || + (second && edffm_params(second).nr_cpus)) { + if ((first && edffm_params(first).nr_cpus) && + (second && edffm_params(second).nr_cpus)) + /* both are migrating */ + return edf_higher_prio(first, second); + + if (first && edffm_params(first).nr_cpus) + /* first is migrating */ + return 1; + else + /* second is migrating */ + return 0; + } + + /* both are fixed or not real time */ + return edf_higher_prio(first, second); +} + +/* need_to_preempt - check whether the task t needs to be preempted + * call only with irqs disabled and with ready_lock acquired + */ +int edffm_preemption_needed(rt_domain_t* rt, struct task_struct *t) +{ + /* we need the read lock for edf_ready_queue */ + /* no need to preempt if there is nothing pending */ + if (!__jobs_pending(rt)) + return 0; + /* we need to reschedule if t doesn't exist */ + if (!t) + return 1; + + /* make sure to get non-rt stuff out of the way */ + return !is_realtime(t) || edffm_higher_prio(__next_ready(rt), t); +} + +/* we assume the lock is being held */ +static void preempt(edffm_domain_t *edffm) +{ + preempt_if_preemptable(edffm->scheduled, edffm->cpu); +} + +static void edffm_release_jobs(rt_domain_t* rt, struct bheap* tasks) +{ + unsigned long flags; + edffm_domain_t *edffm = container_of(rt, edffm_domain_t, domain); + + raw_spin_lock_irqsave(&edffm->slock, flags); + + __merge_ready(rt, tasks); + + if (edffm_preemption_needed(rt, edffm->scheduled)) + preempt(edffm); + + raw_spin_unlock_irqrestore(&edffm->slock, flags); +} + +/* EDF-fm uses the "release_master" field to force the next release for + * the task 'task' to happen on a remote CPU. The remote cpu for task is + * previously set up during job_completion() taking into consideration + * whether a task is a migratory task or not. + */ +static inline void +edffm_add_release_remote(struct task_struct *task) +{ + unsigned long flags; + rt_domain_t *rt = task_edf(task); + + raw_spin_lock_irqsave(&rt->tobe_lock, flags); + + /* "modify" destination cpu */ + rt->release_master = get_partition(task); + + TRACE_TASK(task, "Add remote release: smp_proc_id = %d, cpu = %d, remote = %d\n", + smp_processor_id(), task_cpu(task), rt->release_master); + + /* trigger future release */ + __add_release(rt, task); + + /* reset proper release_master and unlock */ + rt->release_master = NO_CPU; + raw_spin_unlock_irqrestore(&rt->tobe_lock, flags); +} + +/* perform double ready_queue locking in an orderwise fashion + * this is called with: interrupt disabled and rq->lock held (from + * schedule()) + */ +static noinline void double_domain_lock(edffm_domain_t *dom1, edffm_domain_t *dom2) +{ + if (dom1 == dom2) { + /* fake */ + raw_spin_lock(&dom1->slock); + } else { + if (dom1 < dom2) { + raw_spin_lock(&dom1->slock); + raw_spin_lock(&dom2->slock); + TRACE("acquired %d and %d\n", dom1->cpu, dom2->cpu); + } else { + raw_spin_lock(&dom2->slock); + raw_spin_lock(&dom1->slock); + TRACE("acquired %d and %d\n", dom2->cpu, dom1->cpu); + } + } +} + +/* Directly insert a task in a remote ready queue. This function + * should only be called if this task is a migrating task and its + * last job for this CPU just completed (a new one is released for + * a remote CPU), but the new job is already tardy. + */ +static noinline void insert_task_in_remote_ready(struct task_struct *task) +{ + edffm_domain_t *this = remote_edffm(task_cpu(task)); + edffm_domain_t *remote = remote_edffm(get_partition(task)); + + BUG_ON(get_partition(task) != remote->cpu); + + TRACE_TASK(task, "Migrate From P%d -> To P%d\n", + this->cpu, remote->cpu); + TRACE_TASK(task, "Inserting in remote ready queue\n"); + + WARN_ON(!irqs_disabled()); + + raw_spin_unlock(&this->slock); + mb(); + TRACE_TASK(task,"edffm_lock %d released\n", this->cpu); + + /* lock both ready queues */ + double_domain_lock(this, remote); + mb(); + + __add_ready(&remote->domain, task); + + /* release remote but keep ours */ + raw_spin_unlock(&remote->slock); + TRACE_TASK(task,"edffm_lock %d released\n", remote->cpu); + + /* ask remote cpu to reschedule, we are already rescheduling on this */ + preempt(remote); +} + +static void requeue(struct task_struct* t, rt_domain_t *edf) +{ + if (t->state != TASK_RUNNING) + TRACE_TASK(t, "requeue: !TASK_RUNNING\n"); + + set_rt_flags(t, RT_F_RUNNING); + if (is_released(t, litmus_clock())) { + if (wrong_cpu(t)) { + /* this should only happen if t just completed, but + * its next release is already tardy, so it should be + * migrated and inserted in the remote ready queue + */ + TRACE_TASK(t, "Migrating task already released, " + "move from P%d to P%d\n", + task_cpu(t), get_partition(t)); + + insert_task_in_remote_ready(t); + } else { + /* not a migrat task or the job is on the right CPU */ + __add_ready(edf, t); + } + } else { + if (wrong_cpu(t)) { + + TRACE_TASK(t, "Migrating task, adding remote release\n"); + edffm_add_release_remote(t); + } else { + TRACE_TASK(t, "Adding local release\n"); + add_release(edf, t); + } + } +} + +/* Update statistics for the _current_ job. + * - job_no was incremented _before_ starting this job + * (release_at / prepare_for_next_period) + * - cpu_job_no is incremented when the job completes + */ +static void update_job_counter(struct task_struct *t) +{ + int cpu_pos; + + /* Which CPU counter should be incremented? */ + cpu_pos = edffm_cpu_pos(t->rt_param.task_params.cpu, t); + t->rt_param.semi_part.cpu_job_no[cpu_pos]++; + + TRACE_TASK(t, "job_no = %d, cpu_job_no(pos %d) = %d, cpu %d\n", + t->rt_param.job_params.job_no, cpu_pos, cur_cpu_job_no(t), + t->rt_param.task_params.cpu); +} + +/* What is the next cpu for this job? (eq. 8, in EDF-Fm paper) */ +static int next_cpu_for_job(struct task_struct *t) +{ + BUG_ON(!is_migrat_task(t)); + + TRACE_TASK(t, "%u = %u * %u / %u\n", + t->rt_param.job_params.job_no, cur_cpu_job_no(t), + cur_cpu_fract_den(t), cur_cpu_fract_num(t)); + if ((t->rt_param.job_params.job_no) == + (((lt_t) cur_cpu_job_no(t) * cur_cpu_fract_den(t)) / + cur_cpu_fract_num(t))) + return edffm_params(t).cpus[0]; + + return edffm_params(t).cpus[1]; +} + +/* If needed (the share for task t on this CPU is exhausted), updates + * the task_params.cpu for the _migrating_ task t + */ +static void change_migrat_cpu_if_needed(struct task_struct *t) +{ + BUG_ON(!is_migrat_task(t)); + /* EDF-fm: if it is a migrating task and it has already executed + * the required number of jobs on this CPU, we need to move it + * on its next CPU; changing the cpu here will affect the requeue + * and the next release + */ + if (unlikely(next_cpu_for_job(t) != migrat_cur_cpu(t))) { + + tsk_rt(t)->task_params.cpu = migrat_next_cpu(t); + TRACE_TASK(t, "EDF-fm: will migrate job %d -> %d\n", + task_cpu(t), tsk_rt(t)->task_params.cpu); + return; + } + + TRACE_TASK(t, "EDF-fm: job will stay on %d -> %d\n", + task_cpu(t), tsk_rt(t)->task_params.cpu); +} + +static void job_completion(struct task_struct* t, int forced) +{ + sched_trace_task_completion(t,forced); + TRACE_TASK(t, "job_completion().\n"); + + if (unlikely(is_migrat_task(t))) { + update_job_counter(t); + change_migrat_cpu_if_needed(t); + } + + set_rt_flags(t, RT_F_SLEEP); + prepare_for_next_period(t); +} + +static void edffm_tick(struct task_struct *t) +{ + edffm_domain_t *edffm = local_edffm; + + BUG_ON(is_realtime(t) && t != edffm->scheduled); + + if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { + set_tsk_need_resched(t); + TRACE("edffm_scheduler_tick: " + "%d is preemptable " + " => FORCE_RESCHED\n", t->pid); + } +} + +static struct task_struct* edffm_schedule(struct task_struct * prev) +{ + edffm_domain_t* edffm = local_edffm; + rt_domain_t* edf = &edffm->domain; + struct task_struct* next; + + int out_of_time, sleep, preempt, exists, blocks, change_cpu, resched; + + raw_spin_lock(&edffm->slock); + + BUG_ON(edffm->scheduled && edffm->scheduled != prev); + BUG_ON(edffm->scheduled && !is_realtime(prev)); + + /* (0) Determine state */ + exists = edffm->scheduled != NULL; + blocks = exists && !is_running(edffm->scheduled); + out_of_time = exists && + budget_enforced(edffm->scheduled) && + budget_exhausted(edffm->scheduled); + sleep = exists && get_rt_flags(edffm->scheduled) == RT_F_SLEEP; + change_cpu = exists && wrong_cpu(edffm->scheduled); + preempt = edffm_preemption_needed(edf, prev); + + BUG_ON(blocks && change_cpu); + + if (exists) + TRACE_TASK(prev, + "blocks:%d out_of_time:%d sleep:%d preempt:%d " + "wrong_cpu:%d state:%d sig:%d\n", + blocks, out_of_time, sleep, preempt, + change_cpu, prev->state, signal_pending(prev)); + + /* If we need to preempt do so. */ + resched = preempt; + + /* If a task blocks we have no choice but to reschedule. */ + if (blocks) + resched = 1; + + /* If a task has just woken up, it was tardy and the wake up + * raced with this schedule, a new job has already been released, + * but scheduled should be enqueued on a remote ready queue, and a + * new task should be selected for the current queue. + */ + if (change_cpu) + resched = 1; + + /* Any task that is preemptable and either exhausts its execution + * budget or wants to sleep completes. We may have to reschedule after + * this. + */ + if ((out_of_time || sleep) && !blocks) { + job_completion(edffm->scheduled, !sleep); + resched = 1; + } + + /* The final scheduling decision. Do we need to switch for some reason? + * Switch if we are in RT mode and have no task or if we need to + * resched. + */ + next = NULL; + if (resched || !exists) { + + if (edffm->scheduled && !blocks) + requeue(edffm->scheduled, edf); + next = __take_ready(edf); + } else + /* Only override Linux scheduler if we have a real-time task + * scheduled that needs to continue. + */ + if (exists) + next = prev; + + if (next) { + TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); + set_rt_flags(next, RT_F_RUNNING); + } else { + TRACE("becoming idle at %llu\n", litmus_clock()); + } + + edffm->scheduled = next; + raw_spin_unlock(&edffm->slock); + + return next; +} + +/* Prepare a task for running in RT mode + */ +static void edffm_task_new(struct task_struct * t, int on_rq, int running) +{ + rt_domain_t* edf = task_edf(t); + edffm_domain_t* edffm = task_edffm(t); + unsigned long flags; + + TRACE_TASK(t, "EDF-fm: task new, cpu = %d\n", + t->rt_param.task_params.cpu); + + release_at(t, litmus_clock()); + update_job_counter(t); + + /* The task should be running in the queue, otherwise signal + * code will try to wake it up with fatal consequences. + */ + raw_spin_lock_irqsave(&edffm->slock, flags); + if (running) { + /* there shouldn't be anything else running at the time */ + BUG_ON(edffm->scheduled); + edffm->scheduled = t; + } else { + requeue(t, edf); + /* maybe we have to reschedule */ + preempt(edffm); + } + raw_spin_unlock_irqrestore(&edffm->slock, flags); +} + +static void edffm_task_wake_up(struct task_struct *task) +{ + unsigned long flags; + edffm_domain_t* edffm = task_edffm(task); + rt_domain_t* edf = task_edf(task); + lt_t now; + + TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); + + TRACE_TASK(task, "acquire edffm %d\n", edffm->cpu); + raw_spin_lock_irqsave(&edffm->slock, flags); + + BUG_ON(edffm != task_edffm(task)); + BUG_ON(is_queued(task)); + + now = litmus_clock(); + if (is_tardy(task, now)) { + if (unlikely(is_migrat_task(task))) { + /* a new job will be released. + * Update current job counter */ + update_job_counter(task); + /* Switch CPU if needed */ + change_migrat_cpu_if_needed(task); + } + /* new sporadic release */ + TRACE_TASK(task, "release new\n"); + release_at(task, now); + sched_trace_task_release(task); + } + + /* Only add to ready queue if it is not the currently-scheduled + * task. This could be the case if a task was woken up concurrently + * on a remote CPU before the executing CPU got around to actually + * de-scheduling the task, i.e., wake_up() raced with schedule() + * and won. + */ + if (edffm->scheduled != task) + requeue(task, edf); + + raw_spin_unlock_irqrestore(&edffm->slock, flags); + TRACE_TASK(task, "release edffm %d\n", edffm->cpu); + TRACE_TASK(task, "wake up done\n"); +} + +static void edffm_task_block(struct task_struct *t) +{ + TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state); + + BUG_ON(!is_realtime(t)); + if (is_queued(t)) { + edffm_domain_t *edffm = local_edffm; + TRACE_TASK(t, "task blocked, race with wakeup, " + "remove from queue %d\n", edffm->cpu); + remove(&edffm->domain, t); + } +} + +static void edffm_task_exit(struct task_struct * t) +{ + unsigned long flags; + edffm_domain_t* edffm = task_edffm(t); + rt_domain_t* edf; + + raw_spin_lock_irqsave(&edffm->slock, flags); + if (is_queued(t)) { + /* dequeue */ + edf = task_edf(t); + remove(edf, t); + } + if (edffm->scheduled == t) + edffm->scheduled = NULL; + + TRACE_TASK(t, "RIP\n"); + + preempt(edffm); + raw_spin_unlock_irqrestore(&edffm->slock, flags); +} + +static long edffm_admit_task(struct task_struct* tsk) +{ + return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; +} + +/* Plugin object */ +static struct sched_plugin edffm_plugin __cacheline_aligned_in_smp = { + .plugin_name = "EDF-fm", + .tick = edffm_tick, + .task_new = edffm_task_new, + .complete_job = complete_job, + .task_exit = edffm_task_exit, + .schedule = edffm_schedule, + .task_wake_up = edffm_task_wake_up, + .task_block = edffm_task_block, + .admit_task = edffm_admit_task +}; + +static int __init init_edffm(void) +{ + int i; + edffm_domain_t *edffm; + + /* Note, broken if num_online_cpus() may change */ + for (i = 0; i < num_online_cpus(); i++) { + edffm = remote_edffm(i); + edffm->cpu = i; + edffm->scheduled = NULL; + edf_domain_init(&edffm->domain, NULL, edffm_release_jobs); + } + + return register_sched_plugin(&edffm_plugin); +} + +module_init(init_edffm); + -- cgit v1.2.2