From 84403b9d19b8ac349a23e25fb6587ee0d2bd9b1c Mon Sep 17 00:00:00 2001 From: Andrea Bastoni Date: Fri, 16 Apr 2010 16:38:42 -0400 Subject: [EDF-fm] Add EDF-fm extensions in rt_task - a bit more general than what exactly needed by EDF-fm - hopefully we should be able to integrate all the parameters in a single environment (or at least in a limited one): less changes in rt_param.h, less changes to the framework, similar implementations, single kernel (so we can run multiple test without changing the kernel). --- include/litmus/rt_param.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index a7a183f34a80..9d17fd3863d9 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h @@ -20,6 +20,9 @@ static inline int lt_after_eq(lt_t a, lt_t b) } #define lt_before_eq(a, b) lt_after_eq(b, a) +#define NR_CPUS_EDF_FM 2 +#define NR_CPUS_SEMI NR_CPUS_EDF_FM + /* different types of clients */ typedef enum { RT_CLASS_HARD, @@ -40,6 +43,13 @@ struct rt_task { unsigned int cpu; task_class_t cls; budget_policy_t budget_policy; /* ignored by pfair */ + /* EDF-fm where can a migrat task execute? */ + unsigned int cpus[NR_CPUS_SEMI]; + /* how many cpus are used by this task? fixed = 0, migrat = multi + * nr_cpus = (number of cpu where the task can be migrated) - 1 + * so that one can use: cpus[nr_cpus] + */ + unsigned int nr_cpus; }; /* The definition of the data that is shared between the kernel and real-time -- cgit v1.2.2 From b51c94dafb0c0ef55b999bc892a06a296c4372fc Mon Sep 17 00:00:00 2001 From: Andrea Bastoni Date: Sun, 18 Apr 2010 13:31:59 -0400 Subject: [EDF-fm] Add required extensions to rt_task and rt_job - rt_task: for each task, add fraction (num/denom) of execution cost that should be handled by each processor (for EDF-fm max 2 processors). - rt_job: add number of jobs already managed by the CPUs (for migrating tasks this is needed to determine on which CPU the next job should execute). --- include/litmus/rt_param.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 9d17fd3863d9..1c63b006f50e 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h @@ -50,6 +50,14 @@ struct rt_task { * so that one can use: cpus[nr_cpus] */ unsigned int nr_cpus; + /* EDF-fm migrating tasks, fraction of this task exec_cost + * that the processors should handle. + * We keep the fraction divided in num/denom : a matrix of + * NR_CPUS_SEMI rows * 2 columns; first column is the numerator + * of the fraction, second column is the denominator + * (in EDF-fm this is a 2*2 matrix) + */ + lt_t fraction[2][NR_CPUS_SEMI]; }; /* The definition of the data that is shared between the kernel and real-time @@ -100,6 +108,11 @@ struct rt_job { * Increase this sequence number when a job is released. */ unsigned int job_no; + + /* EDF-fm: number of jobs handled by this cpu + * (to determine next cpu for a migrating task) + */ + unsigned int cpu_job_no[NR_CPUS_SEMI]; }; struct pfair_param; -- cgit v1.2.2 From b29516cfec14f7652c333d061978c03b6a7fc763 Mon Sep 17 00:00:00 2001 From: Andrea Bastoni Date: Tue, 21 Sep 2010 01:33:50 -0400 Subject: [EDF-fm] Complete reworking of previous implementation of EDF-fm - New plugin is much simpler and it is based on P-EDF. - requeue() is the only place where an insertion in a ready queue can happen. --- litmus/Makefile | 3 +- litmus/sched_edf_fm.c | 602 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 604 insertions(+), 1 deletion(-) create mode 100644 litmus/sched_edf_fm.c diff --git a/litmus/Makefile b/litmus/Makefile index 30369787ece2..5e122bd5069b 100644 --- a/litmus/Makefile +++ b/litmus/Makefile @@ -13,7 +13,8 @@ obj-y = sched_plugin.o litmus.o \ bheap.o \ ctrldev.o \ sched_gsn_edf.o \ - sched_psn_edf.o + sched_psn_edf.o \ + sched_edf_fm.o obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o diff --git a/litmus/sched_edf_fm.c b/litmus/sched_edf_fm.c new file mode 100644 index 000000000000..f027156d595f --- /dev/null +++ b/litmus/sched_edf_fm.c @@ -0,0 +1,602 @@ +/* + * litmus/sched_edf_fm.c + * + * Implementation of the EDF-fm scheduling algorithm. + */ + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +typedef struct { + rt_domain_t domain; + int cpu; + struct task_struct* scheduled; /* only RT tasks */ +/* domain lock */ +#define slock domain.ready_lock +} edffm_domain_t; + +DEFINE_PER_CPU(edffm_domain_t, edffm_domains); + +#define local_edffm (&__get_cpu_var(edffm_domains)) +#define remote_edf(cpu) (&per_cpu(edffm_domains, cpu).domain) +#define remote_edffm(cpu) (&per_cpu(edffm_domains, cpu)) +#define task_edf(task) remote_edf(get_partition(task)) +#define task_edffm(task) remote_edffm(get_partition(task)) + +/* Is the task a migratory task? */ +#define is_migrat_task(task) (tsk_rt(task)->task_params.nr_cpus) +/* t is on the wrong CPU (it should be requeued properly) */ +#define wrong_cpu(t) is_migrat_task((t)) && task_cpu((t)) != get_partition((t)) +/* Get next CPU */ +#define migrat_next_cpu(t) \ + ((tsk_rt(t)->task_params.cpu == tsk_rt(t)->task_params.cpus[0]) ? \ + tsk_rt(t)->task_params.cpus[1] : \ + tsk_rt(t)->task_params.cpus[0]) +/* Get current cpu */ +#define migrat_cur_cpu(t) \ + ((tsk_rt(t)->task_params.cpu == tsk_rt(t)->task_params.cpus[0]) ? \ + tsk_rt(t)->task_params.cpus[0] : \ + tsk_rt(t)->task_params.cpus[1]) +/* Manipulate share for current cpu */ +#define cur_cpu_fract_num(t) \ + ((tsk_rt(t)->task_params.cpu == tsk_rt(t)->task_params.cpus[0]) ? \ + tsk_rt(t)->task_params.fraction[0][0] : \ + tsk_rt(t)->task_params.fraction[0][1]) +#define cur_cpu_fract_den(t) \ + ((tsk_rt(t)->task_params.cpu == tsk_rt(t)->task_params.cpus[0]) ? \ + tsk_rt(t)->task_params.fraction[1][0] : \ + tsk_rt(t)->task_params.fraction[1][1]) +/* Get job number for current cpu */ +#define cur_cpu_job_no(t) \ + ((tsk_rt(t)->task_params.cpu == tsk_rt(t)->task_params.cpus[0]) ? \ + tsk_rt(t)->job_params.cpu_job_no[0] : \ + tsk_rt(t)->job_params.cpu_job_no[1]) +/* What is the current cpu position in the array? */ +#define edffm_cpu_pos(cpu,t) \ + ((cpu == tsk_rt(t)->task_params.cpus[0]) ? \ + 0 : 1) + +/* + * EDF-fm: migratory tasks have higher prio than fixed, EDF in both classes. + * (Both first and second may be NULL). + */ +int edffm_higher_prio(struct task_struct* first, struct task_struct* second) +{ + if ((first && tsk_rt(first)->task_params.nr_cpus) || + (second && tsk_rt(second)->task_params.nr_cpus)) { + if ((first && tsk_rt(first)->task_params.nr_cpus) && + (second && tsk_rt(second)->task_params.nr_cpus)) + /* both are migrating */ + return edf_higher_prio(first, second); + + if (first && tsk_rt(first)->task_params.nr_cpus) + /* first is migrating */ + return 1; + else + /* second is migrating */ + return 0; + } + + /* both are fixed or not real time */ + return edf_higher_prio(first, second); +} + +/* need_to_preempt - check whether the task t needs to be preempted + * call only with irqs disabled and with ready_lock acquired + * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT! + */ +int edffm_preemption_needed(rt_domain_t* rt, struct task_struct *t) +{ + /* we need the read lock for edf_ready_queue */ + /* no need to preempt if there is nothing pending */ + if (!__jobs_pending(rt)) + return 0; + /* we need to reschedule if t doesn't exist */ + if (!t) + return 1; + + /* make sure to get non-rt stuff out of the way */ + return !is_realtime(t) || edffm_higher_prio(__next_ready(rt), t); +} + +/* we assume the lock is being held */ +static void preempt(edffm_domain_t *edffm) +{ + preempt_if_preemptable(edffm->scheduled, edffm->cpu); +} + +static void edffm_release_jobs(rt_domain_t* rt, struct bheap* tasks) +{ + unsigned long flags; + edffm_domain_t *edffm = container_of(rt, edffm_domain_t, domain); + + raw_spin_lock_irqsave(&edffm->slock, flags); + + __merge_ready(rt, tasks); + + if (edffm_preemption_needed(rt, edffm->scheduled)) + preempt(edffm); + + raw_spin_unlock_irqrestore(&edffm->slock, flags); +} + +/* EDF-fm uses the "release_master" field to force the next release for + * the task 'task' to happen on a remote CPU. The remote cpu for task is + * previously set up during job_completion() taking into consideration + * whether a task is a migratory task or not. + */ +static inline void +edffm_add_release_remote(struct task_struct *task) +{ + unsigned long flags; + rt_domain_t *rt = task_edf(task); + + raw_spin_lock_irqsave(&rt->tobe_lock, flags); + + /* "modify" destination cpu */ + rt->release_master = get_partition(task); + + TRACE_TASK(task, "Add remote release: smp_proc_id = %d, cpu = %d, remote = %d\n", + smp_processor_id(), task_cpu(task), rt->release_master); + + /* trigger future release */ + __add_release(rt, task); + + /* reset proper release_master and unlock */ + rt->release_master = NO_CPU; + raw_spin_unlock_irqrestore(&rt->tobe_lock, flags); +} + +/* perform double ready_queue locking in an orderwise fashion + * this is called with: interrupt disabled and rq->lock held (from + * schedule()) + */ +static noinline void double_domain_lock(edffm_domain_t *dom1, edffm_domain_t *dom2) +{ + if (dom1 == dom2) { + /* fake */ + raw_spin_lock(&dom1->slock); + } else { + if (dom1 < dom2) { + raw_spin_lock(&dom1->slock); + raw_spin_lock(&dom2->slock); + TRACE("acquired %d and %d\n", dom1->cpu, dom2->cpu); + } else { + raw_spin_lock(&dom2->slock); + raw_spin_lock(&dom1->slock); + TRACE("acquired %d and %d\n", dom2->cpu, dom1->cpu); + } + } +} + +/* Directly insert a task in a remote ready queue. This function + * should only be called if this task is a migrating task and its + * last job for this CPU just completed (a new one is released for + * a remote CPU), but the new job is already tardy. + */ +static noinline void insert_task_in_remote_ready(struct task_struct *task) +{ + edffm_domain_t *this = remote_edffm(task_cpu(task)); + edffm_domain_t *remote = remote_edffm(get_partition(task)); + /* FIXME we don't drop the rqlock, so this task shouldn't change + * This function shouldn't be called by a wakeup (as the job hasn't + * completed yet) */ + struct task_struct *old; + + BUG_ON(get_partition(task) != remote->cpu); + + TRACE_TASK(task, "Migrate From P%d -> To P%d\n", + this->cpu, remote->cpu); + TRACE_TASK(task, "Inserting in remote ready queue\n"); + + old = task; + WARN_ON(!irqs_disabled()); + + raw_spin_unlock(&this->slock); + mb(); + TRACE_TASK(task,"edffm_lock %d released\n", this->cpu); + + /* lock both ready queues */ + double_domain_lock(this, remote); + mb(); + + __add_ready(&remote->domain, task); + + /* release remote but keep ours */ + raw_spin_unlock(&remote->slock); + TRACE_TASK(task,"edffm_lock %d released\n", remote->cpu); + + if (old != task) { + TRACE_TASK(task,"Task changed while we dropped" + " the lock: old = (0x%p)," + " cur = (0x%p)\n", + old, task); + } + + /* ask remote cpu to reschedule, we are already rescheduling on this */ + preempt(remote); +} + +static void requeue(struct task_struct* t, rt_domain_t *edf) +{ + if (t->state != TASK_RUNNING) + TRACE_TASK(t, "requeue: !TASK_RUNNING\n"); + + set_rt_flags(t, RT_F_RUNNING); + if (is_released(t, litmus_clock())) { + if (wrong_cpu(t)) { + /* this should only happen if t just completed, but + * its next release is already tardy, so it should be + * migrated and inserted in the remote ready queue + */ + TRACE_TASK(t, "Migrating task already released, " + "move from P%d to P%d\n", + task_cpu(t), get_partition(t)); + + insert_task_in_remote_ready(t); + } else { + /* not a migrat task or the job is on the right CPU */ + __add_ready(edf, t); + } + } else { + if (wrong_cpu(t)) { + + TRACE_TASK(t, "Migrating task, adding remote release\n"); + edffm_add_release_remote(t); + } else { + TRACE_TASK(t, "Adding local release\n"); + add_release(edf, t); + } + } +} + +/* Update statistics for the _current_ job. + * - job_no was incremented _before_ starting this job + * (release_at / prepare_for_next_period) + * - job_no can be incremented several times before what we consider + * to be the 'true' release. FIXME: this has the side effect of + * discarding the first job... + */ +static void update_job_counter(struct task_struct *t) +{ + int cpu_pos; +/* Just confusing, FIXME: remove. + * TRACE_TASK(t, "Before: job_no = %d, cpu_job_no = %d, den = %d, num = %d\n", + t->rt_param.job_params.job_no, cur_cpu_job_no(t), + cur_cpu_fract_den(t), cur_cpu_fract_num(t)); +*/ + + /* We need a precise accounting for job numbers; when the first real + * job completes, both job CPU conters are 0; we reset job_no counter + */ + if (tsk_rt(t)->job_params.cpu_job_no[0] == 0 && + tsk_rt(t)->job_params.cpu_job_no[1] == 0) { + /* first "real job" has executed once */ + t->rt_param.job_params.job_no = 1; + } + + /* Which CPU counter should be incremented? */ + cpu_pos = edffm_cpu_pos(t->rt_param.task_params.cpu, t); + t->rt_param.job_params.cpu_job_no[cpu_pos]++; + + TRACE_TASK(t, "job_no = %d, cpu_job_no(pos %d) = %d, cpu %d\n", + t->rt_param.job_params.job_no, cpu_pos, cur_cpu_job_no(t), + t->rt_param.task_params.cpu); +} + +/* What is the next cpu for this job? (eq. 8) */ +static int next_cpu_for_job(struct task_struct *t) +{ + BUG_ON(!is_migrat_task(t)); + + /* update_job_counter() reset job_no to 1 + * when the first "real" job is detected + */ + if ((t->rt_param.job_params.job_no) == + (((lt_t) cur_cpu_job_no(t) * cur_cpu_fract_den(t)) / + cur_cpu_fract_num(t))) + return tsk_rt(t)->task_params.cpus[0]; + + return tsk_rt(t)->task_params.cpus[1]; +} + +/* If needed (the share for task t on this CPU is exhausted), updates + * the task_params.cpu for the _migrating_ task t + */ +static void change_migrat_cpu_if_needed(struct task_struct *t) +{ + BUG_ON(!is_migrat_task(t)); + /* EDF-fm: if it is a migrating task and it has already executed + * the required number of jobs on this CPU, we need to move it + * on its next CPU; changing the cpu here will affect the requeue + * and the next release + */ + if (unlikely(next_cpu_for_job(t) != migrat_cur_cpu(t))) { + + tsk_rt(t)->task_params.cpu = migrat_next_cpu(t); + TRACE_TASK(t, "EDF-fm: will migrate job %d -> %d\n", + task_cpu(t), tsk_rt(t)->task_params.cpu); + return; + } + + TRACE_TASK(t, "EDF-fm: job will stay on %d -> %d\n", + task_cpu(t), tsk_rt(t)->task_params.cpu); +} + +static void job_completion(struct task_struct* t, int forced) +{ + sched_trace_task_completion(t,forced); + TRACE_TASK(t, "job_completion().\n"); + + if (unlikely(is_migrat_task(t))) { + update_job_counter(t); + change_migrat_cpu_if_needed(t); + } + + set_rt_flags(t, RT_F_SLEEP); + prepare_for_next_period(t); +} + +static void edffm_tick(struct task_struct *t) +{ + edffm_domain_t *edffm = local_edffm; + + BUG_ON(is_realtime(t) && t != edffm->scheduled); + + if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { + set_tsk_need_resched(t); + TRACE("edffm_scheduler_tick: " + "%d is preemptable " + " => FORCE_RESCHED\n", t->pid); + } +} + +static struct task_struct* edffm_schedule(struct task_struct * prev) +{ + edffm_domain_t* edffm = local_edffm; + rt_domain_t* edf = &edffm->domain; + struct task_struct* next; + + int out_of_time, sleep, preempt, exists, blocks, change_cpu, resched; + + raw_spin_lock(&edffm->slock); + + /* sanity checking + * differently from gedf, when a task exits (dead) + * edffm->schedule may be null and prev _is_ realtime + */ + BUG_ON(edffm->scheduled && edffm->scheduled != prev); + BUG_ON(edffm->scheduled && !is_realtime(prev)); + + /* (0) Determine state */ + exists = edffm->scheduled != NULL; + blocks = exists && !is_running(edffm->scheduled); + out_of_time = exists && + budget_enforced(edffm->scheduled) && + budget_exhausted(edffm->scheduled); + sleep = exists && get_rt_flags(edffm->scheduled) == RT_F_SLEEP; + change_cpu = exists && wrong_cpu(edffm->scheduled); + preempt = edffm_preemption_needed(edf, prev); + + /* FIXME can this happen??? */ + if(blocks && change_cpu) { + TRACE_TASK(prev, "WARN: blocked, but should change CPU!\n"); + } + + if (exists) + TRACE_TASK(prev, + "blocks:%d out_of_time:%d sleep:%d preempt:%d " + "wrong_cpu:%d state:%d sig:%d\n", + blocks, out_of_time, sleep, preempt, + change_cpu, prev->state, signal_pending(prev)); + + /* If we need to preempt do so. + * The following checks set resched to 1 in case of special + * circumstances. + */ + resched = preempt; + + /* If a task blocks we have no choice but to reschedule. + */ + if (blocks) + resched = 1; + + /* If a task has just woken up, it was tardy and the wake up + * raced with this schedule, a new job has already been released, + * but scheduled should be enqueued on a remote ready queue, and a + * new task should be selected for the current queue. + */ + if (change_cpu) + resched = 1; + + /* Any task that is preemptable and either exhausts its execution + * budget or wants to sleep completes. We may have to reschedule after + * this. + */ + if ((out_of_time || sleep) && !blocks) { + job_completion(edffm->scheduled, !sleep); + resched = 1; + } + + /* The final scheduling decision. Do we need to switch for some reason? + * Switch if we are in RT mode and have no task or if we need to + * resched. + */ + next = NULL; + if (resched || !exists) { + /* Take care of a previously scheduled + * job by taking it out of the Linux runqueue. + */ + if (edffm->scheduled && !blocks) + requeue(edffm->scheduled, edf); + next = __take_ready(edf); + } else + /* Only override Linux scheduler if we have a real-time task + * scheduled that needs to continue. + */ + if (exists) + next = prev; + + if (next) { + TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); + set_rt_flags(next, RT_F_RUNNING); + } else { + TRACE("becoming idle at %llu\n", litmus_clock()); + } + + edffm->scheduled = next; + raw_spin_unlock(&edffm->slock); + + return next; +} + +/* Prepare a task for running in RT mode + */ +static void edffm_task_new(struct task_struct * t, int on_rq, int running) +{ + rt_domain_t* edf = task_edf(t); + edffm_domain_t* edffm = task_edffm(t); + unsigned long flags; + + TRACE_TASK(t, "EDF-fm: task new, cpu = %d\n", + t->rt_param.task_params.cpu); + + /* FIXME: setup job parameters, we will start counting 'real' jobs + * after the first completion */ + release_at(t, litmus_clock()); + + /* The task should be running in the queue, otherwise signal + * code will try to wake it up with fatal consequences. + */ + raw_spin_lock_irqsave(&edffm->slock, flags); + if (running) { + /* there shouldn't be anything else running at the time */ + BUG_ON(edffm->scheduled); + edffm->scheduled = t; + } else { + requeue(t, edf); + /* maybe we have to reschedule */ + preempt(edffm); + } + raw_spin_unlock_irqrestore(&edffm->slock, flags); +} + +static void edffm_task_wake_up(struct task_struct *task) +{ + unsigned long flags; + edffm_domain_t* edffm = task_edffm(task); + rt_domain_t* edf = task_edf(task); + lt_t now; + + TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); + + TRACE_TASK(task, "acquire edffm %d\n", edffm->cpu); + raw_spin_lock_irqsave(&edffm->slock, flags); + + BUG_ON(edffm != task_edffm(task)); + BUG_ON(is_queued(task)); + + now = litmus_clock(); + if (is_tardy(task, now)) { + /* a new job will be released. Update current job counter */ + update_job_counter(task); + /* Switch CPU if needed */ + change_migrat_cpu_if_needed(task); + /* new sporadic release */ + TRACE_TASK(task, "release new\n"); + release_at(task, now); + sched_trace_task_release(task); + } + + /* Only add to ready queue if it is not the currently-scheduled + * task. This could be the case if a task was woken up concurrently + * on a remote CPU before the executing CPU got around to actually + * de-scheduling the task, i.e., wake_up() raced with schedule() + * and won. + */ + if (edffm->scheduled != task) + requeue(task, edf); + + raw_spin_unlock_irqrestore(&edffm->slock, flags); + TRACE_TASK(task, "release edffm %d\n", edffm->cpu); + TRACE_TASK(task, "wake up done\n"); +} + +static void edffm_task_block(struct task_struct *t) +{ + /* only running tasks can block, thus t is in no queue */ + TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state); + + BUG_ON(!is_realtime(t)); + BUG_ON(is_queued(t)); +} + +static void edffm_task_exit(struct task_struct * t) +{ + unsigned long flags; + edffm_domain_t* edffm = task_edffm(t); + rt_domain_t* edf; + + raw_spin_lock_irqsave(&edffm->slock, flags); + if (is_queued(t)) { + /* dequeue */ + edf = task_edf(t); + remove(edf, t); + } + if (edffm->scheduled == t) + edffm->scheduled = NULL; + + TRACE_TASK(t, "RIP\n"); + + preempt(edffm); + raw_spin_unlock_irqrestore(&edffm->slock, flags); +} + +static long edffm_admit_task(struct task_struct* tsk) +{ + return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; +} + +/* Plugin object */ +static struct sched_plugin edffm_plugin __cacheline_aligned_in_smp = { + .plugin_name = "EDF-fm", + .tick = edffm_tick, + .task_new = edffm_task_new, + .complete_job = complete_job, + .task_exit = edffm_task_exit, + .schedule = edffm_schedule, + .task_wake_up = edffm_task_wake_up, + .task_block = edffm_task_block, + .admit_task = edffm_admit_task +}; + +static int __init init_edffm(void) +{ + int i; + edffm_domain_t *edffm; + + /* We do not really want to support cpu hotplug, do we? ;) + * However, if we are so crazy to do so, + * we cannot use num_online_cpu() + */ + for (i = 0; i < num_online_cpus(); i++) { + edffm = remote_edffm(i); + edffm->cpu = i; + edffm->scheduled = NULL; + edf_domain_init(&edffm->domain, NULL, edffm_release_jobs); + } + + return register_sched_plugin(&edffm_plugin); +} + +module_init(init_edffm); + -- cgit v1.2.2 From c592450ff6fc65d585f4543b1f2fdfff64f18a98 Mon Sep 17 00:00:00 2001 From: Andrea Bastoni Date: Tue, 21 Sep 2010 02:00:34 -0400 Subject: [EDF-fm] A block may race with wakeup. A block event may race with a wakeup event. We need to remove the task from the queue if it has been previously enqueued. --- litmus/sched_edf_fm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/litmus/sched_edf_fm.c b/litmus/sched_edf_fm.c index f027156d595f..b7615c14f9bd 100644 --- a/litmus/sched_edf_fm.c +++ b/litmus/sched_edf_fm.c @@ -537,7 +537,12 @@ static void edffm_task_block(struct task_struct *t) TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state); BUG_ON(!is_realtime(t)); - BUG_ON(is_queued(t)); + if (is_queued(t)) { + edffm_domain_t *edffm = local_edffm; + TRACE_TASK(t, "task blocked, race with wakeup, " + "remove from queue %d\n", edffm->cpu); + remove(&edffm->domain, t); + } } static void edffm_task_exit(struct task_struct * t) -- cgit v1.2.2 From 31354ae224da034d067e2f05eeb604802c5fd7cf Mon Sep 17 00:00:00 2001 From: Andrea Bastoni Date: Tue, 21 Sep 2010 11:19:46 -0400 Subject: [EDF-fm] Call change_migrat_cpu_if_needed() only for migrating tasks --- litmus/sched_edf_fm.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/litmus/sched_edf_fm.c b/litmus/sched_edf_fm.c index b7615c14f9bd..8eb7b9c12237 100644 --- a/litmus/sched_edf_fm.c +++ b/litmus/sched_edf_fm.c @@ -269,11 +269,6 @@ static void requeue(struct task_struct* t, rt_domain_t *edf) static void update_job_counter(struct task_struct *t) { int cpu_pos; -/* Just confusing, FIXME: remove. - * TRACE_TASK(t, "Before: job_no = %d, cpu_job_no = %d, den = %d, num = %d\n", - t->rt_param.job_params.job_no, cur_cpu_job_no(t), - cur_cpu_fract_den(t), cur_cpu_fract_num(t)); -*/ /* We need a precise accounting for job numbers; when the first real * job completes, both job CPU conters are 0; we reset job_no counter @@ -507,10 +502,13 @@ static void edffm_task_wake_up(struct task_struct *task) now = litmus_clock(); if (is_tardy(task, now)) { - /* a new job will be released. Update current job counter */ - update_job_counter(task); - /* Switch CPU if needed */ - change_migrat_cpu_if_needed(task); + if (unlikely(is_migrat_task(task))) { + /* a new job will be released. + * Update current job counter */ + update_job_counter(task); + /* Switch CPU if needed */ + change_migrat_cpu_if_needed(task); + } /* new sporadic release */ TRACE_TASK(task, "release new\n"); release_at(task, now); -- cgit v1.2.2 From 844cba1fd1189a755e60317a3f2b4403e7908b5b Mon Sep 17 00:00:00 2001 From: Andrea Bastoni Date: Tue, 21 Sep 2010 21:07:29 -0400 Subject: [EDF-fm] Remove code uglyness and properly initialize jobno - Remove stale comments in the code. - Remove non needed checks and add proper invariants. - Increment per-cpu job number when a task is released (task_new). This removes the need of detecting the 'first' job. --- litmus/sched_edf_fm.c | 63 ++++++++------------------------------------------- 1 file changed, 9 insertions(+), 54 deletions(-) diff --git a/litmus/sched_edf_fm.c b/litmus/sched_edf_fm.c index 8eb7b9c12237..70c5d289288f 100644 --- a/litmus/sched_edf_fm.c +++ b/litmus/sched_edf_fm.c @@ -91,8 +91,7 @@ int edffm_higher_prio(struct task_struct* first, struct task_struct* second) } /* need_to_preempt - check whether the task t needs to be preempted - * call only with irqs disabled and with ready_lock acquired - * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT! + * call only with irqs disabled and with ready_lock acquired */ int edffm_preemption_needed(rt_domain_t* rt, struct task_struct *t) { @@ -187,10 +186,6 @@ static noinline void insert_task_in_remote_ready(struct task_struct *task) { edffm_domain_t *this = remote_edffm(task_cpu(task)); edffm_domain_t *remote = remote_edffm(get_partition(task)); - /* FIXME we don't drop the rqlock, so this task shouldn't change - * This function shouldn't be called by a wakeup (as the job hasn't - * completed yet) */ - struct task_struct *old; BUG_ON(get_partition(task) != remote->cpu); @@ -198,7 +193,6 @@ static noinline void insert_task_in_remote_ready(struct task_struct *task) this->cpu, remote->cpu); TRACE_TASK(task, "Inserting in remote ready queue\n"); - old = task; WARN_ON(!irqs_disabled()); raw_spin_unlock(&this->slock); @@ -215,13 +209,6 @@ static noinline void insert_task_in_remote_ready(struct task_struct *task) raw_spin_unlock(&remote->slock); TRACE_TASK(task,"edffm_lock %d released\n", remote->cpu); - if (old != task) { - TRACE_TASK(task,"Task changed while we dropped" - " the lock: old = (0x%p)," - " cur = (0x%p)\n", - old, task); - } - /* ask remote cpu to reschedule, we are already rescheduling on this */ preempt(remote); } @@ -262,23 +249,12 @@ static void requeue(struct task_struct* t, rt_domain_t *edf) /* Update statistics for the _current_ job. * - job_no was incremented _before_ starting this job * (release_at / prepare_for_next_period) - * - job_no can be incremented several times before what we consider - * to be the 'true' release. FIXME: this has the side effect of - * discarding the first job... + * - cpu_job_no is incremented when the job completes */ static void update_job_counter(struct task_struct *t) { int cpu_pos; - /* We need a precise accounting for job numbers; when the first real - * job completes, both job CPU conters are 0; we reset job_no counter - */ - if (tsk_rt(t)->job_params.cpu_job_no[0] == 0 && - tsk_rt(t)->job_params.cpu_job_no[1] == 0) { - /* first "real job" has executed once */ - t->rt_param.job_params.job_no = 1; - } - /* Which CPU counter should be incremented? */ cpu_pos = edffm_cpu_pos(t->rt_param.task_params.cpu, t); t->rt_param.job_params.cpu_job_no[cpu_pos]++; @@ -288,14 +264,11 @@ static void update_job_counter(struct task_struct *t) t->rt_param.task_params.cpu); } -/* What is the next cpu for this job? (eq. 8) */ +/* What is the next cpu for this job? (eq. 8, in EDF-Fm paper) */ static int next_cpu_for_job(struct task_struct *t) { BUG_ON(!is_migrat_task(t)); - /* update_job_counter() reset job_no to 1 - * when the first "real" job is detected - */ if ((t->rt_param.job_params.job_no) == (((lt_t) cur_cpu_job_no(t) * cur_cpu_fract_den(t)) / cur_cpu_fract_num(t))) @@ -365,10 +338,6 @@ static struct task_struct* edffm_schedule(struct task_struct * prev) raw_spin_lock(&edffm->slock); - /* sanity checking - * differently from gedf, when a task exits (dead) - * edffm->schedule may be null and prev _is_ realtime - */ BUG_ON(edffm->scheduled && edffm->scheduled != prev); BUG_ON(edffm->scheduled && !is_realtime(prev)); @@ -382,10 +351,7 @@ static struct task_struct* edffm_schedule(struct task_struct * prev) change_cpu = exists && wrong_cpu(edffm->scheduled); preempt = edffm_preemption_needed(edf, prev); - /* FIXME can this happen??? */ - if(blocks && change_cpu) { - TRACE_TASK(prev, "WARN: blocked, but should change CPU!\n"); - } + BUG_ON(blocks && change_cpu); if (exists) TRACE_TASK(prev, @@ -394,14 +360,10 @@ static struct task_struct* edffm_schedule(struct task_struct * prev) blocks, out_of_time, sleep, preempt, change_cpu, prev->state, signal_pending(prev)); - /* If we need to preempt do so. - * The following checks set resched to 1 in case of special - * circumstances. - */ + /* If we need to preempt do so. */ resched = preempt; - /* If a task blocks we have no choice but to reschedule. - */ + /* If a task blocks we have no choice but to reschedule. */ if (blocks) resched = 1; @@ -428,9 +390,7 @@ static struct task_struct* edffm_schedule(struct task_struct * prev) */ next = NULL; if (resched || !exists) { - /* Take care of a previously scheduled - * job by taking it out of the Linux runqueue. - */ + if (edffm->scheduled && !blocks) requeue(edffm->scheduled, edf); next = __take_ready(edf); @@ -465,9 +425,8 @@ static void edffm_task_new(struct task_struct * t, int on_rq, int running) TRACE_TASK(t, "EDF-fm: task new, cpu = %d\n", t->rt_param.task_params.cpu); - /* FIXME: setup job parameters, we will start counting 'real' jobs - * after the first completion */ release_at(t, litmus_clock()); + update_job_counter(t); /* The task should be running in the queue, otherwise signal * code will try to wake it up with fatal consequences. @@ -531,7 +490,6 @@ static void edffm_task_wake_up(struct task_struct *task) static void edffm_task_block(struct task_struct *t) { - /* only running tasks can block, thus t is in no queue */ TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state); BUG_ON(!is_realtime(t)); @@ -587,10 +545,7 @@ static int __init init_edffm(void) int i; edffm_domain_t *edffm; - /* We do not really want to support cpu hotplug, do we? ;) - * However, if we are so crazy to do so, - * we cannot use num_online_cpu() - */ + /* Note, broken if num_online_cpus() may change */ for (i = 0; i < num_online_cpus(); i++) { edffm = remote_edffm(i); edffm->cpu = i; -- cgit v1.2.2 From 06b23f7b33c61cf1f03acb2d19ddf5dc6c57a810 Mon Sep 17 00:00:00 2001 From: Andrea Bastoni Date: Tue, 21 Sep 2010 22:22:10 -0400 Subject: Remove EDF-Fm specific parameters from rt_param.h Move parameters specific to EDF-Fm plugin only inside a dedicated data structure. Use union within rt_task to merge also the other semi-part plugins. --- include/litmus/rt_param.h | 64 ++++++++++++++++++++++++++++++----------------- litmus/sched_edf_fm.c | 52 ++++++++++++++++++++------------------ 2 files changed, 68 insertions(+), 48 deletions(-) diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 1c63b006f50e..90685e93ad21 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h @@ -20,9 +20,6 @@ static inline int lt_after_eq(lt_t a, lt_t b) } #define lt_before_eq(a, b) lt_after_eq(b, a) -#define NR_CPUS_EDF_FM 2 -#define NR_CPUS_SEMI NR_CPUS_EDF_FM - /* different types of clients */ typedef enum { RT_CLASS_HARD, @@ -36,6 +33,31 @@ typedef enum { PRECISE_ENFORCEMENT /* NOT IMPLEMENTED - enforced with hrtimers */ } budget_policy_t; + +/* Parameters for EDF-Fm scheduling algorithm. + * Each task may be fixed or migratory. Migratory tasks may + * migrate on 2 (contiguous) CPU only. NR_CPUS_EDF_FM = 2. + */ +#define NR_CPUS_EDF_FM 2 + +struct edffm_params { + /* EDF-fm where can a migratory task execute? */ + unsigned int cpus[NR_CPUS_EDF_FM]; + /* how many cpus are used by this task? + * fixed = 0, migratory = (NR_CPUS_EDF_FM - 1) + * Efficient way to allow writing cpus[nr_cpus]. + */ + unsigned int nr_cpus; + /* Fraction of this task exec_cost that each CPU should handle. + * We keep the fraction divided in num/denom : a matrix of + * (NR_CPUS_EDF_FM rows) x (2 columns). + * The first column is the numerator of the fraction. + * The second column is the denominator. + * In EDF-fm this is a 2*2 matrix + */ + lt_t fraction[2][NR_CPUS_EDF_FM]; +}; + struct rt_task { lt_t exec_cost; lt_t period; @@ -43,21 +65,12 @@ struct rt_task { unsigned int cpu; task_class_t cls; budget_policy_t budget_policy; /* ignored by pfair */ - /* EDF-fm where can a migrat task execute? */ - unsigned int cpus[NR_CPUS_SEMI]; - /* how many cpus are used by this task? fixed = 0, migrat = multi - * nr_cpus = (number of cpu where the task can be migrated) - 1 - * so that one can use: cpus[nr_cpus] - */ - unsigned int nr_cpus; - /* EDF-fm migrating tasks, fraction of this task exec_cost - * that the processors should handle. - * We keep the fraction divided in num/denom : a matrix of - * NR_CPUS_SEMI rows * 2 columns; first column is the numerator - * of the fraction, second column is the denominator - * (in EDF-fm this is a 2*2 matrix) - */ - lt_t fraction[2][NR_CPUS_SEMI]; + + /* parameters used by the semi-partitioned algorithms */ + union { + /* EDF-Fm; defined in sched_edf_fm.c */ + struct edffm_params fm; + } semi_part; }; /* The definition of the data that is shared between the kernel and real-time @@ -108,11 +121,6 @@ struct rt_job { * Increase this sequence number when a job is released. */ unsigned int job_no; - - /* EDF-fm: number of jobs handled by this cpu - * (to determine next cpu for a migrating task) - */ - unsigned int cpu_job_no[NR_CPUS_SEMI]; }; struct pfair_param; @@ -207,6 +215,16 @@ struct rt_param { /* Pointer to the page shared between userspace and kernel. */ struct control_page * ctrl_page; + + /* runtime info for the semi-part plugins */ + union { + struct { + /* EDF-fm: number of jobs handled by this cpu + * (to determine next cpu for a migrating task) + */ + unsigned int cpu_job_no[NR_CPUS_EDF_FM]; + } fm; + } semi_part; }; /* Possible RT flags */ diff --git a/litmus/sched_edf_fm.c b/litmus/sched_edf_fm.c index 70c5d289288f..e14f8588c7a4 100644 --- a/litmus/sched_edf_fm.c +++ b/litmus/sched_edf_fm.c @@ -32,37 +32,39 @@ DEFINE_PER_CPU(edffm_domain_t, edffm_domains); #define task_edf(task) remote_edf(get_partition(task)) #define task_edffm(task) remote_edffm(get_partition(task)) +#define edffm_params(t) (t->rt_param.task_params.semi_part.fm) + /* Is the task a migratory task? */ -#define is_migrat_task(task) (tsk_rt(task)->task_params.nr_cpus) +#define is_migrat_task(task) (edffm_params(task).nr_cpus) /* t is on the wrong CPU (it should be requeued properly) */ #define wrong_cpu(t) is_migrat_task((t)) && task_cpu((t)) != get_partition((t)) /* Get next CPU */ #define migrat_next_cpu(t) \ - ((tsk_rt(t)->task_params.cpu == tsk_rt(t)->task_params.cpus[0]) ? \ - tsk_rt(t)->task_params.cpus[1] : \ - tsk_rt(t)->task_params.cpus[0]) + ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \ + edffm_params(t).cpus[1] : \ + edffm_params(t).cpus[0]) /* Get current cpu */ #define migrat_cur_cpu(t) \ - ((tsk_rt(t)->task_params.cpu == tsk_rt(t)->task_params.cpus[0]) ? \ - tsk_rt(t)->task_params.cpus[0] : \ - tsk_rt(t)->task_params.cpus[1]) + ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \ + edffm_params(t).cpus[0] : \ + edffm_params(t).cpus[1]) /* Manipulate share for current cpu */ #define cur_cpu_fract_num(t) \ - ((tsk_rt(t)->task_params.cpu == tsk_rt(t)->task_params.cpus[0]) ? \ - tsk_rt(t)->task_params.fraction[0][0] : \ - tsk_rt(t)->task_params.fraction[0][1]) + ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \ + edffm_params(t).fraction[0][0] : \ + edffm_params(t).fraction[0][1]) #define cur_cpu_fract_den(t) \ - ((tsk_rt(t)->task_params.cpu == tsk_rt(t)->task_params.cpus[0]) ? \ - tsk_rt(t)->task_params.fraction[1][0] : \ - tsk_rt(t)->task_params.fraction[1][1]) + ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \ + edffm_params(t).fraction[1][0] : \ + edffm_params(t).fraction[1][1]) /* Get job number for current cpu */ #define cur_cpu_job_no(t) \ - ((tsk_rt(t)->task_params.cpu == tsk_rt(t)->task_params.cpus[0]) ? \ - tsk_rt(t)->job_params.cpu_job_no[0] : \ - tsk_rt(t)->job_params.cpu_job_no[1]) + ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \ + tsk_rt(t)->semi_part.fm.cpu_job_no[0] : \ + tsk_rt(t)->semi_part.fm.cpu_job_no[1]) /* What is the current cpu position in the array? */ #define edffm_cpu_pos(cpu,t) \ - ((cpu == tsk_rt(t)->task_params.cpus[0]) ? \ + ((cpu == edffm_params(t).cpus[0]) ? \ 0 : 1) /* @@ -71,14 +73,14 @@ DEFINE_PER_CPU(edffm_domain_t, edffm_domains); */ int edffm_higher_prio(struct task_struct* first, struct task_struct* second) { - if ((first && tsk_rt(first)->task_params.nr_cpus) || - (second && tsk_rt(second)->task_params.nr_cpus)) { - if ((first && tsk_rt(first)->task_params.nr_cpus) && - (second && tsk_rt(second)->task_params.nr_cpus)) + if ((first && edffm_params(first).nr_cpus) || + (second && edffm_params(second).nr_cpus)) { + if ((first && edffm_params(first).nr_cpus) && + (second && edffm_params(second).nr_cpus)) /* both are migrating */ return edf_higher_prio(first, second); - if (first && tsk_rt(first)->task_params.nr_cpus) + if (first && edffm_params(first).nr_cpus) /* first is migrating */ return 1; else @@ -257,7 +259,7 @@ static void update_job_counter(struct task_struct *t) /* Which CPU counter should be incremented? */ cpu_pos = edffm_cpu_pos(t->rt_param.task_params.cpu, t); - t->rt_param.job_params.cpu_job_no[cpu_pos]++; + t->rt_param.semi_part.fm.cpu_job_no[cpu_pos]++; TRACE_TASK(t, "job_no = %d, cpu_job_no(pos %d) = %d, cpu %d\n", t->rt_param.job_params.job_no, cpu_pos, cur_cpu_job_no(t), @@ -272,9 +274,9 @@ static int next_cpu_for_job(struct task_struct *t) if ((t->rt_param.job_params.job_no) == (((lt_t) cur_cpu_job_no(t) * cur_cpu_fract_den(t)) / cur_cpu_fract_num(t))) - return tsk_rt(t)->task_params.cpus[0]; + return edffm_params(t).cpus[0]; - return tsk_rt(t)->task_params.cpus[1]; + return edffm_params(t).cpus[1]; } /* If needed (the share for task t on this CPU is exhausted), updates -- cgit v1.2.2