/* * kernel/sched_edf_hsb.c * * Implementation of the EDF-HSB scheduler plugin. * */ #include #include #include #include #include #include #include #include #include /* undefine to remove capacity sharing */ #define HSB_CAP_SHARE_ENABLED /* fake server PIDs */ #define HRT_BASE_PID 50000 #define SRT_BASE_PID 60000 /******************************************************************************/ /* Capacity queue */ /******************************************************************************/ int cap_check_resched(jiffie_t deadline); typedef struct { int budget; jiffie_t deadline; pid_t donor; struct list_head list; } capacity_t; typedef struct { spinlock_t lock; struct list_head queue; } capacity_queue_t; #define next_cap(q) list_entry((q)->queue.next, capacity_t, list) void capacity_queue_init(capacity_queue_t* queue) { queue->lock = SPIN_LOCK_UNLOCKED; INIT_LIST_HEAD(&queue->queue); } void __add_capacity(capacity_queue_t* queue, capacity_t *cap) { struct list_head* pos; capacity_t* queued; list_for_each_prev(pos, &queue->queue) { queued = list_entry(pos, capacity_t, list); if ( time_before_eq(queued->deadline, cap->deadline)) { __list_add(&cap->list, pos, pos->next); return; } } list_add(&cap->list, &queue->queue); } int __capacity_available(capacity_queue_t* queue) { capacity_t *cap; while (!list_empty(&queue->queue)) { cap = list_entry(queue->queue.next, capacity_t, list); if (time_before_eq(cap->deadline, jiffies)) { list_del(queue->queue.next); kfree(cap); cap = NULL; } else break; } return !list_empty(&queue->queue); } void __return_capacity(capacity_queue_t* queue, capacity_t *cap) { if (!cap->budget || time_before_eq(cap->deadline, jiffies)) kfree(cap); else __add_capacity(queue, cap); } void return_capacity(capacity_queue_t* queue, capacity_t *cap) { unsigned long flags; if (!cap->budget || time_before_eq(cap->deadline, jiffies)) kfree(cap); else { spin_lock_irqsave(&queue->lock, flags); __add_capacity(queue, cap); spin_unlock_irqrestore(&queue->lock, flags); } } #define MIN_TIME_DELTA 1 #define MIN_BUDGET 1 #ifdef HSB_CAP_SHARE_ENABLED void release_capacity(capacity_queue_t* queue, unsigned int budget, jiffie_t deadline, struct task_struct* t) { capacity_t* cap; unsigned long flags; if (deadline >= jiffies + MIN_TIME_DELTA && budget >= MIN_BUDGET) { cap = kmalloc(sizeof(capacity_t), GFP_ATOMIC); if (cap) { cap->budget = budget; cap->deadline = deadline; if (t) cap->donor = t->pid; else cap->donor = 0; spin_lock_irqsave(&queue->lock, flags); __add_capacity(queue, cap); cap_check_resched(next_cap(queue)->deadline); spin_unlock_irqrestore(&queue->lock, flags); if (t) sched_trace_capacity_release(t); } } } void __release_capacity(capacity_queue_t* queue, unsigned int budget, jiffie_t deadline, struct task_struct* t) { capacity_t* cap; if (deadline >= jiffies + MIN_TIME_DELTA && budget >= MIN_BUDGET) { cap = kmalloc(sizeof(capacity_t), GFP_ATOMIC); if (cap) { cap->budget = budget; cap->deadline = deadline; if (t) cap->donor = t->pid; else cap->donor = 0; /* no locking, no resched check -- called from schedule */ __add_capacity(queue, cap); if (t) sched_trace_capacity_release(t); } } } capacity_t* __take_capacity(capacity_queue_t* queue, jiffie_t deadline, int deadline_matters) { capacity_t* cap = NULL; while (!list_empty(&queue->queue)) { cap = list_entry(queue->queue.next, capacity_t, list); if (deadline_matters && time_before(deadline, cap->deadline)) { cap = NULL; break; } list_del(queue->queue.next); if (cap->deadline > jiffies) { if (cap->deadline - jiffies < cap->budget) cap->budget = cap->deadline - jiffies; break; } kfree(cap); cap = NULL; } return cap; } #else /* no capacity sharing */ void release_capacity(capacity_queue_t* queue, unsigned int budget, jiffie_t deadline, struct task_struct* t) { } capacity_t* __take_capacity(capacity_queue_t* queue, jiffie_t deadline, int deadline_matters) { return NULL; } #endif /******************************************************************************/ /* server abstractions */ /******************************************************************************/ /* hrt_server_t - Abstraction of a hard real-time server. * * One HRT server per CPU. If it is unused period and wcet may be zero. * HRT servers are strictly periodic and retain their budget. */ typedef struct { rt_domain_t domain; unsigned int period; unsigned int wcet; jiffie_t deadline; int budget; } hrt_server_t; /* be_server_t - Abstraction of best-effort server. * * This is pretty much only an accounting abstraction. */ typedef struct { unsigned int period; unsigned int wcet; jiffie_t deadline; jiffie_t release; int budget; struct list_head list; pid_t pid; } be_server_t; /* cast to int to allow for negative slack, i.e. tardiness */ #define server_slack(srv) \ ( ((int) (srv)->deadline - (int) jiffies) - (int) (srv)->budget ) typedef struct { int cpu; hrt_server_t hrt; be_server_t* be; capacity_t* cap; task_class_t exec_class; jiffie_t cur_deadline; atomic_t will_schedule; struct list_head list; spinlock_t lock; } cpu_state_t; DEFINE_PER_CPU(cpu_state_t, hsb_cpu_state); #define hrt_dom(cpu) (&per_cpu(hsb_cpu_state, cpu).hrt.domain) #define set_will_schedule() \ (atomic_set(&__get_cpu_var(hsb_cpu_state).will_schedule, 1)) #define clear_will_schedule() \ (atomic_set(&__get_cpu_var(hsb_cpu_state).will_schedule, 0)) #define test_will_schedule(cpu) \ (atomic_read(&per_cpu(hsb_cpu_state, cpu).will_schedule)) static void prepare_hrt_release(hrt_server_t *srv, jiffie_t start) { if (srv->period && srv->wcet) { srv->deadline = start; srv->budget = 0; } } static void check_for_hrt_release(hrt_server_t *srv) { if (srv->wcet && srv->period && time_before_eq(srv->deadline, jiffies)) { srv->deadline += srv->period; srv->budget = srv->wcet; sched_trace_server_release(HRT_BASE_PID + smp_processor_id(), srv->budget, srv->period, RT_CLASS_HARD); } } /* A HRT client is eligible if either its deadline is before the * the server deadline or if the server has zero slack. The server * must have budget left. */ static inline int hrt_client_eligible(hrt_server_t *srv) { if (!list_empty(&srv->domain.ready_queue)) return srv->budget && ( time_before(get_deadline(next_ready(&srv->domain)), srv->deadline) || server_slack(srv) <= 0); else return 0; } static void hsb_cpu_state_init(cpu_state_t* cpu_state, check_resched_needed_t check, int cpu) { edf_domain_init(&cpu_state->hrt.domain, check); cpu_state->hrt.budget = 0; cpu_state->hrt.deadline = 0; cpu_state->hrt.period = 0; cpu_state->hrt.wcet = 0; cpu_state->be = NULL; cpu_state->cap = NULL; cpu_state->cur_deadline = 0; cpu_state->cpu = cpu; cpu_state->lock = SPIN_LOCK_UNLOCKED; cpu_state->exec_class = RT_CLASS_BEST_EFFORT; atomic_set(&cpu_state->will_schedule, 0); INIT_LIST_HEAD(&cpu_state->list); } /******************************************************************************/ /* BE queue functions - mostly like edf_common.c */ /******************************************************************************/ #define be_earlier_deadline(a, b) (time_before(\ (a)->deadline, (b)->deadline)) #define be_earlier_release(a, b) (time_before(\ (a)->release, (b)->release)) static void be_add_ready(rt_domain_t* edf, be_server_t *new) { unsigned long flags; struct list_head *pos; be_server_t *queued; unsigned int passed = 0; BUG_ON(!new); /* first we need the write lock for rt_ready_queue */ write_lock_irqsave(&edf->ready_lock, flags); /* find a spot where our deadline is earlier than the next */ list_for_each(pos, &edf->ready_queue) { queued = list_entry(pos, be_server_t, list); if (unlikely(be_earlier_deadline(new, queued))) { __list_add(&new->list, pos->prev, pos); goto out; } passed++; } /* if we get to this point either the list is empty or new has the * lowest priority. Let's add it to the end. */ list_add_tail(&new->list, &edf->ready_queue); out: if (!passed) edf->check_resched(edf); write_unlock_irqrestore(&edf->ready_lock, flags); } static be_server_t* be_take_ready(rt_domain_t* edf) { be_server_t *t = NULL; if (!list_empty(&edf->ready_queue)) { t = list_entry(edf->ready_queue.next, be_server_t, list); /* kick it out of the ready list */ list_del(&t->list); } return t; } /*static be_server_t* get_be_server(rt_domain_t* edf) { be_server_t *t = NULL; spin_lock(&edf->release_lock); write_lock(&edf->ready_lock); t = be_take_ready(edf); if (!t && !list_empty(&edf->release_queue)) { t = list_entry(edf->release_queue.next, be_server_t, list); list_del(&t->list); } write_unlock(&edf->ready_lock); spin_unlock(&edf->release_lock); return t; }*/ static void be_add_release(rt_domain_t* edf, be_server_t *srv) { unsigned long flags; struct list_head *pos; be_server_t *queued; spin_lock_irqsave(&edf->release_lock, flags); list_for_each_prev(pos, &edf->release_queue) { queued = list_entry(pos, be_server_t, list); if ((unlikely(be_earlier_release(queued, srv)))) { /* the task at pos has an earlier release */ /* insert the new task in behind it */ __list_add(&srv->list, pos, pos->next); goto out; } } list_add(&srv->list, &edf->release_queue); out: spin_unlock_irqrestore(&edf->release_lock, flags); } static void be_try_release_pending(rt_domain_t* edf) { unsigned long flags; struct list_head *pos, *save; be_server_t *queued; if (spin_trylock_irqsave(&edf->release_lock, flags)) { list_for_each_safe(pos, save, &edf->release_queue) { queued = list_entry(pos, be_server_t, list); if (likely(time_before_eq( queued->release, jiffies))) { list_del(pos); be_add_ready(edf, queued); sched_trace_server_release( queued->pid, queued->budget, queued->period, RT_CLASS_BEST_EFFORT); } else /* the release queue is ordered */ break; } spin_unlock_irqrestore(&edf->release_lock, flags); } } static void be_prepare_new_release(be_server_t *t, jiffie_t start) { t->release = start; t->deadline = t->release + t->period; t->budget = t->wcet; } static void be_prepare_new_releases(rt_domain_t *edf, jiffie_t start) { unsigned long flags; struct list_head tmp_list; struct list_head *pos, *n; be_server_t *t; INIT_LIST_HEAD(&tmp_list); spin_lock_irqsave(&edf->release_lock, flags); write_lock(&edf->ready_lock); while (!list_empty(&edf->release_queue)) { pos = edf->release_queue.next; list_del(pos); list_add(pos, &tmp_list); } while (!list_empty(&edf->ready_queue)) { pos = edf->ready_queue.next; list_del(pos); list_add(pos, &tmp_list); } write_unlock(&edf->ready_lock); spin_unlock_irqrestore(&edf->release_lock, flags); list_for_each_safe(pos, n, &tmp_list) { t = list_entry(pos, be_server_t, list); list_del(pos); be_prepare_new_release(t, start); be_add_release(edf, t); } } static void be_prepare_for_next_period(be_server_t *t) { BUG_ON(!t); /* prepare next release */ t->release = t->deadline; t->deadline += t->period; t->budget = t->wcet; } #define be_next_ready(edf) \ list_entry((edf)->ready_queue.next, be_server_t, list) /* need_to_preempt - check whether the task t needs to be preempted by a * best-effort server. */ static inline int be_preemption_needed(rt_domain_t* edf, cpu_state_t* state) { /* we need the read lock for rt_ready_queue */ if (!list_empty(&edf->ready_queue)) { if (state->exec_class == RT_CLASS_SOFT) { if (state->cap) return time_before( be_next_ready(edf)->deadline, state->cap->deadline); else return time_before( be_next_ready(edf)->deadline, state->cur_deadline); } else return 1; } return 0; } static void be_enqueue(rt_domain_t* edf, be_server_t* srv) { int new_release = 0; if (!srv->budget) { be_prepare_for_next_period(srv); new_release = 1; } if (time_before_eq(srv->release, jiffies) && get_rt_mode() == MODE_RT_RUN) { be_add_ready(edf, srv); if (new_release) sched_trace_server_release( srv->pid, srv->budget, srv->period, RT_CLASS_BEST_EFFORT); } else be_add_release(edf, srv); } static void be_preempt(rt_domain_t *be, cpu_state_t *state) { be_server_t *srv; spin_lock(&state->lock); srv = state->be; state->be = NULL; spin_unlock(&state->lock); /* add outside of lock to avoid deadlock */ if (srv) be_enqueue(be, srv); } /******************************************************************************/ /* Actual HSB implementation */ /******************************************************************************/ /* always acquire the cpu lock as the last lock to avoid deadlocks */ static spinlock_t hsb_cpu_lock = SPIN_LOCK_UNLOCKED; /* the cpus queue themselves according to priority in here */ static LIST_HEAD(hsb_cpu_queue); /* the global soft real-time domain */ static rt_domain_t srt; /* the global best-effort server domain * belongs conceptually to the srt domain, but has * be_server_t* queued instead of tast_t* */ static rt_domain_t be; static rt_domain_t hsb_fifo; static capacity_queue_t cap_queue; /* adjust_cpu_queue - Move the cpu entry to the correct place to maintain * order in the cpu queue. * */ static void adjust_cpu_queue(task_class_t class, jiffie_t deadline, be_server_t *be) { struct list_head *pos; cpu_state_t *other; cpu_state_t *entry; spin_lock(&hsb_cpu_lock); entry = &__get_cpu_var(hsb_cpu_state); spin_lock(&entry->lock); entry->exec_class = class; entry->cur_deadline = deadline; entry->be = be; spin_unlock(&entry->lock); if (be) sched_trace_server_scheduled( be->pid, RT_CLASS_BEST_EFFORT, be->budget, be->deadline); else if (class == RT_CLASS_HARD) sched_trace_server_scheduled( HRT_BASE_PID + smp_processor_id(), RT_CLASS_HARD, entry->hrt.budget, entry->hrt.deadline); list_del(&entry->list); /* If we do not execute real-time jobs we just move * to the end of the queue . * If we execute hard real-time jobs we move the start * of the queue. */ switch (entry->exec_class) { case RT_CLASS_HARD: list_add(&entry->list, &hsb_cpu_queue); break; case RT_CLASS_SOFT: list_for_each(pos, &hsb_cpu_queue) { other = list_entry(pos, cpu_state_t, list); if (other->exec_class > RT_CLASS_SOFT || time_before_eq(entry->cur_deadline, other->cur_deadline)) { __list_add(&entry->list, pos->prev, pos); goto out; } } /* possible fall through if lowest SRT priority */ case RT_CLASS_BEST_EFFORT: list_add_tail(&entry->list, &hsb_cpu_queue); break; default: /* something wrong in the variable */ BUG(); } out: spin_unlock(&hsb_cpu_lock); } /* hrt_check_resched - check whether the HRT server on given CPU needs to * preempt the running task. */ static int hrt_check_resched(rt_domain_t *edf) { hrt_server_t *srv = container_of(edf, hrt_server_t, domain); cpu_state_t *state = container_of(srv, cpu_state_t, hrt); int ret = 0; spin_lock(&state->lock); if (hrt_client_eligible(srv)) { if (state->exec_class > RT_CLASS_HARD || time_before( get_deadline(next_ready(edf)), state->cur_deadline) ) { if (state->cpu == smp_processor_id()) set_tsk_need_resched(current); else smp_send_reschedule(state->cpu); } } spin_unlock(&state->lock); return ret; } /* srt_check_resched - Check whether another CPU needs to switch to a SRT task. * * The function only checks and kicks the last CPU. It will reschedule and * kick the next if necessary, and so on. The caller is responsible for making * sure that it is not the last entry or that a reschedule is not necessary. * * Caller must hold edf->ready_lock! */ static int srt_check_resched(rt_domain_t *edf) { cpu_state_t *last; int ret = 0; spin_lock(&hsb_cpu_lock); if (!list_empty(&srt.ready_queue)) { last = list_entry(hsb_cpu_queue.prev, cpu_state_t, list); /* guard against concurrent updates */ spin_lock(&last->lock); if (last->exec_class == RT_CLASS_BEST_EFFORT || ( last->exec_class == RT_CLASS_SOFT && time_before(get_deadline(next_ready(&srt)), last->cur_deadline))) { if (smp_processor_id() == last->cpu) set_tsk_need_resched(current); else if (!test_will_schedule(last->cpu)) smp_send_reschedule(last->cpu); ret = 1; } spin_unlock(&last->lock); } spin_unlock(&hsb_cpu_lock); return ret; } /* be_check_resched - Check whether another CPU needs to switch to a BE server.. * * Caller must hold edf->ready_lock! */ static int be_check_resched(rt_domain_t *edf) { cpu_state_t *last; int soft, bg; int ret = 0; spin_lock(&hsb_cpu_lock); if (!list_empty(&be.ready_queue)) { last = list_entry(hsb_cpu_queue.prev, cpu_state_t, list); /* guard against concurrent updates */ spin_lock(&last->lock); bg = last->exec_class == RT_CLASS_BEST_EFFORT; soft = last->exec_class == RT_CLASS_SOFT; if (bg || (soft && time_before(be_next_ready(&be)->deadline, last->cur_deadline))) { if (smp_processor_id() == last->cpu) set_tsk_need_resched(current); else if (!test_will_schedule(last->cpu)) smp_send_reschedule(last->cpu); ret = 1; } spin_unlock(&last->lock); } spin_unlock(&hsb_cpu_lock); return ret; } int cap_check_resched(jiffie_t deadline) { unsigned long flags; cpu_state_t *last; int soft, bg; int ret = 0; if (get_rt_mode() == MODE_RT_RUN) { spin_lock_irqsave(&hsb_cpu_lock, flags); last = list_entry(hsb_cpu_queue.prev, cpu_state_t, list); /* guard against concurrent updates */ spin_lock(&last->lock); bg = last->exec_class == RT_CLASS_BEST_EFFORT; soft = last->exec_class == RT_CLASS_SOFT; if (bg || (soft && time_before(deadline, last->cur_deadline))) { if (smp_processor_id() == last->cpu) set_tsk_need_resched(current); else if (!test_will_schedule(last->cpu)) smp_send_reschedule(last->cpu); ret = 1; } spin_unlock(&last->lock); spin_unlock_irqrestore(&hsb_cpu_lock, flags); } return ret; } int fifo_check_resched(void) { unsigned long flags; cpu_state_t *last; int ret = 0; if (get_rt_mode() == MODE_RT_RUN) { spin_lock_irqsave(&hsb_cpu_lock, flags); last = list_entry(hsb_cpu_queue.prev, cpu_state_t, list); /* guard against concurrent updates */ spin_lock(&last->lock); if (last->exec_class == RT_CLASS_BEST_EFFORT) { if (smp_processor_id() == last->cpu) set_tsk_need_resched(current); else if (!test_will_schedule(last->cpu)) smp_send_reschedule(last->cpu); ret = 1; } spin_unlock(&last->lock); spin_unlock_irqrestore(&hsb_cpu_lock, flags); } return ret; } static inline int hsb_preemption_needed(rt_domain_t* edf, cpu_state_t* state) { /* we need the read lock for rt_ready_queue */ if (!list_empty(&edf->ready_queue)) { if (state->exec_class == RT_CLASS_SOFT) { if (state->cap) return time_before(get_deadline(next_ready(edf)) , state->cap->deadline); else return time_before(get_deadline(next_ready(edf)) , state->cur_deadline); } else return 1; } return 0; } static inline int cap_preemption_needed(capacity_queue_t* q, cpu_state_t* state) { /* we need the read lock for rt_ready_queue */ if (!list_empty(&q->queue)) { if (state->exec_class == RT_CLASS_SOFT) { if (state->cap) return time_before(next_cap(q)->deadline , state->cap->deadline); else return time_before(next_cap(q)->deadline , state->cur_deadline); } else return 1; } return 0; } /* hsb_scheduler_tick - this function is called for every local timer * interrupt. * * checks whether the current task has expired and checks * whether we need to preempt it if it has not expired */ static reschedule_check_t hsb_scheduler_tick(void) { unsigned long flags; struct task_struct *t = current; int resched = 0; cpu_state_t *state = &__get_cpu_var(hsb_cpu_state); /* expire tasks even if not in real-time mode * this makes sure that at the end of real-time mode * no tasks "run away forever". */ /* charge BE server only if we are not running on a spare capacity */ if (state->be && !state->cap && --state->be->budget <= 0) { sched_trace_server_completion(state->be->pid, 0, state->be->deadline, RT_CLASS_BEST_EFFORT); be_preempt(&be, state); resched = 1; } if (state->cap) if (--state->cap->budget <= 0 || time_before_eq(state->cap->deadline, jiffies)) { kfree(state->cap); state->cap = NULL; resched = 1; } if (is_realtime(t)) { if (is_hrt(t) && (--state->hrt.budget <= 0)) { sched_trace_server_completion( HRT_BASE_PID + smp_processor_id(), 0, state->hrt.deadline, RT_CLASS_HARD); resched = 1; } /* account for received service... */ t->rt_param.times.exec_time++; /* ...and charge current budget */ if (!state->cap) { --t->time_slice; /* a task always should be able to finish its job */ BUG_ON(!is_be(t) && !t->time_slice && !job_completed(t)); } if (job_completed(t) || (is_be(t) && !t->time_slice)) { sched_trace_job_completion(t); set_rt_flags(t, RT_F_SLEEP); resched = 1; } } if (get_rt_mode() == MODE_RT_RUN) { try_release_pending(&state->hrt.domain); check_for_hrt_release(&state->hrt); try_release_pending(&srt); be_try_release_pending(&be); if (!resched) switch (state->exec_class) { case RT_CLASS_HARD: read_lock_irqsave(&state->hrt.domain.ready_lock, flags); resched = edf_preemption_needed( &state->hrt.domain, t); read_unlock_irqrestore( &state->hrt.domain.ready_lock, flags); break; case RT_CLASS_SOFT: case RT_CLASS_BEST_EFFORT: local_irq_save(flags); /* check for HRT jobs */ read_lock(&state->hrt.domain.ready_lock); resched = hrt_client_eligible(&state->hrt); read_unlock(&state->hrt.domain.ready_lock); /* check for spare capacities */ if (!resched) { spin_lock(&cap_queue.lock); resched = cap_preemption_needed(&cap_queue, state); spin_unlock(&cap_queue.lock); } /* check for SRT jobs */ if (!resched) { read_lock(&srt.ready_lock); resched = hsb_preemption_needed( &srt, state); read_unlock(&srt.ready_lock); } /* check for BE jobs */ if (!resched) { read_lock(&be.ready_lock); resched = be_preemption_needed( &be, state); read_unlock(&be.ready_lock); } /* check for background jobs */ if (!resched && !is_realtime(current)) resched = jobs_pending(&hsb_fifo); local_irq_restore(flags); break; default: /* something wrong in the variable */ BUG(); } } if (resched) { set_will_schedule(); return FORCE_RESCHED; } else return NO_RESCHED; } static int schedule_hrt(struct task_struct * prev, struct task_struct ** next, runqueue_t * rq) { unsigned long flags; int deactivate = 1; cpu_state_t *state; state = &__get_cpu_var(hsb_cpu_state); write_lock_irqsave(&state->hrt.domain.ready_lock, flags); if (state->cap) { /* hrt_schedule does not have the cap_queue lock */ return_capacity(&cap_queue, state->cap); state->cap = NULL; } if (is_hrt(prev) && is_released(prev) && is_running(prev) && !edf_preemption_needed(&state->hrt.domain, prev)) { /* This really should only happen if the task has * 100% utilization or when we got a bogus/delayed * resched IPI. */ TRACE("HRT: prev will be next, already released\n"); *next = prev; deactivate = 0; } else { /* either not yet released, preempted, or non-rt */ *next = __take_ready(&state->hrt.domain); /* the logic in hsb_schedule makes sure *next must exist * if we get here */ BUG_ON(!*next); /* stick the task into the runqueue */ __activate_task(*next, rq); set_task_cpu(*next, smp_processor_id()); } set_rt_flags(*next, RT_F_RUNNING); adjust_cpu_queue(RT_CLASS_HARD, get_deadline(*next), NULL); clear_will_schedule(); write_unlock_irqrestore(&state->hrt.domain.ready_lock, flags); return deactivate; } static struct task_struct* find_min_slack_task(struct task_struct *prev, rt_domain_t* edf) { struct list_head *pos; struct task_struct* tsk = NULL; struct task_struct* cur; if (is_realtime(prev) && is_running(prev) && get_rt_flags(prev) != RT_F_SLEEP) tsk = prev; list_for_each(pos, &edf->ready_queue) { cur = list_entry(pos, struct task_struct, rt_list); if (!tsk || task_slack(tsk) > task_slack(cur)) tsk = cur; } return tsk; } static struct task_struct* null_heuristic(struct task_struct *prev, rt_domain_t* edf, rt_domain_t* fifo) { if (jobs_pending(fifo)) return NULL; else if (!list_empty(&edf->ready_queue)) return list_entry(edf->ready_queue.next, struct task_struct, rt_list); else return NULL; } /* caller holds all locks */ static int schedule_capacity(struct task_struct *prev, struct task_struct **next, runqueue_t *rq) { cpu_state_t *state = &__get_cpu_var(hsb_cpu_state); capacity_t* old; if (state->cap) { old = state->cap; state->cap = __take_capacity(&cap_queue, old->deadline, 1); if (!state->cap) state->cap = old; else __return_capacity(&cap_queue, old); } else state->cap = __take_capacity(&cap_queue, 0, 0); /* pick a task likely to be tardy */ *next = find_min_slack_task(prev, &srt); /* only give away spare capacities if there is no task that * is going to be tardy */ if (*next && task_slack(*next) >= 0) *next = null_heuristic(prev, &srt, &hsb_fifo); if (*next && *next != prev) list_del(&(*next)->rt_list); /* if there is none pick a BE job */ if (!*next) { if (is_realtime(prev) && is_be(prev) && is_running(prev) && get_rt_flags(prev) != RT_F_SLEEP) *next = prev; else *next = take_ready(&hsb_fifo); } if (state->be) be_preempt(&be, state); BUG_ON(!state->cap); if (*next && state->cap->donor) { sched_trace_capacity_allocation( *next, state->cap->budget, state->cap->deadline, state->cap->donor); } return *next != prev; } #define BG 0 #define SRT 1 #define BE 2 #define CAP 3 static inline int what_first(rt_domain_t *be, rt_domain_t *srt, capacity_queue_t* q) { jiffie_t sdl = 0, bdl= 0, cdl = 0, cur; int _srt = !list_empty(&srt->ready_queue); int _be = !list_empty(&be->ready_queue); int _cap = __capacity_available(q); int ret = BG; /* nothing ready => background mode*/ cur = 0; if (_srt) sdl = get_deadline(next_ready(srt)); if (_be) bdl = be_next_ready(be)->deadline; if (_cap) cdl = next_cap(q)->deadline; if (_cap) { ret = CAP; cur = cdl; } if (_srt && (time_before(sdl, cur) || !ret)) { ret = SRT; cur = sdl; } if (_be && (time_before(bdl, cur) || !ret)) { ret = BE; cur = bdl; } return ret; } static int schedule_srt_be_cap(struct task_struct *prev, struct task_struct **next, runqueue_t *rq) { task_class_t class = RT_CLASS_BEST_EFFORT; jiffie_t deadline = 0; unsigned long flags; int deactivate = 1; be_server_t* bes; cpu_state_t* state; int type = BG; reschedule: write_lock_irqsave(&srt.ready_lock, flags); write_lock(&be.ready_lock); spin_lock(&cap_queue.lock); state = &__get_cpu_var(hsb_cpu_state); bes = NULL; clear_will_schedule(); if (is_realtime(prev) && (is_released(prev) || is_be(prev)) && is_running(prev) && !hsb_preemption_needed(&srt, state) && !be_preemption_needed(&be, state) ) { /* Our current task's next job has already been * released and has higher priority than the highest * prioriy waiting task; in other words: it is tardy. * We just keep it. */ TRACE("prev will be next, already released\n"); *next = prev; class = prev->rt_param.basic_params.class; deadline = get_deadline(*next); deactivate = 0; } else { /* either not yet released, preempted, or non-rt */ type = what_first(&be, &srt, &cap_queue); switch (type) { case CAP: /* capacity */ deactivate = schedule_capacity(prev, next, rq); deadline = state->cap->deadline; if (*next) class = RT_CLASS_SOFT; else class = RT_CLASS_BEST_EFFORT; break; case BE: /* be */ *next = NULL; bes = be_take_ready(&be); if (bes) { class = RT_CLASS_SOFT; deadline = bes->deadline; *next = take_ready(&hsb_fifo); if (!*next) { /* deactivate */ __release_capacity(&cap_queue, bes->budget, bes->deadline, NULL); bes->budget = 0; barrier(); spin_unlock(&cap_queue.lock); write_unlock(&be.ready_lock); write_unlock_irqrestore(&srt.ready_lock, flags); be_enqueue(&be, bes); goto reschedule; } } break; case SRT: /* srt */ *next = __take_ready(&srt); if (*next) { class = RT_CLASS_SOFT; deadline = get_deadline(*next); } break; case BG: /* background server mode */ class = RT_CLASS_BEST_EFFORT; deadline = 0; *next = take_ready(&hsb_fifo); break; } /* give back capacities */ if (type != CAP && state->cap) { __return_capacity(&cap_queue, state->cap); state->cap = NULL; } if (*next && deactivate) { /* mark the task as executing on this cpu */ set_task_cpu(*next, smp_processor_id()); /* stick the task into the runqueue */ __activate_task(*next, rq); } } adjust_cpu_queue(class, deadline, bes); switch (type) { case BG: break; case BE: be.check_resched(&be); break; case SRT: srt.check_resched(&srt); break; case CAP: if (!list_empty(&cap_queue.queue)) cap_check_resched(list_entry(cap_queue.queue.next, capacity_t, list)->deadline); break; } if(*next) set_rt_flags(*next, RT_F_RUNNING); spin_unlock(&cap_queue.lock); write_unlock(&be.ready_lock); write_unlock_irqrestore(&srt.ready_lock, flags); return deactivate; } static int hsb_schedule(struct task_struct * prev, struct task_struct ** next, runqueue_t * rq) { int need_deactivate = 1; cpu_state_t *state = NULL; preempt_disable(); state = &__get_cpu_var(hsb_cpu_state); be_preempt(&be, state); if (is_realtime(prev) && !is_be(prev) && get_rt_flags(prev) == RT_F_SLEEP) { TRACE("preparing %d for next period\n", prev->pid); release_capacity(&cap_queue, prev->time_slice, prev->rt_param.times.deadline, prev); edf_prepare_for_next_period(prev); } if (get_rt_mode() == MODE_RT_RUN) { /* we need to schedule hrt if a hrt job is pending or when * we have a non expired hrt job on the cpu */ if (hrt_client_eligible(&state->hrt) || unlikely((is_hrt(prev) && is_running(prev) && get_rt_flags(prev) != RT_F_SLEEP))) { if (state->cap) { return_capacity(&cap_queue, state->cap); state->cap = NULL; } need_deactivate = schedule_hrt(prev, next, rq); } else need_deactivate = schedule_srt_be_cap(prev, next, rq); } if (is_realtime(prev) && need_deactivate && prev->array) { /* take it out of the run queue */ deactivate_task(prev, rq); } preempt_enable(); return 0; } /* put task into correct queue */ static inline void hsb_add_release(struct task_struct *t) { if (is_hrt(t)) add_release(hrt_dom(get_partition(t)), t); else if (is_srt(t)) add_release(&srt, t); else if (is_be(t)) { t->time_slice = 0; add_ready(&hsb_fifo, t); fifo_check_resched(); } else BUG(); } /* put task into correct queue */ static inline void hsb_add_ready(struct task_struct *t) { if (is_hrt(t)) add_ready(hrt_dom(get_partition(t)), t); else if (is_srt(t)) add_ready(&srt, t); else if (is_be(t)) { add_ready(&hsb_fifo, t); fifo_check_resched(); } else BUG(); } /* _finish_switch - we just finished the switch away from prev * it is now safe to requeue the task */ static void hsb_finish_switch(struct task_struct *prev) { if (!is_realtime(prev) || !is_running(prev)) return; TRACE("finish switch for %d\n", prev->pid); if (is_be(prev)) { add_ready(&hsb_fifo, prev); return; } if (get_rt_flags(prev) == RT_F_SLEEP || get_rt_mode() != MODE_RT_RUN) { /* this task has expired * _schedule has already taken care of updating * the release and * deadline. We just must check if has been released. */ if (is_released(prev) && get_rt_mode() == MODE_RT_RUN) { sched_trace_job_release(prev); hsb_add_ready(prev); TRACE("%d goes straight to ready queue\n", prev->pid); } else /* it has got to wait */ hsb_add_release(prev); } else { /* this is a forced preemption * thus the task stays in the ready_queue * we only must make it available to other cpus */ hsb_add_ready(prev); } } /* Prepare a task for running in RT mode * Enqueues the task into master queue data structure * returns * -EPERM if task is not TASK_STOPPED */ static long hsb_prepare_task(struct task_struct * t) { TRACE("edf-hsb: prepare task %d\n", t->pid); if (t->state == TASK_STOPPED) { __setscheduler(t, SCHED_FIFO, MAX_RT_PRIO - 1); if (get_rt_mode() == MODE_RT_RUN && !is_be(t)) /* The action is already on. * Prepare immediate release */ edf_release_now(t); /* The task should be running in the queue, otherwise signal * code will try to wake it up with fatal consequences. */ t->state = TASK_RUNNING; if (is_be(t)) t->rt_param.times.deadline = 0; hsb_add_release(t); return 0; } else return -EPERM; } static void hsb_wake_up_task(struct task_struct *task) { /* We must determine whether task should go into the release * queue or into the ready queue. It may enter the ready queue * if it has credit left in its time slice and has not yet reached * its deadline. If it is now passed its deadline we assume this the * arrival of a new sporadic job and thus put it in the ready queue * anyway.If it has zero budget and the next release is in the future * it has to go to the release queue. */ TRACE("edf-hsb: wake up %d with budget=%d\n", task->pid, task->time_slice); task->state = TASK_RUNNING; if (is_be(task)) { task->rt_param.times.last_release = jiffies; hsb_add_release(task); } else if (is_tardy(task)) { /* new sporadic release */ edf_release_now(task); sched_trace_job_release(task); hsb_add_ready(task); } else if (task->time_slice) { /* came back in time before deadline */ set_rt_flags(task, RT_F_RUNNING); hsb_add_ready(task); } else { hsb_add_release(task); } } static void hsb_task_blocks(struct task_struct *t) { /* not really anything to do since it can only block if * it is running, and when it is not running it is not in any * queue anyway. */ TRACE("task %d blocks with budget=%d\n", t->pid, t->time_slice); if (is_be(t)) sched_trace_job_completion(t); } static int hsb_mode_change(int new_mode) { int cpu; cpu_state_t *entry; jiffie_t start; TRACE("[%d] edf-hsb: mode changed to %d\n", smp_processor_id(), new_mode); if (new_mode == MODE_RT_RUN) { start = jiffies + 20; rerelease_all(&srt, edf_release_at); be_prepare_new_releases(&be, start); /* initialize per CPU state * we can't do this at boot time because we don't know * which CPUs will be online and we can't put non-existing * cpus into the queue */ spin_lock(&hsb_cpu_lock); /* get old cruft out of the way in case we reenter real-time * mode for a second time */ while (!list_empty(&hsb_cpu_queue)) list_del(hsb_cpu_queue.next); /* reinitialize */ for_each_online_cpu(cpu) { entry = &per_cpu(hsb_cpu_state, cpu); atomic_set(&entry->will_schedule, 0); entry->exec_class = RT_CLASS_BEST_EFFORT; entry->cur_deadline = 0; list_add(&entry->list, &hsb_cpu_queue); rerelease_all(&entry->hrt.domain, edf_release_at); prepare_hrt_release(&entry->hrt, start); } spin_unlock(&hsb_cpu_lock); } TRACE("[%d] edf-hsb: mode change done\n", smp_processor_id()); return 0; } typedef enum { EDF_HSB_SET_HRT, EDF_HSB_GET_HRT, EDF_HSB_CREATE_BE } edf_hsb_setup_cmds_t; typedef struct { int cpu; unsigned int wcet; unsigned int period; } setup_hrt_param_t; typedef struct { unsigned int wcet; unsigned int period; } create_be_param_t; typedef struct { union { setup_hrt_param_t setup_hrt; create_be_param_t create_be; }; } param_t; static pid_t next_be_server_pid = SRT_BASE_PID; static int hsb_scheduler_setup(int cmd, void __user* up) { unsigned long flags; int error = -EINVAL; cpu_state_t* state; be_server_t* srv; param_t param; switch (cmd) { case EDF_HSB_SET_HRT: if (copy_from_user(¶m, up, sizeof(setup_hrt_param_t))) { error = -EFAULT; goto out; } if (!cpu_online(param.setup_hrt.cpu)) { printk(KERN_WARNING "scheduler setup: " "CPU %d is not online!\n", param.setup_hrt.cpu); error = -EINVAL; goto out; } if (param.setup_hrt.period < param.setup_hrt.wcet) { printk(KERN_WARNING "period < wcet!\n"); error = -EINVAL; goto out; } state = &per_cpu(hsb_cpu_state, param.setup_hrt.cpu); spin_lock_irqsave(&state->lock, flags); state->hrt.wcet = param.setup_hrt.wcet; state->hrt.period = param.setup_hrt.period; spin_unlock_irqrestore(&state->lock, flags); printk(KERN_WARNING "edf-hsb: set HRT #%d to (%d, %d)\n", param.setup_hrt.cpu, param.setup_hrt.wcet, param.setup_hrt.period); error = 0; break; case EDF_HSB_GET_HRT: if (copy_from_user(¶m, up, sizeof(setup_hrt_param_t))) { error = -EFAULT; goto out; } if (!cpu_online(param.setup_hrt.cpu)) { error = -EINVAL; goto out; } state = &per_cpu(hsb_cpu_state, param.setup_hrt.cpu); spin_lock_irqsave(&state->lock, flags); param.setup_hrt.wcet = state->hrt.wcet; param.setup_hrt.period = state->hrt.period; spin_unlock_irqrestore(&state->lock, flags); if (copy_to_user(up, ¶m, sizeof(setup_hrt_param_t))) { error = -EFAULT; goto out; } error = 0; break; case EDF_HSB_CREATE_BE: if (copy_from_user(¶m, up, sizeof(create_be_param_t))) { error = -EFAULT; goto out; } if (param.create_be.period < param.create_be.wcet || !param.create_be.period || !param.create_be.wcet) { error = -EINVAL; goto out; } srv = (be_server_t*) kmalloc(sizeof(be_server_t), GFP_KERNEL); if (!srv) { error = -ENOMEM; goto out; } srv->wcet = param.create_be.wcet; srv->period = param.create_be.period; srv->pid = next_be_server_pid++; INIT_LIST_HEAD(&srv->list); be_prepare_new_release(srv, jiffies); be_enqueue(&be, srv); printk(KERN_WARNING "edf-hsb: created a BE with (%d, %d)\n", param.create_be.wcet, param.create_be.period); error = 0; break; default: printk(KERN_WARNING "edf-hsb: unknown command %d\n", cmd); } out: return error; } /* Plugin object */ static sched_plugin_t s_plugin __cacheline_aligned_in_smp = { .ready_to_use = 0 }; /* * Plugin initialization code. */ #define INIT_SCHED_PLUGIN (struct sched_plugin){\ .plugin_name = "EDF-HSB",\ .ready_to_use = 1,\ .scheduler_tick = hsb_scheduler_tick,\ .prepare_task = hsb_prepare_task,\ .sleep_next_period = edf_sleep_next_period,\ .schedule = hsb_schedule,\ .finish_switch = hsb_finish_switch,\ .mode_change = hsb_mode_change,\ .wake_up_task = hsb_wake_up_task,\ .task_blocks = hsb_task_blocks, \ .scheduler_setup = hsb_scheduler_setup \ } sched_plugin_t *__init init_edf_hsb_plugin(void) { int i; if (!s_plugin.ready_to_use) { capacity_queue_init(&cap_queue); edf_domain_init(&srt, srt_check_resched); edf_domain_init(&be, be_check_resched); fifo_domain_init(&hsb_fifo, NULL); for (i = 0; i < NR_CPUS; i++) { hsb_cpu_state_init(&per_cpu(hsb_cpu_state, i), hrt_check_resched, i); printk("HRT server %d initialized.\n", i); } s_plugin = INIT_SCHED_PLUGIN; } return &s_plugin; }