#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* to set up domain/cpu mappings */ #include typedef struct cont_domain { rt_domain_t domain; struct task_struct *container; struct task_struct *scheduled; //fixed task lt_t scheduled_last_exec_time; //exec_time of the scheduled task when it was last scheduled u64 f_util; struct bheap_node *hn; struct hrtimer idle_enforcement_timer; int timer_armed; } cont_domain_t; typedef struct { int cpu; struct task_struct *linked; struct task_struct *scheduled; //container or migrating task atomic_t will_schedule; /* This heap node should never be NULL. This will be in edfsc_cpu_heap when * extra capacity is available on this CPU. Aka, when this CPU's container * is not fully provisioned. */ struct bheap_node *hn; } cpu_entry_t; struct list_head pending_adds; struct list_head migrating_tasks; struct list_head pending_removes; struct hrtimer container_release_timer; DEFINE_PER_CPU(cpu_entry_t, edfsc_cpu_entries); struct task_struct* container_tasks; static cont_domain_t* container_domains; static cont_domain_t** container_list; static rt_domain_t gsched_domain; #define g_lock (gsched_domain.ready_lock) #define CONTAINER_PERIOD 50000000 u64 m_util; u64 sys_util; //only true when container_boundary needs to perform stabilization int sys_changed; #define is_container(task) ((task) && tsk_rt(task)->edfsc_params.domain != NULL && tsk_rt(task)->domain == &gsched_domain) #define is_fixed(task) ((task) && tsk_rt(task)->domain && tsk_rt(task)->domain != &gsched_domain) #define is_migrating(task) ((task) && tsk_rt(task)->edfsc_params.domain == NULL && tsk_rt(task)->domain == &gsched_domain) #define FP_SHIFT 20 #define to_fp(a) ((a) << FP_SHIFT) #define from_fp(a) ((a) >> FP_SHIFT) #define fp_div(a, b) (to_fp((a)) / (b)) // We need these from litmus.c for partially initializing our container tasks struct release_heap* release_heap_alloc(int gfp_flags); void release_heap_free(struct release_heap* rh); struct bheap_node* bheap_node_alloc(int gfp_flags); void bheap_node_free(struct bheap_node* hn); int count_migrating_tasks(void) { int task_count = 0; struct list_head *pos; unsigned long flags; raw_spin_lock_irqsave(&g_lock, flags); list_for_each(pos, &migrating_tasks) { task_count++; } raw_spin_unlock_irqrestore(&g_lock, flags); return task_count; } /* Do a backwards comparison based on f_util so that heavier containers * will come first */ // Used for best-fit static int container_lower_prio(const void *_a, const void *_b) { const cont_domain_t* a = *(const cont_domain_t**)(_a); const cont_domain_t* b = *(const cont_domain_t**)(_b); return (b->f_util - a->f_util); } // Used for worst-fit static int container_higher_prio(const void *_a, const void *_b) { const cont_domain_t* a = *(const cont_domain_t**)(_a); const cont_domain_t* b = *(const cont_domain_t**)(_b); return (a->f_util - b->f_util); } /* Finds the task_struct of a list node */ static struct task_struct* task_of_list_node(struct list_head *node) { return container_of(node, struct task_struct, edfsc_qnode); } /* Requeues task in the domain recorded in its edfsc_params */ static noinline void requeue(struct task_struct* task) { BUG_ON(!task); /* sanity check before insertion */ BUG_ON(is_queued(task)); BUG_ON(is_migrating(task) && task->rt_param.edfsc_params.container_task != NULL); //BUG_ON(task && tsk_rt(task)->linked_on != NO_CPU); BUG_ON(budget_enforced(task) && budget_exhausted(task)); //BUG_ON(is_container(task) && ((cont_domain_t*)task->rt_param.edfsc_params.domain)->timer_armed); //BUG_ON(task && is_completed(task)); if (is_early_releasing(task) || is_released(task, litmus_clock())) { __add_ready((rt_domain_t *) tsk_rt(task)->domain, task); } else { /* it has got to wait */ add_release((rt_domain_t *) tsk_rt(task)->domain, task); } } /** * Preempt and litmus_reschedule() according to our understanding of the CPU state. * * @note Correctly preempts fixed task if entry->scheduled is a container * @param entry CPU state per EDF-SC. entry->scheduled may be NULL. */ static void preempt(cpu_entry_t *entry) { BUG_ON(!entry); if (is_container(entry->scheduled)) { preempt_if_preemptable(tsk_rt(entry->scheduled)->edfsc_params.domain->scheduled, entry->cpu); } else { preempt_if_preemptable(entry->scheduled, entry->cpu); } } ///////////////////////////////////////////////////////////////////////////////////// /* * * CPU ORDERING * */ static struct bheap_node* edfsc_cpu_heap_node; // Array of cpu heap nodes static struct bheap edfsc_cpu_heap; // Cpu heap static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) { cpu_entry_t *a, *b; a = _a->value; b = _b->value; /* Note that a and b are inverted: we want the lowest-priority CPU at * the top of the heap. */ return edf_higher_prio(b->linked, a->linked); } /* caller must hold g_lock */ static cpu_entry_t* lowest_prio_cpu(void) { struct bheap_node* hn; BUG_ON(!raw_spin_is_locked(&g_lock)); hn = bheap_peek(cpu_lower_prio, &edfsc_cpu_heap); return hn->value; } static void remove_cpu_from_global(cpu_entry_t *entry) { BUG_ON(!raw_spin_is_locked(&g_lock)); // This disconnects the node and sets entry->hn->degree = NOT_IN_HEAP bheap_delete(cpu_lower_prio, &edfsc_cpu_heap, entry->hn); } static void add_cpu_to_global(cpu_entry_t *entry) { BUG_ON(!raw_spin_is_locked(&g_lock)); BUG_ON(bheap_node_in_heap(entry->hn)); bheap_insert(cpu_lower_prio, &edfsc_cpu_heap, entry->hn); } /* update_cpu_position - Move the cpu entry to the correct place to maintain * order in the cpu queue. Caller must hold g_lock. */ static void update_cpu_position(cpu_entry_t *entry) { if (likely(bheap_node_in_heap(entry->hn))) { remove_cpu_from_global(entry); add_cpu_to_global(entry); } } /////////////////////////////////////////////////////////////////////////////////////// /* * * IDLE CONTAINER BUDGET ENFORCEMENT * */ // updates exec_time for container budget tracking static void update_container_budget(struct task_struct* t) { lt_t now = litmus_clock(); tsk_rt(t)->job_params.exec_time += now - tsk_rt(t)->edfsc_params.domain->scheduled_last_exec_time; tsk_rt(t)->edfsc_params.domain->scheduled_last_exec_time = now; } // timeout for timer enforcing budget of empty container static enum hrtimer_restart on_idle_enforcement_timeout(struct hrtimer *timer) { cont_domain_t* domain = container_of(timer, cont_domain_t, idle_enforcement_timer); unsigned long flags; local_irq_save(flags); BUG_ON(tsk_rt(domain->container)->edfsc_params.id != this_cpu_ptr(&edfsc_cpu_entries)->cpu); domain->timer_armed = 0; tsk_rt(domain->container)->completed = 1; litmus_reschedule_local(); local_irq_restore(flags); return HRTIMER_NORESTART; } void manage_idle_enforcement_timer(struct task_struct* t) { lt_t now; cont_domain_t* domain = tsk_rt(t)->edfsc_params.domain; now = litmus_clock(); BUG_ON(is_completed(t)); BUG_ON(budget_exhausted(t) && !is_np(t)); if (!domain->timer_armed) { domain->scheduled_last_exec_time = now; //hrtimer_start cancels the timer so don't have to check //if it is already armed hrtimer_start(&(domain->idle_enforcement_timer), ns_to_ktime(now + budget_remaining(t)), HRTIMER_MODE_ABS_PINNED); domain->timer_armed = 1; } } void cancel_idle_enforcement_timer(struct task_struct* t) { cont_domain_t* domain = tsk_rt(t)->edfsc_params.domain; hrtimer_try_to_cancel(&(domain->idle_enforcement_timer)); domain->timer_armed = 0; } /* link_task_to_cpu - Links a migrating task or container to a CPU * Update the link of a CPU. */ static noinline void link_task_to_cpu(struct task_struct* linked, cpu_entry_t *entry) { BUG_ON(is_fixed(linked)); BUG_ON(is_container(linked) && tsk_rt(linked)->edfsc_params.id != entry->cpu); BUG_ON(linked && is_queued(linked)); //BUG_ON(linked && ((budget_enforced(linked) && budget_exhausted(linked)) || is_completed(linked))); BUG_ON(linked && !is_released(linked, litmus_clock())); //BUG_ON(is_container(linked) && linked->rt_param.edfsc_params.domain->timer_armed); /* Currently linked task is set to be unlinked. */ if (entry->linked && entry->linked->rt_param.linked_on == entry->cpu) entry->linked->rt_param.linked_on = NO_CPU; /* Link new task to CPU. */ if (linked) linked->rt_param.linked_on = entry->cpu; entry->linked = linked; BUG_ON(entry->linked && entry->linked->rt_param.linked_on != entry->cpu); #ifdef WANT_ALL_SCHED_EVENTS if (linked) TRACE_TASK(linked, "linked to %d.\n", entry->cpu); else TRACE("NULL linked to %d.\n", entry->cpu); #endif update_cpu_position(entry); } /* unlink - Make sure a task is not linked any longer to an entry * where it was linked before. Must hold g_lock. */ static noinline void unlink(struct task_struct* t) { cpu_entry_t *entry; BUG_ON(!t); if (t->rt_param.linked_on != NO_CPU) { /* unlink */ entry = &per_cpu(edfsc_cpu_entries, t->rt_param.linked_on); BUG_ON(entry->cpu != t->rt_param.linked_on); t->rt_param.linked_on = NO_CPU; link_task_to_cpu(NULL, entry); BUG_ON(entry->linked || t->rt_param.linked_on != NO_CPU); } else if (is_queued(t)) { /* This is an interesting situation: t is scheduled, * but was just recently unlinked. It cannot be * linked anywhere else (because then it would have * been relinked to this CPU), thus it must be in some * queue. We must remove it from the list in this * case. */ remove(&gsched_domain, t); } } //TODO change local linking static void g_preempt_check(void) { struct task_struct *task, *temp; cpu_entry_t *last, *target; if (!bheap_peek(cpu_lower_prio, &edfsc_cpu_heap)) return; // Loop through CPUs in priority order, checking if anything needs preemption for (last = lowest_prio_cpu(); edf_preemption_needed(&gsched_domain, last->linked); last = lowest_prio_cpu()) { target = last; /* preemption necessary */ task = __take_ready(&gsched_domain); // Preempt_check can be called before gschedule, and therefore g_job_completion. // So, a task can be temporarily added to the ready queue, but will quickly be rectified // by either this, or g_job_completion if (requeue_preempted_job(task)) { // Update container budget tracking if (is_container(task)) { last = &per_cpu(edfsc_cpu_entries, tsk_rt(task)->edfsc_params.id); } else if (is_container(last->linked)) { if (tsk_rt(last->linked)->edfsc_params.domain->timer_armed) { update_container_budget(last->linked); } } if (requeue_preempted_job(last->linked)) { requeue(last->linked); } TRACE("g_preempt_check: attempting to link task %d to %d\n", task->pid, target->cpu); link_task_to_cpu(task, last); preempt(last); } } } static int c_preempt_check(cont_domain_t *container) { if ((is_migrating(container->scheduled) && __peek_ready(&container->domain)) || edf_preemption_needed(&container->domain, container->scheduled)) { preempt(&per_cpu(edfsc_cpu_entries, tsk_rt(container->container)->edfsc_params.id)); return 1; } else { return 0; } } // Callback for new global job release static void g_release_jobs(rt_domain_t* rt, struct bheap* tasks) { unsigned long flags; raw_spin_lock_irqsave(&g_lock, flags); __merge_ready(rt, tasks); g_preempt_check(); raw_spin_unlock_irqrestore(&g_lock, flags); } // Callback for new container release static int c_check_resched(rt_domain_t *edf) { cont_domain_t *cont_dom = container_of(edf, cont_domain_t, domain); /* because this is a callback from rt_domain_t we already hold * the necessary lock for the ready queue */ return c_preempt_check(cont_dom); } static void g_remove_task(struct task_struct *t) { BUG_ON(is_container(t)); //BUG_ON(get_rt_utilization(t) > m_util); m_util -= get_rt_utilization(t); sys_util -= get_rt_utilization(t); sys_changed = 1; } static void c_remove_task(struct task_struct *t) { //BUG_ON(get_rt_utilization(t) > container_domains[tsk_rt(t)->task_params.cpu].f_util); container_domains[tsk_rt(t)->task_params.cpu].f_util -= get_rt_utilization(t); sys_util -= get_rt_utilization(t); sys_changed = 1; } /** * Remove a task from it's current domain and put it in a different domain. * Must be called at the greater time of job completion and deadline to respect * EDF-sc invariants. Can only go from migrating to fixed task. */ static void migrate_task(struct task_struct *t) { BUG_ON(!t); BUG_ON(is_container(t) || is_fixed(t)); BUG_ON(!tsk_rt(t)->edfsc_params.move_to); if (is_queued(t)) remove(tsk_rt(t)->domain, t); // Remove the util of the "fake reservation task"(specified by the paper) from the system sys_util -= get_rt_utilization(t); m_util -= get_rt_utilization(t); tsk_rt(t)->domain = (rt_domain_t*)tsk_rt(t)->edfsc_params.move_to; tsk_rt(t)->edfsc_params.container_task = tsk_rt(t)->edfsc_params.move_to->container; requeue(t); c_preempt_check((cont_domain_t*)tsk_rt(t)->domain); tsk_rt(t)->edfsc_params.move_to = NULL; sys_changed = 1; } /** * Release a container and take it's core out of availability if it's a fully * provisioned container * Note: This is shared by container_boundary() and g_task_completion(). */ static void c_release(struct task_struct *t) { cpu_entry_t* entry; BUG_ON(!is_container(t)); BUG_ON(t->rt_param.edfsc_params.domain->timer_armed); entry = &per_cpu(edfsc_cpu_entries, tsk_rt(t)->edfsc_params.id); tsk_rt(t)->task_params.exec_cost = from_fp(get_rt_utilization(t) * get_rt_period(t)); prepare_for_next_period(t); if (is_early_releasing(t) || is_released(t, litmus_clock())) sched_trace_task_release(t); /* If this container is fully provisioned, remove it from gsched_domain, * edfsc_cpu_heap, and disable the idle enforcement timer. If not, restore. */ if (get_rt_utilization(t) == to_fp(1)) { // Make this cpu unavailable to the global scheduler if (bheap_node_in_heap(entry->hn)) remove_cpu_from_global(entry); // Note that we no longer need the global scheduler to schedule us if (is_queued(t)) { remove(&gsched_domain, t); } // Fully provisioned containers always run, so just set this here if (entry->linked != t) { BUG_ON(is_container(entry->linked)); if (requeue_preempted_job(entry->linked)) { requeue(entry->linked); } link_task_to_cpu(t, entry); } tsk_rt(t)->edfsc_params.domain->scheduled_last_exec_time = litmus_clock(); // Run schedule again to make sure that we're run preempt(entry); } else { // Make our cpu available again if (!bheap_node_in_heap(entry->hn)) add_cpu_to_global(entry); // Note that container's aren't real tasks and thus can't block unlink(t); // Request to be scheduled globally again requeue(t); } } // migrating or container task job_completion, called from edfsc_gschedule // g_lock must already be held static noinline void g_job_completion(struct task_struct* t, int forced) { BUG_ON(!t); sched_trace_task_completion(t, forced); TRACE_TASK(t, "g_job_completion(forced=%d).\n", forced); unlink(t); tsk_rt(t)->completed = 0; // When a migrating task is being turned turned into a fixed task if (is_migrating(t) && tsk_rt(t)->edfsc_params.move_to) { prepare_for_next_period(t); if (is_early_releasing(t) || is_released(t, litmus_clock())) sched_trace_task_release(t); if (tsk_rt(t)->job_params.lateness > 0) { // Don't wait if prev job was tardy migrate_task(t); } else { list_add(&t->edfsc_qnode, &pending_removes); hrtimer_start(&t->edfsc_deadline_timer, ns_to_ktime(get_deadline(t)), HRTIMER_MODE_ABS_PINNED); } // When a migrating job finishes } else if (is_migrating(t)) { /* prepare for next period */ prepare_for_next_period(t); if (is_early_releasing(t) || is_released(t, litmus_clock())) sched_trace_task_release(t); // requeue, but don't requeue a blocking task if (is_current_running()) { requeue(t); g_preempt_check(); } } else if (is_container(t)) { // When a container job finishes late, release it immediately if (get_deadline(t) < litmus_clock()) { c_release(t); g_preempt_check(); if (get_rt_utilization(t) == to_fp(1)) manage_idle_enforcement_timer(t); } else { tsk_rt(t)->completed = 1; } } } // fixed task job_completion, called from edfsc_cschedule static void c_job_completion(struct task_struct* t, int forced) { sched_trace_task_completion(t, forced); TRACE_TASK(t, "c_job_completion(forced=%d).\n", forced); tsk_rt(t)->completed = 0; prepare_for_next_period(t); requeue(t); } // need to update cpu entries after global scheduling // As long as this only touches CPU-local state, it shouldn't need g_lock: static void g_finish_switch(struct task_struct *prev) { unsigned long flags; cpu_entry_t* entry = this_cpu_ptr(&edfsc_cpu_entries); struct task_struct* container = &container_tasks[entry->cpu]; raw_spin_lock_irqsave(&g_lock, flags); entry->scheduled = is_realtime(current) ? current : NULL; // If we're scheduling a task in a container, set entry->scheduled to the container if (entry->scheduled) { if (entry->scheduled->rt_param.edfsc_params.container_task) { entry->scheduled = entry->scheduled->rt_param.edfsc_params.container_task; } } // occurs when current is non-rt, and linked is a container // this happens when an empty container "task" is supposed to be current // but because it's not a real task, a non-rt task is current instead else if (tsk_rt(container)->scheduled_on != NO_CPU){ entry->scheduled = container; } raw_spin_unlock_irqrestore(&g_lock, flags); #ifdef WANT_ALL_SCHED_EVENTS TRACE_TASK(prev, "switched away from\n"); #endif } static int fifo_prio(struct bheap_node* _a, struct bheap_node* _b) { return 0; } /** * Schedule inside of a container domain * Called with g_lock already held * @param cedf Pointer to tsk_rt(container)->edfsc_params->domain * @param prev Previous task running on this processor before schedule was called */ static noinline void edfsc_cschedule(cont_domain_t* cedf, struct task_struct * prev) { rt_domain_t *edf = &cedf->domain; struct task_struct* next; struct task_struct* other_t; struct bheap temp; cpu_entry_t *this_entry, *other_entry; int out_of_time, sleep, preempt, np, exists, blocks, resched; // XXX: The scheduler we copied this from also used `cont_out_of_time`. Is // there some logic that we should have left that needs this? /* sanity checking * differently from gedf, when a task exits (dead) * cedf->schedule may be null and prev _is_ realtime */ //BUG_ON(cedf->scheduled && cedf->scheduled != prev && is_realtime(prev)); BUG_ON(cedf->scheduled && !is_realtime(cedf->scheduled)); /* (0) Determine state */ exists = cedf->scheduled != NULL; blocks = exists && current == cedf->scheduled && !is_current_running(); out_of_time = exists && budget_enforced(cedf->scheduled) && budget_exhausted(cedf->scheduled); np = exists && is_np(cedf->scheduled); sleep = exists && is_completed(cedf->scheduled); preempt = (is_migrating(cedf->scheduled) && __peek_ready(edf)) || edf_preemption_needed(edf, cedf->scheduled); /* If we need to preempt do so. * The following checks set resched to 1 in case of special * circumstances. */ resched = preempt; /* Request a sys_exit_np() call if we would like to preempt but cannot. * Multiple calls to request_exit_np() don't hurt. */ if (np && (out_of_time || preempt || sleep)) request_exit_np(cedf->scheduled); /* Any task that is preemptable and either exhausts its execution * budget or wants to sleep completes. We may have to reschedule after * this. */ if (!np && (out_of_time || sleep)) { if (is_fixed(cedf->scheduled)) c_job_completion(cedf->scheduled, !sleep); else { tsk_rt(cedf->scheduled)->edfsc_params.container_task = NULL; g_job_completion(cedf->scheduled, !sleep); } resched = 1; } // Deschedule any background jobs if a fixed task is ready else if (!np && preempt) { if (!blocks && cedf->scheduled && !is_queued(cedf->scheduled)) { if (is_migrating(cedf->scheduled)) tsk_rt(cedf->scheduled)->edfsc_params.container_task = NULL; requeue(cedf->scheduled); } resched = 1; } /* The final scheduling decision. Do we need to switch for some reason? * Switch if we are in RT mode and have no task or if we need to * resched. */ next = NULL; if (blocks || !exists || (!np && resched)) { BUG_ON(cedf->scheduled && !blocks && !out_of_time && !sleep && !is_migrating(cedf->scheduled) && !is_queued(cedf->scheduled)); next = __take_ready(edf); BUG_ON(next && budget_enforced(next) && budget_exhausted(next)); } else if (exists) { // This is safe when background scheduling, as we can only get here if // there were no other fixed tasks ready to run. BUG_ON(is_queued(cedf->scheduled)); BUG_ON(budget_enforced(cedf->scheduled) && budget_exhausted(cedf->scheduled)); next = cedf->scheduled; } this_entry = this_cpu_ptr(&edfsc_cpu_entries); if (next) { TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); // Give the container a little breathing room, otherwise, the core will be pounded with work // Will often trigger watchdog due to continous execution } else if (!list_empty(&migrating_tasks)) { // Find a task in gsched_domain that isn't a container to background schedule bheap_init(&temp); next = __take_ready(&gsched_domain); while (is_container(next) || (is_migrating(next) && next->cpu != this_entry->cpu)) { bheap_insert(fifo_prio, &temp, tsk_rt(next)->heap_node); next = __take_ready(&gsched_domain); } if (next) { tsk_rt(next)->edfsc_params.container_task = cedf->container; TRACE_TASK(next, "background scheduling at %llu\n", litmus_clock()); } else { TRACE("container becomes idle at %llu\n", litmus_clock()); } while (bheap_peek(fifo_prio, &temp)) { requeue(bheap_take(fifo_prio, &temp)->value); } } if (next && next->cpu != this_entry->cpu) { other_entry = &per_cpu(edfsc_cpu_entries, next->cpu); other_t = is_container(other_entry->linked) ? other_entry->linked->rt_param.edfsc_params.domain->scheduled : other_entry->linked; // If we detect a direct swap, and the other task has already gone through gschedule // To prevent a deadlock, we let them go first and reschedule if (other_t && other_t->cpu == this_entry->cpu) { if (is_migrating(other_t) || other_entry->linked->rt_param.scheduled_on == other_entry->cpu) { if (is_migrating(next)) next->rt_param.edfsc_params.container_task = NULL; requeue(next); next = NULL; } } } cedf->scheduled = next; } //assuming prev is previous task running on the processor before calling schedule static struct task_struct *edfsc_gschedule(struct task_struct *prev) { cpu_entry_t* entry = this_cpu_ptr(&edfsc_cpu_entries); int out_of_time, sleep, preempted, np, exists, blocks, is_cont; unsigned long flags; struct task_struct* next = NULL; struct task_struct* temp = NULL; raw_spin_lock_irqsave(&g_lock, flags); /* sanity checking */ BUG_ON(entry->scheduled && entry->scheduled != prev && !is_container(entry->scheduled)); // It's okay for the previously scheduled task to not be rt if we think a // container task is scheduled and the container doesn't have any pending // jobs of fixed tasks. BUG_ON(entry->scheduled && !is_container(entry->scheduled) && !is_realtime(prev)); // Bug if we didn't think anything was scheduled, but a realtime task was running on our CPU //BUG_ON(is_realtime(prev) && tsk_rt(prev)->linked_on != NO_CPU && !entry->scheduled); /* (0) Determine state */ exists = entry->scheduled != NULL; is_cont = is_container(entry->scheduled); blocks = exists && !is_cont && !is_current_running(); np = exists && !is_cont && is_np(entry->scheduled); sleep = exists && is_completed(entry->scheduled); preempted = entry->scheduled != entry->linked; /* Manually track container budget */ if (is_cont && (tsk_rt(entry->scheduled)->edfsc_params.domain->timer_armed || sleep)) { update_container_budget(entry->scheduled); out_of_time = exists && budget_enforced(entry->scheduled) && budget_exhausted(entry->scheduled); /* Cancel container enforcement timer if container is fully provisioned and out of sync with * container_boundary, or if it is currently being scheduled in gedf */ if (bheap_node_in_heap(entry->hn) || (!bheap_node_in_heap(entry->hn) && out_of_time)) cancel_idle_enforcement_timer(entry->scheduled); } else { out_of_time = exists && budget_enforced(entry->scheduled) && budget_exhausted(entry->scheduled); } if (exists) TRACE_TASK(prev, "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " "state:%d sig:%d is_cont:%d\n", blocks, out_of_time, np, sleep, preempt, prev->state, signal_pending(prev), is_cont); if (entry->linked && preempted) TRACE_TASK(prev, "will be preempted by %s/%d\n", entry->linked->comm, entry->linked->pid); // If a task blocks we have no choice but to reschedule. if (blocks) unlink(prev); /* Request a sys_exit_np() call if we would like to preempt but cannot. * We need to make sure to update the link structure anyway in case * that we are still linked. Multiple calls to request_exit_np() don't * hurt. */ if (np && (out_of_time || preempted || sleep)) { unlink(entry->scheduled); request_exit_np(entry->scheduled); } /* Any task that is preemptable and either exhausts its execution * budget or wants to sleep completes. We may have to reschedule after * this. Don't do a job completion if we block (can't have timers running * for blocked jobs). */ if (!np && (out_of_time || sleep)) { g_job_completion(entry->scheduled, !sleep); } BUG_ON(!bheap_node_in_heap(entry->hn) && entry->linked && !is_container(entry->linked)); if (!entry->linked && bheap_node_in_heap(entry->hn)) { g_preempt_check(); } BUG_ON(entry->linked && is_queued(entry->linked)); BUG_ON(!bheap_node_in_heap(entry->hn) && entry->linked && tsk_rt(entry->linked)->edfsc_params.id != entry->cpu); BUG_ON(is_container(entry->linked) && tsk_rt(entry->linked)->edfsc_params.id != entry->cpu); /* The final scheduling decision. Do we need to switch for some reason? * If linked is different from scheduled, then select linked as next. */ if ((!np || blocks) && entry->linked != entry->scheduled) { // Set the newly linked job to be scheduled if (entry->linked) { next = entry->linked; tsk_rt(entry->linked)->scheduled_on = entry->cpu; BUG_ON(is_queued(entry->linked)); TRACE_TASK(next, "scheduled on P%d\n", smp_processor_id()); } // Set the previously linked to to be unscheduled if (entry->scheduled) { /* When a scheduled is linked to another cpu, from this cpu, there's no guarantee on the order * in which gschedule is called on both cpus. If it has already have scheduled_on set to the other * cpu, then we have to preserve it and can't just set it to NO_CPU */ if (tsk_rt(entry->scheduled)->scheduled_on == entry->cpu) { tsk_rt(entry->scheduled)->scheduled_on = NO_CPU; } TRACE_TASK(entry->scheduled, "descheduled\n"); } } else if (entry->scheduled) { next = entry->scheduled; tsk_rt(next)->scheduled_on = entry->cpu; } BUG_ON(next && get_exec_time(next) > get_exec_cost(next)); // If next is a container, then perform cschedule to determine the fixed task to schedule if (is_container(next)) { edfsc_cschedule(tsk_rt(next)->edfsc_params.domain, prev); if (bheap_node_in_heap(entry->hn)) manage_idle_enforcement_timer(next); next = tsk_rt(next)->edfsc_params.domain->scheduled; } // When next is migrating, but previously scheduled realtime task is a container // must properly restore background scheduled task(if any) to its correct queue/heap else if (is_container(entry->scheduled) && next != entry->scheduled) { struct task_struct** child = &tsk_rt(entry->scheduled)->edfsc_params.domain->scheduled; // No need to handle fixed tasks, cschedule will do that when it runs next if (*child && is_migrating(*child)) { int background_out_of_time = budget_enforced(*child) && budget_exhausted(*child); BUG_ON(is_queued(*child)); BUG_ON(tsk_rt(*child)->linked_on != NO_CPU); tsk_rt(*child)->edfsc_params.container_task = NULL; // If migrating and done if (is_completed(*child) || background_out_of_time) { g_job_completion(*child, background_out_of_time); // If migrating and not blocked } else if (is_current_running()) { requeue(*child); } // Regardless, we never "freeze" a migrating task in a container *child = NULL; } } // Tell LITMUS^RT that we choose a task and are done scheduling after return sched_state_task_picked(); raw_spin_unlock_irqrestore(&g_lock, flags); #ifdef WANT_ALL_SCHED_EVENTS TRACE("g_lock released, next=0x%p\n", next); if (next) TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); else if (exists && !next) TRACE("becomes idle at %llu.\n", litmus_clock()); #endif return next; } /* * Task addition, stabilization, and container task reweighting heuristic to * be run every container task period. */ static enum hrtimer_restart container_boundary(struct hrtimer *timer) { int i; struct list_head *it; struct list_head *temp; u64 u_extra; cont_domain_t *container; struct task_struct *t; int num_cpus = num_online_cpus(); unsigned long flags; TS_SCHED_TIMER_START raw_spin_lock_irqsave(&g_lock, flags); t = NULL; // Try to add tasks from the queue list_for_each_safe(it, temp, &pending_adds) { container = NULL; t = task_of_list_node(it); list_del_init(it); //sort(container_list, num_cpus, sizeof(cont_domain_t *), &container_lower_prio, NULL); // Best fit sort(container_list, num_cpus, sizeof(cont_domain_t *), &container_higher_prio, NULL); // Worst fit if (to_fp(num_cpus) > get_rt_utilization(t) + sys_util) { for (i = 0; i < num_cpus; i++) { if (to_fp(1) > get_rt_utilization(t) + container_list[i]->f_util) { //if (to_fp(1) > get_rt_utilization(t) + container_domains[i].f_util) { //container = &(container_domains[i]); container = container_list[i]; // Used for best/worst fit break; } } if (container) { tsk_rt(t)->domain = (rt_domain_t*)container; tsk_rt(t)->edfsc_params.container_task = container->container; container->f_util += get_rt_utilization(t); } else { tsk_rt(t)->domain = &gsched_domain; tsk_rt(t)->edfsc_params.container_task = NULL; m_util += get_rt_utilization(t); list_add(&t->edfsc_qnode, &migrating_tasks); } sys_util += get_rt_utilization(t); sys_changed = 1; } /* Unblock the task waiting on our admission decision. They will detect * if they have been admitted by examining if tsk_rt(t)->domain != NULL * This sets the the state to TASK_RUNNING, adds the task to the run * queue, and runs edfsc_task_new(). That function will then invoke the * scheduler once the task is setup and our state is consistent. * XXX: It's unclear when we return from wake_up_new_task(), thus we * may be defering other countainer boundary computations for far * longer than we should. */ raw_spin_unlock_irqrestore(&g_lock, flags); BUG_ON(!wake_up_process(t)); raw_spin_lock_irqsave(&g_lock, flags); } // Attempt to move migrating tasks into containers // TODO optimize this so we don't actually have to iterate over all the // migrating tasks and potentially all the containers every period for a // best-case Omega(m) and worst-case O(m^2) work---only once the scheduler // is actually working // Done, only does stabilization when stuff changes in the system // According to the paper, when we migrate, we must reserve space in the container // We do this by adding a fake task that ultimately doesn't release any jobs // This is represented here by adding the utilization to sys_util // which will be subtracted when the migrating task is actually changed to fixed if (sys_changed) { // change this to false to disable stabilization list_for_each_safe(it, temp, &migrating_tasks) { struct task_struct* t = task_of_list_node(it); // Although technically selecting the migrating tasks to be moved into containers // doesn't change m_util and the container's f_util until after the move, // but since the move is guaranteed to happen before the next container_boundary // where we check all the utilization stuff, it's fine to account for it now if (!(tsk_rt(t)->edfsc_params.move_to)) { tsk_rt(t)->edfsc_params.move_to = NULL; container = NULL; //sort(container_list, num_cpus, sizeof(cont_domain_t *), &container_lower_prio, NULL); // Best fit sort(container_list, num_cpus, sizeof(cont_domain_t *), &container_higher_prio, NULL); // Worst fit for (i = 0; i < num_cpus; i++) { u64 leftover = to_fp(1) - container_domains[i].f_util; if (to_fp(1) > get_rt_utilization(t) + container_list[i]->f_util && //if (to_fp(1) > get_rt_utilization(t) + container_domains[i].f_util && to_fp(num_cpus) > get_rt_utilization(t) + sys_util) { //container = &(container_domains[i]); container = container_list[i]; // Used for best/worst fit break; } } if (container) { list_del_init(it); container->f_util += get_rt_utilization(t); sys_util += get_rt_utilization(t); tsk_rt(t)->edfsc_params.move_to = container; sys_changed = 1; } } } } // If needed, reweight containers using EqualOver heuristic if (sys_changed) { int remaining; // Sort containers by the utilization of their fixed tasks sort(container_list, num_cpus, sizeof(cont_domain_t *), &container_lower_prio, NULL); u_extra = to_fp(num_cpus) - sys_util; // Fully provision all the container tasks we can for (i = 0; i < num_cpus && u_extra >= to_fp(1) - container_list[i]->f_util; i++) { struct task_struct* t = container_list[i]->container; tsk_rt(t)->task_params.utilization = to_fp(1); u_extra -= to_fp(1) - container_list[i]->f_util; } // Split the extra capacity between the remaining container tasks // XXX this is actually dangerous as hell, right? Since overheads are // non-zero, this will make tardiness grow unboundedly for migrating // tasks unless we're saved by slack stealing. MinOrFull is also bad // because it will cause tardiness to grow unboundedly for fixed tasks // when overheads are considered. Oh noooooooooooo--- // ---Here's a bogus idea that might just work: split the difference. // Basically act like migrating tasks are another processor (or two or // three or .. or m) and split the extra capacity evenly among // containers and the migrating tasks. In reality we'll need something // like that anyway, and it should at least be less dangerous. u_extra = u_extra / 2; remaining = num_cpus - i; for (; i < num_cpus; i++) { struct task_struct* t = container_list[i]->container; u64 temp_val = container_list[i]->f_util + u_extra / remaining; tsk_rt(t)->task_params.utilization = (temp_val < to_fp(1)) ? temp_val : to_fp(1); BUG_ON(tsk_rt(t)->task_params.utilization > to_fp(1)); } } sys_changed = 0; INIT_LIST_HEAD(&pending_adds); // Re-release container tasks, or tell them they can if they're tardy for (i = 0; i < num_cpus; i++) { t = container_list[i]->container; int armed = container_list[i]->timer_armed; // If the container tasks are currently scheduled, update their budget if (armed) { update_container_budget(t); } /* Either container has completed, or it is fully provisioned and in sync * (thus not requiring a budget enforcement timer). */ if ((!armed && get_rt_period(t) == get_exec_cost(t)) || budget_exhausted(t) || is_completed(t)) { BUG_ON(is_queued(t)); sched_trace_task_completion(t, 0); if (armed) cancel_idle_enforcement_timer(t); tsk_rt(t)->completed = 0; c_release(t); } } g_preempt_check(); raw_spin_unlock_irqrestore(&g_lock, flags); TS_SCHED_TIMER_END hrtimer_add_expires_ns(timer, CONTAINER_PERIOD); return HRTIMER_RESTART; } /* * When preempt check scheduled a task to multiple cores(due to swapping and multiple * multiple invocations of preempt_check), we should not wait for stack, and reschedule */ static bool edfsc_should_wait_for_stack(struct task_struct* t) { cpu_entry_t* entry = this_cpu_ptr(&edfsc_cpu_entries); struct task_struct* tsk = tsk_rt(t)->edfsc_params.container_task; tsk = tsk ? tsk : t; return tsk_rt(tsk)->linked_on == tsk_rt(tsk)->scheduled_on && tsk_rt(tsk)->linked_on == entry->cpu; } /** * Fired when a task reaches its deadline and is pending deletion or migration */ static enum hrtimer_restart task_deadline_callback(struct hrtimer* timer) { unsigned long flags; struct task_struct *t = container_of(timer, struct task_struct, edfsc_deadline_timer); raw_spin_lock_irqsave(&g_lock, flags); BUG_ON(is_container(t)); // This is true only if set to be migrating from container_boundary if (tsk_rt(t)->edfsc_params.move_to) { // Can only be here when called from g_job_completion migrate_task(t); // In the else case, only task_params is guaranteed to be valid // However, in task_exit, we stored information in task_param.cpu // To help up do remove operations } else { // A move to NULL means deletion if (tsk_rt(t)->task_params.cpu == NO_CPU) g_remove_task(t); else c_remove_task(t); // Release our reference to the task struct put_task_struct(t); } list_del_init(&t->edfsc_qnode); raw_spin_unlock_irqrestore(&g_lock, flags); return HRTIMER_NORESTART; } /** * This /always/ runs after admission succeeds, so we can rely on * edfsc_admit_task() handling most of the initialization. * g_lock is not yet held */ static void edfsc_task_new(struct task_struct* t, int on_rq, int is_scheduled) { unsigned long flags; cpu_entry_t* entry = &per_cpu(edfsc_cpu_entries, task_cpu(t)); tsk_rt(t)->sporadic_release = 0; TRACE("EDF-sc: task new %d\n", t->pid); // Create a timer that we'll use to delay accounting during migrations hrtimer_init(&t->edfsc_deadline_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); t->edfsc_deadline_timer.function = task_deadline_callback; raw_spin_lock_irqsave(&g_lock, flags); release_at(t, litmus_clock()); sched_trace_task_release(t); tsk_rt(t)->linked_on = NO_CPU; tsk_rt(t)->scheduled_on = NO_CPU; // Queue this task and request a reschedule if (on_rq || is_scheduled) { requeue(t); if (is_migrating(t)) { g_preempt_check(); } else if (is_fixed(t)) { c_preempt_check((cont_domain_t*)tsk_rt(t)->domain); } preempt(entry); } raw_spin_unlock_irqrestore(&g_lock, flags); } /** * This is called by LITMUS when our task is being woken up after having * previously blocked on something like console or disk I/O. This is a pair to * the `if (blocks) unlink(entry->scheduled);` in edfsc_gschedule(). */ static void edfsc_task_wake_up(struct task_struct *task) { unsigned long flags; lt_t now = litmus_clock(); cpu_entry_t* entry = &per_cpu(edfsc_cpu_entries, task_cpu(task)); TRACE_TASK(task, "wake_up at %llu\n", now); raw_spin_lock_irqsave(&g_lock, flags); now = litmus_clock(); if (is_sporadic(task) && is_tardy(task, now)) { inferred_sporadic_job_release_at(task, now); } if (!is_queued(task) && tsk_rt(task)->domain) requeue(task); if (is_migrating(task)) g_preempt_check(); else if (is_fixed(task)) c_preempt_check((cont_domain_t*)tsk_rt(task)->domain); preempt(entry); raw_spin_unlock_irqrestore(&g_lock, flags); } static void edfsc_task_block(struct task_struct *t) { unsigned long flags; raw_spin_lock_irqsave(&g_lock, flags); if (is_migrating(t) && tsk_rt(t)->edfsc_params.container_task) { tsk_rt(t)->edfsc_params.container_task = NULL; } raw_spin_unlock_irqrestore(&g_lock, flags); } /** * This is called by LITMUS before our task is switched to another scheduler * During task termination (do_exit()), LITMUS first switches the scheduler * to SCHED_FIFO before running the normal Linux task termination proceedure. * After we return from this, `t` may or may not still exist. So we should have * no outstanding handles to any part of the task struct afer this point. */ static void edfsc_task_exit(struct task_struct* t) { unsigned long flags; lt_t now, unaccount_time = 0; cpu_entry_t* entry = &per_cpu(edfsc_cpu_entries, task_cpu(t)); BUG_ON(is_container(t)); raw_spin_lock_irqsave(&g_lock, flags); TRACE_TASK(t, "called edfsc_task_exit\n"); // Remove this task from all members of its scheduling domain if (is_fixed(t)) { tsk_rt(t)->task_params.cpu=((cont_domain_t*)tsk_rt(t)->domain)->container->rt_param.edfsc_params.id; if (is_queued(t)) remove(tsk_rt(t)->domain, t); else { // If we're fixed and not on the ready queues, we should be currently running BUG_ON(((cont_domain_t*)tsk_rt(t)->domain)->scheduled != t); BUG_ON(t != current); ((cont_domain_t*)tsk_rt(t)->domain)->scheduled = NULL; } } else { tsk_rt(t)->task_params.cpu = NO_CPU; list_del_init(&t->edfsc_qnode); if (tsk_rt(t)->edfsc_params.container_task != NULL) { BUG_ON(tsk_rt(t)->edfsc_params.container_task->rt_param.edfsc_params.domain->scheduled != t); tsk_rt(t)->edfsc_params.container_task->rt_param.edfsc_params.domain->scheduled = NULL; } else { unlink(t); entry->scheduled = NULL; } } tsk_rt(t)->domain = NULL; BUG_ON(is_queued(t)); /* To preserve EDF-sc scheduling invariants, we can only release a task's * utilization at the greater of completion or deadline boundary. Thus, here * we schedule a timer to handle this unaccounting of utilization. */ now = litmus_clock(); if (is_released(t, now)) { /* If a task has already been released, no future jobs are pending and we can * just unaccount at the current deadline. */ unaccount_time = get_deadline(t); } else { /* If the task has yet to be released, but we still haven't reached the * deadline of its last-finished job, wait for that deadline. Otherwise * we're after a deadline and before a release, so just remove now. */ if (lt_after(tsk_rt(t)->edfsc_params.prev_deadline, now)) unaccount_time = tsk_rt(t)->edfsc_params.prev_deadline; else unaccount_time = 0; } /* Take out an extra reference on the task struct so that it's not freed until * the deadline boundary timer fires and we finish with it */ get_task_struct(t); // Make it clear that this task is going away tsk_rt(t)->edfsc_params.move_to = NULL; if (unaccount_time == 0) { raw_spin_unlock_irqrestore(&g_lock, flags); // Don't bother setting a zero-length timer - just skip straight to the callback task_deadline_callback(&t->edfsc_deadline_timer); } else { list_add(&t->edfsc_qnode, &pending_removes); raw_spin_unlock_irqrestore(&g_lock, flags); hrtimer_start(&t->edfsc_deadline_timer, ns_to_ktime(unaccount_time), HRTIMER_MODE_ABS_PINNED); } } static struct domain_proc_info edfsc_domain_proc_info; static long edfsc_get_domain_proc_info(struct domain_proc_info **ret) { *ret = &edfsc_domain_proc_info; return 0; } static void edfsc_setup_domain_proc(void) { int cpu; // We don't support release master int num_rt_cpus = num_online_cpus(); struct cd_mapping *cpu_map, *domain_map; memset(&edfsc_domain_proc_info, 0, sizeof(edfsc_domain_proc_info)); init_domain_proc_info(&edfsc_domain_proc_info, num_rt_cpus, num_rt_cpus + 1); edfsc_domain_proc_info.num_cpus = num_rt_cpus; edfsc_domain_proc_info.num_domains = num_rt_cpus + 1; for (cpu = 0; cpu < num_online_cpus(); ++cpu) { /* add one-to-one relation for the container domains */ cpu_map = &edfsc_domain_proc_info.cpu_to_domains[cpu]; domain_map = &edfsc_domain_proc_info.domain_to_cpus[cpu]; cpu_map->id = cpu; domain_map->id = cpu; cpumask_set_cpu(cpu, cpu_map->mask); cpumask_set_cpu(cpu, domain_map->mask); /* add all-to-one relation for the global domain */ cpu_map = &edfsc_domain_proc_info.cpu_to_domains[cpu]; domain_map = &edfsc_domain_proc_info.domain_to_cpus[num_rt_cpus]; cpu_map->id = cpu; domain_map->id = num_rt_cpus; cpumask_set_cpu(num_rt_cpus, cpu_map->mask); cpumask_set_cpu(cpu, domain_map->mask); } } static long edfsc_activate_plugin(void) { /* TODO This will need to: * - Initialize the containers and container tasks * (or can that be done at least partially in the module init function? * First releases have to be here, but setting up data structures might * be reusable if we don't destroy them when the plugin is deactivated) * - ... */ int i; lt_t now; cpu_entry_t* entry; struct task_struct* t; edfsc_setup_domain_proc(); INIT_LIST_HEAD(&pending_adds); INIT_LIST_HEAD(&migrating_tasks); INIT_LIST_HEAD(&pending_removes); bheap_init(&edfsc_cpu_heap); // Set up the container boundary timer hrtimer_init(&container_release_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); container_release_timer.function = container_boundary; edf_domain_init(&gsched_domain, NULL, g_release_jobs); container_tasks = kmalloc(sizeof(struct task_struct) * num_online_cpus(), GFP_KERNEL); container_domains = kmalloc(sizeof(cont_domain_t) * num_online_cpus(), GFP_KERNEL); container_list = kmalloc(sizeof(cont_domain_t*) * num_online_cpus(), GFP_KERNEL); edfsc_cpu_heap_node = kmalloc(sizeof(struct bheap_node) * num_online_cpus(), GFP_KERNEL); sys_util = to_fp(0); m_util = to_fp(0); sys_changed = 1; memset(container_tasks, 0, sizeof(struct task_struct) * num_online_cpus()); memset(container_domains, 0, sizeof(cont_domain_t) * num_online_cpus()); // Initialize container domains for (i = 0; i < num_online_cpus(); i++) { edf_domain_init(&container_domains[i].domain, c_check_resched, NULL); container_domains[i].scheduled = NULL; container_domains[i].container = &container_tasks[i]; container_domains[i].f_util = to_fp(0); hrtimer_init(&(container_domains[i].idle_enforcement_timer), CLOCK_MONOTONIC, HRTIMER_MODE_ABS); container_domains[i].idle_enforcement_timer.function = on_idle_enforcement_timeout; // Name the task its container ID mapped to ASCII snprintf(container_tasks[i].comm, TASK_COMM_LEN, "%d", i); container_tasks[i].pid = -i; tsk_rt(&container_tasks[i])->task_params.exec_cost = CONTAINER_PERIOD; tsk_rt(&container_tasks[i])->task_params.period = CONTAINER_PERIOD; tsk_rt(&container_tasks[i])->task_params.utilization = to_fp(1); tsk_rt(&container_tasks[i])->task_params.relative_deadline = CONTAINER_PERIOD; tsk_rt(&container_tasks[i])->task_params.budget_policy = PRECISE_ENFORCEMENT; tsk_rt(&container_tasks[i])->edfsc_params.container_task = NULL; tsk_rt(&container_tasks[i])->domain = &gsched_domain; tsk_rt(&container_tasks[i])->edfsc_params.domain = &container_domains[i]; tsk_rt(&container_tasks[i])->sporadic_release = 0; tsk_rt(&container_tasks[i])->edfsc_params.id = i; tsk_rt(&container_tasks[i])->heap_node = bheap_node_alloc(GFP_ATOMIC); tsk_rt(&container_tasks[i])->rel_heap = release_heap_alloc(GFP_ATOMIC); tsk_rt(&container_tasks[i])->linked_on = NO_CPU; tsk_rt(&container_tasks[i])->scheduled_on = NO_CPU; if (!tsk_rt(&container_tasks[i])->heap_node || !tsk_rt(&container_tasks[i])->rel_heap) { printk(KERN_WARNING "litmus: no more heap node memory!?\n"); return -ENOMEM; } else { bheap_node_init(&tsk_rt(&container_tasks[i])->heap_node, &container_tasks[i]); } container_tasks[i].policy = SCHED_LITMUS; // Populate the container_list while we're at it. container_list[i] = &container_domains[i]; // Link heap nodes to CPU structures entry = &per_cpu(edfsc_cpu_entries, i); entry->cpu = i; entry->scheduled = NULL; entry->linked = NULL; entry->hn = &edfsc_cpu_heap_node[i]; bheap_node_init(&entry->hn, entry); } now = litmus_clock(); for (i = 0; i < num_online_cpus(); i++) { t = &container_tasks[i]; entry = &per_cpu(edfsc_cpu_entries, tsk_rt(t)->edfsc_params.id); ((cont_domain_t*)tsk_rt(t)->edfsc_params.domain)->scheduled_last_exec_time = now; release_at(t, now); link_task_to_cpu(t, entry); } // Start the container boundary timer hrtimer_start(&container_release_timer, ns_to_ktime(now + CONTAINER_PERIOD), HRTIMER_MODE_ABS_PINNED); return 0; } static long edfsc_deactivate_plugin(void) { int i; struct list_head *l, *temp; struct task_struct* t; // Stop the container boundary timer hrtimer_try_to_cancel(&container_release_timer); list_for_each_safe(l, temp, &pending_removes) { t = task_of_list_node(l); list_del_init(l); hrtimer_try_to_cancel(&t->edfsc_deadline_timer); } for (i = 0; i < num_online_cpus(); i++) { bheap_node_free(tsk_rt(&container_tasks[i])->heap_node); release_heap_free(tsk_rt(&container_tasks[i])->rel_heap); hrtimer_try_to_cancel(&container_domains[i].idle_enforcement_timer); } kfree(container_tasks); kfree(container_domains); kfree(container_list); kfree(edfsc_cpu_heap_node); destroy_domain_proc_info(&edfsc_domain_proc_info); return 0; } /** * This is called before is_realtime(tsk) and before edfsc_task_new() * We should be inside the context of the process attempting to become realtime * Called with preemption disabled and g_lock /not/ held */ static long edfsc_admit_task(struct task_struct* tsk) { unsigned long flags; // We assume that we're running in the context of `tsk` BUG_ON(tsk != current); raw_spin_lock_irqsave(&g_lock, flags); // Make sure that edfsc_params doesn't contain garbage // Note that edfsc_params->domain will always be NULL for non-container tasks memset(&tsk_rt(tsk)->edfsc_params, 0, sizeof(struct edfsc_params)); // This is how we tell if we've been admitted, so make sure it's unset first // Note that this represents the domain we're being scheduled in tsk_rt(tsk)->domain = NULL; // The admission test needs to know our utilization tsk_rt(tsk)->task_params.utilization = fp_div(get_exec_cost(tsk), get_rt_period(tsk)); // Add us to the queue of tasks waiting on admission list_add_tail(&tsk->edfsc_qnode, &pending_adds); raw_spin_unlock_irqrestore(&g_lock, flags); // We don't know if we can be admitted until a container job boundry is reached, // so block until the scheduler can make that decision set_current_state(TASK_INTERRUPTIBLE); // Changed from TASK_RUNNING preempt_enable_no_resched(); schedule(); // LITMUS^RT expects preemption to still be disabled after we return preempt_disable(); // We only resume excution here after admission tests complete OR if we // were interrupted by a signal. if (tsk_rt(tsk)->domain != NULL) return 0; // Successfully admitted else { raw_spin_lock_irqsave(&g_lock, flags); // We'll still be on pending_adds if interrupted by a signal struct list_head* l; struct list_head *temp; list_for_each_safe(l, temp, &pending_adds) { if (task_of_list_node(l) == tsk) { list_del_init(l); raw_spin_unlock_irqrestore(&g_lock, flags); return -EINTR; // Interrupted } } raw_spin_unlock_irqrestore(&g_lock, flags); return -ENOSPC; // Rejected } } /* Plugin object */ static struct sched_plugin edfsc_plugin __cacheline_aligned_in_smp = { .plugin_name = "EDF-sc", .finish_switch = g_finish_switch, .task_new = edfsc_task_new, .complete_job = complete_job, .task_exit = edfsc_task_exit, .schedule = edfsc_gschedule, .task_wake_up = edfsc_task_wake_up, .task_block = edfsc_task_block, .admit_task = edfsc_admit_task, .activate_plugin = edfsc_activate_plugin, .deactivate_plugin = edfsc_deactivate_plugin, .should_wait_for_stack = edfsc_should_wait_for_stack, .get_domain_proc_info = edfsc_get_domain_proc_info, }; static int __init init_edfsc(void) { return register_sched_plugin(&edfsc_plugin); } module_init(init_edfsc);