/** * litmus/sched_mc.c * * Implementation of the Mixed Criticality scheduling algorithm. * * (Per Mollison, Erickson, Anderson, Baruah, Scoredos 2010) */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /** * struct cpu_entry - State of a CPU for the entire MC system * @cpu CPU id * @scheduled Task that is physically running * @linked Task that should be running / is logically running * @lock For serialization * @crit_entries Array of CPU state per criticality level */ struct cpu_entry { int cpu; struct task_struct* scheduled; struct task_struct* linked; raw_spinlock_t lock; struct crit_entry crit_entries[NUM_CRIT_LEVELS]; #ifdef CONFIG_PLUGIN_MC_REDIRECT struct list_head redir; raw_spinlock_t redir_lock; #endif #ifdef CONFIG_MERGE_TIMERS struct event_group* event_group; #endif }; DEFINE_PER_CPU(struct cpu_entry, cpus); #ifdef CONFIG_RELEASE_MASTER static int interrupt_cpu; #endif #ifdef CONFIG_MERGE_TIMERS static struct event_group* global_group; #endif #define domain_data(dom) (container_of(dom, struct domain_data, domain)) #define is_global(dom) (domain_data(dom)->heap) #define is_global_task(t) (is_global(get_task_domain(t))) #define can_requeue(t) \ (!is_global_task(t) || (t)->rt_param.scheduled_on == NO_CPU) #define entry_level(e) \ (((e)->linked) ? tsk_mc_crit((e)->linked) : NUM_CRIT_LEVELS - 1) #define crit_cpu(ce) \ (container_of((void*)((ce) - (ce)->level), struct cpu_entry, crit_entries)) #define TRACE_ENTRY(e, fmt, args...) \ STRACE("P%d, linked=" TS " " fmt "\n", e->cpu, TA(e->linked), ##args) #define TRACE_CRIT_ENTRY(ce, fmt, args...) \ STRACE("%s P%d, linked=" TS " " fmt "\n", \ (ce)->domain->name, crit_cpu(ce)->cpu, TA((ce)->linked), ##args) /* * Sort CPUs within a global domain by the domain's priority function. */ static int cpu_lower_prio(struct bheap_node *a, struct bheap_node *b) { struct domain *domain; struct crit_entry *first, *second; struct task_struct *first_link, *second_link; first = a->value; second = b->value; first_link = first->linked; second_link = second->linked; if (!first->usable || !second->usable) { return second->usable && first->usable; } else if (!first_link || !second_link) { return second_link && !first_link; } else { domain = get_task_domain(first_link); BUG_ON(domain != get_task_domain(second_link)); return domain->higher_prio(second_link, first_link); } } /* * Return true if the domain has a higher priority ready task. The curr * task must belong to the domain. */ static int mc_preempt_needed(struct domain *dom, struct task_struct* curr) { struct task_struct *next = dom->peek_ready(dom); if (!next || !curr) { return next && !curr; } else { BUG_ON(tsk_mc_crit(next) != tsk_mc_crit(curr)); return get_task_domain(next)->higher_prio(next, curr); } } /* * Return next CPU which should preempted or NULL if the domain has no * preemptable CPUs. */ static inline struct crit_entry* lowest_prio_cpu(struct domain *dom) { struct bheap *heap = domain_data(dom)->heap; struct bheap_node* hn = bheap_peek(cpu_lower_prio, heap); return (hn) ? hn->value : NULL; } /* * Cancel ghost timer. */ static inline void cancel_ghost(struct crit_entry *ce) { #ifdef CONFIG_MERGE_TIMERS cancel_event(crit_cpu(ce)->event_group, &ce->event); #else hrtimer_try_to_cancel(&ce->timer); #endif } /* * Arm ghost timer. Will merge timers if the option is specified. */ static inline void arm_ghost(struct crit_entry *ce, lt_t fire) { #ifdef CONFIG_MERGE_TIMERS add_event(crit_cpu(ce)->event_group, &ce->event, fire); #else __hrtimer_start_range_ns(&ce->timer, ns_to_ktime(when_to_fire), 0 /* delta */, HRTIMER_MODE_ABS_PINNED, 0 /* no wakeup */); #endif } /* * Time accounting for ghost tasks. * Must be called before a decision is made involving the task's budget. */ static void update_ghost_time(struct task_struct *p) { u64 clock = litmus_clock(); u64 delta = clock - p->se.exec_start; BUG_ON(!is_ghost(p)); if (unlikely ((s64)delta < 0)) { delta = 0; TRACE_MC_TASK(p, "WARNING: negative time delta"); } if (tsk_mc_data(p)->mc_job.ghost_budget <= delta) { TRACE_MC_TASK(p, "Ghost job could have ended"); tsk_mc_data(p)->mc_job.ghost_budget = 0; p->se.exec_start = clock; } else { TRACE_MC_TASK(p, "Ghost job updated, but didn't finish"); tsk_mc_data(p)->mc_job.ghost_budget -= delta; p->se.exec_start = clock; } } /** * link_task_to_crit() - Logically run a task at a criticality level. * Caller must hold @ce's domain's lock. */ static void link_task_to_crit(struct crit_entry *ce, struct task_struct *task) { lt_t when_to_fire; struct bheap *heap; TRACE_CRIT_ENTRY(ce, "Linking " TS, TA(task)); BUG_ON(!ce->usable && task); BUG_ON(task && tsk_rt(task)->linked_on != NO_CPU); BUG_ON(task && is_global(ce->domain) && !bheap_node_in_heap(ce->node)); /* Unlink last task */ if (ce->linked) { TRACE_MC_TASK(ce->linked, "Unlinking"); ce->linked->rt_param.linked_on = NO_CPU; if (is_ghost(ce->linked)) { cancel_ghost(ce); if (tsk_mc_data(ce->linked)->mc_job.ghost_budget > 0) { /* Job isn't finished, so do accounting */ update_ghost_time(ce->linked); } } } /* Actually link task */ ce->linked = task; if (task) { task->rt_param.linked_on = crit_cpu(ce)->cpu; if (is_ghost(task) && CRIT_LEVEL_A != tsk_mc_crit(task)) { /* There is a level-A timer that will force a * preemption, so we don't set this for level-A * tasks. */ /* Reset budget timer */ task->se.exec_start = litmus_clock(); when_to_fire = task->se.exec_start + tsk_mc_data(task)->mc_job.ghost_budget; arm_ghost(ce, when_to_fire); } } /* Update global heap node position */ if (is_global(ce->domain)) { heap = domain_data(ce->domain)->heap; bheap_delete(cpu_lower_prio, heap, ce->node); bheap_insert(cpu_lower_prio, heap, ce->node); } } static void check_for_preempt(struct domain*); /** * job_arrival() - Called when a task re-enters the system. * Caller must hold no locks. */ static void job_arrival(struct task_struct *task) { struct domain *dom = get_task_domain(task); TRACE_MC_TASK(task, "Job arriving"); BUG_ON(!task); if (can_requeue(task)) { raw_spin_lock(dom->lock); dom->requeue(dom, task); check_for_preempt(dom); raw_spin_unlock(dom->lock); } else { /* If a global task is scheduled on one cpu, it CANNOT * be requeued into a global domain. Another cpu might * dequeue the global task before it is descheduled, * causing the system to crash when the task is scheduled * in two places simultaneously. */ TRACE_MC_TASK(task, "Delayed arrival of scheduled task"); } } /** * low_prio_arrival() - If CONFIG_PLUGIN_MC_REDIRECT is enabled, will * redirect a lower priority job_arrival work to the interrupt_cpu. */ static void low_prio_arrival(struct task_struct *task) { struct cpu_entry *entry; /* Race conditions! */ if (!can_requeue(task)) return; #ifdef CONFIG_PLUGIN_MC_REDIRECT #ifndef CONFIG_PLUGIN_MC_REDIRECT_ALL if (!is_global_task(task)) goto arrive; #endif if (smp_processor_id() != interrupt_cpu) { entry = &__get_cpu_var(cpus); raw_spin_lock(&entry->redir_lock); TRACE_MC_TASK(task, "Adding to redirect queue"); list_add(&tsk_rt(task)->list, &entry->redir); raw_spin_unlock(&entry->redir_lock); litmus_reschedule(interrupt_cpu); } else #endif { arrive: job_arrival(task); } } #ifdef CONFIG_PLUGIN_MC_REDIRECT /** * fix_global_levels() - Execute redirected job arrivals on this cpu. */ static void fix_global_levels(void) { int c; struct cpu_entry *e; struct list_head *pos, *safe; struct task_struct *t; STRACE("Fixing global levels"); for_each_online_cpu(c) { e = &per_cpu(cpus, c); raw_spin_lock(&e->redir_lock); list_for_each_safe(pos, safe, &e->redir) { t = list_entry(pos, struct task_struct, rt_param.list); BUG_ON(!t); TRACE_MC_TASK(t, "Dequeued redirected job"); list_del_init(pos); job_arrival(t); } raw_spin_unlock(&e->redir_lock); } } #endif /** * link_task_to_cpu() - Logically run a task on a CPU. * The task must first have been linked to one of the CPU's crit_entries. * Caller must hold the entry lock. */ static void link_task_to_cpu(struct cpu_entry *entry, struct task_struct *task) { int i = entry_level(entry); TRACE_MC_TASK(task, "Linking to P%d", entry->cpu); BUG_ON(task && tsk_rt(task)->linked_on != entry->cpu); BUG_ON(task && is_ghost(task)); if (task){ set_rt_flags(task, RT_F_RUNNING); } entry->linked = task; /* Higher criticality crit entries are now usable */ for (; i < entry_level(entry) + 1; i++) { entry->crit_entries[i].usable = 1; } } /** * preempt() - Preempt a logically running task with a higher priority one. * @dom Domain from which to draw higher priority task * @ce CPU criticality level to preempt * * Caller must hold the lock for @dom and @ce's CPU lock. */ static void preempt(struct domain *dom, struct crit_entry *ce) { struct task_struct *task = dom->take_ready(dom); struct cpu_entry *entry = crit_cpu(ce); BUG_ON(!task); TRACE_CRIT_ENTRY(ce, "Preempted by " TS, TA(task)); /* Per-domain preemption */ if (ce->linked && can_requeue(ce->linked)) { dom->requeue(dom, ce->linked); } link_task_to_crit(ce, task); /* Preempt actual execution if this is a running task */ if (!is_ghost(task)) { link_task_to_cpu(entry, task); preempt_if_preemptable(entry->scheduled, entry->cpu); } } /** * update_crit_levels() - Update criticality entries for the new cpu state. * This should be called after a new task has been linked to @entry. * The caller must hold the @entry->lock, but this method will release it. */ static void update_crit_levels(struct cpu_entry *entry) { int i, global_preempted; struct crit_entry *ce; struct task_struct *readmit[NUM_CRIT_LEVELS]; enum crit_level level = entry_level(entry); /* Remove lower priority tasks from the entry */ for (i = level + 1; i < NUM_CRIT_LEVELS; i++) { ce = &entry->crit_entries[i]; global_preempted = entry->scheduled == ce->linked && ce->linked && entry->linked && !is_ghost(ce->linked) && is_global(ce->domain); /* Do not readmit global tasks which are preempted! These can't * ever be re-admitted until they are descheduled for reasons * explained in job_arrival. */ readmit[i] = (!global_preempted) ? ce->linked : NULL; ce->usable = 0; if (ce->linked) link_task_to_crit(ce, NULL); } /* Need to unlock so we can access domains */ raw_spin_unlock(&entry->lock); /* Re-admit tasks to the system */ for (i = level + 1; i < NUM_CRIT_LEVELS; i++) { ce = &entry->crit_entries[i]; if (readmit[i]) low_prio_arrival(readmit[i]); } } /** * check_for_preempt() - Causes a preemption if higher-priority tasks are ready. * Caller must hold domain lock. * Makes gigantic nasty assumption that there is 1 global criticality level, * and it is the last one in each list, so it doesn't call update_crit.. */ static void check_for_preempt(struct domain *dom) { int preempted = 1; struct cpu_entry *entry; struct crit_entry *ce; if (is_global(dom)) { /* Loop until we find a non-preemptable CPU */ while ((ce = lowest_prio_cpu(dom)) && preempted) { entry = crit_cpu(ce); preempted = 0; raw_spin_lock(&entry->lock); if (ce->usable && dom->preempt_needed(dom, ce->linked)){ preempted = 1; preempt(dom, ce); } raw_spin_unlock(&entry->lock); } } else /* Partitioned */ { ce = domain_data(dom)->crit_entry; entry = crit_cpu(ce); raw_spin_lock(&entry->lock); if (ce->usable && dom->preempt_needed(dom, ce->linked)) { preempt(dom, ce); update_crit_levels(entry); } else { raw_spin_unlock(&entry->lock); } } } /** * remove_from_all() - Logically remove a task from all structures. * Caller must hold no locks. */ static void remove_from_all(struct task_struct* task) { int update = 0; struct cpu_entry *entry; struct crit_entry *ce; struct domain *dom = get_task_domain(task); TRACE_MC_TASK(task, "Removing from everything"); BUG_ON(!task); raw_spin_lock(dom->lock); if (task->rt_param.linked_on != NO_CPU) { entry = &per_cpu(cpus, task->rt_param.linked_on); raw_spin_lock(&entry->lock); /* Unlink only if task is still linked post lock */ ce = &entry->crit_entries[tsk_mc_crit(task)]; if (task->rt_param.linked_on != NO_CPU) { BUG_ON(entry->linked != task); link_task_to_crit(ce, NULL); if (!is_ghost(task)) { update = 1; link_task_to_cpu(entry, NULL); } } if (update) update_crit_levels(entry); else raw_spin_unlock(&entry->lock); } else if (is_queued(task)) { /* This is an interesting situation: t is scheduled, * but was just recently unlinked. It cannot be * linked anywhere else (because then it would have * been relinked to this CPU), thus it must be in some * queue. We must remove it from the list in this * case. */ remove((rt_domain_t*)get_task_domain(task)->data, task); } BUG_ON(is_queued(task)); raw_spin_unlock(dom->lock); } /** * job_completion() - Update task state and re-enter it into the system. * Converts tasks which have completed their execution early into ghost jobs. * Caller must hold no locks. */ static void job_completion(struct task_struct *task, int forced) { lt_t now; TRACE_MC_TASK(task, "Completed"); sched_trace_task_completion(task, forced); BUG_ON(!task); /* Logically stop the task execution */ set_rt_flags(task, RT_F_SLEEP); remove_from_all(task); now = litmus_clock(); /* If it's not a ghost job, do ghost job conversion */ if (!is_ghost(task)) { TRACE_MC_TASK(task, "is not a ghost task"); tsk_mc_data(task)->mc_job.ghost_budget = budget_remaining(task); tsk_mc_data(task)->mc_job.is_ghost = 1; } /* If the task is a ghost job with no budget, it either exhausted * its ghost budget or there was no ghost budget after the job * conversion. Revert back to a normal task and complete the period. */ if (tsk_mc_data(task)->mc_job.ghost_budget == 0) { TRACE_MC_TASK(task, "has zero ghost budget"); tsk_mc_data(task)->mc_job.is_ghost = 0; prepare_for_next_period(task); if (is_released(task, litmus_clock())) sched_trace_task_release(task); } /* Requeue non-blocking tasks */ if (is_running(task)) job_arrival(task); } /** * mc_ghost_exhausted() - Complete logically running ghost task. */ #ifdef CONFIG_MERGE_TIMERS static void mc_ghost_exhausted(struct rt_event *e) { struct crit_entry *ce = container_of(e, struct crit_entry, event); #else static enum hrtimer_restart mc_ghost_exhausted(struct hrtimer *timer) { struct crit_entry *ce = container_of(timer, struct crit_entry, timer); #endif unsigned long flags; struct task_struct *tmp = NULL; local_irq_save(flags); TRACE("Ghost exhausted\n"); TRACE_CRIT_ENTRY(ce, "Firing here"); /* Due to race conditions, we cannot just set the linked * task's budget to 0 as it may no longer be the task * for which this timer was armed. Instead, update the running * task time and see if this causes exhaustion. */ raw_spin_lock(&crit_cpu(ce)->lock); if (ce->linked && is_ghost(ce->linked)) { update_ghost_time(ce->linked); if (tsk_mc_data(ce->linked)->mc_job.ghost_budget == 0) { tmp = ce->linked; link_task_to_crit(ce, NULL); } } raw_spin_unlock(&crit_cpu(ce)->lock); if (tmp) job_completion(tmp, 0); local_irq_restore(flags); #ifndef CONFIG_MERGE_TIMERS return HRTIMER_NORESTART; #endif } static enum hrtimer_restart ce_timer_function(struct hrtimer *timer) { struct ce_dom_data *ce_data = container_of(timer, struct ce_dom_data, timer); struct crit_entry *ce = &per_cpu(cpus, ce_data->cpu).crit_entries[CRIT_LEVEL_A]; struct domain *dom = ce->domain; struct task_struct *old_link = NULL; unsigned long flags; TRACE("MC level-A timer callback for CPU %d\n", ce_data->cpu); local_irq_save(flags); raw_spin_lock(dom->lock); raw_spin_lock(&crit_cpu(ce)->lock); if (ce->linked && ce->linked == ce_data->should_schedule && is_ghost(ce->linked)) { old_link = ce->linked; tsk_mc_data(ce->linked)->mc_job.ghost_budget = 0; link_task_to_crit(ce, NULL); } raw_spin_unlock(&crit_cpu(ce)->lock); mc_ce_timer_callback_common(dom, timer); /* job completion will check for preemptions by means of calling job * arrival if the task is not blocked */ if (NULL != old_link) { STRACE("old_link " TS " so will call job completion\n", TA(old_link)); raw_spin_unlock(dom->lock); job_completion(old_link, 0); } else { STRACE("old_link was null, so will call check for preempt\n"); raw_spin_unlock(dom->lock); check_for_preempt(dom); } local_irq_restore(flags); return HRTIMER_RESTART; } /** * mc_release_jobs() - Add heap of tasks to the system, check for preemptions. */ static void mc_release_jobs(rt_domain_t* rt, struct bheap* tasks) { unsigned long flags; struct task_struct *first = bheap_peek(rt->order, tasks)->value; struct domain *dom = get_task_domain(first); raw_spin_lock_irqsave(dom->lock, flags); TRACE(TS "Jobs released\n", TA(first)); __merge_ready(rt, tasks); check_for_preempt(dom); raw_spin_unlock_irqrestore(dom->lock, flags); } /** * ms_task_new() - Setup new mixed-criticality task. * Assumes that there are no partitioned domains after level B. */ static void mc_task_new(struct task_struct *t, int on_rq, int running) { unsigned long flags; struct cpu_entry* entry; enum crit_level level = tsk_mc_crit(t); local_irq_save(flags); TRACE("New mixed criticality task %d\n", t->pid); /* Assign domain */ if (level < CRIT_LEVEL_C) entry = &per_cpu(cpus, get_partition(t)); else entry = &per_cpu(cpus, task_cpu(t)); t->rt_param._domain = entry->crit_entries[level].domain; /* Setup job params */ release_at(t, litmus_clock()); tsk_mc_data(t)->mc_job.ghost_budget = 0; tsk_mc_data(t)->mc_job.is_ghost = 0; if (running) { BUG_ON(entry->scheduled); entry->scheduled = t; tsk_rt(t)->scheduled_on = entry->cpu; } else { t->rt_param.scheduled_on = NO_CPU; } t->rt_param.linked_on = NO_CPU; job_arrival(t); local_irq_restore(flags); } /** * mc_task_new() - Add task back into its domain check for preemptions. */ static void mc_task_wake_up(struct task_struct *task) { unsigned long flags; lt_t now = litmus_clock(); local_irq_save(flags); TRACE(TS " wakes up\n", TA(task)); if (is_tardy(task, now)) { /* Task missed its last release */ release_at(task, now); sched_trace_task_release(task); } if (!is_ghost(task)) job_arrival(task); local_irq_restore(flags); } /** * mc_task_block() - Remove task from state to prevent it being run anywhere. */ static void mc_task_block(struct task_struct *task) { unsigned long flags; local_irq_save(flags); TRACE(TS " blocks\n", TA(task)); remove_from_all(task); local_irq_restore(flags); } /** * mc_task_exit() - Remove task from the system. */ static void mc_task_exit(struct task_struct *task) { unsigned long flags; local_irq_save(flags); BUG_ON(!is_realtime(task)); TRACE(TS " RIP\n", TA(task)); remove_from_all(task); if (tsk_rt(task)->scheduled_on != NO_CPU) { per_cpu(cpus, tsk_rt(task)->scheduled_on).scheduled = NULL; tsk_rt(task)->scheduled_on = NO_CPU; } if (CRIT_LEVEL_A == tsk_mc_crit(task)) mc_ce_task_exit_common(task); local_irq_restore(flags); } /** * mc_admit_task() - Return true if the task is valid. * Assumes there are no partitioned levels after level B. */ static long mc_admit_task(struct task_struct* task) { const enum crit_level crit = tsk_mc_crit(task); long ret; if (!tsk_mc_data(task)) { printk(KERN_WARNING "Tried to admit task with no criticality " "level\n"); ret = -EINVAL; goto out; } if (crit < CRIT_LEVEL_C && get_partition(task) == NO_CPU) { printk(KERN_WARNING "Tried to admit partitioned task with no " "partition\n"); ret = -EINVAL; goto out; } if (crit == CRIT_LEVEL_A) { ret = mc_ce_admit_task_common(task); if (ret) goto out; } printk(KERN_INFO "Admitted task with criticality level %d\n", tsk_mc_crit(task)); ret = 0; out: return ret; } /** * mc_schedule() - Return next task which should be scheduled. */ static struct task_struct* mc_schedule(struct task_struct * prev) { unsigned long flags; struct domain *dom; struct crit_entry *ce; struct cpu_entry* entry = &__get_cpu_var(cpus); int i, out_of_time, sleep, preempt, exists, blocks, global, lower; struct task_struct *dtask = NULL, *ready_task = NULL, *next = NULL; local_irq_save(flags); /* Sanity checking */ BUG_ON(entry->scheduled && entry->scheduled != prev); BUG_ON(entry->scheduled && !is_realtime(prev)); BUG_ON(is_realtime(prev) && !entry->scheduled); /* Determine state */ raw_spin_lock(&entry->lock); exists = entry->scheduled != NULL; blocks = exists && !is_running(entry->scheduled); out_of_time = exists && budget_enforced(entry->scheduled) && budget_exhausted(entry->scheduled); sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; global = exists && is_global_task(entry->scheduled); preempt = entry->scheduled != entry->linked; lower = exists && preempt && entry->linked && tsk_mc_crit(entry->scheduled) > tsk_mc_crit(entry->linked); if (exists) { entry->scheduled->rt_param.scheduled_on = NO_CPU; TRACE(TS " blocks:%d out_of_time:%d sleep:%d preempt:%d " "state:%d sig:%d global:%d\n", TA(prev), blocks, out_of_time, sleep, preempt, prev->state, signal_pending(prev), global); } raw_spin_unlock(&entry->lock); #ifdef CONFIG_PLUGIN_MC_REDIRECT if (smp_processor_id() == interrupt_cpu) fix_global_levels(); #endif /* If a task blocks we have no choice but to reschedule */ if (blocks) remove_from_all(entry->scheduled); /* Any task which exhausts its budget or sleeps waiting for its next * period completes unless its execution has been forcibly stopped. */ if ((out_of_time || sleep) && !blocks)/* && !preempt)*/ job_completion(entry->scheduled, !sleep); /* Global scheduled tasks must wait for a deschedule before they * can rejoin the global state. Rejoin them here. */ else if (global && preempt && !blocks) { if (lower) low_prio_arrival(entry->scheduled); else job_arrival(entry->scheduled); } /* Pick next task if none is linked */ raw_spin_lock(&entry->lock); for (i = 0; i < NUM_CRIT_LEVELS && !entry->linked; i++) { ce = &entry->crit_entries[i]; dom = ce->domain; /* Swap locks. We cannot acquire a domain lock while * holding an entry lock or deadlocks will happen. */ raw_spin_unlock(&entry->lock); raw_spin_lock(dom->lock); raw_spin_lock(&entry->lock); dtask = dom->peek_ready(dom); if (!entry->linked && ce->usable && !ce->linked && dtask) { dom->take_ready(dom); link_task_to_crit(ce, dtask); ready_task = (is_ghost(dtask)) ? NULL : dtask; /* Task found! */ if (ready_task) { link_task_to_cpu(entry, ready_task); raw_spin_unlock(dom->lock); update_crit_levels(entry); raw_spin_lock(&entry->lock); continue; } } raw_spin_unlock(dom->lock); } /* Schedule next task */ next = entry->linked; entry->scheduled = next; if (entry->scheduled) entry->scheduled->rt_param.scheduled_on = entry->cpu; sched_state_task_picked(); raw_spin_unlock(&entry->lock); local_irq_restore(flags); if (next) TRACE_MC_TASK(next, "Scheduled at %llu", litmus_clock()); else if (exists && !next) TRACE_ENTRY(entry, "Becomes idle at %llu", litmus_clock()); return next; } /* * This is the plugin's release at function, called by the release task-set * system call. Other places in the file use the generic LITMUS release_at(), * which is not this. */ void mc_release_at(struct task_struct *ts, lt_t start) { /* hack so that we can have CE timers start at the right time */ if (CRIT_LEVEL_A == tsk_mc_crit(ts)) mc_ce_release_at_common(ts, start); else release_at(ts, start); } long mc_deactivate_plugin(void) { return mc_ce_deactivate_plugin_common(); } /* ************************************************************************** * Initialization * ************************************************************************** */ /* Initialize values here so that they are allocated with the module * and destroyed when the module is unloaded. */ /* LVL-A */ DEFINE_PER_CPU(struct domain_data, _mc_crit_a); DEFINE_PER_CPU(raw_spinlock_t, _mc_crit_a_lock); DEFINE_PER_CPU(struct ce_dom_data, _mc_crit_a_ce_data); /* LVL-B */ DEFINE_PER_CPU(struct domain_data, _mc_crit_b); DEFINE_PER_CPU(rt_domain_t, _mc_crit_b_rt); /* LVL-C */ static struct domain_data _mc_crit_c; static rt_domain_t _mc_crit_c_rt; struct bheap _mc_heap_c; struct bheap_node _mc_nodes_c[NR_CPUS]; #ifdef CONFIG_MERGE_TIMERS #ifdef CONFIG_PLUGIN_MC_RELEASE_MASTER struct event_group _mc_group; #else DEFINE_PER_CPU(struct event_group, _mc_groups); #endif #endif static long mc_activate_plugin(void) { struct domain_data *dom_data; struct domain *dom; struct domain_data *our_domains[NR_CPUS]; int cpu, n = 0; long ret; #ifdef CONFIG_RELEASE_MASTER interrupt_cpu = atomic_read(&release_master_cpu); if (interrupt_cpu == NO_CPU) interrupt_cpu = 0; #endif for_each_online_cpu(cpu) { BUG_ON(NR_CPUS <= n); dom = per_cpu(cpus, cpu).crit_entries[CRIT_LEVEL_A].domain; dom_data = domain_data(dom); our_domains[cpu] = dom_data; n++; } ret = mc_ce_set_domains(n, our_domains); if (ret) goto out; ret = mc_ce_activate_plugin_common(); out: return ret; } static struct sched_plugin mc_plugin __cacheline_aligned_in_smp = { .plugin_name = "MC", .task_new = mc_task_new, .complete_job = complete_job, .task_exit = mc_task_exit, .schedule = mc_schedule, .task_wake_up = mc_task_wake_up, .task_block = mc_task_block, .admit_task = mc_admit_task, .activate_plugin = mc_activate_plugin, .release_at = mc_release_at, .deactivate_plugin = mc_deactivate_plugin, }; static void init_crit_entry(struct crit_entry *ce, enum crit_level level, struct domain_data *dom_data, struct bheap_node *node) { ce->level = level; ce->linked = NULL; ce->node = node; ce->domain = &dom_data->domain; ce->usable = 1; #ifdef CONFIG_MERGE_TIMERS init_event(&ce->event, level, mc_ghost_exhausted, event_list_alloc(GFP_ATOMIC)); #else hrtimer_init(&ce->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); ce->timer.function = mc_ghost_exhausted; #endif } static void init_local_domain(struct cpu_entry *entry, struct domain_data *dom_data, enum crit_level level) { dom_data->heap = NULL; dom_data->crit_entry = &entry->crit_entries[level]; init_crit_entry(dom_data->crit_entry, level, dom_data, NULL); } static void init_global_domain(struct domain_data *dom_data, enum crit_level level, struct bheap *heap, struct bheap_node *nodes) { int cpu; struct cpu_entry *entry; struct crit_entry *ce; struct bheap_node *node; dom_data->crit_entry = NULL; dom_data->heap = heap; bheap_init(heap); for_each_online_cpu(cpu) { entry = &per_cpu(cpus, cpu); node = &nodes[cpu]; ce = &entry->crit_entries[level]; init_crit_entry(ce, level, dom_data, node); bheap_node_init(&ce->node, ce); bheap_insert(cpu_lower_prio, heap, node); } } static inline void init_edf_domain(struct domain *dom, rt_domain_t *rt, int timer_cpu, int prio) { pd_domain_init(dom, rt, edf_ready_order, NULL, mc_release_jobs, mc_preempt_needed, edf_higher_prio); #ifdef CONFIG_PLUGIN_MC_RELEASE_MASTER #ifdef CONFIG_MERGE_TIMERS rt->event_group = &_mc_group; rt->prio = prio; #else rt->release_master = interrupt_cpu; #endif #elif CONFIG_MERGE_TIMERS rt->event_group = &_mc_groups[timer_cpu]; rt->prio = prio; #endif } struct domain_data *ce_domain_for(int); static int __init init_mc(void) { int cpu; struct cpu_entry *entry; struct domain_data *dom_data; rt_domain_t *rt; raw_spinlock_t *a_dom_lock, *b_dom_lock, *c_dom_lock; /* For lock debugger */ struct ce_dom_data *ce_data; for_each_online_cpu(cpu) { entry = &per_cpu(cpus, cpu); /* CPU */ entry->cpu = cpu; entry->scheduled = NULL; entry->linked = NULL; raw_spin_lock_init(&entry->lock); #ifdef CONFIG_PLUGIN_MC_REDIRECT raw_spin_lock_init(&entry->redir_lock); INIT_LIST_HEAD(&entry->redir); #endif #ifdef CONFIG_MERGE_TIMERS #ifdef CONFIG_PLUGIN_MC_RELEASE_MASTER entry->event_group = &_mc_group; #else init_event_group(&_mc_groups[cpu], CONFIG_MERGE_TIMERS_WINDOW, cpu); entry->event_group = &_mc_groups[cpu]; #endif #endif /* CRIT_LEVEL_A */ dom_data = &per_cpu(_mc_crit_a, cpu); ce_data = &per_cpu(_mc_crit_a_ce_data, cpu); a_dom_lock = &per_cpu(_mc_crit_a_lock, cpu); raw_spin_lock_init(a_dom_lock); ce_domain_init(&dom_data->domain, a_dom_lock, ce_requeue, ce_peek_and_take_ready, ce_peek_and_take_ready, mc_preempt_needed, ce_higher_prio, ce_data, cpu, ce_timer_function); init_local_domain(entry, dom_data, CRIT_LEVEL_A); dom_data->domain.name = "LVL-A"; /* CRIT_LEVEL_B */ dom_data = &per_cpu(_mc_crit_b, cpu); rt = &per_cpu(_mc_crit_b_rt, cpu); init_local_domain(entry, dom_data, CRIT_LEVEL_B); init_edf_domain(&dom_data->domain, rt, cpu, CRIT_LEVEL_B); b_dom_lock = dom_data->domain.lock; raw_spin_lock_init(b_dom_lock); dom_data->domain.name = "LVL-B"; } #ifdef CONFIG_MERGE_TIMERS #ifdef CONFIG_PLUGIN_MC_RELEASE_MASTER init_event_group(&_mc_group, CONFIG_MERGE_TIMERS_WINDOW, interrupt_cpu); global_group = &_mc_group; #else global_group = &_mc_groups[0]; #endif #endif /* CRIT_LEVEL_C */ init_global_domain(&_mc_crit_c, CRIT_LEVEL_C, &_mc_heap_c, _mc_nodes_c); init_edf_domain(&_mc_crit_c.domain, &_mc_crit_c_rt, 0, CRIT_LEVEL_C); c_dom_lock = _mc_crit_c.domain.lock; raw_spin_lock_init(c_dom_lock); _mc_crit_c.domain.name = "LVL-C"; return register_sched_plugin(&mc_plugin); } module_init(init_mc);