/*
* litmus/sched_mc.c
* Implementation of the Mixed Criticality scheduling algorithm.
*
* (Per Mollison, Erickson, Anderson, Baruah, Scoredos 2010)
* TODO: optimize reschedule
*/
#include <linux/spinlock.h>
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/hrtimer.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/poison.h>
#include <linux/pid.h>
#include <litmus/litmus.h>
#include <litmus/trace.h>
#include <litmus/jobs.h>
#include <litmus/sched_plugin.h>
#include <litmus/edf_common.h>
#include <litmus/sched_trace.h>
#include <litmus/domain.h>
#include <litmus/bheap.h>
#include <litmus/event_group.h>
#include <litmus/budget.h>
#include <litmus/server.h>
#include <litmus/sched_mc.h>
#include <litmus/ce_domain.h>
#include <litmus/dgl.h>
#include <litmus/color.h>
#include <litmus/way_tracker.h>
struct mc_signal {
int update:1;
int preempt:1;
};
struct cpu_entry {
int cpu;
struct crit_entry crit_entries[NUM_CRIT_LEVELS];
struct task_struct* scheduled;
struct task_struct* will_schedule;
struct task_struct* linked;
struct mc_signal signal;
raw_spinlock_t lock;
raw_spinlock_t signal_lock;
#ifdef CONFIG_PLUGIN_MC_REDIRECT
struct list_head redir;
raw_spinlock_t redir_lock;
#endif
#ifdef CONFIG_MERGE_TIMERS
struct event_group *event_group;
#endif
};
static struct dgl group_lock;
static raw_spinlock_t dgl_lock;
DEFINE_PER_CPU(struct cpu_entry, cpus);
static int interrupt_cpu;
#define has_resources(t, c) (tsk_rt(t)->req == group_lock.acquired[c])
#define domain_data(dom) (container_of(dom, struct domain_data, domain))
#define is_global(dom) (domain_data(dom)->heap)
#define is_global_task(t) (is_global(get_task_domain(t)))
#define can_requeue(t) \
((t)->rt_param.linked_on == NO_CPU && /* Not linked anywhere */ \
!is_queued(t) && /* Not gonna be linked */ \
(!is_global_task(t) || (t)->rt_param.scheduled_on == NO_CPU))
#define entry_level(e) \
(((e)->linked) ? tsk_mc_crit((e)->linked) : NUM_CRIT_LEVELS - 1)
#define get_crit_entry_for(cpu, level) (&per_cpu(cpus, cpu).crit_entries[level])
#define crit_cpu(ce) \
(container_of((void*)((ce) - (ce)->level), struct cpu_entry, crit_entries))
static void clear_signal(struct mc_signal *signal)
{
signal->update = signal->preempt = 0;
}
/*
* Put in requests for resources needed by @t.
*/
static int acquire_resources(struct task_struct *t)
{
int cpu, acquired;
struct server *task_server;
struct cpu_entry *entry;
if (!lock_cache)
return 1;
BUG_ON(tsk_rt(t)->linked_on == NO_CPU);
raw_spin_lock(&dgl_lock);
cpu = tsk_rt(t)->linked_on;
task_server = &tsk_rt(t)->server;
if (!cache_preempt && is_kernel_np(t)) {
TRACE_MC_TASK(t, "Already contending for resources\n");
return has_resources(t, cpu);
}
if (!has_resources(t, cpu)) {
sched_trace_task_block(t);
server_state_change(task_server, SS_BLOCKED, 0);
TRACE_MC_TASK(t, "Blocked at %llu\n", litmus_clock());
add_group_req(&group_lock, tsk_rt(t)->req, cpu);
if (!cache_preempt)
make_np(t);
}
acquired = has_resources(t, cpu);
if (acquired) {
entry = &per_cpu(cpus, cpu);
entry->signal.update = 0;
}
raw_spin_unlock(&dgl_lock);
return acquired;
}
static void release_resources(struct task_struct *t)
{
struct server *task_server = &tsk_rt(t)->server;
if (!lock_cache)
return;
raw_spin_lock(&dgl_lock);
server_state_change(task_server, SS_REMOVED, 0);
if (cache_preempt || is_kernel_np(t)) {
TRACE_MC_TASK(t, "Releasing resources\n");
remove_group_req(&group_lock, tsk_rt(t)->req);
take_np(t);
} else if (!cache_preempt) {
TRACE_MC_TASK(t, "No resources to release!\n");
}
raw_spin_unlock(&dgl_lock);
}
static int dumb_acquire(struct task_struct *t)
{
struct server *server = &tsk_rt(t)->server;
server_state_change(server, SS_ACTIVE, 0);
return 1;
}
static void dumb_release(struct task_struct *t)
{
struct server *server = &tsk_rt(t)->server;
server_state_change(server, SS_REMOVED, 0);
}
#define fully_removed(s) ((s)->state == SS_REMOVED && !(s)->in_transit)
/*
* Sort CPUs within a global domain's heap.
*/
static int cpu_lower_prio(struct bheap_node *a, struct bheap_node *b)
{
struct domain *domain;
struct crit_entry *first, *second;
struct task_struct *first_link, *second_link;
first = a->value;
second = b->value;
first_link = first->server.linked;
second_link = second->server.linked;
if (fully_removed(&first->server) || fully_removed(&second->server)){
/* Removed entries go at the back of the heap */
return fully_removed(&second->server) &&
!fully_removed(&first->server);
} else if (!first_link || !second_link) {
/* Entry with nothing scheduled is lowest priority (front) */
return second_link && !first_link;
} else {
/* Sort by deadlines of tasks (later deadlines first) */
domain = get_task_domain(first_link);
return domain->higher_prio(second_link, first_link);
}
}
/*
* Return true if the domain has a higher priority ready task. The @curr
* task must belong to the domain.
*/
static int mc_preempt_needed(struct domain *dom, struct task_struct* curr)
{
struct task_struct *next = dom->peek_ready(dom);
if (!next || !curr) {
return next && !curr;
} else {
BUG_ON(tsk_mc_crit(next) != tsk_mc_crit(curr));
return !is_np(curr) &&
get_task_domain(next)->higher_prio(next, curr);
}
}
/*
* Update crit entry position in a global heap. Caller must hold
* @ce's domain lock.
*/
static void update_crit_position(struct crit_entry *ce)
{
struct bheap *heap;
if (is_global(ce->domain)) {
heap = domain_data(ce->domain)->heap;
BUG_ON(!heap);
BUG_ON(!bheap_node_in_heap(ce->node));
bheap_delete(cpu_lower_prio, heap, ce->node);
bheap_insert(cpu_lower_prio, heap, ce->node);
}
}
/*
* Update crit entry position in a global heap if it has been marked
* for update. Caller must hold @ce's domain lock.
*/
static void fix_crit_position(struct crit_entry *ce)
{
struct server *server = &ce->server;
if (is_global(ce->domain) && server->in_transit) {
server_state_change(server, server->state, 0);
update_crit_position(ce);
}
}
/*
* Return next CPU which should preempted or NULL if the domain has no
* preemptable CPUs. Caller must hold the @dom lock.
*/
static struct crit_entry* lowest_prio_cpu(struct domain *dom)
{
struct bheap *heap = domain_data(dom)->heap;
struct bheap_node* hn;
struct crit_entry *ce, *res = NULL;
do {
hn = bheap_peek(cpu_lower_prio, heap);
ce = (hn) ? hn->value : NULL;
if (ce) {
if (ce->server.in_transit)
fix_crit_position(ce);
else if (ce->server.state == SS_ACTIVE)
res = ce;
else if (ce->server.state == SS_REMOVED)
ce = NULL;
}
} while (ce && !res);
return res;
}
/*
* Time accounting for ghost tasks.
* Must be called before a decision is made involving the task's budget.
*/
static void update_server_time(struct task_struct *p)
{
u64 clock = litmus_clock();
u64 delta = clock - p->rt_param.last_exec_time;
if (unlikely ((s64)delta < 0)) {
delta = 0;
}
if (budget_remaining(p) <= delta) {
tsk_rt(p)->job_params.exec_time = get_exec_cost(p);
} else {
tsk_rt(p)->job_params.exec_time += delta;
}
p->rt_param.last_exec_time = clock;
}
/*
* Arm ghost timer. Will merge timers if the option is specified.
*/
static void start_crit(struct crit_entry *ce)
{
lt_t fire;
struct task_struct *task;
struct server *task_server;
BUG_ON(ce->server.state != SS_ACTIVE);
task = ce->server.linked;
task_server = &tsk_rt(task)->server;
if (is_ghost(task) && CRIT_LEVEL_A != tsk_mc_crit(task)) {
/* There is a level-A timer that will force a
* preemption, so we don't set this for level-A
* tasks. Otherwise reset the budget timer
*/
fire = litmus_clock() + budget_remaining(task);
#ifdef CONFIG_MERGE_TIMERS
add_event(crit_cpu(ce)->event_group, &ce->event, fire);
#else
__hrtimer_start_range_ns(&ce->timer,
ns_to_ktime(fire),
0 /* delta */,
HRTIMER_MODE_ABS_PINNED,
0 /* no wakeup */);
#endif
}
server_state_change(task_server, SS_ACTIVE, 0);
}
static void stop_crit(struct crit_entry *ce)
{
struct server *task_server = &tsk_rt(ce->server.linked)->server;
if (is_ghost(ce->server.linked)) {
if (!budget_exhausted(ce->server.linked)) {
/* Job isn't finished, so do accounting */
update_server_time(ce->server.linked);
}
#ifdef CONFIG_MERGE_TIMERS
cancel_event(&ce->event);
#else
hrtimer_try_to_cancel(&ce->timer);
#endif
}
if (task_server->state != SS_BLOCKED) {
server_state_change(task_server, SS_REMOVED, 0);
}
}
/**
* link_task_to_crit() - Logically run a task at a criticality level.
* Caller must hold @ce's CPU lock.
*/
static void link_task_to_crit(struct crit_entry *ce,
struct task_struct *task)
{
struct server *ce_server = &ce->server;
TRACE_CRIT_ENTRY(ce, "Linking " TS "\n", TA(task));
BUG_ON(task && ce_server->state != SS_ACTIVE);
BUG_ON(task && tsk_rt(task)->linked_on != NO_CPU);
BUG_ON(task && is_global(ce->domain) &&
!bheap_node_in_heap(ce->node));
/* Unlink last task */
if (ce->server.linked) {
ce->domain->release_resources(ce->server.linked);
if (ce_server->state == SS_BLOCKED) {
server_state_change(ce_server, SS_ACTIVE, 0);
}
TRACE_MC_TASK(ce->server.linked, "Unlinking\n");
stop_crit(ce);
tsk_rt(ce->server.linked)->server.parent = 0;
tsk_rt(ce->server.linked)->server.cpu = NO_CPU;
ce->server.linked->rt_param.linked_on = NO_CPU;
}
/* Actually link task */
ce->server.linked = task;
if (task) {
/* Block if task cannot acquire resources */
task->rt_param.linked_on = crit_cpu(ce)->cpu;
tsk_rt(task)->server.parent = ce_sid(ce);
tsk_rt(ce->server.linked)->server.cpu = crit_cpu(ce)->cpu;
if (ce->domain->acquire_resources(task)) {
start_crit(ce);
} else {
server_state_change(ce_server, SS_BLOCKED, 0);
}
}
}
static void check_for_preempt(struct domain*);
/**
* job_arrival() - Called when a task re-enters the system.
* Caller must hold no locks.
*/
static void job_arrival(struct task_struct *task)
{
struct domain *dom = get_task_domain(task);
TRACE_MC_TASK(task, "Job arriving\n");
BUG_ON(!task);
raw_spin_lock(dom->lock);
if (can_requeue(task)) {
BUG_ON(task->rt_param.linked_on != NO_CPU);
dom->requeue(dom, task);
check_for_preempt(dom);
} else {
/* If a global task is scheduled on one cpu, it CANNOT
* be requeued into a global domain. Another cpu might
* dequeue the global task before it is descheduled,
* causing the system to crash when the task is scheduled
* in two places simultaneously.
*/
TRACE_MC_TASK(task, "Delayed arrival of scheduled task, "
"linked: %d, sched: %d, queued: %d\n",
tsk_rt(task)->linked_on, tsk_rt(task)->scheduled_on,
is_queued(task));
}
raw_spin_unlock(dom->lock);
}
/**
* low_prio_arrival() - If CONFIG_PLUGIN_MC_REDIRECT is enabled, will
* redirect a lower priority job_arrival work to the interrupt_cpu.
*/
static void low_prio_arrival(struct task_struct *task)
{
struct cpu_entry *entry;
/* Race conditions! */
if (!can_requeue(task)) return;
#ifdef CONFIG_PLUGIN_MC_REDIRECT
if (!is_global_task(task))
goto arrive;
if (smp_processor_id() != interrupt_cpu) {
entry = &__get_cpu_var(cpus);
raw_spin_lock(&entry->redir_lock);
TRACE_MC_TASK(task, "Adding to redirect queue\n");
list_add(&tsk_rt(task)->list, &entry->redir);
raw_spin_unlock(&entry->redir_lock);
litmus_reschedule(interrupt_cpu);
} else
#endif
{
arrive:
TRACE_MC_TASK(task, "On interrupt master, requeueing task\n");
job_arrival(task);
}
}
#ifdef CONFIG_PLUGIN_MC_REDIRECT
/**
* fix_global_levels() - Execute redirected job arrivals on this cpu.
*/
static void fix_global_levels(void)
{
int c;
struct cpu_entry *e;
struct list_head *pos, *safe;
struct task_struct *t;
STRACE("Fixing global levels\n");
for_each_online_cpu(c) {
e = &per_cpu(cpus, c);
raw_spin_lock(&e->redir_lock);
list_for_each_safe(pos, safe, &e->redir) {
t = list_entry(pos, struct task_struct, rt_param.list);
BUG_ON(!t);
TRACE_MC_TASK(t, "Dequeued redirected job\n");
list_del_init(pos);
job_arrival(t);
}
raw_spin_unlock(&e->redir_lock);
}
}
#endif
/**
* link_task_to_cpu() - Logically run a task on a CPU.
* The task must first have been linked to one of the CPU's crit_entries.
* Caller must hold the entry lock.
*/
static void link_task_to_cpu(struct cpu_entry *entry, struct task_struct *task)
{
int i = entry_level(entry);
struct crit_entry *ce;
struct server *server;
TRACE_MC_TASK(task, "Linking to P%d\n", entry->cpu);
BUG_ON(task && tsk_rt(task)->linked_on != entry->cpu);
BUG_ON(task && is_ghost(task));
if (entry->linked) {
server = &tsk_rt(entry->linked)->server;
sched_trace_server_switch_away(server->sid, *server->job,
entry->linked->pid,
get_user_job(entry->linked),
entry->cpu);
}
if (task) {
server = &tsk_rt(task)->server;
sched_trace_server_switch_to(server->sid, *server->job,
task->pid,
get_user_job(task),
entry->cpu);
}
entry->linked = task;
/* Higher criticality crit entries are now usable */
for (; i < entry_level(entry) + 1; i++) {
ce = &entry->crit_entries[i];
server = &ce->server;
if (server->state == SS_REMOVED) {
TRACE_CRIT_ENTRY(ce, "Moving up to active\n");
server_state_change(server, SS_ACTIVE, 1);
}
}
}
static void preempt_cpu(struct cpu_entry *entry, struct task_struct *t)
{
link_task_to_cpu(entry, t);
litmus_reschedule(entry->cpu);
}
/**
* preempt_crit() - Preempt a logically running task with a higher priority one.
* @dom Domain from which to draw higher priority task
* @ce CPU criticality level to preempt
* @return Preempted task
*
* Caller must hold the lock for @dom and @ce's CPU lock.
*/
static struct task_struct* preempt_crit(struct domain *dom, struct crit_entry *ce)
{
struct task_struct *task = dom->take_ready(dom);
struct cpu_entry *entry = crit_cpu(ce);
struct task_struct *old = ce->server.linked;
BUG_ON(!task);
TRACE_CRIT_ENTRY(ce, "Preempted by " TS "\n", TA(task));
/* Per-domain preemption */
link_task_to_crit(ce, task);
/* if (old && can_requeue(old)) { */
/* dom->requeue(dom, old); */
/* } */
update_crit_position(ce);
/* Preempt actual execution if this is a running task.
* We know that our task is higher priority than what is currently
* running on this CPU as otherwise the crit_entry would have
* been disabled and a preemption could not have occurred
*/
if (!is_ghost(task) && SS_BLOCKED != ce->server.state) {
preempt_cpu(entry, task);
} else if (old && old == entry->linked) {
/* Preempted running task with ghost job. Nothing should run */
preempt_cpu(entry, NULL);
}
return old;
}
/**
* update_crit_levels() - Update criticality entries for the new cpu state.
* This should be called after a new task has been linked to @entry.
* The caller must hold the @entry->lock, but this method will release it.
*/
static void update_crit_levels(struct cpu_entry *entry)
{
int i, global_preempted;
struct server *server;
struct crit_entry *ce;
struct task_struct *readmit[NUM_CRIT_LEVELS];
enum crit_level level = entry_level(entry);
/* Remove lower priority tasks from the entry */
for (i = level + 1; i < NUM_CRIT_LEVELS; i++) {
ce = &entry->crit_entries[i];
server = &ce->server;
global_preempted = ce->server.linked &&
/* This task is running on a cpu */
ce->server.linked->rt_param.scheduled_on == entry->cpu &&
/* But it was preempted */
ce->server.linked != entry->linked &&
/* And it is an eligible global task */
!is_ghost(ce->server.linked) && is_global(ce->domain);
/* Do not readmit global tasks which are preempted! These can't
* ever be re-admitted until they are descheduled for reasons
* explained in job_arrival.
*/
readmit[i] = (!global_preempted) ? ce->server.linked : NULL;
if (server->state != SS_REMOVED) {
if (ce->server.linked) {
link_task_to_crit(ce, NULL);
}
TRACE_CRIT_ENTRY(ce, "Removing lower crit\n");
server_state_change(server, SS_REMOVED, 1);
}
}
/* Need to unlock so we can access domains */
raw_spin_unlock(&entry->lock);
/* Re-admit tasks to the system */
for (i = level + 1; i < NUM_CRIT_LEVELS; i++) {
ce = &entry->crit_entries[i];
if (readmit[i]) {
low_prio_arrival(readmit[i]);
}
}
}
/*
* Assumes a single, lowest-priority global criticicality level. This avoids
* unnecessary calls to update_crit_levels.
*/
static void check_global_preempt(struct domain *dom)
{
int recheck;
struct crit_entry *ce;
struct cpu_entry *entry;
struct task_struct *preempted;
recheck = 1;
/* Loop until we find a non-preemptable CPU */
while (recheck && (ce = lowest_prio_cpu(dom))) {
entry = crit_cpu(ce);
recheck = 1;
preempted = NULL;
/* Cache next task */
dom->peek_ready(dom);
raw_spin_lock(&entry->lock);
if (ce->server.in_transit) {
/* CPU disabled while locking! */
fix_crit_position(ce);
} else if (mc_preempt_needed(dom, ce->server.linked)) {
/* Success! Check for more preemptions */
preempted = preempt_crit(dom, ce);
} else {
/* Failure! */
recheck = 0;
}
raw_spin_unlock(&entry->lock);
/* Only add preempted task after lock has been released */
if (preempted && can_requeue(preempted)) {
dom->requeue(dom, preempted);
}
}
}
static void check_partitioned_preempt(struct domain *dom)
{
struct cpu_entry *entry;
struct crit_entry *ce;
ce = domain_data(dom)->crit_entry;
entry = crit_cpu(ce);
if (ce->server.state == SS_REMOVED ||
!mc_preempt_needed(dom, ce->server.linked)) {
return;
}
entry->signal.preempt = 1;
litmus_reschedule(entry->cpu);
}
/**
* check_for_preempt() - Causes a preemption if higher-priority tasks are ready.
* Caller must hold domain lock.
*/
static void check_for_preempt(struct domain *dom)
{
struct crit_entry *ce;
struct cpu_entry *entry;
if (is_global(dom)) {
check_global_preempt(dom);
} else {
ce = domain_data(dom)->crit_entry;
entry = crit_cpu(ce);
/* Cache next task */
dom->peek_ready(dom);
raw_spin_lock(&entry->lock);
check_partitioned_preempt(dom);
raw_spin_unlock(&entry->lock);
}
}
/**
* remove_from_all() - Logically remove a task from all structures.
* Caller must hold no locks.
*/
static void remove_from_all(struct task_struct* task)
{
int update = 0;
struct cpu_entry *entry;
struct crit_entry *ce;
struct domain *dom = get_task_domain(task);
TRACE_MC_TASK(task, "Removing from everything\n");
BUG_ON(!task);
raw_spin_lock(dom->lock);
/* Remove the task from any CPU state */
if (task->rt_param.linked_on != NO_CPU) {
TRACE_MC_TASK(task, "Linked to something\n");
entry = &per_cpu(cpus, task->rt_param.linked_on);
raw_spin_lock(&entry->lock);
/* Unlink only if task is still linked post lock */
ce = &entry->crit_entries[tsk_mc_crit(task)];
if (task->rt_param.linked_on != NO_CPU) {
BUG_ON(ce->server.linked != task);
if (entry->linked == task) {
update = 1;
link_task_to_cpu(entry, NULL);
}
link_task_to_crit(ce, NULL);
update_crit_position(ce);
} else {
TRACE_MC_TASK(task, "Unlinked before we got lock!\n");
}
raw_spin_unlock(&entry->lock);
} else {
TRACE_MC_TASK(task, "Not linked to anything\n");
}
/* Ensure the task isn't returned by its domain */
dom->remove(dom, task);
raw_spin_unlock(dom->lock);
}
/**
* job_completion() - Update task state and re-enter it into the system.
* Converts tasks which have completed their execution early into ghost jobs.
* Caller must hold no locks.
*/
static void job_completion(struct task_struct *task, int forced)
{
int release_server;
struct cpu_entry *entry;
struct crit_entry *ce;
TRACE_MC_TASK(task, "Completed\n");
if (!forced) {
/* Userspace signaled job completion */
sched_trace_task_completion(current, 0);
mb();
setup_user_release(current, get_user_deadline(current));
}
#ifndef CONFIG_PLUGIN_MC_LINUX_SLACK_STEALING
/* Release lowest-criticality task's servers with their userspace tasks,
* preventing them from turning into idle ghost tasks
*/
if (tsk_mc_crit(task) == NUM_CRIT_LEVELS - 1)
release_server = 1;
else
#endif
release_server = budget_exhausted(task);
if (release_server || forced) {
if (release_server)
sched_trace_server_completion(-task->pid,
get_rt_job(task));
/* Only unlink (and release resources) if the current server job
* must stop logically running
*/
remove_from_all(task);
}
if (lt_before(get_user_release(task), litmus_clock()) ||
(release_server && tsk_rt(task)->completed)){
TRACE_TASK(task, "Executable task going back to running\n");
tsk_rt(task)->completed = 0;
}
if (release_server || forced) {
/* TODO: Level A does this independently and should not */
if (release_server && CRIT_LEVEL_A != tsk_mc_crit(task)) {
prepare_for_next_period(task);
}
TRACE_TASK(task, "Is released: %d, now: %llu, rel: %llu\n",
is_released(task, litmus_clock()), litmus_clock(),
get_release(task));
/* Requeue non-blocking tasks */
if (is_running(task)) {
job_arrival(task);
}
} else if (is_ghost(task)) {
entry = &per_cpu(cpus, tsk_rt(task)->linked_on);
ce = &entry->crit_entries[tsk_mc_crit(task)];
raw_spin_lock(&entry->lock);
if (ce->server.linked == task) {
/* The task went ghost while it was linked to a CPU */
link_task_to_cpu(entry, NULL);
stop_crit(ce);
if (ce->server.state == SS_ACTIVE)
start_crit(ce);
}
raw_spin_unlock(&entry->lock);
}
}
/**
* mc_ghost_exhausted() - Complete logically running ghost task.
*/
#ifdef CONFIG_MERGE_TIMERS
static void mc_ghost_exhausted(struct rt_event *e)
{
struct crit_entry *ce = container_of(e, struct crit_entry, event);
#else
static enum hrtimer_restart mc_ghost_exhausted(struct hrtimer *timer)
{
struct crit_entry *ce = container_of(timer, struct crit_entry, timer);
#endif
struct task_struct *tmp = NULL;
struct cpu_entry *entry = crit_cpu(ce);
TRACE("Firing here at %llu\n", litmus_clock());
TRACE_CRIT_ENTRY(ce, "For this\n");
raw_spin_lock(&entry->lock);
if (is_ghost(ce->server.linked)) {
update_server_time(ce->server.linked);
if (budget_exhausted(ce->server.linked)) {
tmp = ce->server.linked;
}
} else {
litmus_reschedule(crit_cpu(ce)->cpu);
}
raw_spin_unlock(&entry->lock);
if (tmp)
job_completion(tmp, 1);
#ifndef CONFIG_MERGE_TIMERS
return HRTIMER_NORESTART;
#endif
}
/*
* The MC-CE common timer callback code for merged and non-merged timers.
* Returns the next time the timer should fire.
*/
static lt_t __ce_timer_function(struct ce_dom_data *ce_data)
{
struct crit_entry *ce = get_crit_entry_for(ce_data->cpu, CRIT_LEVEL_A);
struct domain *dom = ce->domain;
struct task_struct *old_link = NULL;
lt_t next_timer_abs;
TRACE("MC level-A timer callback for CPU %d\n", ce_data->cpu);
raw_spin_lock(dom->lock);
raw_spin_lock(&crit_cpu(ce)->lock);
if (ce->server.linked &&
ce->server.linked == ce_data->should_schedule)
{
old_link = ce->server.linked;
link_task_to_crit(ce, NULL);
mc_ce_job_completion(dom, old_link);
}
raw_spin_unlock(&crit_cpu(ce)->lock);
next_timer_abs = mc_ce_timer_callback_common(dom);
/* Job completion will check for preemptions by means of calling job
* arrival if the task is not blocked */
if (NULL != old_link) {
STRACE("old_link " TS " so will call job completion\n", TA(old_link));
raw_spin_unlock(dom->lock);
job_completion(old_link, 1);
} else {
STRACE("old_link was null, so will call check for preempt\n");
check_for_preempt(dom);
raw_spin_unlock(dom->lock);
}
return next_timer_abs;
}
#ifdef CONFIG_MERGE_TIMERS
static void ce_timer_function(struct rt_event *e)
{
struct ce_dom_data *ce_data =
container_of(e, struct ce_dom_data, event);
unsigned long flags;
lt_t next_timer_abs;
TS_LVLA_RELEASE_START;
local_irq_save(flags);
next_timer_abs = __ce_timer_function(ce_data);
add_event(per_cpu(cpus, ce_data->cpu).event_group, e, next_timer_abs);
local_irq_restore(flags);
TS_LVLA_RELEASE_END;
}
#else /* else to CONFIG_MERGE_TIMERS */
static enum hrtimer_restart ce_timer_function(struct hrtimer *timer)
{
struct ce_dom_data *ce_data =
container_of(timer, struct ce_dom_data, timer);
unsigned long flags;
lt_t next_timer_abs;
TS_LVLA_RELEASE_START;
local_irq_save(flags);
next_timer_abs = __ce_timer_function(ce_data);
hrtimer_set_expires(timer, ns_to_ktime(next_timer_abs));
local_irq_restore(flags);
TS_LVLA_RELEASE_END;
return HRTIMER_RESTART;
}
#endif /* CONFIG_MERGE_TIMERS */
/**
* mc_release_jobs() - Add heap of tasks to the system, check for preemptions.
*/
static void mc_release_jobs(rt_domain_t* rt, struct bheap* tasks)
{
unsigned long flags;
struct task_struct *first = bheap_peek(rt->order, tasks)->value;
struct domain *dom = get_task_domain(first);
raw_spin_lock_irqsave(dom->lock, flags);
TRACE(TS "Jobs released\n", TA(first));
__merge_ready(rt, tasks);
check_for_preempt(dom);
raw_spin_unlock_irqrestore(dom->lock, flags);
}
/**
* ms_task_new() - Setup new mixed-criticality task.
* Assumes that there are no partitioned domains after level B.
*/
static void mc_task_new(struct task_struct *t, int on_rq, int running)
{
unsigned long flags;
int i;
struct cpu_entry* entry;
enum crit_level level = tsk_mc_crit(t);
struct dgl_group_req *req;
struct control_page *cp = tsk_rt(t)->ctrl_page;
struct color_ctrl_page *ccp = &tsk_rt(t)->color_ctrl_page;
local_irq_save(flags);
TRACE("New mixed criticality task %d\n", t->pid);
if (level == CRIT_LEVEL_A)
get_rt_relative_deadline(t) = get_exec_cost(t);
/* Assign domain */
if (level < CRIT_LEVEL_C)
entry = &per_cpu(cpus, get_partition(t));
else
entry = &per_cpu(cpus, task_cpu(t));
t->rt_param._domain = entry->crit_entries[level].domain;
tsk_rt(t)->flush = 0;
tsk_rt(t)->load = 0;
/* Userspace and kernelspace view of task state may differ.
* Model kernel state as a budget enforced container
*/
sched_trace_container_param(t->pid, t->comm);
sched_trace_server_param(-t->pid, t->pid,
get_exec_cost(t), get_rt_period(t));
server_init(&tsk_rt(t)->server, -t->pid,
&tsk_rt(t)->job_params.job_no,
NO_CPU);
tsk_rt(t)->task_params.budget_policy = PRECISE_ENFORCEMENT;
BUG_ON(!tsk_rt(t)->server.job);
/* Apply chunking */
if (level == CRIT_LEVEL_B && color_chunk &&
lt_after(get_exec_cost(t), color_chunk)) {
tsk_rt(t)->orig_cost = get_exec_cost(t);
}
/* Setup color request */
req = kmalloc(sizeof(*req), GFP_ATOMIC);
req->task = t;
tsk_rt(t)->req = req;
if (cp && ccp) {
TRACE_MC_TASK(t, "Initializing group request\n");
cp->colors_updated = 0;
dgl_group_req_init(&group_lock, req);
for (i = 0; ccp->pages[i]; ++i)
set_req(&group_lock, req, ccp->colors[i], ccp->pages[i]);
} else {
BUG_ON(CRIT_LEVEL_B == tsk_mc_crit(t));
}
/* Setup job params */
release_at(t, litmus_clock());
if (running) {
BUG_ON(entry->scheduled);
TRACE_MC_TASK(t, "Was already running\n");
entry->scheduled = t;
tsk_rt(t)->scheduled_on = entry->cpu;
tsk_rt(t)->last_exec_time = litmus_clock();
} else {
t->rt_param.scheduled_on = NO_CPU;
}
t->rt_param.linked_on = NO_CPU;
job_arrival(t);
local_irq_restore(flags);
}
/**
* mc_task_new() - Add task back into its domain check for preemptions.
*/
static void mc_task_wake_up(struct task_struct *task)
{
unsigned long flags;
lt_t now = litmus_clock();
local_irq_save(flags);
TRACE(TS " wakes up\n", TA(task));
if (is_tardy(task, now)) {
/* Task missed its last release */
release_at(task, now);
sched_trace_task_release(task);
}
if (budget_exhausted(task))
/* Rare, but possible, race condition */
job_completion(task, 1);
else
job_arrival(task);
local_irq_restore(flags);
}
/**
* mc_task_block() - Remove task from state to prevent it being run anywhere.
*/
static void mc_task_block(struct task_struct *task)
{
unsigned long flags;
local_irq_save(flags);
TRACE(TS " blocks\n", TA(task));
remove_from_all(task);
local_irq_restore(flags);
}
/**
* mc_task_exit() - Remove task from the system.
*/
static void mc_task_exit(struct task_struct *task)
{
unsigned long flags;
local_irq_save(flags);
BUG_ON(!is_realtime(task));
TRACE(TS " RIP\n", TA(task));
if (tsk_mc_crit(task) == CRIT_LEVEL_B && lock_cache) {
color_sched_out_task(task);
}
remove_from_all(task);
if (tsk_rt(task)->scheduled_on != NO_CPU) {
per_cpu(cpus, tsk_rt(task)->scheduled_on).scheduled = NULL;
tsk_rt(task)->scheduled_on = NO_CPU;
}
/* TODO: restore. This was geting triggered by race conditions even when
* no level-A task was executing */
/* if (CRIT_LEVEL_A == tsk_mc_crit(task)) */
/* mc_ce_task_exit_common(task); */
local_irq_restore(flags);
}
/**
* mc_admit_task() - Return true if the task is valid.
* Assumes there are no partitioned levels after level B.
*/
static long mc_admit_task(struct task_struct* task)
{
const enum crit_level crit = tsk_mc_crit(task);
long ret;
if (!tsk_mc_data(task)) {
printk(KERN_WARNING "Tried to admit task with no criticality "
"level\n");
ret = -EINVAL;
goto out;
}
if (crit < CRIT_LEVEL_C && get_partition(task) == NO_CPU) {
printk(KERN_WARNING "Tried to admit partitioned task with no "
"partition\n");
ret = -EINVAL;
goto out;
}
/* if (crit < CRIT_LEVEL_C && get_partition(task) == interrupt_cpu) { */
/* printk(KERN_WARNING "Tried to admit partitioned task on " */
/* "the interrupt master\n"); */
/* ret = -EINVAL; */
/* goto out; */
/* } */
if (crit == CRIT_LEVEL_A) {
ret = mc_ce_admit_task_common(task);
if (ret)
goto out;
}
printk(KERN_INFO "Admitted task with criticality level %d\n",
tsk_mc_crit(task));
ret = 0;
out:
return ret;
}
/*
* Caller must hold the entry lock.
*/
void pick_next_task(struct cpu_entry *entry)
{
int i;
struct crit_entry *ce;
struct domain *dom;
struct task_struct *dtask, *ready_task;
struct server *server;
STRACE("Picking next task\n");
for (i = 0; i < NUM_CRIT_LEVELS && !entry->linked; i++) {
ce = &entry->crit_entries[i];
dom = ce->domain;
server = &ce->server;
/* Swap locks. We cannot acquire a domain lock while
* holding an entry lock or deadlocks will happen
*/
raw_spin_unlock(&entry->lock);
raw_spin_lock(dom->lock);
/* Do domain stuff before grabbing CPU locks */
dtask = dom->peek_ready(dom);
fix_crit_position(ce);
raw_spin_lock(&entry->lock);
ready_task = NULL;
if (!entry->linked && server->state == SS_ACTIVE) {
if (ce->server.linked) {
ready_task = ce->server.linked;
} else if (dtask) {
/* Need a new task */
dom->take_ready(dom);
ready_task = dtask;
link_task_to_crit(ce, dtask);
update_crit_position(ce);
}
}
if (ready_task && !is_ghost(ready_task) &&
server->state == SS_ACTIVE) {
link_task_to_cpu(entry, ready_task);
raw_spin_unlock(dom->lock);
update_crit_levels(entry);
raw_spin_lock(&entry->lock);
continue;
}
raw_spin_unlock(dom->lock);
}
}
static void process_update_signal(struct cpu_entry *entry)
{
int locked;
struct crit_entry *ce;
struct server *crit_server, *task_server;
struct task_struct *linked;
STRACE("Reading update signal\n");
ce = &entry->crit_entries[CRIT_LEVEL_B];
/* Complete task state transitions */
crit_server = &ce->server;
if (!crit_server->linked) {
return;
}
linked = crit_server->linked;
task_server = &tsk_rt(linked)->server;
if (!task_server->in_transit) {
return;
}
raw_spin_lock(&dgl_lock);
/* Update and save lock state */
update_group_req(&group_lock, tsk_rt(linked)->req);
locked = has_resources(linked, entry->cpu);
raw_spin_unlock(&dgl_lock);
if (locked && crit_server->state != SS_ACTIVE) {
TRACE_MC_TASK(linked, "Activated\n");
server_state_change(crit_server, SS_ACTIVE, 0);
start_crit(ce);
server_state_change(task_server, SS_ACTIVE, 0);
if (!is_ghost(linked)) {
link_task_to_cpu(entry, linked);
update_crit_levels(entry);
raw_spin_lock(&entry->lock);
}
} else if (!locked && crit_server->state != SS_BLOCKED) {
TRACE_MC_TASK(linked, "Blocked\n");
if (entry->linked == linked) {
link_task_to_cpu(entry, NULL);
}
server_state_change(task_server, SS_BLOCKED, 0);
stop_crit(ce);
server_state_change(crit_server, SS_BLOCKED, 0);
}
}
static void process_signals(struct cpu_entry *entry)
{
struct domain *dom;
struct crit_entry *ce;
struct mc_signal signal;
struct task_struct *preempted;
ce = &entry->crit_entries[CRIT_LEVEL_B];
dom = ce->domain;
/* Load signals */
raw_spin_lock(&entry->signal_lock);
signal = entry->signal;
clear_signal(&entry->signal);
raw_spin_unlock(&entry->signal_lock);
if (signal.preempt) {
raw_spin_lock(dom->lock);
/* A higher-priority task may exist */
STRACE("Reading preempt signal\n");
dom->peek_ready(dom);
raw_spin_lock(&entry->lock);
if (ce->server.state == SS_ACTIVE &&
mc_preempt_needed(ce->domain, ce->server.linked)) {
preempted = preempt_crit(ce->domain, ce);
raw_spin_unlock(dom->lock);
/* Can't requeue while we hold the entry lock, but
* can't release that lock until state of lower-crit
* servers is updated
*/
if (!is_ghost(ce->server.linked)) {
update_crit_levels(entry);
} else {
raw_spin_unlock(&entry->lock);
}
if (preempted) {
raw_spin_lock(dom->lock);
dom->requeue(dom, preempted);
raw_spin_unlock(dom->lock);
}
raw_spin_lock(&entry->lock);
} else {
raw_spin_unlock(dom->lock);
}
} else {
raw_spin_lock(&entry->lock);
}
if (signal.update) {
process_update_signal(entry);
}
}
/**
* mc_schedule() - Return next task which should be scheduled.
*/
static struct task_struct* mc_schedule(struct task_struct* prev)
{
lt_t start, exec;
int out_of_time, sleep, preempt, exists, blocks, global, lower, work;
struct cpu_entry* entry = &__get_cpu_var(cpus);
struct task_struct *next = NULL;
/* Litmus gave up because it couldn't access the stack of the CPU
* on which will_schedule was migrating from. Requeue it.
* This really only happens in VMs
*/
if (entry->will_schedule && entry->will_schedule != prev) {
entry->will_schedule->rt_param.scheduled_on = NO_CPU;
low_prio_arrival(entry->will_schedule);
}
if (prev && tsk_rt(prev)->last_exec_time) {
exec = litmus_clock() - tsk_rt(prev)->last_exec_time;
tsk_rt(prev)->user_job.exec_time += exec;
}
if (prev && tsk_mc_crit(prev) == CRIT_LEVEL_B &&
is_realtime(prev) && get_rt_job(prev) > 1 && lock_cache) {
start = litmus_clock();
work = color_sched_out_task(prev);
tsk_rt(prev)->flush = litmus_clock() - start;
++tsk_rt(prev)->flush_work;
}
TS_LVLA_SCHED_START;
TS_LVLB_SCHED_START;
TS_LVLC_SCHED_START;
raw_spin_lock(&entry->lock);
BUG_ON(entry->scheduled && entry->scheduled != prev);
BUG_ON(entry->scheduled && !is_realtime(prev));
BUG_ON(prev && is_realtime(prev) && !entry->scheduled);
if (entry->scheduled != NULL) {
entry->scheduled->rt_param.scheduled_on = NO_CPU;
update_server_time(entry->scheduled);
}
/* Determine state */
exists = entry->scheduled != NULL;
blocks = exists && !is_running(entry->scheduled);
out_of_time = exists && budget_exhausted(entry->scheduled);
sleep = exists && tsk_rt(entry->scheduled)->completed;
global = exists && is_global_task(entry->scheduled);
preempt = entry->scheduled != entry->linked;
lower = exists && preempt && entry->linked &&
tsk_mc_crit(entry->scheduled) > tsk_mc_crit(entry->linked);
TRACE(TS " block:%d oot:%d sleep:%d preempt:%d, now: %llu\n",
TA(prev), blocks, out_of_time, sleep, preempt, litmus_clock());
raw_spin_unlock(&entry->lock);
#ifdef CONFIG_PLUGIN_MC_REDIRECT
if (smp_processor_id() == interrupt_cpu)
fix_global_levels();
#endif
/* If a task blocks we have no choice but to reschedule */
if (blocks)
remove_from_all(entry->scheduled);
/* Any task which exhausts its budget or sleeps waiting for its next
* period completes unless its execution has been forcibly stopped
*/
else if (out_of_time || sleep)/* && !preempt)*/
job_completion(entry->scheduled, !sleep || preempt);
/* Global scheduled tasks must wait for a deschedule before they
* can rejoin the global state. Rejoin them here
*/
else if (global && preempt) {
if (lower)
low_prio_arrival(entry->scheduled);
else
job_arrival(entry->scheduled);
}
/* TODO: move this down somehow */
sched_state_task_picked();
process_signals(entry);
/* Pick next task if none is linked */
if (!entry->linked)
pick_next_task(entry);
/* Schedule next task */
next = entry->linked;
if (next) {
next->rt_param.scheduled_on = entry->cpu;
}
entry->will_schedule = next;
raw_spin_unlock(&entry->lock);
if (next) {
switch (tsk_mc_crit(next)) {
case CRIT_LEVEL_A: TS_LVLA_SCHED_END(next); break;
case CRIT_LEVEL_B: TS_LVLB_SCHED_END(next); break;
case CRIT_LEVEL_C: TS_LVLC_SCHED_END(next); break;
}
}
if (next && tsk_mc_crit(next) == CRIT_LEVEL_B && lock_cache && get_rt_job(next) > 1) {
start = litmus_clock();
work = color_sched_in_task(next);
tsk_rt(next)->load = litmus_clock() - start;
tsk_rt(next)->load_work = work;
}
if (next) {
tsk_rt(next)->last_exec_time = litmus_clock();
TRACE_MC_TASK(next, "Picked this task\n");
} else {
STRACE("CPU %d idles at %llu\n", entry->cpu, litmus_clock());
}
return next;
}
void mc_finish_switch(struct task_struct *prev)
{
struct cpu_entry* entry = &__get_cpu_var(cpus);
entry->scheduled = is_realtime(current) ? current : NULL;
TRACE_TASK(prev, "Switched away from to " TS "\n",
TA(entry->scheduled));
}
long mc_deactivate_plugin(void)
{
return mc_ce_deactivate_plugin_common();
}
static unsigned long long deadline_prio(struct dgl *dgl, struct dgl_group_req *greq)
{
return get_deadline(greq->task);
}
static void cpu_update(struct dgl_group_req *greq)
{
struct cpu_entry *entry = &per_cpu(cpus, greq->cpu);
raw_spin_lock(&entry->signal_lock);
entry->signal.update = 1;
raw_spin_unlock(&entry->signal_lock);
litmus_reschedule(greq->cpu);
}
/*
* Setup and send signal to CPU for resource acquisition. To avoid touching
* CPU locks, all CPU state modifications are delayed until the signal is
* processed.
*/
static void cpu_acquired(struct dgl_group_req *greq)
{
struct server *server = &tsk_rt(greq->task)->server;
TRACE_MC_TASK(greq->task, "Acquired CPU %d\n", greq->cpu);
sched_trace_task_resume(greq->task);
server_state_change(server, SS_ACTIVE, 1);
cpu_update(greq);
}
static void cpu_preempted(struct dgl_group_req *greq)
{
struct server *server = &tsk_rt(greq->task)->server;
TRACE_MC_TASK(greq->task, "Dropping CPU %d\n", greq->cpu);
sched_trace_task_block(greq->task);
server_state_change(server, SS_BLOCKED, 1);
cpu_update(greq);
}
/* **************************************************************************
* Initialization
* ************************************************************************** */
/* Initialize values here so that they are allocated with the module
* and destroyed when the module is unloaded.
*/
/* LVL-A */
DEFINE_PER_CPU(struct domain_data, _mc_crit_a);
DEFINE_PER_CPU(raw_spinlock_t, _mc_crit_a_lock);
DEFINE_PER_CPU(struct ce_dom_data, _mc_crit_a_ce_data);
/* LVL-B */
DEFINE_PER_CPU(struct domain_data, _mc_crit_b);
DEFINE_PER_CPU(rt_domain_t, _mc_crit_b_rt);
/* LVL-C */
static struct domain_data _mc_crit_c;
static rt_domain_t _mc_crit_c_rt;
struct bheap _mc_heap_c;
struct bheap_node _mc_nodes_c[NR_CPUS];
static long mc_activate_plugin(void)
{
struct domain_data *dom_data;
struct domain *dom;
struct domain_data *our_domains[NR_CPUS];
rt_domain_t *rt_dom;
int cpu, n = 0;
long ret;
reset_way_tracker();
interrupt_cpu = atomic_read(&release_master_cpu);
for_each_online_cpu(cpu) {
rt_dom = &per_cpu(_mc_crit_b_rt, cpu);
/* rt_dom->release_master = cpu; */
}
if (cache_preempt && !lock_cache) {
printk(KERN_ERR "LITMUS-MC: specified cache preemption without "
"enabling the locking protocol (lock_cache)\n");
ret = -EINVAL;
goto out;
}
dgl_init(&group_lock, color_cache_info.nr_colors,
color_cache_info.ways);
if (cache_preempt) {
group_lock.assign_priority = deadline_prio;
group_lock.cpu_preempted = cpu_preempted;
}
group_lock.cpu_acquired = cpu_acquired;
for_each_online_cpu(cpu) {
BUG_ON(NR_CPUS <= n);
dom = per_cpu(cpus, cpu).crit_entries[CRIT_LEVEL_A].domain;
dom_data = domain_data(dom);
our_domains[cpu] = dom_data;
#if defined(CONFIG_MERGE_TIMERS) && defined(CONFIG_PLUGIN_MC_RELEASE_MASTER)
per_cpu(cpus, cpu).event_group =
get_event_group_for(interrupt_cpu);
#elif defined(CONFIG_MERGE_TIMERS) && !defined(CONFIG_PLUGIN_MC_RELEASE_MASTER)
per_cpu(cpus, cpu).event_group = get_event_group_for(cpu);
#endif
n++;
}
ret = mc_ce_set_domains(n, our_domains);
if (ret)
goto out;
ret = mc_ce_activate_plugin_common();
out:
return ret;
}
static void mc_release_ts(lt_t time)
{
int cpu, cont_id = -1;
char name[TASK_COMM_LEN];
enum crit_level level;
struct cpu_entry *entry;
struct crit_entry *ce;
level = CRIT_LEVEL_A;
strcpy(name, "LVL-A");
for_each_online_cpu(cpu) {
/* if (cpu == interrupt_cpu) */
/* continue; */
entry = &per_cpu(cpus, cpu);
sched_trace_container_param(++cont_id, (const char*)&name);
ce = &entry->crit_entries[level];
sched_trace_server_param(ce_sid(ce), cont_id, 0, 0);
server_state_change(&ce->server, SS_ACTIVE, 0);
}
level = CRIT_LEVEL_B;
strcpy(name, "LVL-B");
for_each_online_cpu(cpu) {
/* if (cpu == interrupt_cpu) */
/* continue; */
entry = &per_cpu(cpus, cpu);
sched_trace_container_param(++cont_id, (const char*)&name);
ce = &entry->crit_entries[level];
sched_trace_server_param(ce_sid(ce), cont_id, 0, 0);
server_state_change(&ce->server, SS_ACTIVE, 0);
}
level = CRIT_LEVEL_C;
strcpy(name, "LVL-C");
sched_trace_container_param(++cont_id, (const char*)&name);
for_each_online_cpu(cpu) {
entry = &per_cpu(cpus, cpu);
ce = &entry->crit_entries[level];
sched_trace_server_param(ce_sid(ce), cont_id, 0, 0);
server_state_change(&ce->server, SS_ACTIVE, 0);
}
mc_ce_release_at_common(NULL, time);
}
static struct sched_plugin mc_plugin __cacheline_aligned_in_smp = {
.plugin_name = "MC",
.task_new = mc_task_new,
.complete_job = complete_job,
.task_exit = mc_task_exit,
.schedule = mc_schedule,
.task_wake_up = mc_task_wake_up,
.task_block = mc_task_block,
.admit_task = mc_admit_task,
.activate_plugin = mc_activate_plugin,
.release_at = release_at,
.deactivate_plugin = mc_deactivate_plugin,
.finish_switch = mc_finish_switch,
.release_ts = mc_release_ts,
};
static void init_crit_entry(struct cpu_entry *entry,
struct crit_entry *ce, enum crit_level level,
struct domain_data *dom_data,
struct bheap_node *node)
{
ce->level = level;
ce->server.linked = NULL;
ce->node = node;
ce->domain = &dom_data->domain;
server_init(&ce->server, ce_sid(ce), 0, entry->cpu);
ce->server.parent = -entry->cpu - 1;
#ifdef CONFIG_MERGE_TIMERS
init_event(&ce->event, level, mc_ghost_exhausted,
event_list_alloc(GFP_ATOMIC));
#else
hrtimer_init(&ce->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
ce->timer.function = mc_ghost_exhausted;
#endif
}
static void init_local_domain(struct cpu_entry *entry, struct domain_data *dom_data,
enum crit_level level)
{
dom_data->heap = NULL;
dom_data->crit_entry = &entry->crit_entries[level];
init_crit_entry(entry, dom_data->crit_entry, level, dom_data, NULL);
}
static void init_global_domain(struct domain_data *dom_data, enum crit_level level,
struct bheap *heap, struct bheap_node *nodes)
{
int cpu;
struct cpu_entry *entry;
struct crit_entry *ce;
struct bheap_node *node;
dom_data->crit_entry = NULL;
dom_data->heap = heap;
bheap_init(heap);
for_each_online_cpu(cpu) {
entry = &per_cpu(cpus, cpu);
node = &nodes[cpu];
ce = &entry->crit_entries[level];
init_crit_entry(entry, ce, level, dom_data, node);
bheap_node_init(&ce->node, ce);
bheap_insert(cpu_lower_prio, heap, node);
}
}
static void init_edf_domain(struct domain *dom, rt_domain_t *rt,
enum crit_level prio, int is_partitioned, int cpu)
{
pd_domain_init(dom, rt, edf_ready_order, NULL,
mc_release_jobs, edf_higher_prio);
rt->level = prio;
#if defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) && defined(CONFIG_MERGE_TIMERS)
/* All timers are on one CPU and release-master is using the event
* merging interface as well. */
BUG_ON(NO_CPU == interrupt_cpu);
rt->event_group = get_event_group_for(interrupt_cpu);
rt->prio = prio;
#elif defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) && !defined(CONFIG_MERGE_TIMERS)
/* Using release master, but not merging timers. */
/* rt->release_master = interrupt_cpu; */
#elif !defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) && defined(CONFIG_MERGE_TIMERS)
/* Merge the timers, but don't move them to the release master. */
if (is_partitioned) {
rt->event_group = get_event_group_for(cpu);
} else {
/* Global timers will be added to the event groups that code is
* executing on when add_event() is called.
*/
rt->event_group = NULL;
}
rt->prio = prio;
#endif
}
static char* domain_name(const char *name, int cpu)
{
char *buf = kmalloc(LITMUS_LOCKDEP_NAME_MAX_LEN * sizeof(char), GFP_ATOMIC);
snprintf(buf, LITMUS_LOCKDEP_NAME_MAX_LEN, "%s%d", name, cpu);
return buf;
}
struct domain_data *ce_domain_for(int);
static int __init init_mc(void)
{
int cpu;
rt_domain_t *rt;
raw_spinlock_t *a_dom_lock, *b_dom_lock, *c_dom_lock; /* For lock debugger */
struct cpu_entry *entry;
struct domain_data *dom_data;
struct ce_dom_data *ce_data;
for_each_online_cpu(cpu) {
entry = &per_cpu(cpus, cpu);
/* CPU */
entry->cpu = cpu;
entry->scheduled = NULL;
entry->linked = NULL;
raw_spin_lock_init(&entry->lock);
raw_spin_lock_init(&entry->signal_lock);
clear_signal(&entry->signal);
#ifdef CONFIG_PLUGIN_MC_REDIRECT
raw_spin_lock_init(&entry->redir_lock);
INIT_LIST_HEAD(&entry->redir);
#endif
/* CRIT_LEVEL_A */
dom_data = &per_cpu(_mc_crit_a, cpu);
ce_data = &per_cpu(_mc_crit_a_ce_data, cpu);
a_dom_lock = &per_cpu(_mc_crit_a_lock, cpu);
dom_data->domain.acquire_resources = dumb_acquire;
dom_data->domain.release_resources = dumb_release;
raw_spin_lock_init(a_dom_lock);
ce_domain_init(&dom_data->domain,
a_dom_lock, ce_requeue, ce_peek_and_take_ready,
ce_peek_and_take_ready, ce_higher_prio, ce_data, cpu,
ce_timer_function);
init_local_domain(entry, dom_data, CRIT_LEVEL_A);
dom_data->domain.name = domain_name("LVL-A", cpu);
/* CRIT_LEVEL_B */
dom_data = &per_cpu(_mc_crit_b, cpu);
rt = &per_cpu(_mc_crit_b_rt, cpu);
init_local_domain(entry, dom_data, CRIT_LEVEL_B);
init_edf_domain(&dom_data->domain, rt, CRIT_LEVEL_B, 1, cpu);
dom_data->domain.acquire_resources = acquire_resources;
dom_data->domain.release_resources = release_resources;
b_dom_lock = dom_data->domain.lock;
raw_spin_lock_init(b_dom_lock);
dom_data->domain.name = domain_name("LVL-B", cpu);
}
/* CRIT_LEVEL_C */
init_global_domain(&_mc_crit_c, CRIT_LEVEL_C,
&_mc_heap_c, _mc_nodes_c);
init_edf_domain(&_mc_crit_c.domain, &_mc_crit_c_rt, CRIT_LEVEL_C,
0, NO_CPU);
_mc_crit_c.domain.acquire_resources = dumb_acquire;
_mc_crit_c.domain.release_resources = dumb_release;
c_dom_lock = _mc_crit_c.domain.lock;
raw_spin_lock_init(c_dom_lock);
_mc_crit_c.domain.name = "LVL-C";
/* GROUP LOCK */
raw_spin_lock_init(&dgl_lock);
return register_sched_plugin(&mc_plugin);
}
module_init(init_mc);