#include #include #include #include #include #include #include #include #include #include #include #include #include struct gmp_reservation_environment _global_env; struct cpu_entry { struct task_struct *scheduled; lt_t deadline; int cpu; enum crit_level lv; bool will_schedule; }; struct cpu_priority { raw_spinlock_t lock; struct cpu_entry cpu_entries[NR_CPUS]; }; struct cpu_priority _lowest_prio_cpu; struct mc2_task_state { struct task_client res_info; int cpu; bool has_departed; struct mc2_task mc2_param; }; struct crit_entry { enum crit_level level; struct task_struct *running; //struct hrtimer ghost_timer; }; struct mc2_cpu_state { raw_spinlock_t lock; struct sup_reservation_environment sup_env; struct hrtimer timer; struct hrtimer g_timer; int cpu; struct task_struct* scheduled; struct crit_entry crit_entries[NUM_CRIT_LEVELS]; }; static DEFINE_PER_CPU(struct mc2_cpu_state, mc2_cpu_state); #define cpu_state_for(cpu_id) (&per_cpu(mc2_cpu_state, cpu_id)) #define local_cpu_state() (&__get_cpu_var(mc2_cpu_state)) static struct mc2_task_state* get_mc2_state(struct task_struct *tsk) { return (struct mc2_task_state*) tsk_rt(tsk)->plugin_state; } static enum crit_level get_task_crit_level(struct task_struct *tsk) { //struct mc2_task_state *tinfo = get_mc2_state(tsk); struct mc2_task *mp; if (!tsk || !is_realtime(tsk)) return NUM_CRIT_LEVELS; mp = tsk_rt(tsk)->mc2_data; if (!mp) return NUM_CRIT_LEVELS; else return mp->crit; } static struct reservation* res_find_by_id(struct mc2_cpu_state *state, unsigned int id) { struct reservation *res; res = sup_find_by_id(&state->sup_env, id); if (!res) res = gmp_find_by_id(&_global_env, id); return res; } static void mc2_update_time(enum crit_level lv, struct mc2_cpu_state *state, lt_t time) { if (lv < CRIT_LEVEL_C) sup_update_time(&state->sup_env, time); else if (lv == CRIT_LEVEL_C) gmp_update_time(&_global_env, time); else TRACE("update_time(): Criticality level error!!!!\n"); } static void task_departs(struct task_struct *tsk, int job_complete) { struct mc2_task_state* tinfo = get_mc2_state(tsk); struct mc2_cpu_state* state = local_cpu_state(); struct reservation* res; struct reservation_client *client; res = tinfo->res_info.client.reservation; client = &tinfo->res_info.client; res->ops->client_departs(res, client, job_complete); tinfo->has_departed = true; TRACE_TASK(tsk, "CLIENT DEPART with budget %llu\n", res->cur_budget); if (job_complete && res->cur_budget) { struct crit_entry* ce; enum crit_level lv = tinfo->mc2_param.crit; //lt_t now = litmus_clock(); ce = &state->crit_entries[lv]; ce->running = tsk; res->is_ghost = 1; TRACE_TASK(tsk, "BECOME GHOST at %llu\n", litmus_clock()); //BUG_ON(hrtimer_active(&ce->ghost_timer)); //TRACE("setting GHOST timer %llu\n", ns_to_ktime(now + res->cur_budget)); //__hrtimer_start_range_ns(&ce->ghost_timer, ns_to_ktime(now + res->cur_budget), 0, HRTIMER_MODE_ABS_PINNED, 0); } } static void task_arrives(struct mc2_cpu_state *state, struct task_struct *tsk) { struct mc2_task_state* tinfo = get_mc2_state(tsk); struct reservation* res; struct reservation_client *client; enum crit_level lv = get_task_crit_level(tsk); res = tinfo->res_info.client.reservation; client = &tinfo->res_info.client; tinfo->has_departed = false; res->ops->client_arrives(res, client); if (lv != NUM_CRIT_LEVELS) { struct crit_entry *ce; ce = &state->crit_entries[lv]; if (ce->running == tsk) ce->running = NULL; } } /* return: NO_CPU - all CPUs are running tasks with higher priority than Level C */ static int get_lowest_prio_cpu(void) { struct cpu_entry *ce; int cpu, ret = NO_CPU; lt_t latest_deadline = 0; raw_spin_lock(&_lowest_prio_cpu.lock); for_each_online_cpu(cpu) { ce = &_lowest_prio_cpu.cpu_entries[cpu]; if (!ce->will_schedule) { if (!ce->scheduled) { raw_spin_unlock(&_lowest_prio_cpu.lock); return ce->cpu; } else if (ce->lv == CRIT_LEVEL_C && ce->deadline > latest_deadline) { latest_deadline = ce->deadline; ret = ce->cpu; } } } raw_spin_unlock(&_lowest_prio_cpu.lock); return ret; } /* NOTE: drops state->lock */ static void mc2_update_timer_and_unlock(struct mc2_cpu_state *state) { int local; lt_t update, now; enum crit_level lv = get_task_crit_level(state->scheduled); struct next_timer_event *event, *next; int found_event = 0; //TRACE_TASK(state->scheduled, "update_timer!\n"); if (lv != NUM_CRIT_LEVELS) TRACE_TASK(state->scheduled, "UPDATE_TIMER LV = %d\n", lv); update = state->sup_env.next_scheduler_update; now = state->sup_env.env.current_time; /* Be sure we're actually running on the right core, * as pres_update_timer() is also called from pres_task_resume(), * which might be called on any CPU when a thread resumes. */ local = local_cpu_state() == state; /* Must drop state lock before calling into hrtimer_start(), which * may raise a softirq, which in turn may wake ksoftirqd. */ raw_spin_unlock(&state->lock); if (update <= now) { litmus_reschedule(state->cpu); } else if (likely(local && update != SUP_NO_SCHEDULER_UPDATE)) { /* Reprogram only if not already set correctly. */ if (!hrtimer_active(&state->timer) || ktime_to_ns(hrtimer_get_expires(&state->timer)) != update) { TRACE("canceling timer...at %llu\n", ktime_to_ns(hrtimer_get_expires(&state->timer))); hrtimer_cancel(&state->timer); TRACE("setting scheduler timer for %llu\n", update); /* We cannot use hrtimer_start() here because the * wakeup flag must be set to zero. */ __hrtimer_start_range_ns(&state->timer, ns_to_ktime(update), 0 /* timer coalescing slack */, HRTIMER_MODE_ABS_PINNED, 0 /* wakeup */); } } else if (unlikely(!local && update != SUP_NO_SCHEDULER_UPDATE)) { /* Poke remote core only if timer needs to be set earlier than * it is currently set. */ TRACE("mc2_update_timer for remote CPU %d (update=%llu, " "active:%d, set:%llu)\n", state->cpu, update, hrtimer_active(&state->timer), ktime_to_ns(hrtimer_get_expires(&state->timer))); if (!hrtimer_active(&state->timer) || ktime_to_ns(hrtimer_get_expires(&state->timer)) > update) { TRACE("poking CPU %d so that it can update its " "scheduling timer (active:%d, set:%llu)\n", state->cpu, hrtimer_active(&state->timer), ktime_to_ns(hrtimer_get_expires(&state->timer))); litmus_reschedule(state->cpu); } } raw_spin_lock(&_global_env.lock); list_for_each_entry_safe(event, next, &_global_env.next_events, list) { if (event->timer_armed_on == NO_CPU) { found_event = 1; if (event->next_update < litmus_clock()) { int cpu = get_lowest_prio_cpu(); TRACE("GLOBAL EVENT PASSED!! poking CPU %d to reschedule\n", cpu); list_del(&event->list); kfree(event); if (cpu != NO_CPU) { raw_spin_lock(&_lowest_prio_cpu.lock); _lowest_prio_cpu.cpu_entries[cpu].will_schedule = true; raw_spin_unlock(&_lowest_prio_cpu.lock); litmus_reschedule(cpu); } } else if (!hrtimer_active(&state->g_timer)) { int ret; TRACE("setting global scheduler timer for %llu\n", event->next_update); ret = __hrtimer_start_range_ns(&state->g_timer, ns_to_ktime(event->next_update), 0 /* timer coalescing slack */, HRTIMER_MODE_ABS_PINNED, 0 /* wakeup */); if (!ret) { event->timer_armed_on = state->cpu; } } } } raw_spin_unlock(&_global_env.lock); } static void mc2_update_ghost_state(struct mc2_cpu_state *state) { int lv = 0; struct crit_entry* ce; struct reservation *res; struct mc2_task_state *tinfo; for (lv = 0; lv < NUM_CRIT_LEVELS; lv++) { ce = &state->crit_entries[lv]; if (ce->running != NULL) { tinfo = get_mc2_state(ce->running); /* if (lv != CRIT_LEVEL_C) res = sup_find_by_id(&state->sup_env, tinfo->mc2_param.res_id); else continue; */ res = res_find_by_id(state, tinfo->mc2_param.res_id); TRACE("LV %d running id %d budget %llu\n", lv, tinfo->mc2_param.res_id, res->cur_budget); if (!res->cur_budget) { struct sup_reservation_environment* sup_env = &state->sup_env; TRACE("GHOST FINISH id %d at %llu\n", tinfo->mc2_param.res_id, litmus_clock()); ce->running = NULL; res->is_ghost = 0; res = list_first_entry_or_null(&sup_env->active_reservations, struct reservation, list); if (res) litmus_reschedule_local(); } } } } /* static enum hrtimer_restart on_ghost_timer(struct hrtimer *timer) { struct crit_entry *ce; struct mc2_cpu_state *state; ce = container_of(timer, struct crit_entry, ghost_timer); state = container_of(ce, struct mc2_cpu_state, crit_entries[ce->level]); TRACE("GHOST_TIMER FIRED at %llu\n", litmus_clock()); raw_spin_lock(&state->lock); sup_update_time(&state->sup_env, litmus_clock()); mc2_update_ghost_state(state); raw_spin_unlock(&state->lock); return HRTIMER_NORESTART; } */ static void update_cpu_prio(struct mc2_cpu_state *state) { struct cpu_entry *ce = &_lowest_prio_cpu.cpu_entries[state->cpu]; enum crit_level lv = get_task_crit_level(state->scheduled); if (!state->scheduled) { // cpu is idle. ce->scheduled = NULL; ce->deadline = ULLONG_MAX; ce->lv = NUM_CRIT_LEVELS; } else if (lv == CRIT_LEVEL_C) { ce->scheduled = state->scheduled; ce->deadline = get_deadline(state->scheduled); ce->lv = lv; } else if (lv < CRIT_LEVEL_C) { ce->scheduled = state->scheduled; ce->deadline = 0; ce->lv = lv; } }; static enum hrtimer_restart on_global_scheduling_timer(struct hrtimer *timer) { unsigned long flags; enum hrtimer_restart restart = HRTIMER_NORESTART; struct mc2_cpu_state *state; struct next_timer_event *event, *next; bool schedule_now; lt_t update, now; int found_event = 0; state = container_of(timer, struct mc2_cpu_state, g_timer); /* The scheduling timer should only fire on the local CPU, because * otherwise deadlocks via timer_cancel() are possible. * Note: this does not interfere with dedicated interrupt handling, as * even under dedicated interrupt handling scheduling timers for * budget enforcement must occur locally on each CPU. */ //BUG_ON(state->cpu != raw_smp_processor_id()); if (state->cpu != raw_smp_processor_id()) TRACE("BUG!!!!!!!!!!!!! TIMER FIRED ON THE OTHER CPU\n"); raw_spin_lock_irqsave(&_global_env.lock, flags); update = litmus_clock(); TRACE("GLOBAL TIMER FIRED at %llu\n", update); list_for_each_entry_safe(event, next, &_global_env.next_events, list) { if (event->next_update < update) { found_event = 1; list_del(&event->list); TRACE("EVENT at %llu IS DELETED\n", event->next_update); kfree(event); } } if (!found_event) { raw_spin_unlock_irqrestore(&_global_env.lock, flags); return restart; } schedule_now = gmp_update_time(&_global_env, update); raw_spin_lock(&state->lock); mc2_update_ghost_state(state); raw_spin_unlock(&state->lock); now = _global_env.env.current_time; TRACE_CUR("on_global_scheduling_timer at %llu, upd:%llu (for cpu=%d) SCHEDULE_NOW = %d\n", now, update, state->cpu, schedule_now); if (schedule_now) { int cpu = get_lowest_prio_cpu(); if (cpu != NO_CPU) { raw_spin_lock(&_lowest_prio_cpu.lock); _lowest_prio_cpu.cpu_entries[cpu].will_schedule = true; raw_spin_unlock(&_lowest_prio_cpu.lock); TRACE("LOWEST CPU = P%d\n", cpu); litmus_reschedule(cpu); } } raw_spin_unlock_irqrestore(&_global_env.lock, flags); return restart; } static enum hrtimer_restart on_scheduling_timer(struct hrtimer *timer) { unsigned long flags; enum hrtimer_restart restart = HRTIMER_NORESTART; struct mc2_cpu_state *state; lt_t update, now; state = container_of(timer, struct mc2_cpu_state, timer); /* The scheduling timer should only fire on the local CPU, because * otherwise deadlocks via timer_cancel() are possible. * Note: this does not interfere with dedicated interrupt handling, as * even under dedicated interrupt handling scheduling timers for * budget enforcement must occur locally on each CPU. */ BUG_ON(state->cpu != raw_smp_processor_id()); TRACE("TIMER FIRED at %llu\n", litmus_clock()); raw_spin_lock_irqsave(&state->lock, flags); sup_update_time(&state->sup_env, litmus_clock()); mc2_update_ghost_state(state); update = state->sup_env.next_scheduler_update; now = state->sup_env.env.current_time; TRACE_CUR("on_scheduling_timer at %llu, upd:%llu (for cpu=%d)\n", now, update, state->cpu); if (update <= now) { litmus_reschedule_local(); } else if (update != SUP_NO_SCHEDULER_UPDATE) { hrtimer_set_expires(timer, ns_to_ktime(update)); restart = HRTIMER_RESTART; } raw_spin_unlock_irqrestore(&state->lock, flags); return restart; } struct task_struct* mc2_dispatch(struct sup_reservation_environment* sup_env, struct mc2_cpu_state* state) { struct reservation *res, *next; struct task_struct *tsk = NULL; struct crit_entry *ce; enum crit_level lv; lt_t time_slice; list_for_each_entry_safe(res, next, &sup_env->active_reservations, list) { if (res->state == RESERVATION_ACTIVE) { tsk = res->ops->dispatch_client(res, &time_slice); if (likely(tsk)) { lv = get_task_crit_level(tsk); if (lv == NUM_CRIT_LEVELS) { sup_scheduler_update_after(sup_env, res->cur_budget); return tsk; } else { ce = &state->crit_entries[lv]; if (likely(!ce->running)) { sup_scheduler_update_after(sup_env, res->cur_budget); res->blocked_by_ghost = 0; res->is_ghost = 0; return tsk; } else { res->blocked_by_ghost = 1; } } } } } // no level A or B tasks list_for_each_entry_safe(res, next, &_global_env.active_reservations, list) { if (res->state == RESERVATION_ACTIVE && res->scheduled_on == NO_CPU) { tsk = res->ops->dispatch_client(res, &time_slice); if (likely(tsk)) { lv = get_task_crit_level(tsk); if (lv == NUM_CRIT_LEVELS) { gmp_scheduler_update_after(&_global_env, res->cur_budget); //raw_spin_unlock(&_global_env.lock); return tsk; } else { ce = &state->crit_entries[lv]; if (likely(!ce->running)) { gmp_scheduler_update_after(&_global_env, res->cur_budget); res->blocked_by_ghost = 0; res->is_ghost = 0; res->scheduled_on = state->cpu; //raw_spin_unlock(&_global_env.lock); return tsk; } else { res->blocked_by_ghost = 1; } } } } } return NULL; } static struct task_struct* mc2_schedule(struct task_struct * prev) { /* next == NULL means "schedule background work". */ struct mc2_cpu_state *state = local_cpu_state(); raw_spin_lock(&_lowest_prio_cpu.lock); if (_lowest_prio_cpu.cpu_entries[state->cpu].will_schedule == true) _lowest_prio_cpu.cpu_entries[state->cpu].will_schedule = false; raw_spin_unlock(&_lowest_prio_cpu.lock); raw_spin_lock(&state->lock); //BUG_ON(state->scheduled && state->scheduled != prev); //BUG_ON(state->scheduled && !is_realtime(prev)); if (state->scheduled && state->scheduled != prev) TRACE("BUG1!!!!!!!!\n"); if (state->scheduled && !is_realtime(prev)) TRACE("BUG2!!!!!!!!\n"); /* update time */ state->sup_env.will_schedule = true; //TRACE_TASK(prev, "MC2_SCHEDULE sup_update_time ####\n"); sup_update_time(&state->sup_env, litmus_clock()); raw_spin_lock(&_global_env.lock); gmp_update_time(&_global_env, litmus_clock()); //TRACE_TASK(prev, "MC2_SCHEDULE sup_update_time !!!!\n"); mc2_update_ghost_state(state); /* remove task from reservation if it blocks */ if (is_realtime(prev) && !is_running(prev)) task_departs(prev, is_completed(prev)); /* figure out what to schedule next */ state->scheduled = mc2_dispatch(&state->sup_env, state); if (state->scheduled && is_realtime(state->scheduled)) TRACE_TASK(state->scheduled, "mc2_dispatch picked me!\n"); raw_spin_lock(&_lowest_prio_cpu.lock); update_cpu_prio(state); raw_spin_unlock(&_lowest_prio_cpu.lock); /* Notify LITMUS^RT core that we've arrived at a scheduling decision. */ sched_state_task_picked(); /* program scheduler timer */ state->sup_env.will_schedule = false; raw_spin_unlock(&_global_env.lock); /* NOTE: drops state->lock */ mc2_update_timer_and_unlock(state); if (prev != state->scheduled && is_realtime(prev)) { struct mc2_task_state* tinfo = get_mc2_state(prev); struct reservation* res = tinfo->res_info.client.reservation; TRACE_TASK(prev, "PREV JOB scheduled_on = P%d\n", res->scheduled_on); res->scheduled_on = NO_CPU; TRACE_TASK(prev, "descheduled.\n"); } if (state->scheduled) { TRACE_TASK(state->scheduled, "scheduled.\n"); //tinfo = get_mc2_state(state->scheduled); //state->run_level = tinfo->mc2_param.crit; } return state->scheduled; } static void resume_legacy_task_model_updates(struct task_struct *tsk) { lt_t now; if (is_sporadic(tsk)) { /* If this sporadic task was gone for a "long" time and woke up past * its deadline, then give it a new budget by triggering a job * release. This is purely cosmetic and has no effect on the * P-RES scheduler. */ now = litmus_clock(); if (is_tardy(tsk, now)) release_at(tsk, now); } } /* Called when the state of tsk changes back to TASK_RUNNING. * We need to requeue the task. */ static void mc2_task_resume(struct task_struct *tsk) { unsigned long flags; struct mc2_task_state* tinfo = get_mc2_state(tsk); struct mc2_cpu_state *state; TRACE_TASK(tsk, "thread wakes up at %llu\n", litmus_clock()); if (tinfo->cpu != -1) state = cpu_state_for(tinfo->cpu); else state = local_cpu_state(); raw_spin_lock_irqsave(&state->lock, flags); /* Requeue only if self-suspension was already processed. */ if (tinfo->has_departed) { /* Assumption: litmus_clock() is synchronized across cores, * since we might not actually be executing on tinfo->cpu * at the moment. */ if (tinfo->cpu != -1) { sup_update_time(&state->sup_env, litmus_clock()); } else { raw_spin_lock(&_global_env.lock); TRACE("RESUME UPDATE ####\n"); gmp_update_time(&_global_env, litmus_clock()); TRACE("RESUME UPDATE $$$$\n"); raw_spin_unlock(&_global_env.lock); } mc2_update_ghost_state(state); task_arrives(state, tsk); /* NOTE: drops state->lock */ TRACE_TASK(tsk, "mc2_resume()\n"); mc2_update_timer_and_unlock(state); local_irq_restore(flags); } else { TRACE_TASK(tsk, "resume event ignored, still scheduled\n"); raw_spin_unlock_irqrestore(&state->lock, flags); } resume_legacy_task_model_updates(tsk); } /* syscall backend for job completions */ static long mc2_complete_job(void) { ktime_t next_release; long err; TRACE_CUR("mc2_complete_job at %llu (deadline: %llu)\n", litmus_clock(), get_deadline(current)); tsk_rt(current)->completed = 1; if (tsk_rt(current)->sporadic_release) { struct mc2_cpu_state *state; struct reservation_environment *env; struct mc2_task_state *tinfo; struct reservation *res; unsigned long flags; local_irq_save(flags); state = local_cpu_state(); env = &(state->sup_env.env); tinfo = get_mc2_state(current); res = res_find_by_id(state, tsk_rt(current)->mc2_data->res_id); if (get_task_crit_level(current) < CRIT_LEVEL_C) { raw_spin_lock(&state->lock); env->time_zero = tsk_rt(current)->sporadic_release_time; } else { raw_spin_lock(&_global_env.lock); _global_env.env.time_zero = tsk_rt(current)->sporadic_release_time; } res->next_replenishment = tsk_rt(current)->sporadic_release_time; if (get_task_crit_level(current) == CRIT_LEVEL_A) { struct table_driven_reservation *tdres; tdres = container_of(res, struct table_driven_reservation, res); tdres->next_interval = 0; tdres->major_cycle_start = tsk_rt(current)->sporadic_release_time; res->next_replenishment += tdres->intervals[0].start; } res->cur_budget = 0; res->env->change_state(res->env, res, RESERVATION_DEPLETED); //TRACE_CUR("CHANGE NEXT_REP = %llu\n NEXT_UPDATE = %llu\n", res->next_replenishment, state->sup_env.next_scheduler_update); if (get_task_crit_level(current) < CRIT_LEVEL_C) { raw_spin_unlock(&state->lock); } else { raw_spin_unlock(&_global_env.lock); } local_irq_restore(flags); } prepare_for_next_period(current); next_release = ns_to_ktime(get_release(current)); preempt_disable(); TRACE_CUR("next_release=%llu\n", get_release(current)); if (get_release(current) > litmus_clock()) { set_current_state(TASK_INTERRUPTIBLE); preempt_enable_no_resched(); err = schedule_hrtimeout(&next_release, HRTIMER_MODE_ABS); } else { err = 0; TRACE_CUR("TARDY: release=%llu now=%llu\n", get_release(current), litmus_clock()); preempt_enable(); } TRACE_CUR("mc2_complete_job returns at %llu\n", litmus_clock()); return err; } static long mc2_admit_task(struct task_struct *tsk) { long err = -ESRCH; unsigned long flags; struct reservation *res; struct mc2_cpu_state *state; struct mc2_task_state *tinfo = kzalloc(sizeof(*tinfo), GFP_ATOMIC); struct mc2_task *mp = tsk_rt(tsk)->mc2_data; enum crit_level lv; if (!tinfo) return -ENOMEM; if (!mp) { printk(KERN_ERR "mc2_admit_task: criticality level has not been set\n"); return err; } lv = mp->crit; preempt_disable(); if (lv < CRIT_LEVEL_C) { state = cpu_state_for(task_cpu(tsk)); raw_spin_lock_irqsave(&state->lock, flags); res = sup_find_by_id(&state->sup_env, mp->res_id); /* found the appropriate reservation (or vCPU) */ if (res) { TRACE_TASK(tsk, "SUP FOUND RES ID\n"); tinfo->mc2_param.crit = mp->crit; tinfo->mc2_param.res_id = mp->res_id; //kfree(tsk_rt(tsk)->plugin_state); //tsk_rt(tsk)->plugin_state = NULL; err = mc2_task_client_init(&tinfo->res_info, &tinfo->mc2_param, tsk, res); tinfo->cpu = task_cpu(tsk); tinfo->has_departed = true; tsk_rt(tsk)->plugin_state = tinfo; /* disable LITMUS^RT's per-thread budget enforcement */ tsk_rt(tsk)->task_params.budget_policy = NO_ENFORCEMENT; } raw_spin_unlock_irqrestore(&state->lock, flags); } else if (lv == CRIT_LEVEL_C) { raw_spin_lock_irqsave(&_global_env.lock, flags); res = gmp_find_by_id(&_global_env, mp->res_id); /* found the appropriate reservation (or vCPU) */ if (res) { TRACE_TASK(tsk, "GMP FOUND RES ID\n"); tinfo->mc2_param.crit = mp->crit; tinfo->mc2_param.res_id = mp->res_id; //kfree(tsk_rt(tsk)->plugin_state); //tsk_rt(tsk)->plugin_state = NULL; err = mc2_task_client_init(&tinfo->res_info, &tinfo->mc2_param, tsk, res); tinfo->cpu = -1; tinfo->has_departed = true; tsk_rt(tsk)->plugin_state = tinfo; /* disable LITMUS^RT's per-thread budget enforcement */ tsk_rt(tsk)->task_params.budget_policy = NO_ENFORCEMENT; } raw_spin_unlock_irqrestore(&_global_env.lock, flags); } preempt_enable(); if (err) kfree(tinfo); return err; } static void mc2_task_new(struct task_struct *tsk, int on_runqueue, int is_running) { unsigned long flags; struct mc2_task_state* tinfo = get_mc2_state(tsk); struct mc2_cpu_state *state; // = cpu_state_for(tinfo->cpu); struct reservation *res; enum crit_level lv = get_task_crit_level(tsk); TRACE_TASK(tsk, "new RT task %llu (on_rq:%d, running:%d)\n", litmus_clock(), on_runqueue, is_running); if (tinfo->cpu == -1) state = local_cpu_state(); else state = cpu_state_for(tinfo->cpu); /* acquire the lock protecting the state and disable interrupts */ raw_spin_lock_irqsave(&state->lock, flags); if (is_running) { state->scheduled = tsk; /* make sure this task should actually be running */ litmus_reschedule_local(); } if (on_runqueue || is_running) { /* Assumption: litmus_clock() is synchronized across cores * [see comment in pres_task_resume()] */ raw_spin_lock(&_global_env.lock); mc2_update_time(lv, state, litmus_clock()); raw_spin_unlock(&_global_env.lock); mc2_update_ghost_state(state); task_arrives(state, tsk); /* NOTE: drops state->lock */ TRACE("mc2_new()\n"); mc2_update_timer_and_unlock(state); local_irq_restore(flags); } else raw_spin_unlock_irqrestore(&state->lock, flags); res = res_find_by_id(state, tinfo->mc2_param.res_id); if (res) { TRACE_TASK(tsk, "mc2_task_new() next_replenishment = %llu\n", res->next_replenishment); release_at(tsk, res->next_replenishment); } else TRACE_TASK(tsk, "mc2_task_new() next_replenishment = NULL\n"); } static long mc2_reservation_destroy(unsigned int reservation_id, int cpu) { long ret = -EINVAL; struct mc2_cpu_state *state; struct reservation *res = NULL, *next; struct sup_reservation_environment *sup_env; int found = 0; enum crit_level lv = get_task_crit_level(current); if (cpu == -1) { raw_spin_lock(&_global_env.lock); list_for_each_entry_safe(res, next, &_global_env.depleted_reservations, list) { if (res->id == reservation_id) { TRACE("DESTROY RES FOUND!!!\n"); list_del(&res->list); kfree(res); found = 1; ret = 0; } } if (!found) { list_for_each_entry_safe(res, next, &_global_env.inactive_reservations, list) { if (res->id == reservation_id) { TRACE("DESTROY RES FOUND!!!\n"); list_del(&res->list); kfree(res); found = 1; ret = 0; } } } if (!found) { list_for_each_entry_safe(res, next, &_global_env.active_reservations, list) { if (res->id == reservation_id) { TRACE("DESTROY RES FOUND!!!\n"); list_del(&res->list); kfree(res); found = 1; ret = 0; } } } /* list_for_each_entry(res, &_global_env.depleted_reservations, list) { TRACE("DEPLETED LIST R%d\n", res->id); } list_for_each_entry(res, &_global_env.inactive_reservations, list) { TRACE("INACTIVE LIST R%d\n", res->id); } list_for_each_entry(res, &_global_env.active_reservations, list) { TRACE("ACTIVE LIST R%d\n", res->id); } */ if (list_empty(&_global_env.active_reservations)) INIT_LIST_HEAD(&_global_env.active_reservations); if (list_empty(&_global_env.depleted_reservations)) INIT_LIST_HEAD(&_global_env.depleted_reservations); if (list_empty(&_global_env.inactive_reservations)) INIT_LIST_HEAD(&_global_env.inactive_reservations); if (list_empty(&_global_env.next_events)) INIT_LIST_HEAD(&_global_env.next_events); raw_spin_unlock(&_global_env.lock); } else { state = cpu_state_for(cpu); raw_spin_lock(&state->lock); // res = sup_find_by_id(&state->sup_env, reservation_id); sup_env = &state->sup_env; //if (!res) { list_for_each_entry_safe(res, next, &sup_env->depleted_reservations, list) { if (res->id == reservation_id) { if (lv == CRIT_LEVEL_A) { struct table_driven_reservation *tdres; tdres = container_of(res, struct table_driven_reservation, res); kfree(tdres->intervals); } list_del(&res->list); kfree(res); found = 1; ret = 0; } } if (!found) { list_for_each_entry_safe(res, next, &sup_env->inactive_reservations, list) { if (res->id == reservation_id) { if (lv == CRIT_LEVEL_A) { struct table_driven_reservation *tdres; tdres = container_of(res, struct table_driven_reservation, res); kfree(tdres->intervals); } list_del(&res->list); kfree(res); found = 1; ret = 0; } } } if (!found) { list_for_each_entry_safe(res, next, &sup_env->active_reservations, list) { if (res->id == reservation_id) { if (lv == CRIT_LEVEL_A) { struct table_driven_reservation *tdres; tdres = container_of(res, struct table_driven_reservation, res); kfree(tdres->intervals); } list_del(&res->list); kfree(res); found = 1; ret = 0; } } } //} raw_spin_unlock(&state->lock); } TRACE("RESERVATION_DESTROY ret = %d\n", ret); return ret; } static void mc2_task_exit(struct task_struct *tsk) { unsigned long flags; struct mc2_task_state* tinfo = get_mc2_state(tsk); struct mc2_cpu_state *state; enum crit_level lv = tinfo->mc2_param.crit; struct crit_entry* ce; if (tinfo->cpu != -1) state = cpu_state_for(tinfo->cpu); else state = local_cpu_state(); raw_spin_lock_irqsave(&state->lock, flags); if (state->scheduled == tsk) state->scheduled = NULL; ce = &state->crit_entries[lv]; if (ce->running == tsk) ce->running = NULL; /* remove from queues */ if (is_running(tsk)) { /* Assumption: litmus_clock() is synchronized across cores * [see comment in pres_task_resume()] */ //if (lv < CRIT_LEVEL_C) // sup_update_time(&state->sup_env, litmus_clock()); raw_spin_lock(&_global_env.lock); mc2_update_time(lv, state, litmus_clock()); raw_spin_unlock(&_global_env.lock); mc2_update_ghost_state(state); task_departs(tsk, 0); /* NOTE: drops state->lock */ TRACE("mc2_exit()\n"); mc2_update_timer_and_unlock(state); local_irq_restore(flags); } else raw_spin_unlock_irqrestore(&state->lock, flags); /* if (tinfo->mc2_param.crit == CRIT_LEVEL_A) { struct table_driven_reservation *td_res; struct reservation *res; res = sup_find_by_id(&state->sup_env, tinfo->mc2_param.res_id); td_res = container_of(res, struct table_driven_reservation, res); kfree(td_res->intervals); //kfree(td_res); } else if (tinfo->mc2_param.crit == CRIT_LEVEL_B) { struct polling_reservation *pres; struct reservation *res; res = sup_find_by_id(&state->sup_env, tinfo->mc2_param.res_id); pres = container_of(res, struct polling_reservation, res); kfree(pres); } */ kfree(tsk_rt(tsk)->plugin_state); tsk_rt(tsk)->plugin_state = NULL; kfree(tsk_rt(tsk)->mc2_data); tsk_rt(tsk)->mc2_data = NULL; } static long create_polling_reservation( int res_type, struct reservation_config *config) { struct mc2_cpu_state *state; struct reservation* res; struct polling_reservation *pres; unsigned long flags; int use_edf = config->priority == LITMUS_NO_PRIORITY; int periodic = res_type == PERIODIC_POLLING; long err = -EINVAL; if (config->polling_params.budget > config->polling_params.period) { printk(KERN_ERR "invalid polling reservation (%u): " "budget > period\n", config->id); return -EINVAL; } if (config->polling_params.budget > config->polling_params.relative_deadline && config->polling_params.relative_deadline) { printk(KERN_ERR "invalid polling reservation (%u): " "budget > deadline\n", config->id); return -EINVAL; } if (config->polling_params.offset > config->polling_params.period) { printk(KERN_ERR "invalid polling reservation (%u): " "offset > period\n", config->id); return -EINVAL; } /* Allocate before we grab a spin lock. * Todo: would be nice to use a core-local allocation. */ pres = kzalloc(sizeof(*pres), GFP_KERNEL); if (!pres) return -ENOMEM; if (config->cpu != -1) { state = cpu_state_for(config->cpu); raw_spin_lock_irqsave(&state->lock, flags); res = sup_find_by_id(&state->sup_env, config->id); if (!res) { polling_reservation_init(pres, use_edf, periodic, config->polling_params.budget, config->polling_params.period, config->polling_params.relative_deadline, config->polling_params.offset); pres->res.id = config->id; pres->res.blocked_by_ghost = 0; pres->res.is_ghost = 0; if (!use_edf) pres->res.priority = config->priority; sup_add_new_reservation(&state->sup_env, &pres->res); err = config->id; } else { err = -EEXIST; } raw_spin_unlock_irqrestore(&state->lock, flags); } else { raw_spin_lock_irqsave(&_global_env.lock, flags); res = gmp_find_by_id(&_global_env, config->id); if (!res) { polling_reservation_init(pres, use_edf, periodic, config->polling_params.budget, config->polling_params.period, config->polling_params.relative_deadline, config->polling_params.offset); pres->res.id = config->id; pres->res.blocked_by_ghost = 0; pres->res.scheduled_on = NO_CPU; pres->res.is_ghost = 0; if (!use_edf) pres->res.priority = config->priority; gmp_add_new_reservation(&_global_env, &pres->res); TRACE("GMP_ADD_NEW_RESERVATION R%d\n", pres->res.id); err = config->id; } else { err = -EEXIST; } raw_spin_unlock_irqrestore(&_global_env.lock, flags); } if (err < 0) kfree(pres); return err; } #define MAX_INTERVALS 1024 static long create_table_driven_reservation( struct reservation_config *config) { struct mc2_cpu_state *state; struct reservation* res; struct table_driven_reservation *td_res = NULL; struct lt_interval *slots = NULL; size_t slots_size; unsigned int i, num_slots; unsigned long flags; long err = -EINVAL; if (!config->table_driven_params.num_intervals) { printk(KERN_ERR "invalid table-driven reservation (%u): " "no intervals\n", config->id); return -EINVAL; } if (config->table_driven_params.num_intervals > MAX_INTERVALS) { printk(KERN_ERR "invalid table-driven reservation (%u): " "too many intervals (max: %d)\n", config->id, MAX_INTERVALS); return -EINVAL; } num_slots = config->table_driven_params.num_intervals; slots_size = sizeof(slots[0]) * num_slots; slots = kzalloc(slots_size, GFP_KERNEL); if (!slots) return -ENOMEM; td_res = kzalloc(sizeof(*td_res), GFP_KERNEL); if (!td_res) err = -ENOMEM; else err = copy_from_user(slots, config->table_driven_params.intervals, slots_size); if (!err) { /* sanity checks */ for (i = 0; !err && i < num_slots; i++) if (slots[i].end <= slots[i].start) { printk(KERN_ERR "invalid table-driven reservation (%u): " "invalid interval %u => [%llu, %llu]\n", config->id, i, slots[i].start, slots[i].end); err = -EINVAL; } for (i = 0; !err && i + 1 < num_slots; i++) if (slots[i + 1].start <= slots[i].end) { printk(KERN_ERR "invalid table-driven reservation (%u): " "overlapping intervals %u, %u\n", config->id, i, i + 1); err = -EINVAL; } if (slots[num_slots - 1].end > config->table_driven_params.major_cycle_length) { printk(KERN_ERR "invalid table-driven reservation (%u): last " "interval ends past major cycle %llu > %llu\n", config->id, slots[num_slots - 1].end, config->table_driven_params.major_cycle_length); err = -EINVAL; } } if (!err) { state = cpu_state_for(config->cpu); raw_spin_lock_irqsave(&state->lock, flags); res = sup_find_by_id(&state->sup_env, config->id); if (!res) { table_driven_reservation_init(td_res, config->table_driven_params.major_cycle_length, slots, num_slots); td_res->res.id = config->id; td_res->res.priority = config->priority; td_res->res.blocked_by_ghost = 0; sup_add_new_reservation(&state->sup_env, &td_res->res); err = config->id; } else { err = -EEXIST; } raw_spin_unlock_irqrestore(&state->lock, flags); } if (err < 0) { kfree(slots); kfree(td_res); } return err; } static long mc2_reservation_create(int res_type, void* __user _config) { long ret = -EINVAL; struct reservation_config config; TRACE("Attempt to create reservation (%d)\n", res_type); if (copy_from_user(&config, _config, sizeof(config))) return -EFAULT; if (config.cpu != -1) { if (config.cpu < 0 || !cpu_online(config.cpu)) { printk(KERN_ERR "invalid polling reservation (%u): " "CPU %d offline\n", config.id, config.cpu); return -EINVAL; } } switch (res_type) { case PERIODIC_POLLING: case SPORADIC_POLLING: ret = create_polling_reservation(res_type, &config); break; case TABLE_DRIVEN: ret = create_table_driven_reservation(&config); break; default: return -EINVAL; }; return ret; } static struct domain_proc_info mc2_domain_proc_info; static long mc2_get_domain_proc_info(struct domain_proc_info **ret) { *ret = &mc2_domain_proc_info; return 0; } static void mc2_setup_domain_proc(void) { int i, cpu; int num_rt_cpus = num_online_cpus(); struct cd_mapping *cpu_map, *domain_map; memset(&mc2_domain_proc_info, sizeof(mc2_domain_proc_info), 0); init_domain_proc_info(&mc2_domain_proc_info, num_rt_cpus, num_rt_cpus); mc2_domain_proc_info.num_cpus = num_rt_cpus; mc2_domain_proc_info.num_domains = num_rt_cpus; i = 0; for_each_online_cpu(cpu) { cpu_map = &mc2_domain_proc_info.cpu_to_domains[i]; domain_map = &mc2_domain_proc_info.domain_to_cpus[i]; cpu_map->id = cpu; domain_map->id = i; cpumask_set_cpu(i, cpu_map->mask); cpumask_set_cpu(cpu, domain_map->mask); ++i; } } static long mc2_activate_plugin(void) { int cpu, lv; struct mc2_cpu_state *state; struct cpu_entry *ce; gmp_init(&_global_env); raw_spin_lock_init(&_lowest_prio_cpu.lock); for_each_online_cpu(cpu) { TRACE("Initializing CPU%d...\n", cpu); state = cpu_state_for(cpu); ce = &_lowest_prio_cpu.cpu_entries[cpu]; ce->cpu = cpu; ce->scheduled = NULL; ce->deadline = ULLONG_MAX; ce->lv = NUM_CRIT_LEVELS; ce->will_schedule = false; raw_spin_lock_init(&state->lock); state->cpu = cpu; state->scheduled = NULL; for (lv = 0; lv < NUM_CRIT_LEVELS; lv++) { struct crit_entry *cr_entry = &state->crit_entries[lv]; cr_entry->level = lv; cr_entry->running = NULL; //hrtimer_init(&ce->ghost_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); //ce->ghost_timer.function = on_ghost_timer; } sup_init(&state->sup_env); hrtimer_init(&state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); state->timer.function = on_scheduling_timer; hrtimer_init(&state->g_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); state->g_timer.function = on_global_scheduling_timer; } mc2_setup_domain_proc(); return 0; } static void mc2_finish_switch(struct task_struct *prev) { struct mc2_cpu_state *state = local_cpu_state(); state->scheduled = is_realtime(current) ? current : NULL; } static long mc2_deactivate_plugin(void) { int cpu; struct mc2_cpu_state *state; struct reservation *res; struct next_timer_event *event; for_each_online_cpu(cpu) { state = cpu_state_for(cpu); raw_spin_lock(&state->lock); hrtimer_cancel(&state->timer); hrtimer_cancel(&state->g_timer); /* Delete all reservations --- assumes struct reservation * is prefix of containing struct. */ while (!list_empty(&state->sup_env.active_reservations)) { res = list_first_entry( &state->sup_env.active_reservations, struct reservation, list); list_del(&res->list); kfree(res); } while (!list_empty(&state->sup_env.inactive_reservations)) { res = list_first_entry( &state->sup_env.inactive_reservations, struct reservation, list); list_del(&res->list); kfree(res); } while (!list_empty(&state->sup_env.depleted_reservations)) { res = list_first_entry( &state->sup_env.depleted_reservations, struct reservation, list); list_del(&res->list); kfree(res); } raw_spin_unlock(&state->lock); } raw_spin_lock(&_global_env.lock); while (!list_empty(&_global_env.active_reservations)) { TRACE("RES FOUND!!!\n"); res = list_first_entry( &_global_env.active_reservations, struct reservation, list); list_del(&res->list); kfree(res); } while (!list_empty(&_global_env.inactive_reservations)) { TRACE("RES FOUND!!!\n"); res = list_first_entry( &_global_env.inactive_reservations, struct reservation, list); list_del(&res->list); kfree(res); } while (!list_empty(&_global_env.depleted_reservations)) { TRACE("RES FOUND!!!\n"); res = list_first_entry( &_global_env.depleted_reservations, struct reservation, list); list_del(&res->list); kfree(res); } while (!list_empty(&_global_env.next_events)) { TRACE("EVENT FOUND!!!\n"); event = list_first_entry( &_global_env.next_events, struct next_timer_event, list); list_del(&event->list); kfree(event); } raw_spin_unlock(&_global_env.lock); destroy_domain_proc_info(&mc2_domain_proc_info); return 0; } static struct sched_plugin mc2_plugin = { .plugin_name = "MC2", .schedule = mc2_schedule, .finish_switch = mc2_finish_switch, .task_wake_up = mc2_task_resume, .admit_task = mc2_admit_task, .task_new = mc2_task_new, .task_exit = mc2_task_exit, .complete_job = mc2_complete_job, .get_domain_proc_info = mc2_get_domain_proc_info, .activate_plugin = mc2_activate_plugin, .deactivate_plugin = mc2_deactivate_plugin, .reservation_create = mc2_reservation_create, .reservation_destroy = mc2_reservation_destroy, }; static int __init init_mc2(void) { return register_sched_plugin(&mc2_plugin); } module_init(init_mc2);