/* * kernel/sched_part_edf.c * * Implementation of the partitioned EDF scheduler plugin. */ #include #include #include #include #include #include #include typedef struct { rt_domain_t domain; int cpu; struct task_struct* scheduled; /* only RT tasks */ spinlock_t lock; } part_edf_domain_t; #define local_edf (&__get_cpu_var(part_edf_domains).domain) #define local_pedf (&__get_cpu_var(part_edf_domains)) #define remote_edf(cpu) (&per_cpu(part_edf_domains, cpu).domain) #define remote_pedf(cpu) (&per_cpu(part_edf_domains, cpu)) #define task_edf(task) remote_edf(get_partition(task)) static void part_edf_domain_init(part_edf_domain_t* pedf, check_resched_needed_t check, int cpu) { edf_domain_init(&pedf->domain, check); pedf->cpu = cpu; pedf->lock = SPIN_LOCK_UNLOCKED; pedf->scheduled = NULL; } DEFINE_PER_CPU(part_edf_domain_t, part_edf_domains); /* This check is trivial in partioned systems as we only have to consider * the CPU of the partition. * */ static int part_edf_check_resched(rt_domain_t *edf) { part_edf_domain_t *pedf = container_of(edf, part_edf_domain_t, domain); int ret = 0; spin_lock(&pedf->lock); /* because this is a callback from rt_domain_t we already hold * the necessary lock for the ready queue */ if (edf_preemption_needed(edf, pedf->scheduled)) { if (pedf->cpu == smp_processor_id()) set_tsk_need_resched(current); else smp_send_reschedule(pedf->cpu); ret = 1; } spin_unlock(&pedf->lock); return ret; } static reschedule_check_t part_edf_scheduler_tick(void) { unsigned long flags; struct task_struct *t = current; reschedule_check_t want_resched = NO_RESCHED; rt_domain_t *edf = local_edf; part_edf_domain_t *pedf = local_pedf; /* Check for inconsistency. We don't need the lock for this since * ->scheduled is only changed in schedule, which obviously is not * executing in parallel on this CPU */ BUG_ON(is_realtime(t) && t != pedf->scheduled); /* expire tasks even if not in real-time mode * this makes sure that at the end of real-time mode * no tasks "run away forever". */ if (is_realtime(t) && (!--t->time_slice)) { /* this task has exhausted its budget in this period */ set_rt_flags(t, RT_F_SLEEP); want_resched = FORCE_RESCHED; } if (get_rt_mode() == MODE_RT_RUN) { /* check whether anything is waiting to be released * this could probably be moved to the global timer * interrupt handler since the state will only change * once per jiffie */ try_release_pending(edf); if (want_resched != FORCE_RESCHED) { read_lock_irqsave(&edf->ready_lock, flags); if (edf_preemption_needed(edf, t)) want_resched = FORCE_RESCHED; read_unlock_irqrestore(&edf->ready_lock, flags); } } return want_resched; } static int part_edf_schedule(struct task_struct * prev, struct task_struct ** next, runqueue_t * rq) { int need_deactivate = 1; part_edf_domain_t* pedf = local_pedf; rt_domain_t* edf = &pedf->domain; if (is_realtime(prev) && get_rt_flags(prev) == RT_F_SLEEP) edf_prepare_for_next_period(prev); if (get_rt_mode() == MODE_RT_RUN) { write_lock(&edf->ready_lock); if (is_realtime(prev) && is_released(prev) && is_running(prev) && !edf_preemption_needed(edf, prev)) { /* this really should only happen if the task has * 100% utilization... */ TRACE("prev will be next, already released\n"); *next = prev; need_deactivate = 0; } else { /* either not yet released, preempted, or non-rt */ *next = __take_ready(edf); if (*next) { /* stick the task into the runqueue */ __activate_task(*next, rq); set_task_cpu(*next, smp_processor_id()); } } spin_lock(&pedf->lock); pedf->scheduled = *next; spin_unlock(&pedf->lock); if (*next) set_rt_flags(*next, RT_F_RUNNING); write_unlock(&edf->ready_lock); } if (is_realtime(prev) && need_deactivate && prev->array) { /* take it out of the run queue */ deactivate_task(prev, rq); } return 0; } static void part_edf_finish_switch(struct task_struct *prev) { rt_domain_t* edf = local_edf; if (!is_realtime(prev) || !is_running(prev)) return; if (get_rt_flags(prev) == RT_F_SLEEP || get_rt_mode() != MODE_RT_RUN) { /* this task has expired * _schedule has already taken care of updating * the release and * deadline. We just must check if has been released. */ if (is_released(prev) && get_rt_mode() == MODE_RT_RUN) { /* already released */ add_ready(edf, prev); TRACE("%d goes straight to ready queue\n", prev->pid); } else /* it has got to wait */ add_release(edf, prev); } else { /* this is a forced preemption * thus the task stays in the ready_queue * we only must make it available to others */ add_ready(edf, prev); } } /* Prepare a task for running in RT mode * Enqueues the task into master queue data structure * returns * -EPERM if task is not TASK_STOPPED */ static long part_edf_prepare_task(struct task_struct * t) { rt_domain_t* edf = task_edf(t); TRACE("[%d] part edf: prepare task %d on CPU %d\n", smp_processor_id(), t->pid, get_partition(t)); if (t->state == TASK_STOPPED) { __setscheduler(t, SCHED_FIFO, MAX_RT_PRIO - 1); if (get_rt_mode() == MODE_RT_RUN) /* The action is already on. * Prepare immediate release. */ edf_release_now(t); /* The task should be running in the queue, otherwise signal * code will try to wake it up with fatal consequences. */ t->state = TASK_RUNNING; add_release(edf, t); return 0; } else return -EPERM; } static void part_edf_wake_up_task(struct task_struct *task) { rt_domain_t* edf; edf = task_edf(task); /* We must determine whether task should go into the release * queue or into the ready queue. It may enter the ready queue * if it has credit left in its time slice and has not yet reached * its deadline. If it is now passed its deadline we assume this the * arrival of a new sporadic job and thus put it in the ready queue * anyway.If it has zero budget and the next release is in the future * it has to go to the release queue. */ TRACE("part edf: wake up %d with budget=%d for cpu %d\n", task->pid, task->time_slice, get_partition(task)); task->state = TASK_RUNNING; if (is_tardy(task)) { /* new sporadic release */ edf_release_now(task); add_ready(edf, task); } else if (task->time_slice) { /* Came back in time before deadline. This may cause * deadline overruns, but since we don't handle suspensions * in the analytical model, we don't care since we can't * guarantee anything at all if tasks block. */ set_rt_flags(task, RT_F_RUNNING); add_ready(edf, task); } else { add_release(edf, task); } } static void part_edf_task_blocks(struct task_struct *t) { BUG_ON(!is_realtime(t)); /* not really anything to do since it can only block if * it is running, and when it is not running it is not in any * queue anyway. * */ TRACE("task %d blocks with budget=%d\n", t->pid, t->time_slice); BUG_ON(in_list(&t->rt_list)); } /* When _tear_down is called, the task should not be in any queue any more * as it must have blocked first. We don't have any internal state for the task, * it is all in the task_struct. */ static long part_edf_tear_down(struct task_struct * t) { BUG_ON(!is_realtime(t)); TRACE("part edf: tear down called for %d \n", t->pid); BUG_ON(t->array); BUG_ON(in_list(&t->rt_list)); return 0; } static int part_edf_mode_change(int new_mode) { int cpu; if (new_mode == MODE_RT_RUN) for_each_online_cpu(cpu) rerelease_all(remote_edf(cpu), edf_release_at); TRACE("[%d] part edf: mode changed to %d\n", smp_processor_id(), new_mode); return 0; } /* Plugin object */ static sched_plugin_t s_plugin __cacheline_aligned_in_smp = { .ready_to_use = 0 }; /* * Plugin initialization code. */ #define INIT_SCHED_PLUGIN (struct sched_plugin) {\ .plugin_name = "Partitioned EDF",\ .ready_to_use = 1,\ .scheduler_tick = part_edf_scheduler_tick,\ .prepare_task = part_edf_prepare_task,\ .sleep_next_period = edf_sleep_next_period,\ .tear_down = part_edf_tear_down,\ .schedule = part_edf_schedule,\ .finish_switch = part_edf_finish_switch,\ .mode_change = part_edf_mode_change,\ .wake_up_task = part_edf_wake_up_task,\ .task_blocks = part_edf_task_blocks \ } sched_plugin_t *__init init_part_edf_plugin(void) { int i; if (!s_plugin.ready_to_use) { for (i = 0; i < NR_CPUS; i++) { part_edf_domain_init(remote_pedf(i), part_edf_check_resched, i); printk("CPU partition %d initialized.", i); } s_plugin = INIT_SCHED_PLUGIN; } return &s_plugin; }