/* * kernel/sched-global-edf.c * * Re-Implementation of the Global EDF scheduler. * * This version works without using the struct queue. It uses the * builtin kernel lists. */ #include #include #include #include #include #include #include /* cpu_entry_t - maintain state of the priority of cpu's current task * this is needed to check for priority inversions. */ typedef struct { int cpu; int executes_realtime; jiffie_t cur_deadline; struct list_head list; atomic_t will_schedule; } cpu_entry_t; DEFINE_PER_CPU(cpu_entry_t, gedf_cpu_entries); #define set_will_schedule() \ (atomic_set(&__get_cpu_var(gedf_cpu_entries).will_schedule, 1)) #define clear_will_schedule() \ (atomic_set(&__get_cpu_var(gedf_cpu_entries).will_schedule, 0)) #define test_will_schedule(cpu) \ (atomic_read(&per_cpu(gedf_cpu_entries, cpu).will_schedule)) /* always acquire the cpu lock as the last lock to avoid deadlocks */ static spinlock_t gedf_cpu_lock = SPIN_LOCK_UNLOCKED; /* the cpus queue themselves according to priority in here */ static LIST_HEAD(gedf_cpu_queue); static rt_domain_t gedf; #define DUMP(args...) TRACE(args) /* adjust_cpu_queue - Move the cpu entry to the correct place to maintain * order in the cpu queue. Caller must hold ready write lock. * */ static void adjust_cpu_queue(int exec_rt, jiffie_t deadline) { struct list_head *pos; cpu_entry_t *other; cpu_entry_t *entry; spin_lock(&gedf_cpu_lock); entry = &__get_cpu_var(gedf_cpu_entries); entry->executes_realtime = exec_rt; entry->cur_deadline = deadline; /* TODO: move instead of del+reinsert */ list_del(&entry->list); /* if we do not execute real-time jobs we just move * to the end of the queue */ if (entry->executes_realtime) list_for_each(pos, &gedf_cpu_queue) { other = list_entry(pos, cpu_entry_t, list); if (!other->executes_realtime || time_before_eq(entry->cur_deadline, other->cur_deadline)) { __list_add(&entry->list, pos->prev, pos); goto out; } } /* if we get this far we have the lowest priority task */ list_add_tail(&entry->list, &gedf_cpu_queue); out: spin_unlock(&gedf_cpu_lock); } /* check_reschedule_needed - Check whether another CPU needs to reschedule. * * The function only checks and kicks the last CPU. It will reschedule and * kick the next if necessary, and so on. The caller is responsible for making * sure that it is not the last entry or that a reschedule is not necessary. * * TODO: This function is probably way too trigger happy. It should only send * IPIs if the other CPU is not going to reschedule anyway. But that is * hard to detect reliably. Too many schedules will hurt performance * but do not cause incorrect schedules. */ static int gedf_check_resched(rt_domain_t *edf) { cpu_entry_t *last; int ret = 0; spin_lock(&gedf_cpu_lock); if (!list_empty(&edf->ready_queue)) { last = list_entry(gedf_cpu_queue.prev, cpu_entry_t, list); if (!last->executes_realtime || time_before(next_ready(edf)->rt_param.times.deadline, last->cur_deadline)) { if (smp_processor_id() == last->cpu) set_tsk_need_resched(current); else if (!test_will_schedule(last->cpu)) smp_send_reschedule(last->cpu); ret = 1; } } spin_unlock(&gedf_cpu_lock); return ret; } /* gedf_scheduler_tick - this function is called for every local timer * interrupt. * * checks whether the current task has expired and checks * whether we need to preempt it if it has not expired */ static reschedule_check_t gedf_scheduler_tick(void) { unsigned long flags; struct task_struct *t = current; reschedule_check_t want_resched = NO_RESCHED; /* expire tasks even if not in real-time mode * this makes sure that at the end of real-time mode * no tasks "run away forever". */ BUG_ON(is_realtime(t) && t->time_slice > 100000); if (is_realtime(t) && (!--t->time_slice)) { /* this task has exhausted its budget in this period */ set_rt_flags(t, RT_F_SLEEP); want_resched = FORCE_RESCHED; set_will_schedule(); sched_trace_job_completion(t); } if (get_rt_mode() == MODE_RT_RUN) { /* check whether anything is waiting to be released * this could probably be moved to the global timer * interrupt handler since the state will only change * once per jiffie */ try_release_pending(&gedf); if (want_resched != FORCE_RESCHED) { read_lock_irqsave(&gedf.ready_lock, flags); if (edf_preemption_needed(&gedf, t)) { want_resched = FORCE_RESCHED; set_will_schedule(); } read_unlock_irqrestore(&gedf.ready_lock, flags); } } return want_resched; } /* This is main Global EDF schedule function * * Assumes the caller holds the lock for rq and that irqs are disabled * This is function only works for indirect switching */ static int gedf_schedule(struct task_struct * prev, struct task_struct ** next, runqueue_t * rq) { int need_deactivate = 1; int rt; jiffie_t deadline; unsigned long flags; if (is_realtime(prev) && get_rt_flags(prev) == RT_F_SLEEP) { DUMP("preparing %d for next period\n", prev->pid); edf_prepare_for_next_period(prev); } if (get_rt_mode() == MODE_RT_RUN) { write_lock_irqsave(&gedf.ready_lock, flags); clear_will_schedule(); if (is_realtime(prev) && is_released(prev) && is_running(prev) && !edf_preemption_needed(&gedf, prev)) { /* Our current task's next job has already been * released and has higher priority than the highest * prioriy waiting task; in other words: it is tardy. * We just keep it. */ DUMP("prev will be next, already released\n"); *next = prev; rt = 1; deadline = prev->rt_param.times.deadline; need_deactivate = 0; } else { /* either not yet released, preempted, or non-rt */ *next = __take_ready(&gedf); if (*next) { /* mark the task as executing on this cpu */ set_task_cpu(*next, smp_processor_id()); /* stick the task into the runqueue */ __activate_task(*next, rq); rt = 1; deadline = (*next)->rt_param.times.deadline; } else rt = deadline = 0; } adjust_cpu_queue(rt, deadline); if (rt) { set_rt_flags(*next, RT_F_RUNNING); gedf.check_resched(&gedf); } write_unlock_irqrestore(&gedf.ready_lock, flags); } if (is_realtime(prev) && need_deactivate && prev->array) { /* take it out of the run queue */ deactivate_task(prev, rq); } /* don't put back into release yet. * We first need to actually switch * stacks before we can execute it * on a different CPU */ /* in the current implementation nobody cares about the return value */ return 0; } /* _finish_switch - we just finished the switch away from prev * it is now safe to requeue the task */ static void gedf_finish_switch(struct task_struct *prev) { if (!is_realtime(prev) || !is_running(prev)) return; /*printk(KERN_INFO "gedf finish switch for %d\n", prev->pid);*/ if (get_rt_flags(prev) == RT_F_SLEEP || get_rt_mode() != MODE_RT_RUN) { /* this task has expired * _schedule has already taken care of updating * the release and * deadline. We just must check if has been released. */ if (time_before_eq(prev->rt_param.times.release, jiffies) && get_rt_mode() == MODE_RT_RUN) { /* already released */ add_ready(&gedf, prev); DUMP("%d goes straight to ready queue\n", prev->pid); } else /* it has got to wait */ add_release(&gedf, prev); } else { /* this is a forced preemption * thus the task stays in the ready_queue * we only must make it available to others */ add_ready(&gedf, prev); } } /* Prepare a task for running in RT mode * Enqueues the task into master queue data structure * returns * -EPERM if task is not TASK_STOPPED */ static long gedf_prepare_task(struct task_struct * t) { TRACE("global edf: prepare task %d\n", t->pid); if (t->state == TASK_STOPPED) { __setscheduler(t, SCHED_FIFO, MAX_RT_PRIO - 1); if (get_rt_mode() == MODE_RT_RUN) /* The action is already on. * Prepare immediate release */ edf_release_now(t); /* The task should be running in the queue, otherwise signal * code will try to wake it up with fatal consequences. */ t->state = TASK_RUNNING; add_release(&gedf, t); return 0; } else return -EPERM; } static void gedf_wake_up_task(struct task_struct *task) { /* We must determine whether task should go into the release * queue or into the ready queue. It may enter the ready queue * if it has credit left in its time slice and has not yet reached * its deadline. If it is now passed its deadline we assume this the * arrival of a new sporadic job and thus put it in the ready queue * anyway.If it has zero budget and the next release is in the future * it has to go to the release queue. */ TRACE("global edf: wake up %d with budget=%d\n", task->pid, task->time_slice); task->state = TASK_RUNNING; if (is_tardy(task)) { /* new sporadic release */ edf_release_now(task); sched_trace_job_release(task); add_ready(&gedf, task); } else if (task->time_slice) { /* came back in time before deadline * TODO: clip budget to fit into period, otherwise it could * cause a deadline overrun in the next period, i.e. * over allocation in the next period. */ set_rt_flags(task, RT_F_RUNNING); add_ready(&gedf, task); } else { add_release(&gedf, task); } } static void gedf_task_blocks(struct task_struct *t) { BUG_ON(!is_realtime(t)); /* not really anything to do since it can only block if * it is running, and when it is not running it is not in any * queue anyway. * * TODO: Check whether the assumption is correct for SIGKILL and * SIGSTOP. */ TRACE("task %d blocks with budget=%d\n", t->pid, t->time_slice); BUG_ON(t->rt_list.next != LIST_POISON1); BUG_ON(t->rt_list.prev != LIST_POISON2); } /* When _tear_down is called, the task should not be in any queue any more * as it must have blocked first. We don't have any internal state for the task, * it is all in the task_struct. */ static long gedf_tear_down(struct task_struct * t) { BUG_ON(!is_realtime(t)); TRACE("global edf: tear down called for %d \n", t->pid); BUG_ON(t->array); BUG_ON(t->rt_list.next != LIST_POISON1); BUG_ON(t->rt_list.prev != LIST_POISON2); return 0; } static int gedf_mode_change(int new_mode) { int cpu; cpu_entry_t *entry; /* printk(KERN_INFO "[%d] global edf: mode changed to %d\n", smp_processor_id(), new_mode);*/ if (new_mode == MODE_RT_RUN) { rerelease_all(&gedf, edf_release_at); /* initialize per CPU state * we can't do this at boot time because we don't know * which CPUs will be online and we can't put non-existing * cpus into the queue */ spin_lock(&gedf_cpu_lock); /* get old cruft out of the way in case we reenter real-time * mode for a second time */ while (!list_empty(&gedf_cpu_queue)) list_del(gedf_cpu_queue.next); /* reinitialize */ for_each_online_cpu(cpu) { entry = &per_cpu(gedf_cpu_entries, cpu); atomic_set(&entry->will_schedule, 0); entry->executes_realtime = 0; entry->cur_deadline = 0; entry->cpu = cpu; list_add(&entry->list, &gedf_cpu_queue); } spin_unlock(&gedf_cpu_lock); } /*printk(KERN_INFO "[%d] global edf: mode change done\n", smp_processor_id()); */ return 0; } /* Plugin object */ static sched_plugin_t s_plugin __cacheline_aligned_in_smp = { .ready_to_use = 0 }; /* * Plugin initialization code. */ #define INIT_SCHED_PLUGIN (struct sched_plugin){\ .plugin_name = "Global EDF",\ .ready_to_use = 1,\ .scheduler_tick = gedf_scheduler_tick,\ .prepare_task = gedf_prepare_task,\ .sleep_next_period = edf_sleep_next_period,\ .tear_down = gedf_tear_down,\ .shutdown_hook = 0,\ .schedule = gedf_schedule,\ .finish_switch = gedf_finish_switch,\ .mode_change = gedf_mode_change,\ .wake_up_task = gedf_wake_up_task,\ .task_blocks = gedf_task_blocks \ } sched_plugin_t *__init init_global_edf_plugin(void) { if (!s_plugin.ready_to_use) { edf_domain_init(&gedf, gedf_check_resched); s_plugin = INIT_SCHED_PLUGIN; } return &s_plugin; } /*****************************************************************************/ /*****************************************************************************/ /*****************************************************************************/ /* NON-PREEMPTIVE GLOBAL EDF */ /* gedf_np_scheduler_tick - this function is called for every local timer * interrupt. * * checks whether the current task has expired and checks * whether we need to preempt it if it has not expired */ static reschedule_check_t gedf_np_scheduler_tick(void) { if (get_rt_mode() == MODE_RT_RUN) { /* check whether anything is waiting to be released * this could probably be moved to the global timer * interrupt handler since the state will only change * once per jiffie */ try_release_pending(&gedf); } /* expire tasks even if not in real-time mode * this makes sure that at the end of real-time mode * no tasks "run away forever". */ BUG_ON(current->time_slice > 1000); if (is_realtime(current) && (!--current->time_slice)) { /* this task has exhausted its budget in this period */ set_rt_flags(current, RT_F_SLEEP); return FORCE_RESCHED; } else return NO_RESCHED; } /* gedf_np_check_resched - Check whether another CPU needs to reschedule. * * The function only checks and kicks the last CPU. It will reschedule and * kick the next if necessary, and so on. The caller is responsible for making * sure that it is not the last entry or that a reschedule is not necessary. * */ static int gedf_np_check_resched(rt_domain_t *edf) { cpu_entry_t *last; int ret = 0; spin_lock(&gedf_cpu_lock); if (!list_empty(&edf->ready_queue)) { last = list_entry(gedf_cpu_queue.prev, cpu_entry_t, list); /* preemption happens only for non-realtime tasks */ if (!last->executes_realtime) { if (smp_processor_id() == last->cpu) set_tsk_need_resched(current); else smp_send_reschedule(last->cpu); ret = 1; goto out; } } out: spin_unlock(&gedf_cpu_lock); return ret; } /* non-preemptive global EDF * * Non-preemptive EDF is almost the same as normal EDF. We only have to * adjust the scheduler tick and the resched function. */ #define INIT_SCHED_PLUGIN_NP (struct sched_plugin){\ .plugin_name = "Non-Preemptive Global EDF",\ .ready_to_use = 1,\ .scheduler_tick = gedf_np_scheduler_tick,\ .prepare_task = gedf_prepare_task,\ .sleep_next_period = edf_sleep_next_period,\ .tear_down = gedf_tear_down,\ .shutdown_hook = 0,\ .schedule = gedf_schedule,\ .finish_switch = gedf_finish_switch,\ .mode_change = gedf_mode_change,\ .wake_up_task = gedf_wake_up_task,\ .task_blocks = gedf_task_blocks \ } /* as we only set the plugin at boot time, * we use the same structure as preemptive EDF. This simplifies a lot * of the funtions. */ sched_plugin_t* __init init_global_edf_np_plugin(void) { if (!s_plugin.ready_to_use) { edf_domain_init(&gedf, gedf_np_check_resched); s_plugin = INIT_SCHED_PLUGIN_NP; } return &s_plugin; }