Add adaptive scheduler based on GSN-EDF.

This only introduces the necessary source files.
author: Bjoern B. Brandenburg <bbb@cs.unc.edu> 2007-10-04 15:28:41 -0400
committer: Bjoern B. Brandenburg <bbb@cs.unc.edu> 2007-10-04 15:28:41 -0400
commit: 4be7a143bd5a08e8b8cee8539dea745a6107cfd3 (patch)
tree: fcd1bf22fa8a30fe6b7c8f44ff75fe674438f7de /kernel
parent: d74881ffa37434d2ce5455e9e2086292c6128d56 (diff)
3 files changed, 815 insertions, 2 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 1b6957b160..55acc937b5 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -13,7 +13,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
            edf_common.o fifo_common.o pfair_common.o\
            sched_global_edf.o sched_part_edf.o sched_edf_hsb.o sched_pfair.o \
            sched_gsn_edf.o sched_psn_edf.o litmus_sem.o \
-            trace.o ft_event.o rt_domain.o
+            trace.o ft_event.o rt_domain.o sched_adaptive.o
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-y += time/
diff --git a/kernel/litmus.c b/kernel/litmus.c
index 1e4db2cd20..217b0a0357 100644
--- a/kernel/litmus.c
+++ b/kernel/litmus.c
@@ -592,6 +592,7 @@ sched_plugin_t *init_edf_hsb_plugin(void);
 sched_plugin_t *init_pfair_plugin(void);
 sched_plugin_t *init_gsn_edf_plugin(void);
 sched_plugin_t *init_psn_edf_plugin(void);
+sched_plugin_t *init_adaptive_plugin(void);
 /* keep everything needed to setup plugins in one place */
@@ -616,7 +617,7 @@ static struct {
        PLUGIN(PFAIR, pfair),
        PLUGIN(GSN_EDF, gsn_edf),
        PLUGIN(PSN_EDF, psn_edf),
+        PLUGIN(ADAPTIVE, adaptive),
        /*********************************************
        *       Add your custom plugin here                             
        **********************************************/
diff --git a/kernel/sched_adaptive.c b/kernel/sched_adaptive.c
new file mode 100644
index 0000000000..14a8db7638
--- /dev/null
+++ b/kernel/sched_adaptive.c
@@ -0,0 +1,812 @@
+/*
+ * kernel/sched_adaptive.c
+ *
+ * Implementation of Aaron's adaptive global EDF  scheduling algorithm.
+ *
+ * This scheduler is based on the GSN-EDF scheduler for simplicity reasons.
+ */
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/queuelock.h>
+#include <linux/litmus.h>
+#include <linux/sched_plugin.h>
+#include <linux/edf_common.h>
+#include <linux/sched_trace.h>
+/* Overview of GSN-EDF operations.
+ *
+ * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This
+ * description only covers how the individual operations are implemented in
+ * LITMUS. 
+ *
+ * link_task_to_cpu(T, cpu)     - Low-level operation to update the linkage
+ *                                structure (NOT the actually scheduled
+ *                                task). If there is another linked task To
+ *                                already it will set To->linked_on = NO_CPU
+ *                                (thereby removing its association with this
+ *                                CPU). However, it will not requeue the
+ *                                previously linked task (if any). It will set
+ *                                T's state to RT_F_RUNNING and check whether
+ *                                it is already running somewhere else. If T
+ *                                is scheduled somewhere else it will link
+ *                                it to that CPU instead (and pull the linked
+ *                                task to cpu). T may be NULL.
+ *                                
+ * unlink(T)                    - Unlink removes T from all scheduler data
+ *                                structures. If it is linked to some CPU it
+ *                                will link NULL to that CPU. If it is
+ *                                currently queued in the gsnedf queue it will
+ *                                be removed from the T->rt_list. It is safe to
+ *                                call unlink(T) if T is not linked. T may not
+ *                                be NULL.
+ *
+ * requeue(T)                   - Requeue will insert T into the appropriate
+ *                                queue. If the system is in real-time mode and
+ *                                the T is released already, it will go into the
+ *                                ready queue. If the system is not in
+ *                                real-time mode is T, then T will go into the
+ *                                release queue. If T's release time is in the
+ *                                future, it will go into the release
+ *                                queue. That means that T's release time/job
+ *                                no/etc. has to be updated before requeu(T) is
+ *                                called. It is not safe to call requeue(T)
+ *                                when T is already queued. T may not be NULL.
+ *
+ * gsnedf_job_arrival(T)        - This is the catch all function when T enters
+ *                                the system after either a suspension or at a
+ *                                job release. It will queue T (which means it
+ *                                is not safe to call gsnedf_job_arrival(T) if
+ *                                T is already queued) and then check whether a
+ *                                preemption is necessary. If a preemption is
+ *                                necessary it will update the linkage
+ *                                accordingly and cause scheduled to be called
+ *                                (either with an IPI or need_resched). It is
+ *                                safe to call gsnedf_job_arrival(T) if T's
+ *                                next job has not been actually released yet
+ *                                (releast time in the future). T will be put
+ *                                on the release queue in that case.
+ * 
+ * job_completion(T)            - Take care of everything that needs to be done
+ *                                to prepare T for its next release and place
+ *                                it in the right queue with
+ *                                gsnedf_job_arrival(). 
+ *
+ *
+ * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is
+ * equivalent to unlink(T). Note that if you unlink a task from a CPU none of
+ * the functions will automatically propagate pending task from the ready queue
+ * to a linked task. This is the job of the calling function ( by means of
+ * __take_ready).
+ */
+/* cpu_entry_t - maintain the linked and scheduled state
+ */
+typedef struct  {
+        int                     cpu;
+        struct task_struct*     linked;         /* only RT tasks */
+        struct task_struct*     scheduled;      /* only RT tasks */
+        struct list_head        list;
+        atomic_t                will_schedule;  /* prevent unneeded IPIs */
+} cpu_entry_t;
+DEFINE_PER_CPU(cpu_entry_t, adaptive_cpu_entries);
+#define set_will_schedule() \
+        (atomic_set(&__get_cpu_var(adaptive_cpu_entries).will_schedule, 1))
+#define clear_will_schedule() \
+        (atomic_set(&__get_cpu_var(adaptive_cpu_entries).will_schedule, 0))
+#define test_will_schedule(cpu) \
+        (atomic_read(&per_cpu(adaptive_cpu_entries, cpu).will_schedule))
+#define NO_CPU 0xffffffff
+/* The gsnedf_lock is used to serialize all scheduling events.
+ * It protects
+ */
+static queuelock_t adaptive_lock;
+/* the cpus queue themselves according to priority in here */
+static LIST_HEAD(adaptive_cpu_queue);
+static rt_domain_t adaptive;
+/* update_cpu_position - Move the cpu entry to the correct place to maintain 
+ *                       order in the cpu queue. Caller must hold adaptive lock.
+ */
+static void update_cpu_position(cpu_entry_t *entry) 
+{
+        cpu_entry_t *other;
+        struct list_head *pos;
+        list_del(&entry->list);
+        /* if we do not execute real-time jobs we just move 
+         * to the end of the queue 
+         */
+        if (entry->linked) {
+                list_for_each(pos, &adaptive_cpu_queue) {
+                        other = list_entry(pos, cpu_entry_t, list);
+                        if (edf_higher_prio(entry->linked, other->linked)) {
+                                __list_add(&entry->list, pos->prev, pos);
+                                return;
+                        }
+                }               
+        }
+        /* if we get this far we have the lowest priority job */
+        list_add_tail(&entry->list, &adaptive_cpu_queue);
+}
+/* link_task_to_cpu - Update the link of a CPU.
+ *                    Handles the case where the to-be-linked task is already
+ *                    scheduled on a different CPU.
+ */
+static noinline void link_task_to_cpu(struct task_struct* linked,
+                             cpu_entry_t *entry)
+                             
+{
+        cpu_entry_t *sched;
+        struct task_struct* tmp;
+        int on_cpu;
+        BUG_ON(linked && !is_realtime(linked));
+        /* Currently linked task is set to be unlinked. */
+        if (entry->linked) {
+                entry->linked->rt_param.linked_on = NO_CPU;
+        }
+        /* Link new task to CPU. */
+        if (linked) {           
+                set_rt_flags(linked, RT_F_RUNNING);
+                /* handle task is already scheduled somewhere! */
+                on_cpu = linked->rt_param.scheduled_on;
+                if (on_cpu != NO_CPU) {
+                        sched = &per_cpu(adaptive_cpu_entries, on_cpu);
+                        /* this should only happen if not linked already */
+                        BUG_ON(sched->linked == linked);
+                        
+                        /* If we are already scheduled on the CPU to which we
+                         * wanted to link, we don't need to do the swap --
+                         * we just link ourselves to the CPU and depend on
+                         * the caller to get things right.
+                         */
+                        if (entry != sched) {
+                                tmp = sched->linked;
+                                linked->rt_param.linked_on = sched->cpu;
+                                sched->linked = linked;
+                                update_cpu_position(sched);
+                                linked = tmp;
+                        }
+                }
+                if (linked) /* might be NULL due to swap */
+                        linked->rt_param.linked_on = entry->cpu;
+        }
+        entry->linked = linked;
+        update_cpu_position(entry);
+}
+/* unlink - Make sure a task is not linked any longer to an entry
+ *          where it was linked before. Must hold adaptive_lock.
+ */
+static noinline void unlink(struct task_struct* t)
+{
+        cpu_entry_t *entry;
+        if (unlikely(!t)) {
+                TRACE_BUG_ON(!t);
+                return;
+        }
+        if (t->rt_param.linked_on != NO_CPU) {
+                /* unlink */
+                entry = &per_cpu(adaptive_cpu_entries, t->rt_param.linked_on);
+                t->rt_param.linked_on = NO_CPU;
+                link_task_to_cpu(NULL, entry);
+        } else if (in_list(&t->rt_list)) {
+                /* This is an interesting situation: t is scheduled,
+                 * but was just recently unlinked.  It cannot be
+                 * linked anywhere else (because then it would have
+                 * been relinked to this CPU), thus it must be in some
+                 * queue. We must remove it from the list in this
+                 * case.
+                 */
+                list_del(&t->rt_list);
+        }
+} 
+/* preempt - force a CPU to reschedule
+ */
+static noinline void preempt(cpu_entry_t *entry)
+{
+        /* We cannot make the is_np() decision here if it is a remote CPU
+         * because requesting exit_np() requires that we currently use the
+         * address space of the task. Thus, in the remote case we just send
+         * the IPI and let schedule() handle the problem.
+         */
+        if (smp_processor_id() == entry->cpu) {
+                if (entry->scheduled && is_np(entry->scheduled))
+                        request_exit_np(entry->scheduled);                      
+                else
+                        set_tsk_need_resched(current);
+        } else
+                /* in case that it is a remote CPU we have to defer the
+                 * the decision to the remote CPU
+                 * FIXME: We could save a few IPI's here if we leave the flag
+                 * set when we are waiting for a np_exit().
+                 */
+                if (!test_will_schedule(entry->cpu))
+                        smp_send_reschedule(entry->cpu);
+}
+/* requeue - Put an unlinked task into gsn-edf domain.
+ *           Caller must hold adaptive_lock.
+ */
+static noinline void requeue(struct task_struct* task)
+{
+        BUG_ON(!task);
+        /* sanity check rt_list before insertion */
+        BUG_ON(in_list(&task->rt_list));
+        if (get_rt_flags(task) == RT_F_SLEEP || 
+            get_rt_mode() != MODE_RT_RUN) {
+                /* this task has expired
+                 * _schedule has already taken care of updating 
+                 * the release and
+                 * deadline. We just must check if it has been released.
+                 */
+                if (is_released(task) && get_rt_mode() == MODE_RT_RUN)
+                        __add_ready(&adaptive, task);
+                else { 
+                        /* it has got to wait */
+                        __add_release(&adaptive, task);
+                }
+        } else 
+                /* this is a forced preemption 
+                 * thus the task stays in the ready_queue
+                 * we only must make it available to others
+                 */
+                __add_ready(&adaptive, task);
+}
+/* adaptive_job_arrival: task is either resumed or released */
+static noinline void adaptive_job_arrival(struct task_struct* task)
+{
+        cpu_entry_t* last;
+        BUG_ON(list_empty(&adaptive_cpu_queue));
+        BUG_ON(!task);
+        /* first queue arriving job */
+        requeue(task);
+        /* then check for any necessary preemptions */
+        last = list_entry(adaptive_cpu_queue.prev, cpu_entry_t, list);
+        if (edf_preemption_needed(&adaptive, last->linked)) {
+                /* preemption necessary */
+                task = __take_ready(&adaptive);
+                TRACE("job_arrival: task %d linked to %d\n", 
+                      task->pid, last->cpu);
+                if (last->linked)
+                        requeue(last->linked);
+                
+                link_task_to_cpu(task, last);
+                preempt(last);
+        }
+}
+/* check for current job releases */
+static noinline  void adaptive_release_jobs(void)
+{
+        struct list_head *pos, *save;
+        struct task_struct   *queued;
+        list_for_each_safe(pos, save, &adaptive.release_queue) {
+                queued = list_entry(pos, struct task_struct, rt_list);
+                if (likely(is_released(queued))) {
+                        /* this one is ready to go*/
+                        list_del(pos);
+                        set_rt_flags(queued, RT_F_RUNNING);
+                        
+                        sched_trace_job_release(queued);
+                        adaptive_job_arrival(queued);
+                } 
+                else
+                        /* the release queue is ordered */
+                        break;                  
+        }
+}
+/* adaptive_scheduler_tick - this function is called for every local timer 
+ *                       interrupt.
+ *
+ *                   checks whether the current task has expired and checks
+ *                   whether we need to preempt it if it has not expired
+ */
+static reschedule_check_t adaptive_scheduler_tick(void)
+{
+        unsigned long           flags;
+        struct task_struct*     t = current;
+        reschedule_check_t      want_resched = NO_RESCHED;
+        /* expire tasks even if not in real-time mode
+         * this makes sure that at the end of real-time mode
+         * no task  "runs away forever".
+         */
+        if (is_realtime(t))
+                TRACE_CUR("before dec: time_slice == %u\n", t->time_slice);
+        if (is_realtime(t) && t->time_slice && !--t->time_slice) {              
+                if (!is_np(t)) { /* np tasks will be preempted when they become
+                                    preemptable again */
+                        want_resched = FORCE_RESCHED;
+                        set_will_schedule();
+                        TRACE("adaptive_scheduler_tick: "
+                              "%d is preemptable "
+                              " => FORCE_RESCHED\n", t->pid);                   
+                } else {
+                        TRACE("adaptive_scheduler_tick: "
+                              "%d is non-preemptable, "
+                              "preemption delayed.\n", t->pid);                 
+                        request_exit_np(t);
+                }
+        } 
+        /* only the first CPU needs to release jobs */
+        if (get_rt_mode() == MODE_RT_RUN && smp_processor_id() == 0) {
+                queue_lock_irqsave(&adaptive_lock, flags);
+                
+                /* (1) try to release pending jobs */
+                adaptive_release_jobs();
+                /* we don't need to check linked != scheduled since
+                 * set_tsk_need_resched has been set by preempt() if necessary
+                 */
+                
+                queue_unlock_irqrestore(&adaptive_lock, flags);         
+        }
+        return want_resched;
+}
+/* caller holds adaptive_lock */
+static noinline void job_completion(struct task_struct *t)
+{
+        BUG_ON(!t);
+        sched_trace_job_completion(t);
+        TRACE_TASK(t, "job_completion().\n");
+        /* set flags */
+        set_rt_flags(t, RT_F_SLEEP);
+        /* prepare for next period */
+        edf_prepare_for_next_period(t); 
+        /* unlink */
+        unlink(t);
+        /* requeue
+         * But don't requeue a blocking task. */
+        if (is_running(t))
+                adaptive_job_arrival(t);
+}
+/* Getting schedule() right is a bit tricky. schedule() may not make any
+ * assumptions on the state of the current task since it may be called for a
+ * number of reasons. The reasons include a scheduler_tick() determined that it
+ * was necessary, because sys_exit_np() was called, because some Linux
+ * subsystem determined so, or even (in the worst case) because there is a bug
+ * hidden somewhere. Thus, we must take extreme care to determine what the
+ * current state is. 
+ *
+ * The CPU could currently be scheduling a task (or not), be linked (or not).
+ *
+ * The following assertions for the scheduled task could hold:
+ *
+ *      - !is_running(scheduled)        // the job blocks
+ *      - scheduled->timeslice == 0     // the job completed (forcefully)
+ *      - get_rt_flag() == RT_F_SLEEP   // the job completed (by syscall)
+ *      - linked != scheduled           // we need to reschedule (for any reason)
+ *      - is_np(scheduled)              // rescheduling must be delayed,
+ *                                         sys_exit_np must be requested
+ *      
+ * Any of these can occur together. 
+ */
+static int adaptive_schedule(struct task_struct * prev, 
+                         struct task_struct ** next, 
+                         runqueue_t * rq)
+{
+        cpu_entry_t*            entry = &__get_cpu_var(adaptive_cpu_entries);
+        int                     out_of_time, sleep, preempt, np, exists, 
+                                rt, blocks;
+        struct task_struct*     linked;
+        /* Will be released in finish_switch. */
+        queue_lock(&adaptive_lock);
+        clear_will_schedule();
+        /* sanity checking */
+        BUG_ON(entry->scheduled && entry->scheduled != prev);
+        BUG_ON(entry->scheduled && !is_realtime(prev));
+        /* (0) Determine state */       
+        exists      = entry->scheduled != NULL;
+        blocks      = exists && !is_running(entry->scheduled);
+        out_of_time = exists && !entry->scheduled->time_slice;
+        np          = exists && is_np(entry->scheduled);
+        sleep       = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
+        preempt     = entry->scheduled != entry->linked;
+        rt          = get_rt_mode() == MODE_RT_RUN;
+        /* If a task blocks we have no choice but to reschedule.
+         */
+        if (blocks)
+                unlink(entry->scheduled);
+        /* Request a sys_exit_np() call if we would like to preempt but cannot.
+         * We need to make sure to update the link structure anyway in case
+         * that we are still linked. Multiple calls to request_exit_np() don't
+         * hurt.
+         */
+        if (np && (out_of_time || preempt || sleep)) {          
+                unlink(entry->scheduled);
+                request_exit_np(entry->scheduled);
+        }
+        /* Any task that is preemptable and either exhausts its execution
+         * budget or wants to sleep completes. We may have to reschedule after
+         * this.
+         */
+        if (!np && (out_of_time || sleep))
+                job_completion(entry->scheduled);               
+        
+        /* Stop real-time tasks when we leave real-time mode 
+         */
+        if (!rt && entry->linked) {
+                /* task will be preempted once it is preemptable 
+                 * (which it may be already)
+                 */
+                linked = entry->linked;
+                unlink(linked);        
+                requeue(linked);
+        }
+        /* Link pending task if we became unlinked.
+         */
+        if (rt && !entry->linked)
+                link_task_to_cpu(__take_ready(&adaptive), entry);       
+        /* The final scheduling decision. Do we need to switch for some reason?
+         * If linked different from scheduled select linked as next.
+         */ 
+        if ((!np || blocks) &&
+            entry->linked != entry->scheduled) {
+                /* Take care of a previously scheduled
+                 * job by taking it out of the Linux runqueue.
+                 */
+                if (entry->scheduled) {                 
+                        if (prev->array)
+                                /* take it out of the run queue */
+                                deactivate_task(prev, rq);
+                }
+                /* Schedule a linked job? */
+                if (entry->linked) {
+                        *next = entry->linked;
+                        /* mark the task as executing on this cpu */
+                        set_task_cpu(*next, smp_processor_id());
+                        /* stick the task into the runqueue */
+                        __activate_task(*next, rq);
+                }
+        } else 
+                /* Only override Linux scheduler if we have real-time task 
+                 * scheduled that needs to continue.
+                 */
+                if (exists)             
+                        *next = prev;
+        /* Unlock in case that we don't affect real-time tasks or
+         * if nothing changed and finish_switch won't be called.
+         */
+        if (prev == *next || (!is_realtime(prev) && !*next))
+                queue_unlock(&adaptive_lock);
+        return 0;
+}
+/* _finish_switch - we just finished the switch away from prev
+ */
+static void adaptive_finish_switch(struct task_struct *prev) 
+{
+        cpu_entry_t*    entry = &__get_cpu_var(adaptive_cpu_entries);
+        if (is_realtime(current))
+                entry->scheduled = current;             
+        else
+                entry->scheduled = NULL;
+        prev->rt_param.scheduled_on    = NO_CPU;
+        current->rt_param.scheduled_on = smp_processor_id();
+        /* unlock in case schedule() left it locked */
+        if (is_realtime(current) || is_realtime(prev))
+                        queue_unlock(&adaptive_lock);   
+}
+/*      Prepare a task for running in RT mode
+ *      Enqueues the task into master queue data structure
+ *      returns 
+ *              -EPERM  if task is not TASK_STOPPED
+ */
+static long adaptive_prepare_task(struct task_struct * t)
+{
+        unsigned long           flags;
+        TRACE("gsn edf: prepare task %d\n", t->pid);
+        if (t->state == TASK_STOPPED) {
+                __setscheduler(t, SCHED_FIFO, MAX_RT_PRIO - 1);
+                t->rt_param.scheduled_on       = NO_CPU;
+                t->rt_param.linked_on          = NO_CPU;
+                if (get_rt_mode() == MODE_RT_RUN)
+                        /* The action is already on. 
+                         * Prepare immediate release
+                         */
+                        edf_release_now(t);
+                /* The task should be running in the queue, otherwise signal 
+                 * code will try to wake it up with fatal consequences.
+                 */
+                t->state = TASK_RUNNING; 
+                
+                queue_lock_irqsave(&adaptive_lock, flags);
+                requeue(t);
+                queue_unlock_irqrestore(&adaptive_lock, flags);         
+                return 0;
+        }
+        else
+                return -EPERM;
+}
+static void adaptive_wake_up_task(struct task_struct *task) 
+{
+        unsigned long flags;
+        /* We must determine whether task should go into the release       
+         * queue or into the ready queue. It may enter the ready queue 
+         * if it has credit left in its time slice and has not yet reached 
+         * its deadline. If it is now passed its deadline we assume this the 
+         * arrival of a new sporadic job and thus put it in the ready queue 
+         * anyway.If it has zero budget and the next release is in the future 
+         * it has to go to the release queue.
+         */
+        TRACE("adaptive: %d unsuspends with budget=%d\n", 
+              task->pid, task->time_slice);
+        task->state = TASK_RUNNING;
+        /* We need to take suspensions because of semaphores into
+         * account! If a job resumes after being suspended due to acquiring
+         * a semaphore, it should never be treated as a new job release.
+         */
+        if (get_rt_flags(task) == RT_F_EXIT_SEM) {
+                set_rt_flags(task, RT_F_RUNNING);
+        } else {
+                if (is_tardy(task)) {
+                        /* new sporadic release */
+                        edf_release_now(task);
+                        sched_trace_job_release(task);
+                }
+                else if (task->time_slice)
+                        /* came back in time before deadline
+                         */
+                        set_rt_flags(task, RT_F_RUNNING);
+        }
+        queue_lock_irqsave(&adaptive_lock, flags);
+        adaptive_job_arrival(task);
+        queue_unlock_irqrestore(&adaptive_lock, flags); 
+}
+static void adaptive_task_blocks(struct task_struct *t)
+{
+        unsigned long flags;
+        /* unlink if necessary */
+        queue_lock_irqsave(&adaptive_lock, flags);
+        unlink(t);
+        queue_unlock_irqrestore(&adaptive_lock, flags);
+        BUG_ON(!is_realtime(t));
+        TRACE("task %d suspends with budget=%d\n", t->pid, t->time_slice);
+        BUG_ON(t->rt_list.next != LIST_POISON1);
+        BUG_ON(t->rt_list.prev != LIST_POISON2);
+}
+/* When _tear_down is called, the task should not be in any queue any more
+ * as it must have blocked first. We don't have any internal state for the task,
+ * it is all in the task_struct.
+ */
+static long adaptive_tear_down(struct task_struct * t)
+{
+        BUG_ON(!is_realtime(t));
+        TRACE_TASK(t, "RIP\n");
+        BUG_ON(t->array);
+        BUG_ON(t->rt_list.next != LIST_POISON1);
+        BUG_ON(t->rt_list.prev != LIST_POISON2);
+        return 0;
+}
+static long adaptive_pi_block(struct pi_semaphore *sem,
+                            struct task_struct *new_waiter)
+{
+        /* This callback has to handle the situation where a new waiter is 
+         * added to the wait queue of the semaphore.
+         * 
+         * We must check if has a higher priority than the currently 
+         * highest-priority task, and then potentially reschedule.
+         */
+        BUG_ON(!new_waiter);
+        if (edf_higher_prio(new_waiter, sem->hp.task)) {
+                TRACE_TASK(new_waiter, " boosts priority\n");
+                /* called with IRQs disabled */
+                queue_lock(&adaptive_lock);
+                /* store new highest-priority task */
+                sem->hp.task = new_waiter;
+                if (sem->holder) {
+                        /* let holder inherit */
+                        sem->holder->rt_param.inh_task = new_waiter;
+                        unlink(sem->holder);
+                        adaptive_job_arrival(sem->holder);
+                }
+                queue_unlock(&adaptive_lock);   
+        }
+        return 0;
+}
+static long adaptive_inherit_priority(struct pi_semaphore *sem,
+                                    struct task_struct *new_owner)
+{
+        /* We don't need to acquire the adaptive_lock since at the time of this
+         * call new_owner isn't actually scheduled yet (it's still sleeping) 
+         * and since the calling function already holds sem->wait.lock, which 
+         * prevents concurrent sem->hp.task changes.
+         */
+        if (sem->hp.task && sem->hp.task != new_owner) {        
+                new_owner->rt_param.inh_task = sem->hp.task;
+                TRACE_TASK(new_owner, "inherited priority from %s/%d\n", 
+                           sem->hp.task->comm, sem->hp.task->pid);
+        } else
+                TRACE_TASK(new_owner, 
+                           "cannot inherit priority, "
+                           "no higher priority job waits.\n");
+        return 0;
+}
+/* This function is called on a semaphore release, and assumes that
+ * the current task is also the semaphore holder.
+ */
+static long adaptive_return_priority(struct pi_semaphore *sem)
+{
+        struct task_struct* t = current;
+        int ret = 0;
+        /* Find new highest-priority semaphore task
+         * if holder task is the current hp.task.
+         *
+         * Calling function holds sem->wait.lock.
+         */
+        if (t == sem->hp.task)
+                edf_set_hp_task(sem);
+        TRACE_CUR("adaptive_return_priority for lock %p\n", sem);
+        if (t->rt_param.inh_task) {
+                /* interrupts already disabled by PI code */
+                queue_lock(&adaptive_lock);
+                
+                /* Reset inh_task to NULL. */
+                t->rt_param.inh_task = NULL;
+                
+                /* Check if rescheduling is necessary */
+                unlink(t);
+                adaptive_job_arrival(t);
+                queue_unlock(&adaptive_lock);
+        }       
+        return ret;
+}
+static int adaptive_mode_change(int new_mode)
+{
+        unsigned long flags;
+        int cpu;
+        cpu_entry_t *entry;
+        if (new_mode == MODE_RT_RUN) {
+                queue_lock_irqsave(&adaptive_lock, flags);
+                __rerelease_all(&adaptive, edf_release_at);
+                /* get old cruft out of the way in case we reenter real-time
+                 * mode for a second time
+                 */
+                while (!list_empty(&adaptive_cpu_queue))
+                        list_del(adaptive_cpu_queue.next);
+                /* reinitialize */
+                for_each_online_cpu(cpu) {
+                        entry = &per_cpu(adaptive_cpu_entries, cpu);
+                        atomic_set(&entry->will_schedule, 0);
+                        entry->linked    = NULL;
+                        entry->scheduled = NULL;
+                        list_add(&entry->list, &adaptive_cpu_queue);
+                }               
+                queue_unlock_irqrestore(&adaptive_lock, flags);
+        }
+        return 0;
+}
+/*      Plugin object   */
+static sched_plugin_t s_plugin __cacheline_aligned_in_smp = {
+        .ready_to_use = 0 
+};
+/*
+ *      Plugin initialization code.
+ */
+#define INIT_SCHED_PLUGIN (struct sched_plugin){                \
+        .plugin_name            = "ADAPTIVE",                   \
+        .ready_to_use           = 1,                            \
+        .algo_scheduler_tick    = adaptive_scheduler_tick,      \
+        .scheduler_tick         = rt_scheduler_tick,            \
+        .prepare_task           = adaptive_prepare_task,        \
+        .sleep_next_period      = edf_sleep_next_period,        \
+        .tear_down              = adaptive_tear_down,           \
+        .schedule               = adaptive_schedule,            \
+        .finish_switch          = adaptive_finish_switch,       \
+        .mode_change            = adaptive_mode_change,         \
+        .wake_up_task           = adaptive_wake_up_task,        \
+        .task_blocks            = adaptive_task_blocks,         \
+        .inherit_priority       = adaptive_inherit_priority,    \
+        .return_priority        = adaptive_return_priority,     \
+        .pi_block               = adaptive_pi_block             \
+}
+sched_plugin_t *__init init_adaptive_plugin(void)
+{
+        int cpu;
+        cpu_entry_t *entry;
+        if (!s_plugin.ready_to_use)
+        {
+                /* initialize CPU state */
+                for (cpu = 0; cpu < NR_CPUS; cpu++)  {
+                        entry = &per_cpu(adaptive_cpu_entries, cpu);
+                        atomic_set(&entry->will_schedule, 0);
+                        entry->linked    = NULL;
+                        entry->scheduled = NULL;
+                        entry->cpu       = cpu;
+                }               
+                queue_lock_init(&adaptive_lock);
+                set_sched_options(SCHED_NONE);
+                edf_domain_init(&adaptive, NULL);
+                s_plugin = INIT_SCHED_PLUGIN;
+        }
+        return &s_plugin;
+}
author	Bjoern B. Brandenburg <bbb@cs.unc.edu>	2007-10-04 15:28:41 -0400
committer	Bjoern B. Brandenburg <bbb@cs.unc.edu>	2007-10-04 15:28:41 -0400
commit	4be7a143bd5a08e8b8cee8539dea745a6107cfd3 (patch)
tree	fcd1bf22fa8a30fe6b7c8f44ff75fe674438f7de /kernel
parent	d74881ffa37434d2ce5455e9e2086292c6128d56 (diff)