Proper sobliv draining and many bug fixes.wip-2012.3-gpu-sobliv-budget-w-kshark

Proper sobliv draining: Always and only drain budget from a task if its BASE priority is among the top m processors in the cluster. This required some work with timers and tracking of consumed budget while a task is suspended (since the Linux rq won't track this for us). Had to introduce a number of hacks and kludges to make this work in the required timeframe: 1) C-EDF's ready queue lock becomes recursive (yuck!) 2) Extend bheap with a for_each visitor function. This is needed to set a timer for each newly released job. 3) Dual-binary heap structure in C-EDF to divide jobs into top-m and not-top-m tasks. 4) Restructured the budget plugin API. Unfortunatly, there is not a lot of overlap between SIMPLE and SOBLIV draining policies.
author: Glenn Elliott <gelliott@cs.unc.edu> 2013-04-04 17:31:30 -0400
committer: Glenn Elliott <gelliott@cs.unc.edu> 2013-04-04 17:37:06 -0400
commit: b379666c96805563ca61ddfbb38f0ec9809edf05 (patch)
tree: 6f0b64b1aa70b76e4db0e6786629b985a61dbefe
parent: 3324865fc5792b9d755d46cafa42c74b5037bba5 (diff)
5 files changed, 147 insertions, 88 deletions
diff --git a/include/litmus/bheap.h b/include/litmus/bheap.h
index 4fded5724b28..72dec0cc0240 100644
--- a/include/litmus/bheap.h
+++ b/include/litmus/bheap.h
@@ -24,8 +24,6 @@ struct bheap {
         * This speeds up repeated peek operations.
         */
        struct bheap_node*      min;
-//      unsigned int            size;
 };
 typedef int (*bheap_prio_t)(struct bheap_node* a, struct bheap_node* b);
@@ -43,14 +41,9 @@ static inline int bheap_empty(struct bheap* heap)
        return heap->head == NULL && heap->min == NULL;
 }
-//static inline unsigned int bheap_size(struct bheap* heap)
+typedef void (*bheap_for_each_t)(struct bheap_node* node, void* args);
-//{
-//      return heap->size;
-//}
-typedef void (*bheap_for_all_t)(struct bheap_node* node, void* args);
-void bheap_for_all(struct bheap* heap, bheap_for_all_t fn, void* args);
+void bheap_for_each(struct bheap* heap, bheap_for_each_t fn, void* args);
 /* insert (and reinitialize) a node into the heap */
 void bheap_insert(bheap_prio_t higher_prio,
diff --git a/include/litmus/budget.h b/include/litmus/budget.h
index 08d5e0970d1d..bcdbf3d82f7b 100644
--- a/include/litmus/budget.h
+++ b/include/litmus/budget.h
@@ -80,12 +80,8 @@ void simple_on_exit(struct task_struct* t);
 *
 * Limitation: Quantum budget tracking is unsupported.
 */
-//void sobliv_on_scheduled(struct task_struct* t);
 void sobliv_on_blocked(struct task_struct* t);
 void sobliv_on_wakeup(struct task_struct* t);
-//void sobliv_on_sleep(struct task_struct* t);
-//void sobliv_on_preempt(struct task_struct* t);
-/* Use the DRAIN_SIMPLE implementations */
 #define sobliv_on_exit  simple_on_exit
 void sobliv_on_inherit(struct task_struct* t, struct task_struct* prio_inh);
 void sobliv_on_disinherit(struct task_struct* t, struct task_struct* prio_inh);
diff --git a/litmus/bheap.c b/litmus/bheap.c
index 403c09cc9e81..c69d75c28aaf 100644
--- a/litmus/bheap.c
+++ b/litmus/bheap.c
@@ -5,8 +5,6 @@ void bheap_init(struct bheap* heap)
 {
        heap->head = NULL;
        heap->min  = NULL;
-//      heap->size = 0;
 }
 void bheap_node_init(struct bheap_node** _h, void* value)
@@ -21,19 +19,19 @@ void bheap_node_init(struct bheap_node** _h, void* value)
 }
-static void __bheap_for_all(struct bheap_node *h, bheap_for_all_t fn, void* args)
+static void __bheap_for_each(struct bheap_node *h, bheap_for_each_t fn, void* args)
 {
        /* pre-order */
        fn(h, args);
        /* depth-first */
        if (h->child)
-                __bheap_for_all(h->child, fn, args);
+                __bheap_for_each(h->child, fn, args);
        if (h->next)
-                __bheap_for_all(h->next, fn, args);
+                __bheap_for_each(h->next, fn, args);
 }
-void bheap_for_all(struct bheap* heap, bheap_for_all_t fn, void* args)
+void bheap_for_each(struct bheap* heap, bheap_for_each_t fn, void* args)
 {
        struct bheap_node *head;
@@ -41,7 +39,7 @@ void bheap_for_all(struct bheap* heap, bheap_for_all_t fn, void* args)
        BUG_ON(!fn);
        head = heap->head;
-        __bheap_for_all(head, fn, args);
+        __bheap_for_each(head, fn, args);
 }
 /* make child a subtree of root */
@@ -198,8 +196,6 @@ void bheap_insert(bheap_prio_t higher_prio, struct bheap* heap,
                heap->min   = node;
        } else
                __bheap_union(higher_prio, heap, node);
-//      ++heap->size;
 }
 void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap)
@@ -222,8 +218,6 @@ void bheap_union(bheap_prio_t higher_prio,
        __bheap_union(higher_prio, target, addition->head);
        /* this is a destructive merge */
        addition->head = NULL;
-//      target->size += addition->size;
 }
 struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
@@ -245,8 +239,6 @@ struct bheap_node* bheap_take(bheap_prio_t higher_prio,
        if (node)
                node->degree = NOT_IN_HEAP;
-//      --heap->size;
        return node;
 }
@@ -320,8 +312,6 @@ void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap,
                heap->min = NULL;
        node->degree = NOT_IN_HEAP;
-//      --heap->size;
 }
 /* allocate a heap node for value and insert into the heap */
diff --git a/litmus/fifo_lock.c b/litmus/fifo_lock.c
index ed637044c948..e3a4420851b2 100644
--- a/litmus/fifo_lock.c
+++ b/litmus/fifo_lock.c
@@ -735,20 +735,11 @@ void fifo_mutex_budget_exhausted(struct litmus_lock* l, struct task_struct* t)
        struct fifo_mutex *mutex = fifo_mutex_from_lock(l);
        unsigned long flags = 0;
-#ifdef CONFIG_LITMUS_DGL_SUPPORT
+        /* DGL lock must already be held on this code path */
-        unsigned long dglirqflags;
-        raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t);
-        lock_global_irqsave(dgl_lock, dglirqflags);
-#endif
        lock_fine_irqsave(&mutex->lock, flags);
-        // unlocks mutex->lock
+        /* unlocks mutex->lock */
        __fifo_mutex_propagate_decrease_inheritance(&mutex->litmus_lock, t, flags, 1);
-#ifdef CONFIG_LITMUS_DGL_SUPPORT
-        unlock_global_irqrestore(dgl_lock, dglirqflags);
-#endif
 }
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index fc174c464a17..f6fa8a339d48 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -87,6 +87,11 @@
 #include <litmus/gpu_affinity.h>
 #endif
+/* TODO: Move this to litmus/Kconfig */
+#define RECURSIVE_READY_QUEUE_LOCK
 /* Reference configuration variable. Determines which cache level is used to
 * group CPUs into clusters.  GLOBAL_CLUSTER, which is the default, means that
 * all CPUs form a single cluster (just like GSN-EDF).
@@ -133,8 +138,10 @@ typedef struct clusterdomain {
        cpumask_var_t   cpu_map;
        /* the cpus queue themselves according to priority in here */
        struct binheap cpu_heap;
-        /* lock for this cluster */
+#ifdef RECURSIVE_READY_QUEUE_LOCK
-#define cluster_lock domain.ready_lock
+        int                     recursive_depth;
+        atomic_t        owner_cpu;
+#endif
 #ifdef CONFIG_LITMUS_PAI_SOFTIRQD
        struct tasklet_head pending_tasklets;
@@ -150,6 +157,82 @@ typedef struct clusterdomain {
 } cedf_domain_t;
+#ifdef RECURSIVE_READY_QUEUE_LOCK
+#define lock_readyq_irqsave(cluster, flags) \
+do { \
+        if (unlikely(irqs_disabled() && atomic_read(&cluster->owner_cpu) == smp_processor_id())) { \
+                local_irq_save(flags); /* useless. makes compiler happy though */ \
+                ++cluster->recursive_depth; \
+        } \
+        else { \
+                raw_spin_lock_irqsave(&cluster->domain.ready_lock, flags); \
+                atomic_set(&cluster->owner_cpu, smp_processor_id()); \
+                BUG_ON(cluster->recursive_depth != 0); \
+        } \
+}while(0)
+#define lock_readyq(cluster) \
+do { \
+        if (unlikely(irqs_disabled() && atomic_read(&cluster->owner_cpu) == smp_processor_id())) \
+                ++cluster->recursive_depth; \
+        else { \
+                raw_spin_lock(&cluster->domain.ready_lock); \
+                atomic_set(&cluster->owner_cpu, smp_processor_id()); \
+                BUG_ON(cluster->recursive_depth != 0); \
+        } \
+}while(0)
+#define unlock_readyq_irqrestore(cluster, flags) \
+do { \
+        BUG_ON(!raw_spin_is_locked(&cluster->domain.ready_lock)); \
+        BUG_ON(atomic_read(&cluster->owner_cpu) != smp_processor_id()); \
+        if (unlikely(cluster->recursive_depth > 0)) { \
+                --cluster->recursive_depth; \
+                local_irq_restore(flags); /* useless. makes compiler happy though */ \
+        } \
+        else { \
+                atomic_set(&cluster->owner_cpu, NO_CPU); \
+                raw_spin_unlock_irqrestore(&cluster->domain.ready_lock, flags); \
+        } \
+}while(0)
+#define unlock_readyq(cluster) \
+do { \
+        BUG_ON(!raw_spin_is_locked(&cluster->domain.ready_lock)); \
+        if (unlikely(cluster->recursive_depth > 0)) { \
+                BUG_ON(atomic_read(&cluster->owner_cpu) != smp_processor_id()); \
+                --cluster->recursive_depth; \
+        } \
+        else { \
+                atomic_set(&cluster->owner_cpu, NO_CPU); \
+                raw_spin_unlock(&cluster->domain.ready_lock); \
+        } \
+}while(0)
+#else
+#define lock_readyq_irqsave(cluster, flags) \
+do {\
+        raw_spin_lock_irqsave(&cluster->domain.ready_lock, flags); \
+}while(0)
+#define lock_readyq(cluster) \
+do {\
+        raw_spin_lock(&cluster->domain.ready_lock); \
+}while(0)
+#define unlock_readyq_irqrestore(cluster, flags) \
+do {\
+        raw_spin_unlock_irqrestore(&cluster->domain.ready_lock, flags); \
+}while(0)
+#define unlock_readyq(cluster) \
+do {\
+        raw_spin_unlock(&cluster->domain.ready_lock); \
+}while(0)
+#endif
 /* a cedf_domain per cluster; allocation is done at init/activation time */
 cedf_domain_t *cedf;
@@ -292,7 +375,7 @@ static void cedf_untrack_in_top_m(struct task_struct *t)
                                                &cluster->top_m,
                                                struct budget_tracker, top_m_node);
                        bt_flag_set(to_move, BTF_IS_TOP_M);
-                        budget_state_machine(t,on_enter_top_m);
+                        budget_state_machine(to_move,on_enter_top_m);
                }
                else {
                        --cluster->top_m_size;
@@ -561,14 +644,14 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
        cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain);
        unsigned long flags;
-        raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
+        lock_readyq_irqsave(cluster, flags);
-        bheap_for_all(tasks, cedf_track_on_release, NULL);
+        bheap_for_each(tasks, cedf_track_on_release, NULL);
        __merge_ready(&cluster->domain, tasks);
        check_for_preemptions(cluster);
-        raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
+        unlock_readyq_irqrestore(cluster, flags);
 }
 /* caller holds cluster_lock */
@@ -755,7 +838,19 @@ static enum hrtimer_restart cedf_sobliv_on_exhausted(struct task_struct *t)
                                /* force job completion */
                                TRACE_TASK(t, "blocked, postponing deadline\n");
-                                raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
+                                /* Outermost lock of the cluster. Recursive lock calls are
+                                 * possible on this code path. This should be the _ONLY_
+                                 * scenario where recursive calls are made. */
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+                                /* Unfortunately, we _might_ need to grab the DGL lock, so we
+                                 * must grab it every time since it must be take before the
+                                 * cluster lock. */
+                                raw_spin_lock_irqsave(&cluster->dgl_lock, flags);
+                                lock_readyq(cluster);
+#else
+                                lock_readyq_irqsave(cluster, flags);
+#endif
                                job_completion(t, 1); /* refreshes budget and pushes out deadline */
 #ifdef CONFIG_LITMUS_LOCKING
@@ -792,7 +887,7 @@ static enum hrtimer_restart cedf_sobliv_on_exhausted(struct task_struct *t)
                                        }
                                }
 #endif
-                                raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
+//                              unlock_readyq_irqrestore(cluster, flags);
 #ifdef CONFIG_LITMUS_LOCKING
                                /* Check our inheritance and propagate any changes forward. */
@@ -810,9 +905,17 @@ static enum hrtimer_restart cedf_sobliv_on_exhausted(struct task_struct *t)
 #ifdef CONFIG_LITMUS_LOCKING
                                /* double-check that everything is okay */
-                                raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
+//                              lock_readyq_irqsave(cluster, flags);
                                check_for_preemptions(cluster);
-                                raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
+//                              unlock_readyq_irqrestore(cluster, flags);
+#endif
+                                /* should be the outermost unlock call */
+#ifdef CONFIG_LITMUS_DGL_SUPPORT
+                                unlock_readyq(cluster);
+                                raw_spin_unlock_irqrestore(&cluster->dgl_lock, flags);
+#else
+                                unlock_readyq_irqrestore(cluster, flags);
 #endif
                                /* we need to set up the budget timer since we're within the callback. */
@@ -890,7 +993,7 @@ static void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_ta
                TS_NV_SCHED_BOTISR_START;
-                raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
+                lock_readyq_irqsave(cluster, flags);
                if(cluster->pending_tasklets.head != NULL) {
                        // remove tasklet at head.
@@ -932,7 +1035,7 @@ static void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_ta
                        TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
                }
-                raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
+                unlock_readyq_irqrestore(cluster, flags);
                if(tasklet) {
                        __do_lit_tasklet(tasklet, 0ul);
@@ -1040,7 +1143,7 @@ static int cedf_enqueue_pai_tasklet(struct tasklet_struct* tasklet)
        cluster = task_cpu_cluster(tasklet->owner);
-        raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
+        lock_readyq_irqsave(cluster, flags);
        thisCPU = smp_processor_id();
@@ -1091,7 +1194,7 @@ static int cedf_enqueue_pai_tasklet(struct tasklet_struct* tasklet)
                __add_pai_tasklet(tasklet, cluster);
        }
-        raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
+        unlock_readyq_irqrestore(cluster, flags);
        if (runLocal /*&& runNow */) {  // runNow == 1 is implied
@@ -1128,14 +1231,14 @@ static void cedf_change_prio_pai_tasklet(struct task_struct *old_prio,
                cluster = task_cpu_cluster(probe);
                if(cluster->pending_tasklets.head != NULL) {
-                        raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
+                        lock_readyq_irqsave(cluster, flags);
                        for(step = cluster->pending_tasklets.head; step != NULL; step = step->next) {
                                if(step->owner == old_prio) {
                                        TRACE("%s: Found tasklet to change: %d\n", __FUNCTION__, step->owner->pid);
                                        step->owner = new_prio;
                                }
                        }
-                        raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
+                        unlock_readyq_irqrestore(cluster, flags);
                }
        }
        else {
@@ -1184,7 +1287,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
        }
 #endif
-        raw_spin_lock(&cluster->cluster_lock);
+        lock_readyq(cluster);
        clear_will_schedule();
        /* sanity checking */
@@ -1307,27 +1410,8 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
 out_set_state:
 #endif
-//#ifdef CONFIG_LITMUS_LOCKING
-//      /* Update priority inheritance linkbacks.
-//       * A blocked task may have multiple tasks that inherit from it, but only
-//       * one of those tasks should be runnable. Provide a link-back between the
-//       * blocked task and the one that inherits from it. */
-//
-//      /* TODO: Support klmirqd and aux tasks */
-//      /* TODO: MOVE THESE CALLS TO __increase AND __decrease TO CATCH ALL CASES.
-//        PAY ATTENTION TO RUN-STATE OF INHERITOR & INHERITEE */
-//      if (next != prev) {
-//              if (prev && tsk_rt(prev)->inh_task) {
-//                      clear_inh_task_linkback(prev, tsk_rt(prev)->inh_task);
-//              }
-//              if (next && tsk_rt(next)->inh_task) {
-//                      set_inh_task_linkback(next, tsk_rt(next)->inh_task);
-//              }
-//      }
-//#endif
        sched_state_task_picked();
-        raw_spin_unlock(&cluster->cluster_lock);
+        unlock_readyq(cluster);
 #ifdef WANT_ALL_SCHED_EVENTS
        TRACE("cluster_lock released, next=0x%p\n", next);
@@ -1368,7 +1452,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
        /* the cluster doesn't change even if t is running */
        cluster = task_cpu_cluster(t);
-        raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
+        lock_readyq_irqsave(cluster, flags);
        /* setup job params */
        release_at(t, litmus_clock());
@@ -1400,7 +1484,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
                cedf_job_arrival(t);
        }
-        raw_spin_unlock_irqrestore(&(cluster->cluster_lock), flags);
+        unlock_readyq_irqrestore(cluster, flags);
 }
 static void cedf_task_wake_up(struct task_struct *t)
@@ -1411,7 +1495,7 @@ static void cedf_task_wake_up(struct task_struct *t)
        cluster = task_cpu_cluster(t);
-        raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
+        lock_readyq_irqsave(cluster, flags);
        now = litmus_clock();
        TRACE_TASK(t, "wake_up at %llu\n", now);
@@ -1443,7 +1527,7 @@ static void cedf_task_wake_up(struct task_struct *t)
        budget_state_machine(t,on_wakeup);
        cedf_job_arrival(t);
-        raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
+        unlock_readyq_irqrestore(cluster, flags);
 }
 static void cedf_task_block(struct task_struct *t)
@@ -1456,7 +1540,7 @@ static void cedf_task_block(struct task_struct *t)
        cluster = task_cpu_cluster(t);
        /* unlink if necessary */
-        raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
+        lock_readyq_irqsave(cluster, flags);
        unlink(t);
@@ -1476,7 +1560,7 @@ static void cedf_task_block(struct task_struct *t)
        }
 #endif
-        raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
+        unlock_readyq_irqrestore(cluster, flags);
        BUG_ON(!is_realtime(t));
 }
@@ -1492,7 +1576,7 @@ static void cedf_task_exit(struct task_struct * t)
 #endif
        /* unlink if necessary */
-        raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
+        lock_readyq_irqsave(cluster, flags);
        if (tsk_rt(t)->inh_task) {
                WARN_ON(1);
@@ -1528,7 +1612,7 @@ static void cedf_task_exit(struct task_struct * t)
                cpu->scheduled = NULL;
                tsk_rt(t)->scheduled_on = NO_CPU;
        }
-        raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
+        unlock_readyq_irqrestore(cluster, flags);
        BUG_ON(!is_realtime(t));
        TRACE_TASK(t, "RIP\n");
@@ -1792,13 +1876,13 @@ static void increase_priority_inheritance(struct task_struct* t, struct task_str
 {
        cedf_domain_t* cluster = task_cpu_cluster(t);
-        raw_spin_lock(&cluster->cluster_lock);
+        lock_readyq(cluster);
        TRACE_TASK(t, "to inherit from %s/%d\n", prio_inh->comm, prio_inh->pid);
        __increase_priority_inheritance(t, prio_inh);
-        raw_spin_unlock(&cluster->cluster_lock);
+        unlock_readyq(cluster);
 #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
        if(tsk_rt(t)->held_gpus) {
@@ -1962,7 +2046,7 @@ static void decrease_priority_inheritance(struct task_struct* t,
 {
        cedf_domain_t* cluster = task_cpu_cluster(t);
-        raw_spin_lock(&cluster->cluster_lock);
+        lock_readyq(cluster);
        TRACE_TASK(t, "to inherit from %s/%d (decrease)\n",
                        (prio_inh) ? prio_inh->comm : "null",
@@ -1970,7 +2054,7 @@ static void decrease_priority_inheritance(struct task_struct* t,
        __decrease_priority_inheritance(t, prio_inh, budget_tiggered);
-        raw_spin_unlock(&cluster->cluster_lock);
+        unlock_readyq(cluster);
 #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
        if(tsk_rt(t)->held_gpus) {
@@ -2438,6 +2522,11 @@ static long cedf_activate_plugin(void)
                raw_spin_lock_init(&cedf[i].dgl_lock);
 #endif
+#ifdef RECURSIVE_READY_QUEUE_LOCK
+                cedf[i].recursive_depth = 0;
+                atomic_set(&cedf[i].owner_cpu, NO_CPU);
+#endif
                cedf[i].top_m_size = 0;
                INIT_BINHEAP_HANDLE(&cedf[i].top_m, cedf_min_heap_base_priority_order);
                INIT_BINHEAP_HANDLE(&cedf[i].not_top_m, cedf_max_heap_base_priority_order);
author	Glenn Elliott <gelliott@cs.unc.edu>	2013-04-04 17:31:30 -0400
committer	Glenn Elliott <gelliott@cs.unc.edu>	2013-04-04 17:37:06 -0400
commit	b379666c96805563ca61ddfbb38f0ec9809edf05 (patch)
tree	6f0b64b1aa70b76e4db0e6786629b985a61dbefe
parent	3324865fc5792b9d755d46cafa42c74b5037bba5 (diff)