FINAL BUG FIXED VERSION

author: Zelin Tong <ztong@ludwig.cs.unc.edu> 2020-07-02 00:58:09 -0400
committer: Zelin Tong <ztong@ludwig.cs.unc.edu> 2020-07-02 00:58:09 -0400
commit: 098a298ef73dd8dbacf0d697eef2a6f2daa2081c (patch)
tree: 546de13acc94765ec9c116b8d8b42632139179a5
parent: e4c5fa6df346a78dfb683d601fd5ad34e6de3375 (diff)
4 files changed, 492 insertions, 400 deletions
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 22ce3da19245..fd8410aaabbf 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -122,7 +122,6 @@ struct edfsc_params {
        cont_domain_t *domain;
        struct task_struct *container_task;
        int id;
-        int can_release; //only used for containers
    // Moved these to the struct task_struct in include/linux/sched.h so that
    // the runtime can build
        //struct list_head qnode;
diff --git a/include/litmus/trace.h b/include/litmus/trace.h
index 2646136e3881..dbbd817d2bd1 100644
--- a/include/litmus/trace.h
+++ b/include/litmus/trace.h
@@ -140,8 +140,8 @@ feather_callback void save_cpu_task_latency(unsigned long event, unsigned long w
 #define TS_PLUGIN_SCHED_START           /* TIMESTAMP(120) */  /* currently unused */
 #define TS_PLUGIN_SCHED_END             /* TIMESTAMP(121) */
-#define TS_PLUGIN_TICK_START            /* TIMESTAMP(130) */
+#define TS_PLUGIN_TICK_START    /* CPU_TIMESTAMP(130) */
-#define TS_PLUGIN_TICK_END              /* TIMESTAMP(131) */
+#define TS_PLUGIN_TICK_END              /* CPU_TIMESTAMP(131) */
 #define TS_ENTER_NP_START               CPU_TIMESTAMP(140)
 #define TS_ENTER_NP_END                 CPU_TIMESTAMP(141)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 98ad911a0c58..fe986d548abf 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3479,6 +3479,7 @@ static void __sched notrace __schedule(bool preempt)
        } else {
                lockdep_unpin_lock(&rq->lock, cookie);
                TS_SCHED_END(prev);
+                litmus->finish_switch(prev);
                raw_spin_unlock_irq(&rq->lock);
        }
diff --git a/litmus/sched_edfsc.c b/litmus/sched_edfsc.c
index fae6feeac76f..ea2dce57b337 100644
--- a/litmus/sched_edfsc.c
+++ b/litmus/sched_edfsc.c
@@ -29,7 +29,6 @@ typedef struct cont_domain {
        struct task_struct *container;
        struct task_struct *scheduled; //fixed task
        lt_t scheduled_last_exec_time; //exec_time of the scheduled task when it was last scheduled
-        lt_t changed_budget; //change to scheduled task's exec time due to container budget constraints
        u64 f_util;
        struct bheap_node *hn;
        struct hrtimer idle_enforcement_timer;
@@ -52,6 +51,8 @@ struct list_head pending_adds;
 struct list_head migrating_tasks;
+struct list_head pending_removes;
 struct hrtimer container_release_timer;
 DEFINE_PER_CPU(cpu_entry_t, edfsc_cpu_entries);
@@ -72,7 +73,7 @@ u64 sys_util;
 int sys_changed;
 #define is_container(task) ((task) && tsk_rt(task)->edfsc_params.domain != NULL && tsk_rt(task)->domain == &gsched_domain)
-#define is_fixed(task) ((task) && tsk_rt(task)->edfsc_params.container_task != NULL)
+#define is_fixed(task) ((task) && tsk_rt(task)->domain && tsk_rt(task)->domain != &gsched_domain)
 #define is_migrating(task) ((task) && tsk_rt(task)->edfsc_params.domain == NULL && tsk_rt(task)->domain == &gsched_domain)
 #define FP_SHIFT 20
@@ -105,16 +106,23 @@ int count_migrating_tasks(void)
 /* Do a backwards comparison based on f_util so that heavier containers
 * will come first
 */
+// Used for best-fit
 static int container_lower_prio(const void *_a, const void *_b)
 {
-        const cont_domain_t *a = (const cont_domain_t *)(_a);
+        const cont_domain_t* a = *(const cont_domain_t**)(_a);
-        const cont_domain_t *b = (const cont_domain_t *)(_b);
+        const cont_domain_t* b = *(const cont_domain_t**)(_b);
-        if (a->f_util < b->f_util) return 1;
+        return (b->f_util - a->f_util);
-        if (a->f_util > b->f_util) return -1;
+}
-        return 0;
+// Used for worst-fit
+static int container_higher_prio(const void *_a, const void *_b)
+{
+        const cont_domain_t* a = *(const cont_domain_t**)(_a);
+        const cont_domain_t* b = *(const cont_domain_t**)(_b);
+        return (a->f_util - b->f_util);
 }
-/* Finds the task_struct of the hrtimer set by task_exit
+/* Finds the task_struct of a list node
 */
 static struct task_struct* task_of_list_node(struct list_head *node)
 {
@@ -128,7 +136,11 @@ static noinline void requeue(struct task_struct* task)
        BUG_ON(!task);
        /* sanity check before insertion */
        BUG_ON(is_queued(task));
-        BUG_ON(!is_realtime(task));
+        BUG_ON(is_migrating(task) && task->rt_param.edfsc_params.container_task != NULL);
+        //BUG_ON(task && tsk_rt(task)->linked_on != NO_CPU);
+        //BUG_ON(is_completed(task) || (budget_enforced(task) && budget_exhausted(task)));
+        //BUG_ON(is_container(task) && ((cont_domain_t*)task->rt_param.edfsc_params.domain)->timer_armed);
+        //BUG_ON(task && is_completed(task));
        if (is_early_releasing(task) || is_released(task, litmus_clock())) {
                __add_ready((rt_domain_t *) tsk_rt(task)->domain, task);
@@ -147,10 +159,12 @@ static noinline void requeue(struct task_struct* task)
 static void preempt(cpu_entry_t *entry)
 {
        BUG_ON(!entry);
-        if (is_container(entry->scheduled))
+        if (is_container(entry->scheduled)) {
                preempt_if_preemptable(tsk_rt(entry->scheduled)->edfsc_params.domain->scheduled, entry->cpu);
-        else
+        }
+        else {
                preempt_if_preemptable(entry->scheduled, entry->cpu);
+        }
 }
 /////////////////////////////////////////////////////////////////////////////////////
@@ -161,7 +175,7 @@ static void preempt(cpu_entry_t *entry)
 */
 static struct bheap_node* edfsc_cpu_heap_node; // Array of cpu heap nodes
-static struct bheap edfsc_cpu_heap;
+static struct bheap edfsc_cpu_heap; // Cpu heap
 static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
 {
@@ -215,7 +229,15 @@ static void update_cpu_position(cpu_entry_t *entry)
 *
 */
-//timeout for timer enforcing budget of empty container
+// updates exec_time for container budget tracking
+static void update_container_budget(struct task_struct* t) {
+        lt_t now = litmus_clock();
+        tsk_rt(t)->job_params.exec_time += now
+                - tsk_rt(t)->edfsc_params.domain->scheduled_last_exec_time;
+        tsk_rt(t)->edfsc_params.domain->scheduled_last_exec_time = now;
+}
+// timeout for timer enforcing budget of empty container
 static enum hrtimer_restart on_idle_enforcement_timeout(struct hrtimer *timer)
 {
        cont_domain_t* domain = container_of(timer, cont_domain_t, idle_enforcement_timer);
@@ -223,7 +245,9 @@ static enum hrtimer_restart on_idle_enforcement_timeout(struct hrtimer *timer)
        unsigned long flags;
        local_irq_save(flags);
+        BUG_ON(tsk_rt(domain->container)->edfsc_params.id != this_cpu_ptr(&edfsc_cpu_entries)->cpu);
        domain->timer_armed = 0;
+        tsk_rt(domain->container)->completed = 1;
        litmus_reschedule_local();
        local_irq_restore(flags);
@@ -236,21 +260,17 @@ void manage_idle_enforcement_timer(struct task_struct* t)
        cont_domain_t* domain = tsk_rt(t)->edfsc_params.domain;
        now = litmus_clock();
-        domain->scheduled_last_exec_time = now;
+        BUG_ON(is_completed(t));
-        if (budget_precisely_enforced(t)) {
+        BUG_ON(budget_exhausted(t) && !is_np(t));
-                BUG_ON(budget_exhausted(t) && !is_np(t));
-                if (likely(!is_np(t))) {
+        if (!domain->timer_armed) {
-                        //hrtimer_start cancels the timer so don't have to check
+                domain->scheduled_last_exec_time = now;
-                        //if it is already armed
+                //hrtimer_start cancels the timer so don't have to check
-                        hrtimer_start(&(domain->idle_enforcement_timer),
+                //if it is already armed
-                                        ns_to_ktime(now + budget_remaining(t)),
+                hrtimer_start(&(domain->idle_enforcement_timer),
-                                        HRTIMER_MODE_ABS_PINNED);
+                                ns_to_ktime(now + budget_remaining(t)),
-                        domain->timer_armed = 1;
+                                HRTIMER_MODE_ABS_PINNED);
-                }
+                domain->timer_armed = 1;
-        }
-        else if (domain->timer_armed) {
-                hrtimer_try_to_cancel(&(domain->idle_enforcement_timer));
-                domain->timer_armed = 0;
        }
 }
@@ -263,18 +283,19 @@ void cancel_idle_enforcement_timer(struct task_struct* t)
 /* link_task_to_cpu - Links a migrating task or container to a CPU
 *                                        Update the link of a CPU.
- *                                      Handles the case where the to-be-linked task is already
- *                                      scheduled on a different CPU.
 */
 static noinline void link_task_to_cpu(struct task_struct* linked,
                                          cpu_entry_t *entry)
 {
-        BUG_ON(linked && !is_realtime(linked));
        BUG_ON(is_fixed(linked));
        BUG_ON(is_container(linked) && tsk_rt(linked)->edfsc_params.id != entry->cpu);
+        BUG_ON(linked && is_queued(linked));
+        //BUG_ON(linked && ((budget_enforced(linked) && budget_exhausted(linked)) || is_completed(linked)));
+        BUG_ON(linked && !is_released(linked, litmus_clock()));
+        //BUG_ON(is_container(linked) && linked->rt_param.edfsc_params.domain->timer_armed);
        /* Currently linked task is set to be unlinked. */
-        if (entry->linked)
+        if (entry->linked && entry->linked->rt_param.linked_on == entry->cpu)
                entry->linked->rt_param.linked_on = NO_CPU;
        /* Link new task to CPU. */
@@ -282,6 +303,7 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
                linked->rt_param.linked_on = entry->cpu;
        entry->linked = linked;
+        BUG_ON(entry->linked && entry->linked->rt_param.linked_on != entry->cpu);
 #ifdef WANT_ALL_SCHED_EVENTS
        if (linked)
                TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
@@ -302,6 +324,7 @@ static noinline void unlink(struct task_struct* t)
        if (t->rt_param.linked_on != NO_CPU) {
                /* unlink */
                entry = &per_cpu(edfsc_cpu_entries, t->rt_param.linked_on);
+                BUG_ON(entry->cpu != t->rt_param.linked_on);
                t->rt_param.linked_on = NO_CPU;
                link_task_to_cpu(NULL, entry);
                BUG_ON(entry->linked || t->rt_param.linked_on != NO_CPU);
@@ -320,9 +343,12 @@ static noinline void unlink(struct task_struct* t)
 //TODO change local linking
 static void g_preempt_check(void)
 {
-        struct task_struct *task;
+        struct task_struct *task, *temp;
        cpu_entry_t *last, *target;
+        if (!bheap_peek(cpu_lower_prio, &edfsc_cpu_heap))
+                return;
        // Loop through CPUs in priority order, checking if anything needs preemption
        for (last = lowest_prio_cpu();
                edf_preemption_needed(&gsched_domain, last->linked);
@@ -331,29 +357,33 @@ static void g_preempt_check(void)
                /* preemption necessary */
                task = __take_ready(&gsched_domain);
-                // Don't requeue if budget is exhausted or job is completed
+                // Preempt_check can be called before gschedule, and therefore g_job_completion.
-                if (requeue_preempted_job(last->linked))
+                // So, a task can be temporarily added to the ready queue, but will quickly be rectified
-                        requeue(last->linked);
+                // by either this, or g_job_completion
+                if (requeue_preempted_job(task)) {
-                // If we're dequeuing a container, put it on the appropriate core and
+                        // Update container budget tracking
-                // move whatever was there before to `last`
+                        if (is_container(task)) {
-                if (is_container(task)) {
+                                last = &per_cpu(edfsc_cpu_entries, tsk_rt(task)->edfsc_params.id);
-                        target = &per_cpu(edfsc_cpu_entries, tsk_rt(task)->edfsc_params.id);
+                        }
-                        TRACE("g_preempt_check: swapping tasks linked on %d and %d\n",
+                        else if (is_container(last->linked)) {
-                                        last->cpu, target->cpu);
+                                if (tsk_rt(last->linked)->edfsc_params.domain->timer_armed) {
-                        link_task_to_cpu(target->linked, last);
+                                        update_container_budget(last->linked);
+                                }
+                        }
+                        if (requeue_preempted_job(last->linked)) {
+                                requeue(last->linked);
+                        }
+                        TRACE("g_preempt_check: attempting to link task %d to %d\n",
+                                task->pid, target->cpu);
+                        link_task_to_cpu(task, last);
                        preempt(last);
                }
-                TRACE("g_preempt_check: attempting to link task %d to %d\n",
-                                task->pid, target->cpu);
-                link_task_to_cpu(task, target);
-                preempt(target);
        }
 }
 static int c_preempt_check(cont_domain_t *container)
 {
-        if (is_migrating(container->scheduled)
+        if ((is_migrating(container->scheduled) && __peek_ready(&container->domain))
                        || edf_preemption_needed(&container->domain, container->scheduled)) {
                preempt(&per_cpu(edfsc_cpu_entries, tsk_rt(container->container)->edfsc_params.id));
                return 1;
@@ -362,6 +392,7 @@ static int c_preempt_check(cont_domain_t *container)
        }
 }
+// Callback for new global job release
 static void g_release_jobs(rt_domain_t* rt, struct bheap* tasks)
 {
        unsigned long flags;
@@ -374,6 +405,7 @@ static void g_release_jobs(rt_domain_t* rt, struct bheap* tasks)
        raw_spin_unlock_irqrestore(&g_lock, flags);
 }
+// Callback for new container release
 static int c_check_resched(rt_domain_t *edf)
 {
        cont_domain_t *cont_dom = container_of(edf, cont_domain_t, domain);
@@ -386,6 +418,7 @@ static int c_check_resched(rt_domain_t *edf)
 static void g_remove_task(struct task_struct *t)
 {
        BUG_ON(is_container(t));
+        //BUG_ON(get_rt_utilization(t) > m_util);
        m_util -= get_rt_utilization(t);
        sys_util -= get_rt_utilization(t);
        sys_changed = 1;
@@ -393,9 +426,8 @@ static void g_remove_task(struct task_struct *t)
 static void c_remove_task(struct task_struct *t)
 {
-        struct task_struct* container_task = tsk_rt(t)->edfsc_params.container_task;
+        //BUG_ON(get_rt_utilization(t) > container_domains[tsk_rt(t)->task_params.cpu].f_util);
-        tsk_rt(container_task)->edfsc_params.domain->f_util -=
+        container_domains[tsk_rt(t)->task_params.cpu].f_util -= get_rt_utilization(t);
-                get_rt_utilization(t);
        sys_util -= get_rt_utilization(t);
        sys_changed = 1;
 }
@@ -415,10 +447,11 @@ static void migrate_task(struct task_struct *t)
                remove(tsk_rt(t)->domain, t);
        // Remove the util of the "fake reservation task"(specified by the paper) from the system
        sys_util -= get_rt_utilization(t);
-        prepare_for_next_period(t);
+        m_util -= get_rt_utilization(t);
        tsk_rt(t)->domain = (rt_domain_t*)tsk_rt(t)->edfsc_params.move_to;
        tsk_rt(t)->edfsc_params.container_task = tsk_rt(t)->edfsc_params.move_to->container;
        requeue(t);
+        c_preempt_check((cont_domain_t*)tsk_rt(t)->domain);
        tsk_rt(t)->edfsc_params.move_to = NULL;
        sys_changed = 1;
 }
@@ -429,11 +462,16 @@ static void migrate_task(struct task_struct *t)
 * Note: This is shared by container_boundary() and g_task_completion().
 */
 static void c_release(struct task_struct *t) {
-        cpu_entry_t* entry = &per_cpu(edfsc_cpu_entries, tsk_rt(t)->edfsc_params.id);
+        cpu_entry_t* entry;
+        BUG_ON(!is_container(t));
+        BUG_ON(t->rt_param.edfsc_params.domain->timer_armed);
+        entry = &per_cpu(edfsc_cpu_entries, tsk_rt(t)->edfsc_params.id);
+        tsk_rt(t)->task_params.exec_cost = from_fp(get_rt_utilization(t) * get_rt_period(t));
        prepare_for_next_period(t);
        if (is_early_releasing(t) || is_released(t, litmus_clock()))
                sched_trace_task_release(t);
-        tsk_rt(t)->task_params.exec_cost = from_fp(get_rt_utilization(t) * get_rt_period(t));
        /* If this container is fully provisioned, remove it from gsched_domain,
         * edfsc_cpu_heap, and disable the idle enforcement timer. If not, restore.
         */
@@ -441,14 +479,18 @@ static void c_release(struct task_struct *t) {
                // Make this cpu unavailable to the global scheduler
                if (bheap_node_in_heap(entry->hn))
                        remove_cpu_from_global(entry);
-                // Fully provisioned containers always run, so just set this here
-                if (entry->linked != t)
-                        link_task_to_cpu(t, entry);
                // Note that we no longer need the global scheduler to schedule us
-                if (is_queued(t))
+                if (is_queued(t)) {
                        remove(&gsched_domain, t);
-                // Fully provisioned containers always run, so idle enforcement is superfluous
+                }
-                cancel_idle_enforcement_timer(t);
+                // Fully provisioned containers always run, so just set this here
+                if (entry->linked != t) {
+                        BUG_ON(is_container(entry->linked));
+                        if (requeue_preempted_job(entry->linked)) {
+                                requeue(entry->linked);
+                        }
+                        link_task_to_cpu(t, entry);
+                }
                tsk_rt(t)->edfsc_params.domain->scheduled_last_exec_time = litmus_clock();
                // Run schedule again to make sure that we're run
                preempt(entry);
@@ -457,13 +499,9 @@ static void c_release(struct task_struct *t) {
                if (!bheap_node_in_heap(entry->hn))
                        add_cpu_to_global(entry);
                // Note that container's aren't real tasks and thus can't block
-                // Let g_preempt_check() decide what to run, don't impose
                unlink(t);
                // Request to be scheduled globally again
-                if (!is_queued(t))
+                requeue(t);
-                        requeue(t);
-                // Re-run our EDF scheduling to adjust for the added core
-                g_preempt_check();
        }
 }
@@ -476,15 +514,19 @@ static noinline void g_job_completion(struct task_struct* t, int forced)
        TRACE_TASK(t, "g_job_completion(forced=%d).\n", forced);
-        tsk_rt(t)->completed = 0;
        unlink(t);
+        tsk_rt(t)->completed = 0;
        // When a migrating task is being turned turned into a fixed task
        if (is_migrating(t) && tsk_rt(t)->edfsc_params.move_to) {
-                if (t->rt_param.job_params.lateness > 0) {
+                prepare_for_next_period(t);
-                        // Don't wait if late
+                if (is_early_releasing(t) || is_released(t, litmus_clock()))
+                        sched_trace_task_release(t);
+                if (tsk_rt(t)->job_params.lateness > 0) {
+                        // Don't wait if prev job was tardy
                        migrate_task(t);
                } else {
+                        list_add(&t->edfsc_qnode, &pending_removes);
                        hrtimer_start(&t->edfsc_deadline_timer, ns_to_ktime(get_deadline(t)),
                                                HRTIMER_MODE_ABS_PINNED);
                }
@@ -499,46 +541,17 @@ static noinline void g_job_completion(struct task_struct* t, int forced)
                        requeue(t);
                        g_preempt_check();
                }
-        /* A container may be in several different states when it finishes. It may:
-         * - Be scheduling a migrating task that is finished, blocked, or out of budget
-         * - Be scheduling a fixed task
-         * - Be scheduling nothing
-         * If there's a migrating task being scheduled, we can't unconditionally
-         * requeue it. Often, we may actually have to call g_job_completion() on
-         * that migrating task. If we finish while running a fixed task, we just
-         * "freeze" it in the container - edfsc_cschedule() will take care of
-         * processing its state when the container is rescheduled.
-         *
-         * If the container is tardy, we process its scheduled task as in the non-
-         * tardy case, then just immediately call c_release() on the container.
-         */
        } else if (is_container(t)) {
-                /*
-                struct task_struct** child = &tsk_rt(t)->edfsc_params.domain->scheduled;
-                // No need to handle fixed tasks, cschedule will do that when it runs next
-                if (*child && is_migrating(*child)) {
-                        BUG_ON(is_queued(*child));
-                        // If migrating and done
-                        if (is_completed(*child) || (budget_enforced(*child) && budget_exhausted(*child))) {
-                                g_job_completion(*child, budget_enforced(*child) && budget_exhausted(*child));
-                        // If migrating and blocked
-                        } else if (!is_current_running()) {
-                                unlink(*child);
-                        // Otherwise it can keep running globally
-                        } else {
-                                requeue(*child);
-                        }
-                        // Regardless, we never "freeze" a migrating task in a container
-                        *child = NULL;
-                }
-                */
                // When a container job finishes late, release it immediately
-                if (tsk_rt(t)->edfsc_params.can_release) {
+                if (get_deadline(t) < litmus_clock()) {
-                        tsk_rt(t)->edfsc_params.can_release = 0;
                        c_release(t);
+                        g_preempt_check();
                        if (get_rt_utilization(t) == to_fp(1))
                                manage_idle_enforcement_timer(t);
                }
+                else {
+                        tsk_rt(t)->completed = 1;
+                }
        }
 }
@@ -557,33 +570,25 @@ static void c_job_completion(struct task_struct* t, int forced)
 // As long as this only touches CPU-local state, it shouldn't need g_lock:
 static void g_finish_switch(struct task_struct *prev)
 {
+        unsigned long flags;
        cpu_entry_t* entry = this_cpu_ptr(&edfsc_cpu_entries);
        struct task_struct* container = &container_tasks[entry->cpu];
-        unsigned long flags;
+        raw_spin_lock_irqsave(&g_lock, flags);
-        BUG_ON(is_realtime(current) && tsk_rt(current)->domain == NULL);
-        // FIXME: It's really expensive to put a lock in here, but since we touch
-        //        members of entry multiple times, we have to lock. Otherwise we
-        //        may make an if branch based off entry->linked, and then have it
-        //        change before we can set entry->scheduled.
-        //raw_spin_lock_irqsave(&g_lock, flags);
-        preempt_disable();
        entry->scheduled = is_realtime(current) ? current : NULL;
        // If we're scheduling a task in a container, set entry->scheduled to the container
        if (entry->scheduled) {
-                if (tsk_rt(container)->edfsc_params.domain->scheduled == entry->scheduled)
+                if (entry->scheduled->rt_param.edfsc_params.container_task) {
-                        entry->scheduled = container;
+                        entry->scheduled = entry->scheduled->rt_param.edfsc_params.container_task;
+                }
        }
        // occurs when current is non-rt, and linked is a container
        // this happens when an empty container "task" is supposed to be current
        // but because it's not a real task, a non-rt task is current instead
-        else if (is_container(entry->linked)) {
+        else if (tsk_rt(container)->scheduled_on != NO_CPU){
-                entry->scheduled = entry->linked;
+                entry->scheduled = container;
        }
-        BUG_ON(is_fixed(entry->scheduled));
+        raw_spin_unlock_irqrestore(&g_lock, flags);
-        //raw_spin_unlock_irqrestore(&g_lock, flags);
-        preempt_enable();
 #ifdef WANT_ALL_SCHED_EVENTS
        TRACE_TASK(prev, "switched away from\n");
 #endif
@@ -600,12 +605,14 @@ static int fifo_prio(struct bheap_node* _a, struct bheap_node* _b)
 * @param cedf Pointer to tsk_rt(container)->edfsc_params->domain
 * @param prev Previous task running on this processor before schedule was called
 */
-static void edfsc_cschedule(cont_domain_t* cedf, struct task_struct * prev)
+static noinline void edfsc_cschedule(cont_domain_t* cedf, struct task_struct * prev)
 {
        rt_domain_t *edf = &cedf->domain;
        struct task_struct* next;
+        struct task_struct* other_t;
        struct bheap temp;
+        cpu_entry_t *this_entry, *other_entry;
        int             out_of_time, sleep, preempt,
                                np, exists, blocks, resched;
        // XXX: The scheduler we copied this from also used `cont_out_of_time`. Is
@@ -620,13 +627,13 @@ static void edfsc_cschedule(cont_domain_t* cedf, struct task_struct * prev)
        /* (0) Determine state */
        exists          = cedf->scheduled != NULL;
-        blocks          = exists && !is_current_running();
+        blocks          = exists && current == cedf->scheduled && !is_current_running();
        out_of_time     = exists && budget_enforced(cedf->scheduled)
                                         && budget_exhausted(cedf->scheduled);
        np              = exists && is_np(cedf->scheduled);
        sleep           = exists && is_completed(cedf->scheduled);
        preempt         = (is_migrating(cedf->scheduled) && __peek_ready(edf)) ||
-                                        (exists && edf_preemption_needed(edf, cedf->scheduled));
+                                        edf_preemption_needed(edf, cedf->scheduled);
        /* If we need to preempt do so.
         * The following checks set resched to 1 in case of special
@@ -647,15 +654,19 @@ static void edfsc_cschedule(cont_domain_t* cedf, struct task_struct * prev)
        if (!np && (out_of_time || sleep)) {
                if (is_fixed(cedf->scheduled))
                        c_job_completion(cedf->scheduled, !sleep);
-                else
+                else {
+                        tsk_rt(cedf->scheduled)->edfsc_params.container_task = NULL;
                        g_job_completion(cedf->scheduled, !sleep);
+                }
                resched = 1;
        }
        // Deschedule any background jobs if a fixed task is ready
-        if (is_migrating(cedf->scheduled) || preempt) {
+        else if (!np && preempt) {
-                if (!sleep && !out_of_time && !blocks && !is_queued(cedf->scheduled))
+                if (!blocks && cedf->scheduled && !is_queued(cedf->scheduled)) {
+                        if (is_migrating(cedf->scheduled))
+                                tsk_rt(cedf->scheduled)->edfsc_params.container_task = NULL;
                        requeue(cedf->scheduled);
+                }
                resched = 1;
        }
@@ -665,28 +676,40 @@ static void edfsc_cschedule(cont_domain_t* cedf, struct task_struct * prev)
         */
        next = NULL;
        if (blocks || !exists || (!np && resched)) {
-                /*if (exists && !out_of_time && !sleep && !is_queued(cedf->scheduled)) {
+                BUG_ON(cedf->scheduled && !blocks && !out_of_time && !sleep && !is_migrating(cedf->scheduled) && !is_queued(cedf->scheduled));
-                        requeue(cedf->scheduled);
-                }*/
                next = __take_ready(edf);
+                // Check for direct swap (1->2, 2->1) scenarios, which can cause deadlock
+                /*if (next) {
+                        other_entry = &per_cpu(edfsc_cpu_entries, next->cpu);
+                        this_entry = this_cpu_ptr(&edfsc_cpu_entries);
+                        if (other_entry != this_entry
+                                        && other_entry->cpu == this_entry->scheduled->cpu) {
+                                requeue(next);
+                                next = NULL;
+                        }
+                }*/
        } else if (exists) {
                // This is safe when background scheduling, as we can only get here if
                // there were no other fixed tasks ready to run.
+                BUG_ON(is_queued(cedf->scheduled));
                next = cedf->scheduled;
        }
+        this_entry = this_cpu_ptr(&edfsc_cpu_entries);
        if (next) {
                TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
+        // Give the container a little breathing room, otherwise, the core will be pounded with work
+        // Will often trigger watchdog due to continous execution
        } else {
                // Find a task in gsched_domain that isn't a container to background schedule
-                bheap_init(&temp);  // XXX this seems inefficient - maybe use a global temp?
+                bheap_init(&temp);
                next = __take_ready(&gsched_domain);
-                while (is_container(next)) {
+                while (is_container(next) || (is_migrating(next) && next->cpu != this_entry->cpu)) {
                        bheap_insert(fifo_prio, &temp, tsk_rt(next)->heap_node);
                        next = __take_ready(&gsched_domain);
-                        BUG_ON(next && is_queued(next));
                }
                if (next) {
+                        tsk_rt(next)->edfsc_params.container_task = cedf->container;
                        TRACE_TASK(next, "background scheduling at %llu\n", litmus_clock());
                } else {
                        TRACE("container becomes idle at %llu\n", litmus_clock());
@@ -696,6 +719,22 @@ static void edfsc_cschedule(cont_domain_t* cedf, struct task_struct * prev)
                }
        }
+        if (next && next->cpu != this_entry->cpu) {
+                other_entry = &per_cpu(edfsc_cpu_entries, next->cpu);
+                other_t = is_container(other_entry->linked) ?
+                        other_entry->linked->rt_param.edfsc_params.domain->scheduled : other_entry->linked;
+                // If we detect a direct swap, and the other task has already gone through gschedule
+                // To prevent a deadlock, we let them go first and reschedule
+                if (other_t && other_t->cpu == this_entry->cpu) {
+                        if (is_migrating(other_t) || other_entry->linked->rt_param.scheduled_on == other_entry->cpu) {
+                                if (is_migrating(next))
+                                        next->rt_param.edfsc_params.container_task = NULL;
+                                requeue(next);
+                                next = NULL;
+                        }
+                }
+        }
        cedf->scheduled = next;
 }
@@ -712,53 +751,49 @@ static struct task_struct *edfsc_gschedule(struct task_struct *prev)
        /* sanity checking */
        BUG_ON(entry->scheduled && entry->scheduled != prev && !is_container(entry->scheduled));
-        //BUG_ON(entry->scheduled && entry->scheduled != prev && is_realtime(prev) &&
-        //              (cont_domain_t*)tsk_rt(prev)->domain != tsk_rt(entry->scheduled)->edfsc_params.domain);
-        //BUG_ON(entry->scheduled && entry->scheduled != prev && is_realtime(prev) && 
-        //              prev != tsk_rt(entry->scheduled)->edfsc_params.domain->scheduled);
        // It's okay for the previously scheduled task to not be rt if we think a
        // container task is scheduled and the container doesn't have any pending
        // jobs of fixed tasks.
        BUG_ON(entry->scheduled && !is_container(entry->scheduled) && !is_realtime(prev));
        // Bug if we didn't think anything was scheduled, but a realtime task was running on our CPU
-        BUG_ON(is_realtime(prev) && tsk_rt(prev)->linked_on != NO_CPU && !entry->scheduled);
+        //BUG_ON(is_realtime(prev) && tsk_rt(prev)->linked_on != NO_CPU && !entry->scheduled);
-        if (is_container(entry->scheduled)) {
-                lt_t now = litmus_clock();
-                tsk_rt(entry->scheduled)->job_params.exec_time += now
-                        - tsk_rt(entry->scheduled)->edfsc_params.domain->scheduled_last_exec_time;
-                tsk_rt(entry->scheduled)->edfsc_params.domain->scheduled_last_exec_time = now;
-        }
        /* (0) Determine state */
        exists    = entry->scheduled != NULL;
        is_cont         = is_container(entry->scheduled);
        blocks    = exists && !is_cont && !is_current_running();
-        out_of_time = exists && budget_enforced(entry->scheduled)
-                                                 && budget_exhausted(entry->scheduled);
        np                      = exists && !is_cont && is_np(entry->scheduled);
-        sleep           = exists && !is_cont && is_completed(entry->scheduled);
+        sleep           = exists && is_completed(entry->scheduled);
        preempted        = entry->scheduled != entry->linked;
+        /* Manually track container budget */
+        if (is_cont && (tsk_rt(entry->scheduled)->edfsc_params.domain->timer_armed || sleep)) {
+                update_container_budget(entry->scheduled);
+                out_of_time = exists && budget_enforced(entry->scheduled)
+                        && budget_exhausted(entry->scheduled);
+                /* Cancel container enforcement timer if container is fully provisioned and out of sync with
+                 * container_boundary, or if it is currently being scheduled in gedf
+                 */
+                if (bheap_node_in_heap(entry->hn) || (!bheap_node_in_heap(entry->hn) && out_of_time))
+                        cancel_idle_enforcement_timer(entry->scheduled);
+        }
+        else {
+                out_of_time = exists && budget_enforced(entry->scheduled)
+                        && budget_exhausted(entry->scheduled);
+        }
        if (exists)
                TRACE_TASK(prev,
-                           "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
+                                "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
-                           "state:%d sig:%d is_cont:%d\n",
+                                "state:%d sig:%d is_cont:%d\n",
-                           blocks, out_of_time, np, sleep, preempt,
+                                blocks, out_of_time, np, sleep, preempt,
-                           prev->state, signal_pending(prev), is_cont);
+                                prev->state, signal_pending(prev), is_cont);
        if (entry->linked && preempted)
                TRACE_TASK(prev, "will be preempted by %s/%d\n",
-                           entry->linked->comm, entry->linked->pid);
+                                entry->linked->comm, entry->linked->pid);
-        if (exists && preempted && !is_queued(entry->scheduled))
-                requeue(entry->scheduled);
-        /* If a task blocks we have no choice but to reschedule.
+        // If a task blocks we have no choice but to reschedule.
-         * Note: containers never block, so if blocks is true and we're background
-         * scheduling, we want to unlink `prev` NOT `entry->scheduled`.
-         */
        if (blocks)
                unlink(prev);
@@ -778,76 +813,49 @@ static struct task_struct *edfsc_gschedule(struct task_struct *prev)
         * for blocked jobs).
         */
        if (!np && (out_of_time || sleep)) {
-                // This is not a global job completion if we're in a fully provisioned container
+                g_job_completion(entry->scheduled, !sleep);
-                if (bheap_node_in_heap(entry->hn))
-                        g_job_completion(entry->scheduled, !sleep);
-                else
-                        unlink(entry->scheduled);
        }
-        // We should have descheduled globally scheduled tasks without budget by now
+        BUG_ON(!bheap_node_in_heap(entry->hn) && entry->linked && !is_container(entry->linked));
-        BUG_ON(entry->linked && budget_enforced(entry->linked) && budget_exhausted(entry->linked));
+        if (!entry->linked && bheap_node_in_heap(entry->hn)) {
-        // Determine what to run next (set entry->linked)
+                g_preempt_check();
-        if (!entry->linked) {
-                struct task_struct* task = __take_ready(&gsched_domain);
-                // Make sure that containers are only scheduled on cores with same id
-                if (is_container(task) && entry->cpu != tsk_rt(task)->edfsc_params.id) {
-                        // Get cpu_entry for task's core assignment
-                        cpu_entry_t* target = &per_cpu(edfsc_cpu_entries, tsk_rt(task)->edfsc_params.id);
-                        // Make sure that someone didn't requeue `task` without unlinking it
-                        BUG_ON(target->linked && target->linked == task);
-                        // Move their linked task to us
-                        link_task_to_cpu(target->linked, entry);
-                        // Setup the container to run next on the remote core
-                        link_task_to_cpu(task, target);
-                        // Alert the remote core that it now needs to reschedule
-                        preempt(target);
-                } else if (task) {
-                        // We'll now schedule the ready task here
-                        link_task_to_cpu(task, entry);
-                        // Tasks on the ready queue should never be out of budget, so it's safe
-                        // to continue the scheduling process from this point on.
-                }
        }
-        BUG_ON(entry->linked && budget_enforced(entry->linked) && budget_exhausted(entry->linked));
+        BUG_ON(entry->linked && is_queued(entry->linked));
-        BUG_ON(!bheap_node_in_heap(entry->hn) && entry->linked && tsk_rt(entry->linked)->edfsc_params.id != entry->cpu);
+        BUG_ON(!bheap_node_in_heap(entry->hn) && entry->linked
+                   && tsk_rt(entry->linked)->edfsc_params.id != entry->cpu);
        BUG_ON(is_container(entry->linked) && tsk_rt(entry->linked)->edfsc_params.id != entry->cpu);
        /* The final scheduling decision. Do we need to switch for some reason?
         * If linked is different from scheduled, then select linked as next.
         */
        if ((!np || blocks) && entry->linked != entry->scheduled) {
-                /* Schedule a linked job? */
+                // Set the newly linked job to be scheduled
                if (entry->linked) {
                        next = entry->linked;
+                        tsk_rt(entry->linked)->scheduled_on = entry->cpu;
+                        BUG_ON(is_queued(entry->linked));
                        TRACE_TASK(next, "scheduled on P%d\n", smp_processor_id());
                }
-                // Note what was running before
+                // Set the previously linked to to be unscheduled
                if (entry->scheduled) {
+                        /* When a scheduled is linked to another cpu, from this cpu, there's no guarantee on the order
+                         * in which gschedule is called on both cpus. If it has already have scheduled_on set to the other
+                         * cpu, then we have to preserve it and can't just set it to NO_CPU
+                         */
+                        if (tsk_rt(entry->scheduled)->scheduled_on == entry->cpu) {
+                                tsk_rt(entry->scheduled)->scheduled_on = NO_CPU;
+                        }
                        TRACE_TASK(entry->scheduled, "descheduled\n");
                }
        } else if (entry->scheduled) {
-                // If we've been running a container, make sure that it has nothing new to schedule
+                next = entry->scheduled;
-                if (is_container(entry->scheduled))
+                tsk_rt(next)->scheduled_on = entry->cpu;
-                        next = entry->scheduled;
-                // Otherwise we can keep running any tasks we previously scheduled
-                else if (is_realtime(prev))
-                        next = prev;
        }
+        BUG_ON(next && get_exec_time(next) > get_exec_cost(next));
-        // Tell LITMUS^RT that we choose a task and are done scheduling after return
+        // If next is a container, then perform cschedule to determine the fixed task to schedule
-        sched_state_task_picked();
-        // When we transition from doing background scheduling to doing normal
-        // scheduling, we may schedule the same task. Unfortunately, when this
-        // happens, g_finish_switch() will /not/ be called. Fix the state manually.
-        temp = entry->scheduled;
-        entry->scheduled = next;
-        // if no fixed tasks to be scheduled by the container, then container->scheduled
-        // should be the previous non-rt task if any
        if (is_container(next)) {
                edfsc_cschedule(tsk_rt(next)->edfsc_params.domain, prev);
                if (bheap_node_in_heap(entry->hn))
@@ -855,26 +863,30 @@ static struct task_struct *edfsc_gschedule(struct task_struct *prev)
                next = tsk_rt(next)->edfsc_params.domain->scheduled;
        }
        // When next is migrating, but previously scheduled realtime task is a container
-        // must properly restore background scheduled task to its correct queue/heap
+        // must properly restore background scheduled task(if any) to its correct queue/heap
-        else if (is_container(temp) && next != temp) {
+        else if (is_container(entry->scheduled) && next != entry->scheduled) {
-                struct task_struct** child = &tsk_rt(temp)->edfsc_params.domain->scheduled;
+                struct task_struct** child = &tsk_rt(entry->scheduled)->edfsc_params.domain->scheduled;
                // No need to handle fixed tasks, cschedule will do that when it runs next
                if (*child && is_migrating(*child)) {
+                        int background_out_of_time = budget_enforced(*child) && budget_exhausted(*child);
                        BUG_ON(is_queued(*child));
+                        BUG_ON(tsk_rt(*child)->linked_on != NO_CPU);
+                        tsk_rt(*child)->edfsc_params.container_task = NULL;
                        // If migrating and done
-                        if (is_completed(*child) || (budget_enforced(*child) && budget_exhausted(*child))) {
+                        if (is_completed(*child) || background_out_of_time) {
-                                g_job_completion(*child, budget_enforced(*child) && budget_exhausted(*child));
+                                g_job_completion(*child, background_out_of_time);
-                        // If migrating and blocked
+                        // If migrating and not blocked
-                        } else if (!is_current_running()) {
+                        } else if (is_current_running()) {
-                                unlink(*child);
-                        // Otherwise it can keep running globally
-                        } else {
                                requeue(*child);
                        }
                        // Regardless, we never "freeze" a migrating task in a container
                        *child = NULL;
                }
        }
+        BUG_ON(is_migrating(entry->scheduled) && !tsk_rt(entry->scheduled)->edfsc_params.container_task
+                        && !blocks && tsk_rt(entry->scheduled)->linked_on == NO_CPU && !is_queued(entry->scheduled));
+        // Tell LITMUS^RT that we choose a task and are done scheduling after return
+        sched_state_task_picked();
        raw_spin_unlock_irqrestore(&g_lock, flags);
@@ -899,39 +911,32 @@ static enum hrtimer_restart container_boundary(struct hrtimer *timer)
        int i;
        struct list_head *it;
        struct list_head *temp;
-        u64 u_extra;
+        u64 u_extra, leeway;
        cont_domain_t *container;
        struct task_struct *t;
-        lt_t now;
        int num_cpus = num_online_cpus();
        unsigned long flags;
-        raw_spin_lock_irqsave(&g_lock, flags);
+        TS_SCHED_TIMER_START
-        now = litmus_clock();
-        // Update budget tracking for containers
+        raw_spin_lock_irqsave(&g_lock, flags);
-        for (i = 0; i < num_cpus; i++) {
-                t = container_list[i]->container;
-                if (container_list[i]->timer_armed)
-                        tsk_rt(t)->job_params.exec_time += now - container_list[i]->scheduled_last_exec_time;
-                else
-                        tsk_rt(t)->job_params.exec_time = get_exec_cost(t);
-        }
        t = NULL;
+        leeway = fp_div(1, 50);
        // Try to add tasks from the queue
        list_for_each_safe(it, temp, &pending_adds) {
-                u_extra = to_fp(num_cpus) - sys_util;
                container = NULL;
                t = task_of_list_node(it);
                list_del_init(it);
-                if (u_extra >= get_rt_utilization(t)) {
+                //sort(container_list, num_cpus, sizeof(cont_domain_t *), &container_lower_prio, NULL); // Best fit
+                sort(container_list, num_cpus, sizeof(cont_domain_t *), &container_higher_prio, NULL); // Worst fit
+                if (to_fp(num_cpus) > get_rt_utilization(t) + sys_util + leeway) {
                        for (i = 0; i < num_cpus; i++) {
-                                u64 leftover = to_fp(1) - container_domains[i].f_util;
+                                if (to_fp(1) > get_rt_utilization(t) + container_list[i]->f_util + leeway) {
-                                if (leftover >= get_rt_utilization(t)) {
+                                //if (to_fp(1) > get_rt_utilization(t) + container_domains[i].f_util + leeway) {
-                                        container = &(container_domains[i]);
+                                        //container = &(container_domains[i]);
+                                        container = container_list[i]; // Used for best/worst fit
                                        break;
                                }
                        }
@@ -944,13 +949,10 @@ static enum hrtimer_restart container_boundary(struct hrtimer *timer)
                                tsk_rt(t)->domain = &gsched_domain;
                                tsk_rt(t)->edfsc_params.container_task = NULL;
                                m_util += get_rt_utilization(t);
-                                //list_add(&tsk_rt(t)->edfsc_params.qnode, &migrating_tasks);
                                list_add(&t->edfsc_qnode, &migrating_tasks);
                        }
                        sys_util += get_rt_utilization(t);
                        sys_changed = 1;
-                        // Setup the release time for the first job to be now
-                        release_at(t, litmus_clock());
                }
                /* Unblock the task waiting on our admission decision. They will detect
                 * if they have been admitted by examining if tsk_rt(t)->domain != NULL
@@ -962,7 +964,7 @@ static enum hrtimer_restart container_boundary(struct hrtimer *timer)
                 *      longer than we should.
                 */
                raw_spin_unlock_irqrestore(&g_lock, flags);
-                wake_up_new_task(t);
+                BUG_ON(!wake_up_process(t));
                raw_spin_lock_irqsave(&g_lock, flags);
        }
@@ -971,34 +973,38 @@ static enum hrtimer_restart container_boundary(struct hrtimer *timer)
        // migrating tasks and potentially all the containers every period for a
        // best-case Omega(m) and worst-case O(m^2) work---only once the scheduler
        // is actually working
+        // Done, only does stabilization when stuff changes in the system
        // According to the paper, when we migrate, we must reserve space in the container
        // We do this by adding a fake task that ultimately doesn't release any jobs
        // This is represented here by adding the utilization to sys_util
        // which will be subtracted when the migrating task is actually changed to fixed
-        if (sys_changed) {
+        if (sys_changed) { // change this to false to disable stabilization
-                list_for_each(it, &migrating_tasks) {
+                list_for_each_safe(it, temp, &migrating_tasks) {
                        struct task_struct* t = task_of_list_node(it);
                        // Although technically selecting the migrating tasks to be moved into containers
                        // doesn't change m_util and the container's f_util until after the move,
                        // but since the move is guaranteed to happen before the next container_boundary
                        // where we check all the utilization stuff, it's fine to account for it now
-                        if (!(tsk_rt(t)->edfsc_params.move_to) && !is_released(t, now)
+                        if (!(tsk_rt(t)->edfsc_params.move_to)) {
-                                && get_deadline(t) < get_deadline(&container_tasks[0]) + get_rt_period(&container_tasks[0])) {
                                tsk_rt(t)->edfsc_params.move_to = NULL;
                                container = NULL;
+                                //sort(container_list, num_cpus, sizeof(cont_domain_t *), &container_lower_prio, NULL); // Best fit
+                                //sort(container_list, num_cpus, sizeof(cont_domain_t *), &container_higher_prio, NULL); // Worst fit
                                for (i = 0; i < num_cpus; i++) {
-                                        u64 leftover = to_fp(1) - container_domains[i].f_util;
+                                        u64 leftover = to_fp(1) - container_domains[i].f_util - leeway;
-                                        if (leftover>=get_rt_utilization(t) && to_fp(num_cpus)>=get_rt_utilization(t)+sys_util) {
+                                        //if (to_fp(1) > get_rt_utilization(t) + container_list[i]->f_util + leeway &&
+                                        if (to_fp(1) > get_rt_utilization(t) + container_domains[i].f_util + leeway &&
+                                                        to_fp(num_cpus) > get_rt_utilization(t) + sys_util + leeway) {
                                                container = &(container_domains[i]);
+                                                //container = container_list[i]; // Used for best/worst fit
                                                break;
                                        }
                                }
                                if (container) {
-                                        list_del_init(&t->edfsc_qnode);
+                                        list_del_init(it);
                                        container->f_util += get_rt_utilization(t);
-                                        m_util -= get_rt_utilization(t);
                                        sys_util += get_rt_utilization(t);
                                        tsk_rt(t)->edfsc_params.move_to = container;
                                        sys_changed = 1;
@@ -1012,7 +1018,7 @@ static enum hrtimer_restart container_boundary(struct hrtimer *timer)
                int remaining;
                // Sort containers by the utilization of their fixed tasks
                sort(container_list, num_cpus, sizeof(cont_domain_t *), &container_lower_prio, NULL);
-                u_extra = to_fp(num_cpus) - sys_util;
+                u_extra = to_fp(num_cpus) - sys_util - leeway;
                // Fully provision all the container tasks we can
                for (i = 0; i < num_cpus && u_extra >= to_fp(1) - container_list[i]->f_util; i++) {
                        struct task_struct* t = container_list[i]->container;
@@ -1034,7 +1040,9 @@ static enum hrtimer_restart container_boundary(struct hrtimer *timer)
                remaining = num_cpus - i;
                for (; i < num_cpus; i++) {
                        struct task_struct* t = container_list[i]->container;
-                        tsk_rt(t)->task_params.utilization = container_list[i]->f_util + u_extra / remaining;
+                        u64 temp_val = container_list[i]->f_util + u_extra / remaining;
+                        tsk_rt(t)->task_params.utilization = (temp_val < to_fp(1)) ? temp_val : to_fp(1);
+                        BUG_ON(tsk_rt(t)->task_params.utilization > to_fp(1));
                }
        }
        sys_changed = 0;
@@ -1043,51 +1051,73 @@ static enum hrtimer_restart container_boundary(struct hrtimer *timer)
        // Re-release container tasks, or tell them they can if they're tardy
        for (i = 0; i < num_cpus; i++) {
-                // will first iterate through fully provisioned containers, then not fully provisioned ones
+                t = container_list[i]->container;
-                struct task_struct* t = container_list[i]->container;
+                int armed = container_list[i]->timer_armed;
-                // If the last job completed on time, release it now
+                // If the container tasks are currently scheduled, update their budget
-                if (budget_exhausted(t)) {
+                if (armed) {
+                        update_container_budget(t);
+                }
+                /* Either container has completed, or it is fully provisioned and in sync
+                 * (thus not requiring a budget enforcement timer).
+                 */
+                if ((!armed && get_rt_period(t) == get_exec_cost(t)) || budget_exhausted(t) || is_completed(t)) {
                        BUG_ON(is_queued(t));
+                        sched_trace_task_completion(t, 0);
+                        if (armed)
+                                cancel_idle_enforcement_timer(t);
+                        tsk_rt(t)->completed = 0;
                        c_release(t);
-                // Otherwise let it release itself when it completes
-                } else {
-                        tsk_rt(t)->edfsc_params.can_release = 1;
-                        manage_idle_enforcement_timer(t);
                }
        }
+        g_preempt_check();
        raw_spin_unlock_irqrestore(&g_lock, flags);
+        TS_SCHED_TIMER_END
        hrtimer_add_expires_ns(timer, LITMUS_QUANTUM_LENGTH_NS);
        return HRTIMER_RESTART;
 }
+/*
+ *      When preempt check scheduled a task to multiple cores(due to swapping and multiple
+ *  multiple invocations of preempt_check), we should not wait for stack, and reschedule
+ */
+static bool edfsc_should_wait_for_stack(struct task_struct* t) {
+        cpu_entry_t* entry = this_cpu_ptr(&edfsc_cpu_entries);
+        struct task_struct* tsk = tsk_rt(t)->edfsc_params.container_task;
+        tsk = tsk ? tsk : t;
+        return tsk_rt(tsk)->linked_on == tsk_rt(tsk)->scheduled_on && tsk_rt(tsk)->linked_on == entry->cpu;
+}
 /**
 * Fired when a task reaches its deadline and is pending deletion or migration
 */
 static enum hrtimer_restart task_deadline_callback(struct hrtimer* timer) {
+        unsigned long flags;
        struct task_struct *t = container_of(timer, struct task_struct, edfsc_deadline_timer);
+        raw_spin_lock_irqsave(&g_lock, flags);
        BUG_ON(is_container(t));
-        printk("util: %d\n", sys_util);
        // This is true only if set to be migrating from container_boundary
        if (tsk_rt(t)->edfsc_params.move_to) {
-                // Migrate here if the task is not late, otherwise migrate in job_complete
+                // Can only be here when called from g_job_completion
-                if (!is_released(t, litmus_clock())
+                migrate_task(t);
-                                || (budget_enforced(t) && budget_exhausted(t))
+        // In the else case, only task_params is guaranteed to be valid
-                                || is_completed(t))
+        // However, in task_exit, we stored information in task_param.cpu
-                        migrate_task(t);
+        // To help up do remove operations
        } else {
                // A move to NULL means deletion
-                // HACK: See comment in edfsc_task_exit()
+                if (tsk_rt(t)->task_params.cpu == NO_CPU)
-                tsk_rt(t)->edfsc_params.container_task = (struct task_struct*)tsk_rt(t)->task_params.phase;
-                if (is_fixed(t))
-                        c_remove_task(t);
-                else
                        g_remove_task(t);
+                else
+                        c_remove_task(t);
                // Release our reference to the task struct
                put_task_struct(t);
        }
+        list_del_init(&t->edfsc_qnode);
+        raw_spin_unlock_irqrestore(&g_lock, flags);
        return HRTIMER_NORESTART;
 }
@@ -1103,20 +1133,30 @@ static void edfsc_task_new(struct task_struct* t, int on_rq, int is_scheduled)
        tsk_rt(t)->sporadic_release = 0;
+        TRACE("EDF-sc: task new %d\n", t->pid);
        // Create a timer that we'll use to delay accounting during migrations
        hrtimer_init(&t->edfsc_deadline_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
        t->edfsc_deadline_timer.function = task_deadline_callback;
        raw_spin_lock_irqsave(&g_lock, flags);
-        // Queue this task and request a reschedule
-        requeue(t);
-        preempt(entry);
-        // Since `t` is not going to run again until we schedule, harmonize state
+        release_at(t, litmus_clock());
-        t->rt_param.linked_on = NO_CPU;
+        sched_trace_task_release(t);
+        tsk_rt(t)->linked_on = NO_CPU;
+        tsk_rt(t)->scheduled_on = NO_CPU;
+        // Queue this task and request a reschedule
+        if (on_rq || is_scheduled) {
+                requeue(t);
+                if (is_migrating(t)) {
+                        g_preempt_check();
+                }
+                else if (is_fixed(t)) {
+                        c_preempt_check((cont_domain_t*)tsk_rt(t)->domain);
+                }
+                preempt(entry);
+        }
        raw_spin_unlock_irqrestore(&g_lock, flags);
-        TRACE("EDF-sc: task new %d\n", t->pid);
 }
 /**
@@ -1127,18 +1167,32 @@ static void edfsc_task_new(struct task_struct* t, int on_rq, int is_scheduled)
 static void edfsc_task_wake_up(struct task_struct *task)
 {
        unsigned long flags;
+        lt_t now = litmus_clock();
-        TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
+        cpu_entry_t* entry = &per_cpu(edfsc_cpu_entries, task_cpu(task));
+        TRACE_TASK(task, "wake_up at %llu\n", now);
        raw_spin_lock_irqsave(&g_lock, flags);
-        // TODO: Look into handling sporadic tasks as sched_gsnedf.c does
+        now = litmus_clock();
-        requeue(task);
+        if (is_sporadic(task) && is_tardy(task, now)) {
-        // TODO: Look into queuing preemption as sched_gsnedf.c does?
+                inferred_sporadic_job_release_at(task, now);
+        }
+        if (!is_queued(task) && tsk_rt(task)->domain)
+                requeue(task);
+        if (is_migrating(task))
+                g_preempt_check();
+        else if (is_fixed(task))
+                c_preempt_check((cont_domain_t*)tsk_rt(task)->domain);
+        preempt(entry);
        raw_spin_unlock_irqrestore(&g_lock, flags);
 }
 static void edfsc_task_block(struct task_struct *t)
 {
-        // TODO
+        unsigned long flags;
+        raw_spin_lock_irqsave(&g_lock, flags);
+        if (is_migrating(t) && tsk_rt(t)->edfsc_params.container_task) {
+                tsk_rt(t)->edfsc_params.container_task = NULL;
+        }
+        raw_spin_unlock_irqrestore(&g_lock, flags);
 }
 /**
@@ -1152,35 +1206,37 @@ static void edfsc_task_exit(struct task_struct* t)
 {
        unsigned long flags;
        lt_t now, unaccount_time = 0;
-        cpu_entry_t* entry;
+        cpu_entry_t* entry = &per_cpu(edfsc_cpu_entries, task_cpu(t));
        BUG_ON(is_container(t));
        raw_spin_lock_irqsave(&g_lock, flags);
        TRACE_TASK(t, "called edfsc_task_exit\n");
        // Remove this task from all members of its scheduling domain
-        unlink(t);
+        if (is_fixed(t)) {
-        if (is_queued(t)) {
+                tsk_rt(t)->task_params.cpu=((cont_domain_t*)tsk_rt(t)->domain)->container->rt_param.edfsc_params.id;
-                remove(tsk_rt(t)->domain, t);
+                if (is_queued(t))
-        } else if (is_fixed(t)) {
+                        remove(tsk_rt(t)->domain, t);
-                // If we're fixed and not on the ready queues, we should be currently running
+                else {
-                BUG_ON(((cont_domain_t*)tsk_rt(t)->domain)->scheduled != t);
+                        // If we're fixed and not on the ready queues, we should be currently running
-                BUG_ON(t != current);
+                        BUG_ON(((cont_domain_t*)tsk_rt(t)->domain)->scheduled != t);
-                ((cont_domain_t*)tsk_rt(t)->domain)->scheduled = NULL;
+                        BUG_ON(t != current);
+                        ((cont_domain_t*)tsk_rt(t)->domain)->scheduled = NULL;
+                }
        } else {
-                // We're in the global domain and not on the ready queues, so we must be running
+                tsk_rt(t)->task_params.cpu = NO_CPU;
-                BUG_ON(t != current);
+                list_del_init(&t->edfsc_qnode);
-                list_del(&t->edfsc_qnode);
+                if (tsk_rt(t)->edfsc_params.container_task != NULL) {
-                entry = &per_cpu(edfsc_cpu_entries, task_cpu(t));
+                        BUG_ON(tsk_rt(t)->edfsc_params.container_task->rt_param.edfsc_params.domain->scheduled != t);
-                // Handle the case where we exit while being background scheduled
+                        tsk_rt(t)->edfsc_params.container_task->rt_param.edfsc_params.domain->scheduled = NULL;
-                if (is_container(entry->scheduled)) {
+                }
-                        BUG_ON(entry->scheduled->rt_param.edfsc_params.domain->scheduled != t);
+                else {
-                        entry->scheduled->rt_param.edfsc_params.domain->scheduled = NULL;
+                        unlink(t);
-                } else {
-                        BUG_ON(entry->scheduled != t);
                        entry->scheduled = NULL;
                }
        }
+        tsk_rt(t)->domain = NULL;
+        BUG_ON(is_queued(t));
        /* To preserve EDF-sc scheduling invariants, we can only release a task's
         * utilization at the greater of completion or deadline boundary. Thus, here
@@ -1209,20 +1265,19 @@ static void edfsc_task_exit(struct task_struct* t)
        get_task_struct(t);
        // Make it clear that this task is going away
        tsk_rt(t)->edfsc_params.move_to = NULL;
-        /* HACK: Unfortunately, even though we hold a reference to the task struct,
-         * LITMUS clears edfsc_params before our timer expires. `task_params` seems
-         * untouched, so hijack `task_params.phase` to link to the container task
-         */
-        tsk_rt(t)->task_params.phase = (lt_t)tsk_rt(t)->edfsc_params.container_task;
-        if (unaccount_time == 0)
+        if (unaccount_time == 0) {
+                raw_spin_unlock_irqrestore(&g_lock, flags);
                // Don't bother setting a zero-length timer - just skip straight to the callback
                task_deadline_callback(&t->edfsc_deadline_timer);
-        else
+        }
+        else {
+                list_add(&t->edfsc_qnode, &pending_removes);
+                raw_spin_unlock_irqrestore(&g_lock, flags);
                hrtimer_start(&t->edfsc_deadline_timer, ns_to_ktime(unaccount_time),
                                        HRTIMER_MODE_ABS_PINNED);
+        }
-        raw_spin_unlock_irqrestore(&g_lock, flags);
 }
 static struct domain_proc_info edfsc_domain_proc_info;
@@ -1274,23 +1329,130 @@ static long edfsc_activate_plugin(void)
         *   be reusable if we don't destroy them when the plugin is deactivated)
         * - ...
         */
+        int i;
+        lt_t now;
+        cpu_entry_t* entry;
+        struct task_struct* t;
+        edfsc_setup_domain_proc();
+        INIT_LIST_HEAD(&pending_adds);
+        INIT_LIST_HEAD(&migrating_tasks);
+        INIT_LIST_HEAD(&pending_removes);
+        bheap_init(&edfsc_cpu_heap);
+        // Set up the container boundary timer
+        hrtimer_init(&container_release_timer, CLOCK_MONOTONIC,
+                        HRTIMER_MODE_ABS_PINNED);
+        container_release_timer.function = container_boundary;
+        edf_domain_init(&gsched_domain, NULL, g_release_jobs);
+        container_tasks = kmalloc(sizeof(struct task_struct) * num_online_cpus(), GFP_KERNEL);
+        container_domains = kmalloc(sizeof(cont_domain_t) * num_online_cpus(), GFP_KERNEL);
+        container_list = kmalloc(sizeof(cont_domain_t*) * num_online_cpus(), GFP_KERNEL);
+        edfsc_cpu_heap_node = kmalloc(sizeof(struct bheap_node) * num_online_cpus(), GFP_KERNEL);
+        sys_util = to_fp(0);
+        m_util = to_fp(0);
+        sys_changed = 1;
+        memset(container_tasks, 0, sizeof(struct task_struct) * num_online_cpus());
+        memset(container_domains, 0, sizeof(cont_domain_t) * num_online_cpus());
+        // Initialize container domains
+        for (i = 0; i < num_online_cpus(); i++) {
+                edf_domain_init(&container_domains[i].domain, c_check_resched, NULL);
+                container_domains[i].scheduled = NULL;
+                container_domains[i].container = &container_tasks[i];
+                container_domains[i].f_util = to_fp(0);
+                hrtimer_init(&(container_domains[i].idle_enforcement_timer), CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+                container_domains[i].idle_enforcement_timer.function = on_idle_enforcement_timeout;
+                // Name the task its container ID mapped to ASCII
+                snprintf(container_tasks[i].comm, TASK_COMM_LEN, "%d", i);
+                container_tasks[i].pid = -i;
+                tsk_rt(&container_tasks[i])->task_params.exec_cost = LITMUS_QUANTUM_LENGTH_NS;
+                tsk_rt(&container_tasks[i])->task_params.period =
+                        LITMUS_QUANTUM_LENGTH_NS;
+                tsk_rt(&container_tasks[i])->task_params.utilization = to_fp(1);
+                tsk_rt(&container_tasks[i])->task_params.relative_deadline =
+                        LITMUS_QUANTUM_LENGTH_NS;
+                tsk_rt(&container_tasks[i])->task_params.budget_policy = PRECISE_ENFORCEMENT;
+                tsk_rt(&container_tasks[i])->edfsc_params.container_task = NULL;
+                tsk_rt(&container_tasks[i])->domain = &gsched_domain;
+                tsk_rt(&container_tasks[i])->edfsc_params.domain = &container_domains[i];
+                tsk_rt(&container_tasks[i])->sporadic_release = 0;
+                tsk_rt(&container_tasks[i])->edfsc_params.id = i;
+                tsk_rt(&container_tasks[i])->heap_node = bheap_node_alloc(GFP_ATOMIC);
+                tsk_rt(&container_tasks[i])->rel_heap = release_heap_alloc(GFP_ATOMIC);
+                tsk_rt(&container_tasks[i])->linked_on = NO_CPU;
+                tsk_rt(&container_tasks[i])->scheduled_on = NO_CPU;
+                if (!tsk_rt(&container_tasks[i])->heap_node || !tsk_rt(&container_tasks[i])->rel_heap) {
+                        printk(KERN_WARNING "litmus: no more heap node memory!?\n");
+                        return -ENOMEM;
+                } else {
+                        bheap_node_init(&tsk_rt(&container_tasks[i])->heap_node, &container_tasks[i]);
+                }
+                container_tasks[i].policy = SCHED_LITMUS;
+                // Populate the container_list while we're at it.
+                container_list[i] = &container_domains[i];
+                // Link heap nodes to CPU structures
+                entry = &per_cpu(edfsc_cpu_entries, i);
+                entry->cpu = i;
+                entry->scheduled = NULL;
+                entry->linked = NULL;
+                entry->hn = &edfsc_cpu_heap_node[i];
+                bheap_node_init(&entry->hn, entry);
+        }
+        now = litmus_clock();
+        for (i = 0; i < num_online_cpus(); i++) {
+                t = &container_tasks[i];
+                entry = &per_cpu(edfsc_cpu_entries, tsk_rt(t)->edfsc_params.id);
+                ((cont_domain_t*)tsk_rt(t)->edfsc_params.domain)->scheduled_last_exec_time = now;
+                release_at(t, now);
+                link_task_to_cpu(t, entry);
+        }
        // Start the container boundary timer
        hrtimer_start(&container_release_timer,
-                        ns_to_ktime(litmus_clock() + LITMUS_QUANTUM_LENGTH_NS),
+                        ns_to_ktime(now + LITMUS_QUANTUM_LENGTH_NS),
                        HRTIMER_MODE_ABS_PINNED);
-        edfsc_setup_domain_proc();
        return 0;
 }
 static long edfsc_deactivate_plugin(void)
 {
-        // TODO: Reset our internal state
+        int i;
+        struct list_head *l, *temp;
+        struct task_struct* t;
        // Stop the container boundary timer
-        hrtimer_cancel(&container_release_timer);
+        hrtimer_try_to_cancel(&container_release_timer);
+        list_for_each_safe(l, temp, &pending_removes) {
+                t = task_of_list_node(l);
+                list_del_init(l);
+                hrtimer_try_to_cancel(&t->edfsc_deadline_timer);
+        }
+        for (i = 0; i < num_online_cpus(); i++) {
+                bheap_node_free(tsk_rt(&container_tasks[i])->heap_node);
+                release_heap_free(tsk_rt(&container_tasks[i])->rel_heap);
+                hrtimer_try_to_cancel(&container_domains[i].idle_enforcement_timer);
+        }
+        kfree(container_tasks);
+        kfree(container_domains);
+        kfree(container_list);
+        kfree(edfsc_cpu_heap_node);
        destroy_domain_proc_info(&edfsc_domain_proc_info);
        return 0;
@@ -1303,9 +1465,11 @@ static long edfsc_deactivate_plugin(void)
 */
 static long edfsc_admit_task(struct task_struct* tsk)
 {
+        unsigned long flags;
        // We assume that we're running in the context of `tsk`
        BUG_ON(tsk != current);
+        raw_spin_lock_irqsave(&g_lock, flags);
        // Make sure that edfsc_params doesn't contain garbage
        // Note that edfsc_params->domain will always be NULL for non-container tasks
        memset(&tsk_rt(tsk)->edfsc_params, 0, sizeof(struct edfsc_params));
@@ -1317,6 +1481,7 @@ static long edfsc_admit_task(struct task_struct* tsk)
        tsk_rt(tsk)->task_params.utilization = fp_div(get_exec_cost(tsk), get_rt_period(tsk));
        // Add us to the queue of tasks waiting on admission
        list_add_tail(&tsk->edfsc_qnode, &pending_adds);
+        raw_spin_unlock_irqrestore(&g_lock, flags);
        // We don't know if we can be admitted until a container job boundry is reached,
        // so block until the scheduler can make that decision
        set_current_state(TASK_INTERRUPTIBLE); // Changed from TASK_RUNNING
@@ -1329,14 +1494,18 @@ static long edfsc_admit_task(struct task_struct* tsk)
        if (tsk_rt(tsk)->domain != NULL)
                return 0; // Successfully admitted
        else {
+                raw_spin_lock_irqsave(&g_lock, flags);
                // We'll still be on pending_adds if interrupted by a signal
                struct list_head* l;
-                list_for_each(l, &pending_adds) {
+                struct list_head *temp;
-                        if (l == &tsk->edfsc_qnode) {
+                list_for_each_safe(l, temp, &pending_adds) {
-                                list_del(l);
+                        if (task_of_list_node(l) == tsk) {
+                                list_del_init(l);
+                                raw_spin_unlock_irqrestore(&g_lock, flags);
                                return -EINTR; // Interrupted
                        }
                }
+                raw_spin_unlock_irqrestore(&g_lock, flags);
                return -ENOSPC; // Rejected
        }
 }
@@ -1354,90 +1523,13 @@ static struct sched_plugin edfsc_plugin __cacheline_aligned_in_smp = {
        .admit_task             = edfsc_admit_task,
        .activate_plugin        = edfsc_activate_plugin,
        .deactivate_plugin      = edfsc_deactivate_plugin,
+        .should_wait_for_stack = edfsc_should_wait_for_stack,
        .get_domain_proc_info   = edfsc_get_domain_proc_info,
 };
 static int __init init_edfsc(void)
 {
-        int i;
-        cpu_entry_t *entry;
-        INIT_LIST_HEAD(&pending_adds);
-        INIT_LIST_HEAD(&migrating_tasks);
-        bheap_init(&edfsc_cpu_heap);
-        edf_domain_init(&gsched_domain, NULL, g_release_jobs);
-        // Set up the container boundary timer
-        hrtimer_init(&container_release_timer, CLOCK_MONOTONIC,
-                        HRTIMER_MODE_ABS_PINNED);
-        container_release_timer.function = container_boundary;
-        container_tasks = kmalloc(sizeof(struct task_struct) * num_online_cpus(), GFP_KERNEL);
-        container_domains = kmalloc(sizeof(cont_domain_t) * num_online_cpus(), GFP_KERNEL);
-        container_list = kmalloc(sizeof(cont_domain_t*) * num_online_cpus(), GFP_KERNEL);
-        edfsc_cpu_heap_node = kmalloc(sizeof(struct bheap_node) * num_online_cpus(), GFP_KERNEL);
-        sys_util = to_fp(0);
-        m_util = to_fp(0);
-        sys_changed = 1;
-        memset(container_tasks, 0, sizeof(struct task_struct) * num_online_cpus());
-        memset(container_domains, 0, sizeof(cont_domain_t) * num_online_cpus());
-        // Initialize container domains
-        for (i = 0; i < num_online_cpus(); i++) {
-                edf_domain_init(&container_domains[i].domain, c_check_resched, NULL);
-                container_domains[i].scheduled = NULL;
-                container_domains[i].container = &container_tasks[i];
-                container_domains[i].f_util = to_fp(0);
-                hrtimer_init(&(container_domains[i].idle_enforcement_timer), CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-                container_domains[i].idle_enforcement_timer.function = on_idle_enforcement_timeout;
-                // Name the task its container ID mapped to ASCII
-                snprintf(container_tasks[i].comm, TASK_COMM_LEN, "%d", i);
-                tsk_rt(&container_tasks[i])->task_params.exec_cost = LITMUS_QUANTUM_LENGTH_NS / 2;
-                tsk_rt(&container_tasks[i])->task_params.period =
-                        LITMUS_QUANTUM_LENGTH_NS;
-                tsk_rt(&container_tasks[i])->task_params.relative_deadline =
-                        LITMUS_QUANTUM_LENGTH_NS;
-                tsk_rt(&container_tasks[i])->task_params.budget_policy = PRECISE_ENFORCEMENT;
-                tsk_rt(&container_tasks[i])->edfsc_params.container_task = NULL;
-                tsk_rt(&container_tasks[i])->domain = &gsched_domain;
-                tsk_rt(&container_tasks[i])->edfsc_params.domain = &container_domains[i];
-                tsk_rt(&container_tasks[i])->edfsc_params.can_release = 0;
-                tsk_rt(&container_tasks[i])->sporadic_release = 0;
-                tsk_rt(&container_tasks[i])->edfsc_params.id = i;
-                tsk_rt(&container_tasks[i])->heap_node = bheap_node_alloc(GFP_ATOMIC);
-                tsk_rt(&container_tasks[i])->rel_heap = release_heap_alloc(GFP_ATOMIC);
-                if (!tsk_rt(&container_tasks[i])->heap_node || !tsk_rt(&container_tasks[i])->rel_heap) {
-                        printk(KERN_WARNING "litmus: no more heap node memory!?\n");
-                        return -ENOMEM;
-                } else {
-                        bheap_node_init(&tsk_rt(&container_tasks[i])->heap_node, &container_tasks[i]);
-                }
-                container_tasks[i].policy = SCHED_LITMUS;
-                release_at(&container_tasks[i], litmus_clock());
-                requeue(&container_tasks[i]);
-                // Populate the container_list while we're at it.
-                container_list[i] = &container_domains[i];
-                // Link heap nodes to CPU structures
-                entry = &per_cpu(edfsc_cpu_entries, i);
-                entry->cpu = i;
-                entry->scheduled = NULL;
-                entry->linked = NULL;
-                entry->hn = &edfsc_cpu_heap_node[i];
-                bheap_node_init(&entry->hn, entry);
-                entry->scheduled = NULL;
-        }
        return register_sched_plugin(&edfsc_plugin);
 }
author	Zelin Tong <ztong@ludwig.cs.unc.edu>	2020-07-02 00:58:09 -0400
committer	Zelin Tong <ztong@ludwig.cs.unc.edu>	2020-07-02 00:58:09 -0400
commit	098a298ef73dd8dbacf0d697eef2a6f2daa2081c (patch)
tree	546de13acc94765ec9c116b8d8b42632139179a5
parent	e4c5fa6df346a78dfb683d601fd5ad34e6de3375 (diff)