Fixed obscure edge case regarding unlink

Unlink in gedf_reservation would try to unlink a task that's not linked nor in the ready queue. It is in fact in the release queue. This happens when multiple check_for_preemptions occur simultaneously, and causes a task to be "scheduled" on 2 cpu_entries simultaneously while out of budget. Thus, both cpus in update_time replenishes the budget and tries to unlink it. The first one succeeds, while the second fails. For more details on how this occurs, see the comment in update_time in gedf_reservation.c
author: Zelin Tong <ztong@ludwig.cs.unc.edu> 2020-10-14 06:14:28 -0400
committer: Tanya Amert <tamert@cs.unc.edu> 2020-10-14 20:27:08 -0400
commit: 2ea98c6d1c38df18555b96afda6607fa9edad72e (patch)
tree: 937c1ec49a1c238e501a1c7a0639531600fb1ef3
parent: b58595723675d016e7a8e06afcad9be8fd85de3a (diff)
1 files changed, 22 insertions, 5 deletions
diff --git a/litmus/reservations/gedf_reservation.c b/litmus/reservations/gedf_reservation.c
index 157cd16d7ebb..2a228b4c8894 100644
--- a/litmus/reservations/gedf_reservation.c
+++ b/litmus/reservations/gedf_reservation.c
@@ -93,7 +93,6 @@ static void requeue(
                __add_ready_res(&gedf_env->domain, &gedf_res->res);
        else
                __add_release_res(&gedf_env->domain, &gedf_res->res);
 }
 static void link_task_to_cpu(
@@ -651,7 +650,11 @@ static void gedf_env_update_time(
        if (!entry->scheduled)
                return;
-        /* tells scheduled res to drain its budget */
+        /* tells scheduled res to drain its budget.
+         * In the situation of 2 cores having the same scheduled(detailed in comment below), the task will be
+         * out of budget. This means drain_budget just atomically sets cur_budget to 0 on drain.
+         * Therefore, no lock is needed for this operation
+         */
        entry->scheduled->res.ops->drain_budget(&entry->scheduled->res, how_much, cpu);
        /* if flagged for removal from environment, invoke shutdown callback */
@@ -659,18 +662,32 @@ static void gedf_env_update_time(
                /* assumed to already been unlinked by whatever set will_remove */
                entry->scheduled->res.ops->shutdown(&entry->scheduled->res);
                entry->scheduled = NULL;
-        } else if (!entry->scheduled->res.cur_budget) {
+        }
+        /* We need to lock this whole section due to how budget draining works.
+         * check_for_preemption can be called before budget is properly updated, which,
+         * through multiple parallel calls to check_for_preemption may end up linking
+         * a task that's out of budget(but not when it is ran through check_for_preemption) to
+         * a core other than this one.
+         * That core can then experience multiple reschedule calls due to the multiple calls to
+         * check_for_preemption, which will make the linked out of budget task into scheduled.
+         * Now we have an interesting dilemma. This core and the other core both sees that its
+         * scheduling the same out of budget task. So we need a way to break symmetry and let
+         * one core do nothing. By checking for !cur_budget and replenishing budget under a lock,
+         * we can achieve this.
+         */
+        raw_spin_lock_irqsave(&gedf_env->domain.ready_lock, flags);
+        if (entry->scheduled && !entry->scheduled->res.cur_budget) {
                entry->scheduled->res.ops->replenish_budget(&entry->scheduled->res, cpu);
                /* unlink and requeue if not blocked and not np*/
                if (!entry->scheduled->blocked &&
                                !entry->scheduled->res.ops->is_np(&entry->scheduled->res, cpu)) {
-                        raw_spin_lock_irqsave(&gedf_env->domain.ready_lock, flags);
                        unlink(gedf_env, entry->scheduled);
                        requeue(gedf_env, entry->scheduled);
                        check_for_preemptions(gedf_env);
-                        raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags);
                }
        }
+        raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags);
 }
 /* callback for how the domain will release jobs */
author	Zelin Tong <ztong@ludwig.cs.unc.edu>	2020-10-14 06:14:28 -0400
committer	Tanya Amert <tamert@cs.unc.edu>	2020-10-14 20:27:08 -0400
commit	2ea98c6d1c38df18555b96afda6607fa9edad72e (patch)
tree	937c1ec49a1c238e501a1c7a0639531600fb1ef3
parent	b58595723675d016e7a8e06afcad9be8fd85de3a (diff)