diff options
author | Zelin Tong <ztong@ludwig.cs.unc.edu> | 2020-10-14 06:14:28 -0400 |
---|---|---|
committer | Tanya Amert <tamert@cs.unc.edu> | 2020-10-14 20:27:08 -0400 |
commit | 2ea98c6d1c38df18555b96afda6607fa9edad72e (patch) | |
tree | 937c1ec49a1c238e501a1c7a0639531600fb1ef3 | |
parent | b58595723675d016e7a8e06afcad9be8fd85de3a (diff) |
Fixed obscure edge case regarding unlink
Unlink in gedf_reservation would try to unlink a task that's not linked
nor in the ready queue. It is in fact in the release queue. This happens
when multiple check_for_preemptions occur simultaneously, and causes a
task to be "scheduled" on 2 cpu_entries simultaneously while out of
budget. Thus, both cpus in update_time replenishes the budget and tries
to unlink it. The first one succeeds, while the second fails.
For more details on how this occurs, see the comment in update_time in
gedf_reservation.c
-rw-r--r-- | litmus/reservations/gedf_reservation.c | 27 |
1 files changed, 22 insertions, 5 deletions
diff --git a/litmus/reservations/gedf_reservation.c b/litmus/reservations/gedf_reservation.c index 157cd16d7ebb..2a228b4c8894 100644 --- a/litmus/reservations/gedf_reservation.c +++ b/litmus/reservations/gedf_reservation.c | |||
@@ -93,7 +93,6 @@ static void requeue( | |||
93 | __add_ready_res(&gedf_env->domain, &gedf_res->res); | 93 | __add_ready_res(&gedf_env->domain, &gedf_res->res); |
94 | else | 94 | else |
95 | __add_release_res(&gedf_env->domain, &gedf_res->res); | 95 | __add_release_res(&gedf_env->domain, &gedf_res->res); |
96 | |||
97 | } | 96 | } |
98 | 97 | ||
99 | static void link_task_to_cpu( | 98 | static void link_task_to_cpu( |
@@ -651,7 +650,11 @@ static void gedf_env_update_time( | |||
651 | if (!entry->scheduled) | 650 | if (!entry->scheduled) |
652 | return; | 651 | return; |
653 | 652 | ||
654 | /* tells scheduled res to drain its budget */ | 653 | /* tells scheduled res to drain its budget. |
654 | * In the situation of 2 cores having the same scheduled(detailed in comment below), the task will be | ||
655 | * out of budget. This means drain_budget just atomically sets cur_budget to 0 on drain. | ||
656 | * Therefore, no lock is needed for this operation | ||
657 | */ | ||
655 | entry->scheduled->res.ops->drain_budget(&entry->scheduled->res, how_much, cpu); | 658 | entry->scheduled->res.ops->drain_budget(&entry->scheduled->res, how_much, cpu); |
656 | 659 | ||
657 | /* if flagged for removal from environment, invoke shutdown callback */ | 660 | /* if flagged for removal from environment, invoke shutdown callback */ |
@@ -659,18 +662,32 @@ static void gedf_env_update_time( | |||
659 | /* assumed to already been unlinked by whatever set will_remove */ | 662 | /* assumed to already been unlinked by whatever set will_remove */ |
660 | entry->scheduled->res.ops->shutdown(&entry->scheduled->res); | 663 | entry->scheduled->res.ops->shutdown(&entry->scheduled->res); |
661 | entry->scheduled = NULL; | 664 | entry->scheduled = NULL; |
662 | } else if (!entry->scheduled->res.cur_budget) { | 665 | } |
666 | |||
667 | /* We need to lock this whole section due to how budget draining works. | ||
668 | * check_for_preemption can be called before budget is properly updated, which, | ||
669 | * through multiple parallel calls to check_for_preemption may end up linking | ||
670 | * a task that's out of budget(but not when it is ran through check_for_preemption) to | ||
671 | * a core other than this one. | ||
672 | * That core can then experience multiple reschedule calls due to the multiple calls to | ||
673 | * check_for_preemption, which will make the linked out of budget task into scheduled. | ||
674 | * Now we have an interesting dilemma. This core and the other core both sees that its | ||
675 | * scheduling the same out of budget task. So we need a way to break symmetry and let | ||
676 | * one core do nothing. By checking for !cur_budget and replenishing budget under a lock, | ||
677 | * we can achieve this. | ||
678 | */ | ||
679 | raw_spin_lock_irqsave(&gedf_env->domain.ready_lock, flags); | ||
680 | if (entry->scheduled && !entry->scheduled->res.cur_budget) { | ||
663 | entry->scheduled->res.ops->replenish_budget(&entry->scheduled->res, cpu); | 681 | entry->scheduled->res.ops->replenish_budget(&entry->scheduled->res, cpu); |
664 | /* unlink and requeue if not blocked and not np*/ | 682 | /* unlink and requeue if not blocked and not np*/ |
665 | if (!entry->scheduled->blocked && | 683 | if (!entry->scheduled->blocked && |
666 | !entry->scheduled->res.ops->is_np(&entry->scheduled->res, cpu)) { | 684 | !entry->scheduled->res.ops->is_np(&entry->scheduled->res, cpu)) { |
667 | raw_spin_lock_irqsave(&gedf_env->domain.ready_lock, flags); | ||
668 | unlink(gedf_env, entry->scheduled); | 685 | unlink(gedf_env, entry->scheduled); |
669 | requeue(gedf_env, entry->scheduled); | 686 | requeue(gedf_env, entry->scheduled); |
670 | check_for_preemptions(gedf_env); | 687 | check_for_preemptions(gedf_env); |
671 | raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags); | ||
672 | } | 688 | } |
673 | } | 689 | } |
690 | raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags); | ||
674 | } | 691 | } |
675 | 692 | ||
676 | /* callback for how the domain will release jobs */ | 693 | /* callback for how the domain will release jobs */ |