diff options
author | Zelin Tong <ztong@ludwig.cs.unc.edu> | 2020-03-04 17:39:50 -0500 |
---|---|---|
committer | Zelin Tong <ztong@ludwig.cs.unc.edu> | 2020-03-04 17:39:50 -0500 |
commit | 6cd4356d6003c8f57ee518575cf91526fe1ed9f0 (patch) | |
tree | 3d8bad6bba3bd10d114f2e136163b8e7fde982d9 | |
parent | ade4ca95d6619e92b348259395a30d978842a77a (diff) |
Fixed calling link_task_to_cpu causing a BUG_ON
This happens because we're not properly scheduling containers to their
associated cores and we're not properly keeping track of cpus whose
containers are fully provisioned.
I fixed the logic for that.
Also, I moved around some functions to group them by similar usage and
fixed some minor bugs
-rw-r--r-- | litmus/sched_edfsc.c | 206 |
1 files changed, 140 insertions, 66 deletions
diff --git a/litmus/sched_edfsc.c b/litmus/sched_edfsc.c index ea1b308667c7..a787fd5a040d 100644 --- a/litmus/sched_edfsc.c +++ b/litmus/sched_edfsc.c | |||
@@ -156,14 +156,77 @@ static cpu_entry_t* lowest_prio_cpu(void) | |||
156 | return hn->value; | 156 | return hn->value; |
157 | } | 157 | } |
158 | 158 | ||
159 | static void remove_cpu_from_global(cpu_entry_t *entry) | ||
160 | { | ||
161 | bheap_delete(cpu_lower_prio, &edfsc_cpu_heap, entry->hn); | ||
162 | } | ||
163 | |||
164 | static void add_cpu_to_global(cpu_entry_t *entry) | ||
165 | { | ||
166 | bheap_insert(cpu_lower_prio, &edfsc_cpu_heap, entry->hn); | ||
167 | } | ||
168 | |||
159 | /* update_cpu_position - Move the cpu entry to the correct place to maintain | 169 | /* update_cpu_position - Move the cpu entry to the correct place to maintain |
160 | * order in the cpu queue. Caller must hold g_lock. | 170 | * order in the cpu queue. Caller must hold g_lock. |
161 | */ | 171 | */ |
162 | static void update_cpu_position(cpu_entry_t *entry) | 172 | static void update_cpu_position(cpu_entry_t *entry) |
163 | { | 173 | { |
164 | if (likely(bheap_node_in_heap(entry->hn))) | 174 | if (likely(bheap_node_in_heap(entry->hn))) |
165 | bheap_delete(cpu_lower_prio, &edfsc_cpu_heap, entry->hn); | 175 | remove_cpu_from_global(entry); |
166 | bheap_insert(cpu_lower_prio, &edfsc_cpu_heap, entry->hn); | 176 | add_cpu_to_global(entry); |
177 | } | ||
178 | |||
179 | /////////////////////////////////////////////////////////////////////////////////////// | ||
180 | /* | ||
181 | * | ||
182 | * IDLE CONTAINER BUDGET ENFORCEMENT | ||
183 | * | ||
184 | */ | ||
185 | |||
186 | //timeout for timer enforcing budget of empty container | ||
187 | static enum hrtimer_restart on_idle_enforcement_timeout(struct hrtimer *timer) | ||
188 | { | ||
189 | cont_domain_t* domain = container_of(timer, cont_domain_t, idle_enforcement_timer); | ||
190 | |||
191 | unsigned long flags; | ||
192 | |||
193 | local_irq_save(flags); | ||
194 | domain->timer_armed = 0; | ||
195 | litmus_reschedule_local(); | ||
196 | local_irq_restore(flags); | ||
197 | |||
198 | return HRTIMER_NORESTART; | ||
199 | } | ||
200 | |||
201 | void manage_idle_enforcement_timer(struct task_struct* t) | ||
202 | { | ||
203 | lt_t now; | ||
204 | |||
205 | cont_domain_t* domain = tsk_rt(t)->edfsc_params.domain; | ||
206 | now = litmus_clock(); | ||
207 | domain->scheduled_last_exec_time = now; | ||
208 | if (budget_precisely_enforced(t)) { | ||
209 | BUG_ON(budget_exhausted(t) && !is_np(t)); | ||
210 | if (likely(!is_np(t))) { | ||
211 | //hrtimer_start cancels the timer so don't have to check | ||
212 | //if it is already armed | ||
213 | hrtimer_start(&(domain->idle_enforcement_timer), | ||
214 | ns_to_ktime(now + budget_remaining(t)), | ||
215 | HRTIMER_MODE_ABS_PINNED); | ||
216 | domain->timer_armed = 1; | ||
217 | } | ||
218 | } | ||
219 | else if (domain->timer_armed) { | ||
220 | hrtimer_try_to_cancel(&(domain->idle_enforcement_timer)); | ||
221 | domain->timer_armed = 0; | ||
222 | } | ||
223 | } | ||
224 | |||
225 | void cancel_idle_enforcement_timer(struct task_struct* t) | ||
226 | { | ||
227 | cont_domain_t* domain = tsk_rt(t)->edfsc_params.domain; | ||
228 | hrtimer_try_to_cancel(&(domain->idle_enforcement_timer)); | ||
229 | domain->timer_armed = 0; | ||
167 | } | 230 | } |
168 | 231 | ||
169 | /* link_task_to_cpu - Links a migrating task or container to a CPU | 232 | /* link_task_to_cpu - Links a migrating task or container to a CPU |
@@ -277,9 +340,8 @@ static void g_preempt_check(void) | |||
277 | edf_preemption_needed(&gsched_domain, last->linked); | 340 | edf_preemption_needed(&gsched_domain, last->linked); |
278 | last = lowest_prio_cpu()) { | 341 | last = lowest_prio_cpu()) { |
279 | target = last; | 342 | target = last; |
280 | if (is_container(last->linked) && get_rt_utilization(last->linked) == to_fp(1)) { | 343 | BUG_ON(is_container(last->linked) && get_rt_utilization(last->linked) == to_fp(1)); |
281 | break; | 344 | |
282 | } | ||
283 | /* preemption necessary */ | 345 | /* preemption necessary */ |
284 | task = __take_ready(&gsched_domain); | 346 | task = __take_ready(&gsched_domain); |
285 | if (is_container(task)) { | 347 | if (is_container(task)) { |
@@ -340,6 +402,7 @@ static void g_remove_task(struct task_struct *t) | |||
340 | future_m_util -= get_rt_utilization(t); | 402 | future_m_util -= get_rt_utilization(t); |
341 | future_sys_util -= get_rt_utilization(t); | 403 | future_sys_util -= get_rt_utilization(t); |
342 | if (tsk_rt(t)->edfsc_params.move_to) { | 404 | if (tsk_rt(t)->edfsc_params.move_to) { |
405 | prepare_for_next_period(t); | ||
343 | tsk_rt(t)->domain = (rt_domain_t*)tsk_rt(t)->edfsc_params.move_to; | 406 | tsk_rt(t)->domain = (rt_domain_t*)tsk_rt(t)->edfsc_params.move_to; |
344 | tsk_rt(t)->edfsc_params.container_task = tsk_rt(t)->edfsc_params.move_to->container; | 407 | tsk_rt(t)->edfsc_params.container_task = tsk_rt(t)->edfsc_params.move_to->container; |
345 | requeue(t); | 408 | requeue(t); |
@@ -376,6 +439,8 @@ static noinline void g_job_completion(struct task_struct* t, int forced) | |||
376 | BUG_ON(!t); | 439 | BUG_ON(!t); |
377 | sched_trace_task_completion(t, forced); | 440 | sched_trace_task_completion(t, forced); |
378 | 441 | ||
442 | cpu_entry_t* entry; | ||
443 | |||
379 | TRACE_TASK(t, "job_completion(forced=%d).\n", forced); | 444 | TRACE_TASK(t, "job_completion(forced=%d).\n", forced); |
380 | 445 | ||
381 | /* set flags */ | 446 | /* set flags */ |
@@ -384,31 +449,55 @@ static noinline void g_job_completion(struct task_struct* t, int forced) | |||
384 | /* unlink */ | 449 | /* unlink */ |
385 | unlink(t); | 450 | unlink(t); |
386 | 451 | ||
452 | // When migrating is being removed, or turned into a fixed task | ||
387 | if (is_migrating(t) && tsk_rt(t)->edfsc_params.will_remove) { | 453 | if (is_migrating(t) && tsk_rt(t)->edfsc_params.will_remove) { |
388 | if (t->rt_param.job_params.lateness > 0) { | 454 | if (t->rt_param.job_params.lateness > 0) { |
389 | // remove the task now | 455 | // remove the task now |
390 | if (is_queued(t)) | 456 | if (is_queued(t)) |
391 | remove(tsk_rt(t)->domain, t); | 457 | remove(tsk_rt(t)->domain, t); |
458 | //g_remove_task will properly add t to the move_to container | ||
392 | g_remove_task(t); | 459 | g_remove_task(t); |
393 | } | 460 | } |
394 | } else if (is_migrating(t) || (is_container(t) && tsk_rt(t)->edfsc_params.can_release)) { | 461 | else { |
395 | tsk_rt(t)->edfsc_params.can_release = 0; //only matter for containers | 462 | //TODO: deadline timer to call g_remove_task |
463 | } | ||
464 | // When a migrating job finishes | ||
465 | } else if (is_migrating(t)) { | ||
396 | /* prepare for next period */ | 466 | /* prepare for next period */ |
397 | tsk_rt(t)->task_params.exec_cost = from_fp(get_rt_utilization(t) * get_rt_period(t)); | ||
398 | prepare_for_next_period(t); | 467 | prepare_for_next_period(t); |
399 | if (is_early_releasing(t) || is_released(t, litmus_clock())) | 468 | if (is_early_releasing(t) || is_released(t, litmus_clock())) |
400 | sched_trace_task_release(t); | 469 | sched_trace_task_release(t); |
401 | //TODO | ||
402 | /* requeue | 470 | /* requeue |
403 | * But don't requeue a blocking task. */ | 471 | * But don't requeue a blocking task. */ |
404 | if (is_current_running()) { //since we don't support blocking, this should always be true | 472 | if (is_current_running()) { //since we don't support blocking, this should always be true |
405 | if (is_container(t) && tsk_rt(t)->edfsc_params.domain->scheduled) { | ||
406 | requeue(tsk_rt(t)->edfsc_params.domain->scheduled); | ||
407 | tsk_rt(t)->edfsc_params.domain->scheduled = NULL; | ||
408 | } | ||
409 | requeue(t); | 473 | requeue(t); |
410 | g_preempt_check(); | 474 | g_preempt_check(); |
411 | } | 475 | } |
476 | // When a container job finishes late | ||
477 | } else if (is_container(t) && tsk_rt(t)->edfsc_params.can_release) { | ||
478 | tsk_rt(t)->edfsc_params.can_release = 0; | ||
479 | tsk_rt(t)->task_params.exec_cost = from_fp(get_rt_utilization(t) * get_rt_period(t)); | ||
480 | prepare_for_next_period(t); | ||
481 | if (is_early_releasing(t) || is_released(t, litmus_clock())) | ||
482 | sched_trace_task_release(t); | ||
483 | if (get_rt_utilization(t) == to_fp(1)) { | ||
484 | cpu_entry_t* entry = &per_cpu(edfsc_cpu_entries, tsk_rt(t)->edfsc_params.id); | ||
485 | remove_cpu_from_global(entry); | ||
486 | entry->linked = t; | ||
487 | tsk_rt(t)->linked_on = entry->cpu; | ||
488 | cancel_idle_enforcement_timer(t); | ||
489 | preempt(entry); | ||
490 | } | ||
491 | else { | ||
492 | if (is_current_running()) { | ||
493 | if (tsk_rt(t)->edfsc_params.domain->scheduled) { | ||
494 | requeue(tsk_rt(t)->edfsc_params.domain->scheduled); | ||
495 | tsk_rt(t)->edfsc_params.domain->scheduled = NULL; | ||
496 | } | ||
497 | requeue(t); | ||
498 | g_preempt_check(); | ||
499 | } | ||
500 | } | ||
412 | } | 501 | } |
413 | } | 502 | } |
414 | 503 | ||
@@ -573,35 +662,12 @@ static struct task_struct* edfsc_cschedule(cont_domain_t* cedf, struct task_stru | |||
573 | return (next) ? next : cedf->container; | 662 | return (next) ? next : cedf->container; |
574 | } | 663 | } |
575 | 664 | ||
576 | void manage_idle_enforcement_timer(struct task_struct* t) | ||
577 | { | ||
578 | lt_t now; | ||
579 | |||
580 | cont_domain_t* domain = tsk_rt(t)->edfsc_params.domain; | ||
581 | now = litmus_clock(); | ||
582 | domain->scheduled_last_exec_time = now; | ||
583 | if (budget_precisely_enforced(t)) { | ||
584 | BUG_ON(budget_exhausted(t) && !is_np(t)); | ||
585 | if (likely(!is_np(t))) { | ||
586 | //hrtimer_start cancels the timer so don't have to check | ||
587 | //if it is already armed | ||
588 | hrtimer_start(&(domain->idle_enforcement_timer), | ||
589 | ns_to_ktime(now + budget_remaining(t)), | ||
590 | HRTIMER_MODE_ABS_PINNED); | ||
591 | domain->timer_armed = 1; | ||
592 | } | ||
593 | } | ||
594 | else if (domain->timer_armed) { | ||
595 | hrtimer_try_to_cancel(&(domain->idle_enforcement_timer)); | ||
596 | domain->timer_armed = 0; | ||
597 | } | ||
598 | } | ||
599 | 665 | ||
600 | //assuming prev is previous task running on the processor before calling schedule | 666 | //assuming prev is previous task running on the processor before calling schedule |
601 | static struct task_struct *edfsc_gschedule(struct task_struct *prev) | 667 | static struct task_struct *edfsc_gschedule(struct task_struct *prev) |
602 | { | 668 | { |
603 | cpu_entry_t* entry = this_cpu_ptr(&edfsc_cpu_entries); | 669 | cpu_entry_t* entry = this_cpu_ptr(&edfsc_cpu_entries); |
604 | int out_of_time, sleep, preempt, np, exists, blocks, is_cont; | 670 | int out_of_time, sleep, preempted, np, exists, blocks, is_cont; |
605 | struct task_struct* next = NULL; | 671 | struct task_struct* next = NULL; |
606 | unsigned long flags; | 672 | unsigned long flags; |
607 | 673 | ||
@@ -650,7 +716,7 @@ static struct task_struct *edfsc_gschedule(struct task_struct *prev) | |||
650 | && budget_exhausted(entry->scheduled); | 716 | && budget_exhausted(entry->scheduled); |
651 | np = exists && is_np(entry->scheduled); | 717 | np = exists && is_np(entry->scheduled); |
652 | sleep = exists && is_completed(entry->scheduled); | 718 | sleep = exists && is_completed(entry->scheduled); |
653 | preempt = entry->scheduled != entry->linked; | 719 | preempted = entry->scheduled != entry->linked; |
654 | 720 | ||
655 | #ifdef WANT_ALL_SCHED_EVENTS | 721 | #ifdef WANT_ALL_SCHED_EVENTS |
656 | TRACE_TASK(prev, "invoked gsnedf_schedule.\n"); | 722 | TRACE_TASK(prev, "invoked gsnedf_schedule.\n"); |
@@ -663,7 +729,7 @@ static struct task_struct *edfsc_gschedule(struct task_struct *prev) | |||
663 | blocks, out_of_time, np, sleep, preempt, | 729 | blocks, out_of_time, np, sleep, preempt, |
664 | prev->state, signal_pending(prev), is_cont); | 730 | prev->state, signal_pending(prev), is_cont); |
665 | 731 | ||
666 | if (entry->linked && preempt) | 732 | if (entry->linked && preempted) |
667 | TRACE_TASK(prev, "will be preempted by %s/%d\n", | 733 | TRACE_TASK(prev, "will be preempted by %s/%d\n", |
668 | entry->linked->comm, entry->linked->pid); | 734 | entry->linked->comm, entry->linked->pid); |
669 | 735 | ||
@@ -679,7 +745,7 @@ static struct task_struct *edfsc_gschedule(struct task_struct *prev) | |||
679 | * that we are still linked. Multiple calls to request_exit_np() don't | 745 | * that we are still linked. Multiple calls to request_exit_np() don't |
680 | * hurt. | 746 | * hurt. |
681 | */ | 747 | */ |
682 | if (np && (out_of_time || preempt || sleep)) { | 748 | if (np && (out_of_time || preempted || sleep)) { |
683 | unlink(entry->scheduled); | 749 | unlink(entry->scheduled); |
684 | request_exit_np(entry->scheduled); | 750 | request_exit_np(entry->scheduled); |
685 | } | 751 | } |
@@ -695,10 +761,21 @@ static struct task_struct *edfsc_gschedule(struct task_struct *prev) | |||
695 | 761 | ||
696 | /* Link pending task if we became unlinked. | 762 | /* Link pending task if we became unlinked. |
697 | */ | 763 | */ |
698 | // TODO make container only be scheduled on cores with same id | 764 | // make container only be scheduled on cores with same id |
699 | if (!entry->linked) | 765 | if (!entry->linked) { |
700 | link_task_to_cpu(__take_ready(&gsched_domain), entry); | 766 | struct task_struct* task = __take_ready(&gsched_domain); |
767 | cpu_entry_t* target = entry; | ||
768 | if (is_container(task)) { | ||
769 | target = &per_cpu(edfsc_cpu_entries, tsk_rt(task)->edfsc_params.id); | ||
770 | } | ||
771 | if (target != entry) { | ||
772 | link_task_to_cpu(target->linked, entry); | ||
773 | } | ||
774 | link_task_to_cpu(task, target); | ||
775 | preempt(target); | ||
776 | |||
701 | BUG_ON(entry->linked && budget_exhausted(entry->linked)); | 777 | BUG_ON(entry->linked && budget_exhausted(entry->linked)); |
778 | } | ||
702 | 779 | ||
703 | BUG_ON(entry->linked && budget_exhausted(entry->linked)); | 780 | BUG_ON(entry->linked && budget_exhausted(entry->linked)); |
704 | 781 | ||
@@ -900,24 +977,35 @@ static enum hrtimer_restart container_boundary(struct hrtimer *timer) | |||
900 | 977 | ||
901 | // Re-release container tasks, or tell them they can if they're tardy | 978 | // Re-release container tasks, or tell them they can if they're tardy |
902 | for (i = 0; i < num_cpus; i++) { | 979 | for (i = 0; i < num_cpus; i++) { |
980 | // will first iterate through fully provisioned containers, then not fully provisioned ones | ||
903 | struct task_struct* t = container_list[i]->container; | 981 | struct task_struct* t = container_list[i]->container; |
904 | printk("container id: %d\n", tsk_rt(t)->edfsc_params.id); | 982 | printk("container id: %d\n", tsk_rt(t)->edfsc_params.id); |
905 | printk("container budget: %lld\n", budget_remaining(t)); | 983 | printk("container budget: %lld\n", budget_remaining(t)); |
906 | // If the last job completed on time, release it now | 984 | // If the last job completed on time, release it now |
907 | if (budget_exhausted(t)) { | 985 | if (budget_exhausted(t)) { |
986 | BUG_ON(is_queued(t)); | ||
908 | prepare_for_next_period(t); | 987 | prepare_for_next_period(t); |
909 | if (is_early_releasing(t) || is_released(t, now)) { | 988 | if (is_early_releasing(t) || is_released(t, now)) |
910 | sched_trace_task_release(t); | 989 | sched_trace_task_release(t); |
990 | if (get_rt_utilization(t) == to_fp(1)) { | ||
991 | cpu_entry_t* entry = &per_cpu(edfsc_cpu_entries, tsk_rt(t)->edfsc_params.id); | ||
992 | remove_cpu_from_global(entry); | ||
993 | entry->linked = t; | ||
994 | tsk_rt(t)->linked_on = entry->cpu; | ||
995 | cancel_idle_enforcement_timer(t); | ||
996 | preempt(entry); | ||
911 | } | 997 | } |
912 | /* requeue | 998 | else { |
913 | * But don't requeue a blocking task. */ | 999 | cpu_entry_t* entry = &per_cpu(edfsc_cpu_entries, tsk_rt(t)->edfsc_params.id); |
914 | if (is_current_running()) { //since we don't support blocking, this should always be true | 1000 | add_cpu_to_global(entry); |
915 | if (tsk_rt(t)->edfsc_params.domain->scheduled) { | 1001 | if (is_current_running()) { //since we don't support blocking, this should always be true |
916 | requeue(tsk_rt(t)->edfsc_params.domain->scheduled); | 1002 | if (tsk_rt(t)->edfsc_params.domain->scheduled) { |
917 | tsk_rt(t)->edfsc_params.domain->scheduled = NULL; | 1003 | requeue(tsk_rt(t)->edfsc_params.domain->scheduled); |
1004 | tsk_rt(t)->edfsc_params.domain->scheduled = NULL; | ||
1005 | } | ||
1006 | requeue(t); | ||
1007 | g_preempt_check(); | ||
918 | } | 1008 | } |
919 | requeue(t); | ||
920 | g_preempt_check(); | ||
921 | } | 1009 | } |
922 | // Otherwise let it release itself when it completes | 1010 | // Otherwise let it release itself when it completes |
923 | } else { | 1011 | } else { |
@@ -1196,20 +1284,6 @@ static struct sched_plugin edfsc_plugin __cacheline_aligned_in_smp = { | |||
1196 | .get_domain_proc_info = edfsc_get_domain_proc_info, | 1284 | .get_domain_proc_info = edfsc_get_domain_proc_info, |
1197 | }; | 1285 | }; |
1198 | 1286 | ||
1199 | //timeout for timer enforcing budget of empty container | ||
1200 | static enum hrtimer_restart on_idle_enforcement_timeout(struct hrtimer *timer) | ||
1201 | { | ||
1202 | cont_domain_t* domain = container_of(timer, cont_domain_t, idle_enforcement_timer); | ||
1203 | |||
1204 | unsigned long flags; | ||
1205 | |||
1206 | local_irq_save(flags); | ||
1207 | domain->timer_armed = 0; | ||
1208 | litmus_reschedule_local(); | ||
1209 | local_irq_restore(flags); | ||
1210 | |||
1211 | return HRTIMER_NORESTART; | ||
1212 | } | ||
1213 | 1287 | ||
1214 | static int __init init_edfsc(void) | 1288 | static int __init init_edfsc(void) |
1215 | { | 1289 | { |