diff options
author | Joshua Bakita <jbakita@cs.unc.edu> | 2020-10-10 22:52:15 -0400 |
---|---|---|
committer | Joshua Bakita <jbakita@cs.unc.edu> | 2020-10-11 19:01:12 -0400 |
commit | a13573378cc34327d5af9d2af88e12ccf1ff4fc3 (patch) | |
tree | 71c0bfd350edaf8a5c7fc23fc948e42ad394bc1a | |
parent | 6d7a3f278d85fb19c742435fff0b757559cba7eb (diff) |
Improve portability and fix dynamic CPU entry/exit in gedf_env
Dynamic CPU entry/exit changes:
- Fix build with CONFIG_RELEASE_MASTER
- Migrate gedf_env release timer when a core is suspended
- Fix race condition in gedf_env core resume
- Add documentation
- Allow for gedf_env_suspend() and gedf_env_resume() to be called
on CPUs that have already been suspended or resumed. (They do
nothing in those cases.)
Portability:
- Allocate space in `gedf_reservation_environment` using
`num_online_cpus()` rather than `NR_CPUS`. Otherwise the stack
frame can overflow when `NR_CPUS` is large.
- Assign `plugin_state` from the plugin rather than the extended
reservations code to support other uses of `plugin_state`.
Misc:
- Improve robustnesss of `gedf_env_is_np()`
- Don't memset with 0 memory already zeroed-out by `kzalloc()`
- Use GFP_ATOMIC for allocations when in a scheduling context
-rw-r--r-- | include/litmus/reservations/gedf_reservation.h | 4 | ||||
-rw-r--r-- | include/litmus/rt_domain.h | 2 | ||||
-rw-r--r-- | litmus/reservations/gedf_reservation.c | 124 | ||||
-rw-r--r-- | litmus/rt_domain.c | 6 | ||||
-rw-r--r-- | litmus/sched_ext_res.c | 1 | ||||
-rw-r--r-- | litmus/sched_ext_res_c1.c | 1 |
6 files changed, 107 insertions, 31 deletions
diff --git a/include/litmus/reservations/gedf_reservation.h b/include/litmus/reservations/gedf_reservation.h index e39d632262a3..08961e7931a2 100644 --- a/include/litmus/reservations/gedf_reservation.h +++ b/include/litmus/reservations/gedf_reservation.h | |||
@@ -59,11 +59,11 @@ struct gedf_reservation_environment { | |||
59 | volatile int num_cpus; | 59 | volatile int num_cpus; |
60 | 60 | ||
61 | /* array of gedf cpu entries */ | 61 | /* array of gedf cpu entries */ |
62 | struct gedf_cpu_entry cpu_entries[NR_CPUS]; | 62 | struct gedf_cpu_entry* cpu_entries; |
63 | 63 | ||
64 | /* used to order cpus for gedf purposes */ | 64 | /* used to order cpus for gedf purposes */ |
65 | struct bheap cpu_heap; | 65 | struct bheap cpu_heap; |
66 | struct bheap_node cpu_node[NR_CPUS]; | 66 | struct bheap_node* cpu_node; |
67 | 67 | ||
68 | rt_domain_t domain; | 68 | rt_domain_t domain; |
69 | }; | 69 | }; |
diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h index 45dfb1e2b76f..691e2c15556c 100644 --- a/include/litmus/rt_domain.h +++ b/include/litmus/rt_domain.h | |||
@@ -251,7 +251,7 @@ static inline void add_release_on(rt_domain_t* rt, | |||
251 | raw_spin_unlock_irqrestore(&rt->tobe_lock, flags); | 251 | raw_spin_unlock_irqrestore(&rt->tobe_lock, flags); |
252 | } | 252 | } |
253 | 253 | ||
254 | static inline void add_release_res_on(rt_domain_rt* rt, | 254 | static inline void add_release_res_on(rt_domain_t* rt, |
255 | struct reservation* res, | 255 | struct reservation* res, |
256 | int target_cpu) | 256 | int target_cpu) |
257 | { | 257 | { |
diff --git a/litmus/reservations/gedf_reservation.c b/litmus/reservations/gedf_reservation.c index dca51a23386e..2ed16575144e 100644 --- a/litmus/reservations/gedf_reservation.c +++ b/litmus/reservations/gedf_reservation.c | |||
@@ -13,6 +13,12 @@ | |||
13 | #include <litmus/debug_trace.h> | 13 | #include <litmus/debug_trace.h> |
14 | #include <litmus/reservations/gedf_reservation.h> | 14 | #include <litmus/reservations/gedf_reservation.h> |
15 | 15 | ||
16 | // Needed to store context during cross-CPU function calls | ||
17 | struct csd_wrapper { | ||
18 | struct call_single_data csd; | ||
19 | struct gedf_reservation_environment* gedf_env; | ||
20 | }; | ||
21 | |||
16 | /* ******************************************************************************* */ | 22 | /* ******************************************************************************* */ |
17 | /* returns 1 if res of a has earlier deadline than res of b */ | 23 | /* returns 1 if res of a has earlier deadline than res of b */ |
18 | static int edf_ready_order(struct bheap_node* a, struct bheap_node* b) | 24 | static int edf_ready_order(struct bheap_node* a, struct bheap_node* b) |
@@ -300,11 +306,9 @@ long alloc_gedf_task_reservation( | |||
300 | if (!gedf_task_res) | 306 | if (!gedf_task_res) |
301 | return -ENOMEM; | 307 | return -ENOMEM; |
302 | 308 | ||
303 | memset(gedf_task_res, 0, sizeof(struct gedf_task_reservation)); | ||
304 | init_ext_reservation(&gedf_task_res->gedf_res.res, task->pid, &gedf_task_ops); | 309 | init_ext_reservation(&gedf_task_res->gedf_res.res, task->pid, &gedf_task_ops); |
305 | 310 | ||
306 | gedf_task_res->task = task; | 311 | gedf_task_res->task = task; |
307 | tsk_rt(task)->plugin_state = gedf_task_res; | ||
308 | 312 | ||
309 | *_res = gedf_task_res; | 313 | *_res = gedf_task_res; |
310 | return 0; | 314 | return 0; |
@@ -322,7 +326,6 @@ long alloc_gedf_container_reservation( | |||
322 | if (!gedf_cont_res) | 326 | if (!gedf_cont_res) |
323 | return -ENOMEM; | 327 | return -ENOMEM; |
324 | 328 | ||
325 | memset(gedf_cont_res, 0, sizeof(struct gedf_container_reservation)); | ||
326 | init_ext_reservation(&gedf_cont_res->gedf_res.res, id, &gedf_cont_ops); | 329 | init_ext_reservation(&gedf_cont_res->gedf_res.res, id, &gedf_cont_ops); |
327 | 330 | ||
328 | gedf_cont_res->max_budget = max_budget; | 331 | gedf_cont_res->max_budget = max_budget; |
@@ -357,7 +360,9 @@ static void gedf_env_shutdown( | |||
357 | raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags); | 360 | raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags); |
358 | 361 | ||
359 | /* free memory */ | 362 | /* free memory */ |
360 | kfree(env); | 363 | kfree(gedf_env->cpu_entries); |
364 | kfree(gedf_env->cpu_node); | ||
365 | kfree(gedf_env); | ||
361 | } | 366 | } |
362 | 367 | ||
363 | static int gedf_env_is_np( | 368 | static int gedf_env_is_np( |
@@ -366,12 +371,9 @@ static int gedf_env_is_np( | |||
366 | { | 371 | { |
367 | struct gedf_reservation_environment* gedf_env = | 372 | struct gedf_reservation_environment* gedf_env = |
368 | container_of(env, struct gedf_reservation_environment, env); | 373 | container_of(env, struct gedf_reservation_environment, env); |
369 | struct reservation* res = | 374 | struct gedf_reservation* scheduled = |
370 | &gedf_env->cpu_entries[cpu].scheduled->res; | 375 | gedf_env->cpu_entries[cpu].scheduled; |
371 | if (res) | 376 | return scheduled && scheduled->res.ops->is_np(&scheduled->res, cpu); |
372 | return res->ops->is_np(res, cpu); | ||
373 | else | ||
374 | return 0; | ||
375 | } | 377 | } |
376 | 378 | ||
377 | static struct reservation* gedf_find_res_by_id( | 379 | static struct reservation* gedf_find_res_by_id( |
@@ -452,9 +454,28 @@ static void gedf_env_add_res( | |||
452 | raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags); | 454 | raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags); |
453 | } | 455 | } |
454 | 456 | ||
455 | /* TODO: currently does not fully support dynamic cores count in environment | 457 | /* try_resume_timer: Attempt to resume the release timer locally. |
456 | * when a core is suspended, if the release timer is on that core, it will not be | 458 | * @param csd_info Pointer to `info` field of struct call_single_data |
457 | * properly suspended. Only the last core to suspend stops the release timer | 459 | * @note Used as IPI callback, do not call directly. Lockless. |
460 | */ | ||
461 | static void try_resume_timer(void *csd_info) | ||
462 | { | ||
463 | struct csd_wrapper* csd_wrapper = csd_info; | ||
464 | struct gedf_reservation_environment* gedf_env = csd_wrapper->gedf_env; | ||
465 | int cpu = smp_processor_id(); | ||
466 | struct gedf_cpu_entry* entry = &gedf_env->cpu_entries[cpu]; | ||
467 | // Abort if this CPU was suspended before we could process the IPI | ||
468 | if (!bheap_node_in_heap(entry->hn)) | ||
469 | goto out; | ||
470 | domain_resume_releases(&gedf_env->domain); | ||
471 | out: | ||
472 | kfree(csd_wrapper); | ||
473 | } | ||
474 | |||
475 | /* gedf_env_suspend: Remove the specified core from scheduling consideration | ||
476 | * @param env Environment to modify | ||
477 | * @param cpu CPU to remove if present. | ||
478 | * @note Safe to call if core already removed. Skips lock in that case. | ||
458 | */ | 479 | */ |
459 | static void gedf_env_suspend( | 480 | static void gedf_env_suspend( |
460 | struct reservation_environment* env, | 481 | struct reservation_environment* env, |
@@ -468,7 +489,19 @@ static void gedf_env_suspend( | |||
468 | gedf_env = container_of(env, struct gedf_reservation_environment, env); | 489 | gedf_env = container_of(env, struct gedf_reservation_environment, env); |
469 | entry = &gedf_env->cpu_entries[cpu]; | 490 | entry = &gedf_env->cpu_entries[cpu]; |
470 | 491 | ||
492 | /* Ignore suspension requests on inactive cores | ||
493 | * This will not errantly fail, as the first thing resume() does is re-add the node | ||
494 | * This will only errantly pass if another core is simultaneously inside | ||
495 | * our critical section. The second check catches that. | ||
496 | * In all cases this will avoid taking the lock if we were never part of the container. | ||
497 | */ | ||
498 | if (!bheap_node_in_heap(entry->hn)) | ||
499 | return; | ||
500 | |||
471 | raw_spin_lock_irqsave(&gedf_env->domain.ready_lock, flags); | 501 | raw_spin_lock_irqsave(&gedf_env->domain.ready_lock, flags); |
502 | // Do not remove! See above comment. | ||
503 | if (!bheap_node_in_heap(entry->hn)) | ||
504 | goto unlock; | ||
472 | 505 | ||
473 | //TODO: More Graceful way to handle forbidden zone violation? | 506 | //TODO: More Graceful way to handle forbidden zone violation? |
474 | BUG_ON(env->ops->is_np(env, cpu)); | 507 | BUG_ON(env->ops->is_np(env, cpu)); |
@@ -484,19 +517,34 @@ static void gedf_env_suspend( | |||
484 | entry->scheduled->res.ops->on_preempt(&entry->scheduled->res, cpu); | 517 | entry->scheduled->res.ops->on_preempt(&entry->scheduled->res, cpu); |
485 | entry->scheduled = NULL; | 518 | entry->scheduled = NULL; |
486 | 519 | ||
487 | BUG_ON(!bheap_node_in_heap(entry->hn)); | ||
488 | /* this essentially removes the cpu from scheduling consideration */ | 520 | /* this essentially removes the cpu from scheduling consideration */ |
489 | bheap_delete(cpu_lower_prio, &gedf_env->cpu_heap, entry->hn); | 521 | bheap_delete(cpu_lower_prio, &gedf_env->cpu_heap, entry->hn); |
490 | 522 | ||
491 | check_for_preemptions(gedf_env); | 523 | check_for_preemptions(gedf_env); |
492 | 524 | ||
493 | raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags); | 525 | /* suspends rt_domain releases when the last core of env is preempted |
494 | 526 | * OR re-arm release timer on a different CPU */ | |
495 | /* suspends rt_domain releases when the last core of env is preempted */ | ||
496 | if (!gedf_env->num_cpus) | 527 | if (!gedf_env->num_cpus) |
497 | domain_suspend_releases(&gedf_env->domain); | 528 | domain_suspend_releases(&gedf_env->domain); |
529 | else { | ||
530 | struct csd_wrapper* csd_wrapper = | ||
531 | kzalloc(sizeof(struct csd_wrapper), GFP_ATOMIC); | ||
532 | csd_wrapper->gedf_env = gedf_env; | ||
533 | csd_wrapper->csd.func = &try_resume_timer; | ||
534 | csd_wrapper->csd.info = csd_wrapper; | ||
535 | smp_call_function_single_async( | ||
536 | lowest_prio_cpu(&gedf_env->cpu_heap)->id, | ||
537 | &csd_wrapper->csd); | ||
538 | } | ||
539 | unlock: | ||
540 | raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags); | ||
498 | } | 541 | } |
499 | 542 | ||
543 | /* gedf_env_resume: Add the specified core to scheduling consideration | ||
544 | * @param env Environment to modify | ||
545 | * @param cpu CPU to add if not yet added. | ||
546 | * @note Safe to call if core already added. | ||
547 | */ | ||
500 | static void gedf_env_resume( | 548 | static void gedf_env_resume( |
501 | struct reservation_environment* env, | 549 | struct reservation_environment* env, |
502 | int cpu) | 550 | int cpu) |
@@ -504,21 +552,35 @@ static void gedf_env_resume( | |||
504 | struct gedf_reservation_environment* gedf_env; | 552 | struct gedf_reservation_environment* gedf_env; |
505 | struct gedf_cpu_entry* entry; | 553 | struct gedf_cpu_entry* entry; |
506 | unsigned long flags; | 554 | unsigned long flags; |
555 | // Needs to be volatile or it may be optimized to gedf_env->num_cpus | ||
556 | volatile int tmp_cpus; | ||
507 | 557 | ||
508 | gedf_env = container_of(env, struct gedf_reservation_environment, env); | 558 | gedf_env = container_of(env, struct gedf_reservation_environment, env); |
509 | entry = &gedf_env->cpu_entries[cpu]; | 559 | entry = &gedf_env->cpu_entries[cpu]; |
510 | 560 | ||
511 | /* resumes rt_domain releases when the first core of env resumes execution */ | 561 | // If we've already been resumed, do nothing |
512 | if (!gedf_env->num_cpus) | 562 | if (bheap_node_in_heap(entry->hn)) |
513 | domain_resume_releases(&gedf_env->domain); | 563 | return; |
514 | 564 | ||
515 | raw_spin_lock_irqsave(&gedf_env->domain.ready_lock, flags); | 565 | raw_spin_lock_irqsave(&gedf_env->domain.ready_lock, flags); |
516 | BUG_ON(bheap_node_in_heap(entry->hn)); | 566 | // Check again. Our earlier check may have raced with this critical section |
517 | gedf_env->num_cpus++; | 567 | if (bheap_node_in_heap(entry->hn)) { |
568 | raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags); | ||
569 | return; | ||
570 | } | ||
571 | |||
572 | // Save how many cpus were resumed before us (if none, we need to restart the timer) | ||
573 | tmp_cpus = gedf_env->num_cpus; | ||
574 | |||
518 | /* adds cpu back to scheduling consideration */ | 575 | /* adds cpu back to scheduling consideration */ |
519 | bheap_insert(cpu_lower_prio, &gedf_env->cpu_heap, entry->hn); | 576 | bheap_insert(cpu_lower_prio, &gedf_env->cpu_heap, entry->hn); |
577 | gedf_env->num_cpus++; | ||
520 | 578 | ||
521 | raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags); | 579 | raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags); |
580 | |||
581 | // Keep this outside the lock. Resuming the timer may have side-effects. | ||
582 | if (!tmp_cpus) | ||
583 | domain_resume_releases(&gedf_env->domain); | ||
522 | } | 584 | } |
523 | 585 | ||
524 | static struct task_struct* gedf_env_dispatch( | 586 | static struct task_struct* gedf_env_dispatch( |
@@ -644,12 +706,24 @@ long alloc_gedf_reservation_environment( | |||
644 | { | 706 | { |
645 | struct gedf_reservation_environment* gedf_env; | 707 | struct gedf_reservation_environment* gedf_env; |
646 | int i; | 708 | int i; |
709 | int total_cpus = num_online_cpus(); | ||
647 | 710 | ||
648 | gedf_env = kzalloc(sizeof(struct gedf_reservation_environment), GFP_KERNEL); | 711 | gedf_env = kzalloc(sizeof(struct gedf_reservation_environment), GFP_ATOMIC); |
649 | if (!gedf_env) | 712 | if (!gedf_env) |
650 | return -ENOMEM; | 713 | return -ENOMEM; |
651 | 714 | /* We don't know which subset of CPUs we'll run on, so we must keep state | |
652 | memset(gedf_env, 0, sizeof(struct gedf_reservation_environment)); | 715 | * for all of them */ |
716 | gedf_env->cpu_entries = kzalloc(sizeof(struct gedf_cpu_entry)*total_cpus, GFP_ATOMIC); | ||
717 | if (!gedf_env->cpu_entries) { | ||
718 | kfree(gedf_env); | ||
719 | return -ENOMEM; | ||
720 | } | ||
721 | gedf_env->cpu_node = kzalloc(sizeof(struct bheap_node)*total_cpus, GFP_ATOMIC); | ||
722 | if (!gedf_env->cpu_node) { | ||
723 | kfree(gedf_env->cpu_entries); | ||
724 | kfree(gedf_env); | ||
725 | return -ENOMEM; | ||
726 | } | ||
653 | 727 | ||
654 | /* set environment callback actions */ | 728 | /* set environment callback actions */ |
655 | gedf_env->env.ops = &gedf_env_ops; | 729 | gedf_env->env.ops = &gedf_env_ops; |
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c index 1a15e2491a65..db3a48bff0d0 100644 --- a/litmus/rt_domain.c +++ b/litmus/rt_domain.c | |||
@@ -123,6 +123,7 @@ void domain_suspend_releases(rt_domain_t* rt) | |||
123 | hrtimer_cancel(&rt->timer); | 123 | hrtimer_cancel(&rt->timer); |
124 | } | 124 | } |
125 | 125 | ||
126 | // Resume the release timer on the current CPU | ||
126 | void domain_resume_releases(rt_domain_t* rt) | 127 | void domain_resume_releases(rt_domain_t* rt) |
127 | { | 128 | { |
128 | release_jobs_before_now(rt); | 129 | release_jobs_before_now(rt); |
@@ -131,7 +132,6 @@ void domain_resume_releases(rt_domain_t* rt) | |||
131 | ns_to_ktime(rt->release_queue.earliest_release), | 132 | ns_to_ktime(rt->release_queue.earliest_release), |
132 | HRTIMER_MODE_ABS_PINNED); | 133 | HRTIMER_MODE_ABS_PINNED); |
133 | } | 134 | } |
134 | |||
135 | } | 135 | } |
136 | 136 | ||
137 | /* allocated in litmus.c */ | 137 | /* allocated in litmus.c */ |
@@ -343,7 +343,7 @@ static void arm_release_timer(rt_domain_t *_rt) | |||
343 | * TODO: find some way to combine this with the task version of this fuction | 343 | * TODO: find some way to combine this with the task version of this fuction |
344 | */ | 344 | */ |
345 | #ifdef CONFIG_RELEASE_MASTER | 345 | #ifdef CONFIG_RELEASE_MASTER |
346 | #define arm_release_timer_res(t) arm_release_timer_res_on((t), NO_CPU) | 346 | #define arm_release_timer_res(t, i) arm_release_timer_res_on((t), (i), NO_CPU) |
347 | static void arm_release_timer_res_on(rt_domain_t *_rt, int interrupt_release, int target_cpu) | 347 | static void arm_release_timer_res_on(rt_domain_t *_rt, int interrupt_release, int target_cpu) |
348 | #else | 348 | #else |
349 | static void arm_release_timer_res(rt_domain_t *_rt, int interrupt_release) | 349 | static void arm_release_timer_res(rt_domain_t *_rt, int interrupt_release) |
@@ -526,7 +526,7 @@ void __add_release_res_on(rt_domain_t* rt, struct reservation *res, | |||
526 | { | 526 | { |
527 | list_add(&res->ln, &rt->tobe_released); | 527 | list_add(&res->ln, &rt->tobe_released); |
528 | 528 | ||
529 | arm_release_timer_res_on(rt, target_cpu); | 529 | arm_release_timer_res_on(rt, 1, target_cpu); |
530 | } | 530 | } |
531 | #endif | 531 | #endif |
532 | 532 | ||
diff --git a/litmus/sched_ext_res.c b/litmus/sched_ext_res.c index 492e2dd8db09..583a2ed9aef0 100644 --- a/litmus/sched_ext_res.c +++ b/litmus/sched_ext_res.c | |||
@@ -105,6 +105,7 @@ static long ext_res_admit_task(struct task_struct *tsk) | |||
105 | err = alloc_gedf_task_reservation(&gedf_task_res, tsk); | 105 | err = alloc_gedf_task_reservation(&gedf_task_res, tsk); |
106 | if (err) | 106 | if (err) |
107 | return err; | 107 | return err; |
108 | tsk_rt(tsk)->plugin_state = gedf_task_res; | ||
108 | 109 | ||
109 | gedf_task_res->gedf_res.res.par_env = mtd_res->res[0].env; | 110 | gedf_task_res->gedf_res.res.par_env = mtd_res->res[0].env; |
110 | 111 | ||
diff --git a/litmus/sched_ext_res_c1.c b/litmus/sched_ext_res_c1.c index 63f6d821d2d4..559708c4234f 100644 --- a/litmus/sched_ext_res_c1.c +++ b/litmus/sched_ext_res_c1.c | |||
@@ -135,6 +135,7 @@ static long ext_res_admit_task(struct task_struct *tsk) | |||
135 | err = alloc_gedf_task_reservation(&gedf_task_res, tsk); | 135 | err = alloc_gedf_task_reservation(&gedf_task_res, tsk); |
136 | if (err) | 136 | if (err) |
137 | return err; | 137 | return err; |
138 | tsk_rt(tsk)->plugin_state = gedf_task_res; | ||
138 | 139 | ||
139 | gedf_task_res->gedf_res.res.par_env = &gedf_env->env; | 140 | gedf_task_res->gedf_res.res.par_env = &gedf_env->env; |
140 | 141 | ||