aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoshua Bakita <jbakita@cs.unc.edu>2020-10-10 22:52:15 -0400
committerJoshua Bakita <jbakita@cs.unc.edu>2020-10-11 19:01:12 -0400
commita13573378cc34327d5af9d2af88e12ccf1ff4fc3 (patch)
tree71c0bfd350edaf8a5c7fc23fc948e42ad394bc1a
parent6d7a3f278d85fb19c742435fff0b757559cba7eb (diff)
Improve portability and fix dynamic CPU entry/exit in gedf_env
Dynamic CPU entry/exit changes: - Fix build with CONFIG_RELEASE_MASTER - Migrate gedf_env release timer when a core is suspended - Fix race condition in gedf_env core resume - Add documentation - Allow for gedf_env_suspend() and gedf_env_resume() to be called on CPUs that have already been suspended or resumed. (They do nothing in those cases.) Portability: - Allocate space in `gedf_reservation_environment` using `num_online_cpus()` rather than `NR_CPUS`. Otherwise the stack frame can overflow when `NR_CPUS` is large. - Assign `plugin_state` from the plugin rather than the extended reservations code to support other uses of `plugin_state`. Misc: - Improve robustnesss of `gedf_env_is_np()` - Don't memset with 0 memory already zeroed-out by `kzalloc()` - Use GFP_ATOMIC for allocations when in a scheduling context
-rw-r--r--include/litmus/reservations/gedf_reservation.h4
-rw-r--r--include/litmus/rt_domain.h2
-rw-r--r--litmus/reservations/gedf_reservation.c124
-rw-r--r--litmus/rt_domain.c6
-rw-r--r--litmus/sched_ext_res.c1
-rw-r--r--litmus/sched_ext_res_c1.c1
6 files changed, 107 insertions, 31 deletions
diff --git a/include/litmus/reservations/gedf_reservation.h b/include/litmus/reservations/gedf_reservation.h
index e39d632262a3..08961e7931a2 100644
--- a/include/litmus/reservations/gedf_reservation.h
+++ b/include/litmus/reservations/gedf_reservation.h
@@ -59,11 +59,11 @@ struct gedf_reservation_environment {
59 volatile int num_cpus; 59 volatile int num_cpus;
60 60
61 /* array of gedf cpu entries */ 61 /* array of gedf cpu entries */
62 struct gedf_cpu_entry cpu_entries[NR_CPUS]; 62 struct gedf_cpu_entry* cpu_entries;
63 63
64 /* used to order cpus for gedf purposes */ 64 /* used to order cpus for gedf purposes */
65 struct bheap cpu_heap; 65 struct bheap cpu_heap;
66 struct bheap_node cpu_node[NR_CPUS]; 66 struct bheap_node* cpu_node;
67 67
68 rt_domain_t domain; 68 rt_domain_t domain;
69}; 69};
diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h
index 45dfb1e2b76f..691e2c15556c 100644
--- a/include/litmus/rt_domain.h
+++ b/include/litmus/rt_domain.h
@@ -251,7 +251,7 @@ static inline void add_release_on(rt_domain_t* rt,
251 raw_spin_unlock_irqrestore(&rt->tobe_lock, flags); 251 raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
252} 252}
253 253
254static inline void add_release_res_on(rt_domain_rt* rt, 254static inline void add_release_res_on(rt_domain_t* rt,
255 struct reservation* res, 255 struct reservation* res,
256 int target_cpu) 256 int target_cpu)
257{ 257{
diff --git a/litmus/reservations/gedf_reservation.c b/litmus/reservations/gedf_reservation.c
index dca51a23386e..2ed16575144e 100644
--- a/litmus/reservations/gedf_reservation.c
+++ b/litmus/reservations/gedf_reservation.c
@@ -13,6 +13,12 @@
13#include <litmus/debug_trace.h> 13#include <litmus/debug_trace.h>
14#include <litmus/reservations/gedf_reservation.h> 14#include <litmus/reservations/gedf_reservation.h>
15 15
16// Needed to store context during cross-CPU function calls
17struct csd_wrapper {
18 struct call_single_data csd;
19 struct gedf_reservation_environment* gedf_env;
20};
21
16/* ******************************************************************************* */ 22/* ******************************************************************************* */
17/* returns 1 if res of a has earlier deadline than res of b */ 23/* returns 1 if res of a has earlier deadline than res of b */
18static int edf_ready_order(struct bheap_node* a, struct bheap_node* b) 24static int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
@@ -300,11 +306,9 @@ long alloc_gedf_task_reservation(
300 if (!gedf_task_res) 306 if (!gedf_task_res)
301 return -ENOMEM; 307 return -ENOMEM;
302 308
303 memset(gedf_task_res, 0, sizeof(struct gedf_task_reservation));
304 init_ext_reservation(&gedf_task_res->gedf_res.res, task->pid, &gedf_task_ops); 309 init_ext_reservation(&gedf_task_res->gedf_res.res, task->pid, &gedf_task_ops);
305 310
306 gedf_task_res->task = task; 311 gedf_task_res->task = task;
307 tsk_rt(task)->plugin_state = gedf_task_res;
308 312
309 *_res = gedf_task_res; 313 *_res = gedf_task_res;
310 return 0; 314 return 0;
@@ -322,7 +326,6 @@ long alloc_gedf_container_reservation(
322 if (!gedf_cont_res) 326 if (!gedf_cont_res)
323 return -ENOMEM; 327 return -ENOMEM;
324 328
325 memset(gedf_cont_res, 0, sizeof(struct gedf_container_reservation));
326 init_ext_reservation(&gedf_cont_res->gedf_res.res, id, &gedf_cont_ops); 329 init_ext_reservation(&gedf_cont_res->gedf_res.res, id, &gedf_cont_ops);
327 330
328 gedf_cont_res->max_budget = max_budget; 331 gedf_cont_res->max_budget = max_budget;
@@ -357,7 +360,9 @@ static void gedf_env_shutdown(
357 raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags); 360 raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags);
358 361
359 /* free memory */ 362 /* free memory */
360 kfree(env); 363 kfree(gedf_env->cpu_entries);
364 kfree(gedf_env->cpu_node);
365 kfree(gedf_env);
361} 366}
362 367
363static int gedf_env_is_np( 368static int gedf_env_is_np(
@@ -366,12 +371,9 @@ static int gedf_env_is_np(
366{ 371{
367 struct gedf_reservation_environment* gedf_env = 372 struct gedf_reservation_environment* gedf_env =
368 container_of(env, struct gedf_reservation_environment, env); 373 container_of(env, struct gedf_reservation_environment, env);
369 struct reservation* res = 374 struct gedf_reservation* scheduled =
370 &gedf_env->cpu_entries[cpu].scheduled->res; 375 gedf_env->cpu_entries[cpu].scheduled;
371 if (res) 376 return scheduled && scheduled->res.ops->is_np(&scheduled->res, cpu);
372 return res->ops->is_np(res, cpu);
373 else
374 return 0;
375} 377}
376 378
377static struct reservation* gedf_find_res_by_id( 379static struct reservation* gedf_find_res_by_id(
@@ -452,9 +454,28 @@ static void gedf_env_add_res(
452 raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags); 454 raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags);
453} 455}
454 456
455/* TODO: currently does not fully support dynamic cores count in environment 457/* try_resume_timer: Attempt to resume the release timer locally.
456 * when a core is suspended, if the release timer is on that core, it will not be 458 * @param csd_info Pointer to `info` field of struct call_single_data
457 * properly suspended. Only the last core to suspend stops the release timer 459 * @note Used as IPI callback, do not call directly. Lockless.
460 */
461static void try_resume_timer(void *csd_info)
462{
463 struct csd_wrapper* csd_wrapper = csd_info;
464 struct gedf_reservation_environment* gedf_env = csd_wrapper->gedf_env;
465 int cpu = smp_processor_id();
466 struct gedf_cpu_entry* entry = &gedf_env->cpu_entries[cpu];
467 // Abort if this CPU was suspended before we could process the IPI
468 if (!bheap_node_in_heap(entry->hn))
469 goto out;
470 domain_resume_releases(&gedf_env->domain);
471out:
472 kfree(csd_wrapper);
473}
474
475/* gedf_env_suspend: Remove the specified core from scheduling consideration
476 * @param env Environment to modify
477 * @param cpu CPU to remove if present.
478 * @note Safe to call if core already removed. Skips lock in that case.
458 */ 479 */
459static void gedf_env_suspend( 480static void gedf_env_suspend(
460 struct reservation_environment* env, 481 struct reservation_environment* env,
@@ -468,7 +489,19 @@ static void gedf_env_suspend(
468 gedf_env = container_of(env, struct gedf_reservation_environment, env); 489 gedf_env = container_of(env, struct gedf_reservation_environment, env);
469 entry = &gedf_env->cpu_entries[cpu]; 490 entry = &gedf_env->cpu_entries[cpu];
470 491
492 /* Ignore suspension requests on inactive cores
493 * This will not errantly fail, as the first thing resume() does is re-add the node
494 * This will only errantly pass if another core is simultaneously inside
495 * our critical section. The second check catches that.
496 * In all cases this will avoid taking the lock if we were never part of the container.
497 */
498 if (!bheap_node_in_heap(entry->hn))
499 return;
500
471 raw_spin_lock_irqsave(&gedf_env->domain.ready_lock, flags); 501 raw_spin_lock_irqsave(&gedf_env->domain.ready_lock, flags);
502 // Do not remove! See above comment.
503 if (!bheap_node_in_heap(entry->hn))
504 goto unlock;
472 505
473 //TODO: More Graceful way to handle forbidden zone violation? 506 //TODO: More Graceful way to handle forbidden zone violation?
474 BUG_ON(env->ops->is_np(env, cpu)); 507 BUG_ON(env->ops->is_np(env, cpu));
@@ -484,19 +517,34 @@ static void gedf_env_suspend(
484 entry->scheduled->res.ops->on_preempt(&entry->scheduled->res, cpu); 517 entry->scheduled->res.ops->on_preempt(&entry->scheduled->res, cpu);
485 entry->scheduled = NULL; 518 entry->scheduled = NULL;
486 519
487 BUG_ON(!bheap_node_in_heap(entry->hn));
488 /* this essentially removes the cpu from scheduling consideration */ 520 /* this essentially removes the cpu from scheduling consideration */
489 bheap_delete(cpu_lower_prio, &gedf_env->cpu_heap, entry->hn); 521 bheap_delete(cpu_lower_prio, &gedf_env->cpu_heap, entry->hn);
490 522
491 check_for_preemptions(gedf_env); 523 check_for_preemptions(gedf_env);
492 524
493 raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags); 525 /* suspends rt_domain releases when the last core of env is preempted
494 526 * OR re-arm release timer on a different CPU */
495 /* suspends rt_domain releases when the last core of env is preempted */
496 if (!gedf_env->num_cpus) 527 if (!gedf_env->num_cpus)
497 domain_suspend_releases(&gedf_env->domain); 528 domain_suspend_releases(&gedf_env->domain);
529 else {
530 struct csd_wrapper* csd_wrapper =
531 kzalloc(sizeof(struct csd_wrapper), GFP_ATOMIC);
532 csd_wrapper->gedf_env = gedf_env;
533 csd_wrapper->csd.func = &try_resume_timer;
534 csd_wrapper->csd.info = csd_wrapper;
535 smp_call_function_single_async(
536 lowest_prio_cpu(&gedf_env->cpu_heap)->id,
537 &csd_wrapper->csd);
538 }
539unlock:
540 raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags);
498} 541}
499 542
543/* gedf_env_resume: Add the specified core to scheduling consideration
544 * @param env Environment to modify
545 * @param cpu CPU to add if not yet added.
546 * @note Safe to call if core already added.
547 */
500static void gedf_env_resume( 548static void gedf_env_resume(
501 struct reservation_environment* env, 549 struct reservation_environment* env,
502 int cpu) 550 int cpu)
@@ -504,21 +552,35 @@ static void gedf_env_resume(
504 struct gedf_reservation_environment* gedf_env; 552 struct gedf_reservation_environment* gedf_env;
505 struct gedf_cpu_entry* entry; 553 struct gedf_cpu_entry* entry;
506 unsigned long flags; 554 unsigned long flags;
555 // Needs to be volatile or it may be optimized to gedf_env->num_cpus
556 volatile int tmp_cpus;
507 557
508 gedf_env = container_of(env, struct gedf_reservation_environment, env); 558 gedf_env = container_of(env, struct gedf_reservation_environment, env);
509 entry = &gedf_env->cpu_entries[cpu]; 559 entry = &gedf_env->cpu_entries[cpu];
510 560
511 /* resumes rt_domain releases when the first core of env resumes execution */ 561 // If we've already been resumed, do nothing
512 if (!gedf_env->num_cpus) 562 if (bheap_node_in_heap(entry->hn))
513 domain_resume_releases(&gedf_env->domain); 563 return;
514 564
515 raw_spin_lock_irqsave(&gedf_env->domain.ready_lock, flags); 565 raw_spin_lock_irqsave(&gedf_env->domain.ready_lock, flags);
516 BUG_ON(bheap_node_in_heap(entry->hn)); 566 // Check again. Our earlier check may have raced with this critical section
517 gedf_env->num_cpus++; 567 if (bheap_node_in_heap(entry->hn)) {
568 raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags);
569 return;
570 }
571
572 // Save how many cpus were resumed before us (if none, we need to restart the timer)
573 tmp_cpus = gedf_env->num_cpus;
574
518 /* adds cpu back to scheduling consideration */ 575 /* adds cpu back to scheduling consideration */
519 bheap_insert(cpu_lower_prio, &gedf_env->cpu_heap, entry->hn); 576 bheap_insert(cpu_lower_prio, &gedf_env->cpu_heap, entry->hn);
577 gedf_env->num_cpus++;
520 578
521 raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags); 579 raw_spin_unlock_irqrestore(&gedf_env->domain.ready_lock, flags);
580
581 // Keep this outside the lock. Resuming the timer may have side-effects.
582 if (!tmp_cpus)
583 domain_resume_releases(&gedf_env->domain);
522} 584}
523 585
524static struct task_struct* gedf_env_dispatch( 586static struct task_struct* gedf_env_dispatch(
@@ -644,12 +706,24 @@ long alloc_gedf_reservation_environment(
644{ 706{
645 struct gedf_reservation_environment* gedf_env; 707 struct gedf_reservation_environment* gedf_env;
646 int i; 708 int i;
709 int total_cpus = num_online_cpus();
647 710
648 gedf_env = kzalloc(sizeof(struct gedf_reservation_environment), GFP_KERNEL); 711 gedf_env = kzalloc(sizeof(struct gedf_reservation_environment), GFP_ATOMIC);
649 if (!gedf_env) 712 if (!gedf_env)
650 return -ENOMEM; 713 return -ENOMEM;
651 714 /* We don't know which subset of CPUs we'll run on, so we must keep state
652 memset(gedf_env, 0, sizeof(struct gedf_reservation_environment)); 715 * for all of them */
716 gedf_env->cpu_entries = kzalloc(sizeof(struct gedf_cpu_entry)*total_cpus, GFP_ATOMIC);
717 if (!gedf_env->cpu_entries) {
718 kfree(gedf_env);
719 return -ENOMEM;
720 }
721 gedf_env->cpu_node = kzalloc(sizeof(struct bheap_node)*total_cpus, GFP_ATOMIC);
722 if (!gedf_env->cpu_node) {
723 kfree(gedf_env->cpu_entries);
724 kfree(gedf_env);
725 return -ENOMEM;
726 }
653 727
654 /* set environment callback actions */ 728 /* set environment callback actions */
655 gedf_env->env.ops = &gedf_env_ops; 729 gedf_env->env.ops = &gedf_env_ops;
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
index 1a15e2491a65..db3a48bff0d0 100644
--- a/litmus/rt_domain.c
+++ b/litmus/rt_domain.c
@@ -123,6 +123,7 @@ void domain_suspend_releases(rt_domain_t* rt)
123 hrtimer_cancel(&rt->timer); 123 hrtimer_cancel(&rt->timer);
124} 124}
125 125
126// Resume the release timer on the current CPU
126void domain_resume_releases(rt_domain_t* rt) 127void domain_resume_releases(rt_domain_t* rt)
127{ 128{
128 release_jobs_before_now(rt); 129 release_jobs_before_now(rt);
@@ -131,7 +132,6 @@ void domain_resume_releases(rt_domain_t* rt)
131 ns_to_ktime(rt->release_queue.earliest_release), 132 ns_to_ktime(rt->release_queue.earliest_release),
132 HRTIMER_MODE_ABS_PINNED); 133 HRTIMER_MODE_ABS_PINNED);
133 } 134 }
134
135} 135}
136 136
137/* allocated in litmus.c */ 137/* allocated in litmus.c */
@@ -343,7 +343,7 @@ static void arm_release_timer(rt_domain_t *_rt)
343 * TODO: find some way to combine this with the task version of this fuction 343 * TODO: find some way to combine this with the task version of this fuction
344 */ 344 */
345#ifdef CONFIG_RELEASE_MASTER 345#ifdef CONFIG_RELEASE_MASTER
346#define arm_release_timer_res(t) arm_release_timer_res_on((t), NO_CPU) 346#define arm_release_timer_res(t, i) arm_release_timer_res_on((t), (i), NO_CPU)
347static void arm_release_timer_res_on(rt_domain_t *_rt, int interrupt_release, int target_cpu) 347static void arm_release_timer_res_on(rt_domain_t *_rt, int interrupt_release, int target_cpu)
348#else 348#else
349static void arm_release_timer_res(rt_domain_t *_rt, int interrupt_release) 349static void arm_release_timer_res(rt_domain_t *_rt, int interrupt_release)
@@ -526,7 +526,7 @@ void __add_release_res_on(rt_domain_t* rt, struct reservation *res,
526{ 526{
527 list_add(&res->ln, &rt->tobe_released); 527 list_add(&res->ln, &rt->tobe_released);
528 528
529 arm_release_timer_res_on(rt, target_cpu); 529 arm_release_timer_res_on(rt, 1, target_cpu);
530} 530}
531#endif 531#endif
532 532
diff --git a/litmus/sched_ext_res.c b/litmus/sched_ext_res.c
index 492e2dd8db09..583a2ed9aef0 100644
--- a/litmus/sched_ext_res.c
+++ b/litmus/sched_ext_res.c
@@ -105,6 +105,7 @@ static long ext_res_admit_task(struct task_struct *tsk)
105 err = alloc_gedf_task_reservation(&gedf_task_res, tsk); 105 err = alloc_gedf_task_reservation(&gedf_task_res, tsk);
106 if (err) 106 if (err)
107 return err; 107 return err;
108 tsk_rt(tsk)->plugin_state = gedf_task_res;
108 109
109 gedf_task_res->gedf_res.res.par_env = mtd_res->res[0].env; 110 gedf_task_res->gedf_res.res.par_env = mtd_res->res[0].env;
110 111
diff --git a/litmus/sched_ext_res_c1.c b/litmus/sched_ext_res_c1.c
index 63f6d821d2d4..559708c4234f 100644
--- a/litmus/sched_ext_res_c1.c
+++ b/litmus/sched_ext_res_c1.c
@@ -135,6 +135,7 @@ static long ext_res_admit_task(struct task_struct *tsk)
135 err = alloc_gedf_task_reservation(&gedf_task_res, tsk); 135 err = alloc_gedf_task_reservation(&gedf_task_res, tsk);
136 if (err) 136 if (err)
137 return err; 137 return err;
138 tsk_rt(tsk)->plugin_state = gedf_task_res;
138 139
139 gedf_task_res->gedf_res.res.par_env = &gedf_env->env; 140 gedf_task_res->gedf_res.res.par_env = &gedf_env->env;
140 141