aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZelin Tong <ztong@ludwig.cs.unc.edu>2020-07-02 00:58:09 -0400
committerZelin Tong <ztong@ludwig.cs.unc.edu>2020-07-02 00:58:09 -0400
commit098a298ef73dd8dbacf0d697eef2a6f2daa2081c (patch)
tree546de13acc94765ec9c116b8d8b42632139179a5
parente4c5fa6df346a78dfb683d601fd5ad34e6de3375 (diff)
FINAL BUG FIXED VERSION
-rw-r--r--include/litmus/rt_param.h1
-rw-r--r--include/litmus/trace.h4
-rw-r--r--kernel/sched/core.c1
-rw-r--r--litmus/sched_edfsc.c886
4 files changed, 492 insertions, 400 deletions
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index 22ce3da19245..fd8410aaabbf 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -122,7 +122,6 @@ struct edfsc_params {
122 cont_domain_t *domain; 122 cont_domain_t *domain;
123 struct task_struct *container_task; 123 struct task_struct *container_task;
124 int id; 124 int id;
125 int can_release; //only used for containers
126 // Moved these to the struct task_struct in include/linux/sched.h so that 125 // Moved these to the struct task_struct in include/linux/sched.h so that
127 // the runtime can build 126 // the runtime can build
128 //struct list_head qnode; 127 //struct list_head qnode;
diff --git a/include/litmus/trace.h b/include/litmus/trace.h
index 2646136e3881..dbbd817d2bd1 100644
--- a/include/litmus/trace.h
+++ b/include/litmus/trace.h
@@ -140,8 +140,8 @@ feather_callback void save_cpu_task_latency(unsigned long event, unsigned long w
140#define TS_PLUGIN_SCHED_START /* TIMESTAMP(120) */ /* currently unused */ 140#define TS_PLUGIN_SCHED_START /* TIMESTAMP(120) */ /* currently unused */
141#define TS_PLUGIN_SCHED_END /* TIMESTAMP(121) */ 141#define TS_PLUGIN_SCHED_END /* TIMESTAMP(121) */
142 142
143#define TS_PLUGIN_TICK_START /* TIMESTAMP(130) */ 143#define TS_PLUGIN_TICK_START /* CPU_TIMESTAMP(130) */
144#define TS_PLUGIN_TICK_END /* TIMESTAMP(131) */ 144#define TS_PLUGIN_TICK_END /* CPU_TIMESTAMP(131) */
145 145
146#define TS_ENTER_NP_START CPU_TIMESTAMP(140) 146#define TS_ENTER_NP_START CPU_TIMESTAMP(140)
147#define TS_ENTER_NP_END CPU_TIMESTAMP(141) 147#define TS_ENTER_NP_END CPU_TIMESTAMP(141)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 98ad911a0c58..fe986d548abf 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3479,6 +3479,7 @@ static void __sched notrace __schedule(bool preempt)
3479 } else { 3479 } else {
3480 lockdep_unpin_lock(&rq->lock, cookie); 3480 lockdep_unpin_lock(&rq->lock, cookie);
3481 TS_SCHED_END(prev); 3481 TS_SCHED_END(prev);
3482 litmus->finish_switch(prev);
3482 raw_spin_unlock_irq(&rq->lock); 3483 raw_spin_unlock_irq(&rq->lock);
3483 } 3484 }
3484 3485
diff --git a/litmus/sched_edfsc.c b/litmus/sched_edfsc.c
index fae6feeac76f..ea2dce57b337 100644
--- a/litmus/sched_edfsc.c
+++ b/litmus/sched_edfsc.c
@@ -29,7 +29,6 @@ typedef struct cont_domain {
29 struct task_struct *container; 29 struct task_struct *container;
30 struct task_struct *scheduled; //fixed task 30 struct task_struct *scheduled; //fixed task
31 lt_t scheduled_last_exec_time; //exec_time of the scheduled task when it was last scheduled 31 lt_t scheduled_last_exec_time; //exec_time of the scheduled task when it was last scheduled
32 lt_t changed_budget; //change to scheduled task's exec time due to container budget constraints
33 u64 f_util; 32 u64 f_util;
34 struct bheap_node *hn; 33 struct bheap_node *hn;
35 struct hrtimer idle_enforcement_timer; 34 struct hrtimer idle_enforcement_timer;
@@ -52,6 +51,8 @@ struct list_head pending_adds;
52 51
53struct list_head migrating_tasks; 52struct list_head migrating_tasks;
54 53
54struct list_head pending_removes;
55
55struct hrtimer container_release_timer; 56struct hrtimer container_release_timer;
56 57
57DEFINE_PER_CPU(cpu_entry_t, edfsc_cpu_entries); 58DEFINE_PER_CPU(cpu_entry_t, edfsc_cpu_entries);
@@ -72,7 +73,7 @@ u64 sys_util;
72int sys_changed; 73int sys_changed;
73 74
74#define is_container(task) ((task) && tsk_rt(task)->edfsc_params.domain != NULL && tsk_rt(task)->domain == &gsched_domain) 75#define is_container(task) ((task) && tsk_rt(task)->edfsc_params.domain != NULL && tsk_rt(task)->domain == &gsched_domain)
75#define is_fixed(task) ((task) && tsk_rt(task)->edfsc_params.container_task != NULL) 76#define is_fixed(task) ((task) && tsk_rt(task)->domain && tsk_rt(task)->domain != &gsched_domain)
76#define is_migrating(task) ((task) && tsk_rt(task)->edfsc_params.domain == NULL && tsk_rt(task)->domain == &gsched_domain) 77#define is_migrating(task) ((task) && tsk_rt(task)->edfsc_params.domain == NULL && tsk_rt(task)->domain == &gsched_domain)
77 78
78#define FP_SHIFT 20 79#define FP_SHIFT 20
@@ -105,16 +106,23 @@ int count_migrating_tasks(void)
105/* Do a backwards comparison based on f_util so that heavier containers 106/* Do a backwards comparison based on f_util so that heavier containers
106 * will come first 107 * will come first
107 */ 108 */
109// Used for best-fit
108static int container_lower_prio(const void *_a, const void *_b) 110static int container_lower_prio(const void *_a, const void *_b)
109{ 111{
110 const cont_domain_t *a = (const cont_domain_t *)(_a); 112 const cont_domain_t* a = *(const cont_domain_t**)(_a);
111 const cont_domain_t *b = (const cont_domain_t *)(_b); 113 const cont_domain_t* b = *(const cont_domain_t**)(_b);
112 if (a->f_util < b->f_util) return 1; 114 return (b->f_util - a->f_util);
113 if (a->f_util > b->f_util) return -1; 115}
114 return 0; 116
117// Used for worst-fit
118static int container_higher_prio(const void *_a, const void *_b)
119{
120 const cont_domain_t* a = *(const cont_domain_t**)(_a);
121 const cont_domain_t* b = *(const cont_domain_t**)(_b);
122 return (a->f_util - b->f_util);
115} 123}
116 124
117/* Finds the task_struct of the hrtimer set by task_exit 125/* Finds the task_struct of a list node
118 */ 126 */
119static struct task_struct* task_of_list_node(struct list_head *node) 127static struct task_struct* task_of_list_node(struct list_head *node)
120{ 128{
@@ -128,7 +136,11 @@ static noinline void requeue(struct task_struct* task)
128 BUG_ON(!task); 136 BUG_ON(!task);
129 /* sanity check before insertion */ 137 /* sanity check before insertion */
130 BUG_ON(is_queued(task)); 138 BUG_ON(is_queued(task));
131 BUG_ON(!is_realtime(task)); 139 BUG_ON(is_migrating(task) && task->rt_param.edfsc_params.container_task != NULL);
140 //BUG_ON(task && tsk_rt(task)->linked_on != NO_CPU);
141 //BUG_ON(is_completed(task) || (budget_enforced(task) && budget_exhausted(task)));
142 //BUG_ON(is_container(task) && ((cont_domain_t*)task->rt_param.edfsc_params.domain)->timer_armed);
143 //BUG_ON(task && is_completed(task));
132 144
133 if (is_early_releasing(task) || is_released(task, litmus_clock())) { 145 if (is_early_releasing(task) || is_released(task, litmus_clock())) {
134 __add_ready((rt_domain_t *) tsk_rt(task)->domain, task); 146 __add_ready((rt_domain_t *) tsk_rt(task)->domain, task);
@@ -147,10 +159,12 @@ static noinline void requeue(struct task_struct* task)
147static void preempt(cpu_entry_t *entry) 159static void preempt(cpu_entry_t *entry)
148{ 160{
149 BUG_ON(!entry); 161 BUG_ON(!entry);
150 if (is_container(entry->scheduled)) 162 if (is_container(entry->scheduled)) {
151 preempt_if_preemptable(tsk_rt(entry->scheduled)->edfsc_params.domain->scheduled, entry->cpu); 163 preempt_if_preemptable(tsk_rt(entry->scheduled)->edfsc_params.domain->scheduled, entry->cpu);
152 else 164 }
165 else {
153 preempt_if_preemptable(entry->scheduled, entry->cpu); 166 preempt_if_preemptable(entry->scheduled, entry->cpu);
167 }
154} 168}
155 169
156///////////////////////////////////////////////////////////////////////////////////// 170/////////////////////////////////////////////////////////////////////////////////////
@@ -161,7 +175,7 @@ static void preempt(cpu_entry_t *entry)
161 */ 175 */
162 176
163static struct bheap_node* edfsc_cpu_heap_node; // Array of cpu heap nodes 177static struct bheap_node* edfsc_cpu_heap_node; // Array of cpu heap nodes
164static struct bheap edfsc_cpu_heap; 178static struct bheap edfsc_cpu_heap; // Cpu heap
165 179
166static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) 180static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
167{ 181{
@@ -215,7 +229,15 @@ static void update_cpu_position(cpu_entry_t *entry)
215 * 229 *
216 */ 230 */
217 231
218//timeout for timer enforcing budget of empty container 232// updates exec_time for container budget tracking
233static void update_container_budget(struct task_struct* t) {
234 lt_t now = litmus_clock();
235 tsk_rt(t)->job_params.exec_time += now
236 - tsk_rt(t)->edfsc_params.domain->scheduled_last_exec_time;
237 tsk_rt(t)->edfsc_params.domain->scheduled_last_exec_time = now;
238}
239
240// timeout for timer enforcing budget of empty container
219static enum hrtimer_restart on_idle_enforcement_timeout(struct hrtimer *timer) 241static enum hrtimer_restart on_idle_enforcement_timeout(struct hrtimer *timer)
220{ 242{
221 cont_domain_t* domain = container_of(timer, cont_domain_t, idle_enforcement_timer); 243 cont_domain_t* domain = container_of(timer, cont_domain_t, idle_enforcement_timer);
@@ -223,7 +245,9 @@ static enum hrtimer_restart on_idle_enforcement_timeout(struct hrtimer *timer)
223 unsigned long flags; 245 unsigned long flags;
224 246
225 local_irq_save(flags); 247 local_irq_save(flags);
248 BUG_ON(tsk_rt(domain->container)->edfsc_params.id != this_cpu_ptr(&edfsc_cpu_entries)->cpu);
226 domain->timer_armed = 0; 249 domain->timer_armed = 0;
250 tsk_rt(domain->container)->completed = 1;
227 litmus_reschedule_local(); 251 litmus_reschedule_local();
228 local_irq_restore(flags); 252 local_irq_restore(flags);
229 253
@@ -236,21 +260,17 @@ void manage_idle_enforcement_timer(struct task_struct* t)
236 260
237 cont_domain_t* domain = tsk_rt(t)->edfsc_params.domain; 261 cont_domain_t* domain = tsk_rt(t)->edfsc_params.domain;
238 now = litmus_clock(); 262 now = litmus_clock();
239 domain->scheduled_last_exec_time = now; 263 BUG_ON(is_completed(t));
240 if (budget_precisely_enforced(t)) { 264 BUG_ON(budget_exhausted(t) && !is_np(t));
241 BUG_ON(budget_exhausted(t) && !is_np(t)); 265
242 if (likely(!is_np(t))) { 266 if (!domain->timer_armed) {
243 //hrtimer_start cancels the timer so don't have to check 267 domain->scheduled_last_exec_time = now;
244 //if it is already armed 268 //hrtimer_start cancels the timer so don't have to check
245 hrtimer_start(&(domain->idle_enforcement_timer), 269 //if it is already armed
246 ns_to_ktime(now + budget_remaining(t)), 270 hrtimer_start(&(domain->idle_enforcement_timer),
247 HRTIMER_MODE_ABS_PINNED); 271 ns_to_ktime(now + budget_remaining(t)),
248 domain->timer_armed = 1; 272 HRTIMER_MODE_ABS_PINNED);
249 } 273 domain->timer_armed = 1;
250 }
251 else if (domain->timer_armed) {
252 hrtimer_try_to_cancel(&(domain->idle_enforcement_timer));
253 domain->timer_armed = 0;
254 } 274 }
255} 275}
256 276
@@ -263,18 +283,19 @@ void cancel_idle_enforcement_timer(struct task_struct* t)
263 283
264/* link_task_to_cpu - Links a migrating task or container to a CPU 284/* link_task_to_cpu - Links a migrating task or container to a CPU
265 * Update the link of a CPU. 285 * Update the link of a CPU.
266 * Handles the case where the to-be-linked task is already
267 * scheduled on a different CPU.
268 */ 286 */
269static noinline void link_task_to_cpu(struct task_struct* linked, 287static noinline void link_task_to_cpu(struct task_struct* linked,
270 cpu_entry_t *entry) 288 cpu_entry_t *entry)
271{ 289{
272 BUG_ON(linked && !is_realtime(linked));
273 BUG_ON(is_fixed(linked)); 290 BUG_ON(is_fixed(linked));
274 BUG_ON(is_container(linked) && tsk_rt(linked)->edfsc_params.id != entry->cpu); 291 BUG_ON(is_container(linked) && tsk_rt(linked)->edfsc_params.id != entry->cpu);
292 BUG_ON(linked && is_queued(linked));
293 //BUG_ON(linked && ((budget_enforced(linked) && budget_exhausted(linked)) || is_completed(linked)));
294 BUG_ON(linked && !is_released(linked, litmus_clock()));
295 //BUG_ON(is_container(linked) && linked->rt_param.edfsc_params.domain->timer_armed);
275 296
276 /* Currently linked task is set to be unlinked. */ 297 /* Currently linked task is set to be unlinked. */
277 if (entry->linked) 298 if (entry->linked && entry->linked->rt_param.linked_on == entry->cpu)
278 entry->linked->rt_param.linked_on = NO_CPU; 299 entry->linked->rt_param.linked_on = NO_CPU;
279 300
280 /* Link new task to CPU. */ 301 /* Link new task to CPU. */
@@ -282,6 +303,7 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
282 linked->rt_param.linked_on = entry->cpu; 303 linked->rt_param.linked_on = entry->cpu;
283 304
284 entry->linked = linked; 305 entry->linked = linked;
306 BUG_ON(entry->linked && entry->linked->rt_param.linked_on != entry->cpu);
285#ifdef WANT_ALL_SCHED_EVENTS 307#ifdef WANT_ALL_SCHED_EVENTS
286 if (linked) 308 if (linked)
287 TRACE_TASK(linked, "linked to %d.\n", entry->cpu); 309 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
@@ -302,6 +324,7 @@ static noinline void unlink(struct task_struct* t)
302 if (t->rt_param.linked_on != NO_CPU) { 324 if (t->rt_param.linked_on != NO_CPU) {
303 /* unlink */ 325 /* unlink */
304 entry = &per_cpu(edfsc_cpu_entries, t->rt_param.linked_on); 326 entry = &per_cpu(edfsc_cpu_entries, t->rt_param.linked_on);
327 BUG_ON(entry->cpu != t->rt_param.linked_on);
305 t->rt_param.linked_on = NO_CPU; 328 t->rt_param.linked_on = NO_CPU;
306 link_task_to_cpu(NULL, entry); 329 link_task_to_cpu(NULL, entry);
307 BUG_ON(entry->linked || t->rt_param.linked_on != NO_CPU); 330 BUG_ON(entry->linked || t->rt_param.linked_on != NO_CPU);
@@ -320,9 +343,12 @@ static noinline void unlink(struct task_struct* t)
320//TODO change local linking 343//TODO change local linking
321static void g_preempt_check(void) 344static void g_preempt_check(void)
322{ 345{
323 struct task_struct *task; 346 struct task_struct *task, *temp;
324 cpu_entry_t *last, *target; 347 cpu_entry_t *last, *target;
325 348
349 if (!bheap_peek(cpu_lower_prio, &edfsc_cpu_heap))
350 return;
351
326 // Loop through CPUs in priority order, checking if anything needs preemption 352 // Loop through CPUs in priority order, checking if anything needs preemption
327 for (last = lowest_prio_cpu(); 353 for (last = lowest_prio_cpu();
328 edf_preemption_needed(&gsched_domain, last->linked); 354 edf_preemption_needed(&gsched_domain, last->linked);
@@ -331,29 +357,33 @@ static void g_preempt_check(void)
331 357
332 /* preemption necessary */ 358 /* preemption necessary */
333 task = __take_ready(&gsched_domain); 359 task = __take_ready(&gsched_domain);
334 // Don't requeue if budget is exhausted or job is completed 360 // Preempt_check can be called before gschedule, and therefore g_job_completion.
335 if (requeue_preempted_job(last->linked)) 361 // So, a task can be temporarily added to the ready queue, but will quickly be rectified
336 requeue(last->linked); 362 // by either this, or g_job_completion
337 363 if (requeue_preempted_job(task)) {
338 // If we're dequeuing a container, put it on the appropriate core and 364 // Update container budget tracking
339 // move whatever was there before to `last` 365 if (is_container(task)) {
340 if (is_container(task)) { 366 last = &per_cpu(edfsc_cpu_entries, tsk_rt(task)->edfsc_params.id);
341 target = &per_cpu(edfsc_cpu_entries, tsk_rt(task)->edfsc_params.id); 367 }
342 TRACE("g_preempt_check: swapping tasks linked on %d and %d\n", 368 else if (is_container(last->linked)) {
343 last->cpu, target->cpu); 369 if (tsk_rt(last->linked)->edfsc_params.domain->timer_armed) {
344 link_task_to_cpu(target->linked, last); 370 update_container_budget(last->linked);
371 }
372 }
373 if (requeue_preempted_job(last->linked)) {
374 requeue(last->linked);
375 }
376 TRACE("g_preempt_check: attempting to link task %d to %d\n",
377 task->pid, target->cpu);
378 link_task_to_cpu(task, last);
345 preempt(last); 379 preempt(last);
346 } 380 }
347 TRACE("g_preempt_check: attempting to link task %d to %d\n",
348 task->pid, target->cpu);
349 link_task_to_cpu(task, target);
350 preempt(target);
351 } 381 }
352} 382}
353 383
354static int c_preempt_check(cont_domain_t *container) 384static int c_preempt_check(cont_domain_t *container)
355{ 385{
356 if (is_migrating(container->scheduled) 386 if ((is_migrating(container->scheduled) && __peek_ready(&container->domain))
357 || edf_preemption_needed(&container->domain, container->scheduled)) { 387 || edf_preemption_needed(&container->domain, container->scheduled)) {
358 preempt(&per_cpu(edfsc_cpu_entries, tsk_rt(container->container)->edfsc_params.id)); 388 preempt(&per_cpu(edfsc_cpu_entries, tsk_rt(container->container)->edfsc_params.id));
359 return 1; 389 return 1;
@@ -362,6 +392,7 @@ static int c_preempt_check(cont_domain_t *container)
362 } 392 }
363} 393}
364 394
395// Callback for new global job release
365static void g_release_jobs(rt_domain_t* rt, struct bheap* tasks) 396static void g_release_jobs(rt_domain_t* rt, struct bheap* tasks)
366{ 397{
367 unsigned long flags; 398 unsigned long flags;
@@ -374,6 +405,7 @@ static void g_release_jobs(rt_domain_t* rt, struct bheap* tasks)
374 raw_spin_unlock_irqrestore(&g_lock, flags); 405 raw_spin_unlock_irqrestore(&g_lock, flags);
375} 406}
376 407
408// Callback for new container release
377static int c_check_resched(rt_domain_t *edf) 409static int c_check_resched(rt_domain_t *edf)
378{ 410{
379 cont_domain_t *cont_dom = container_of(edf, cont_domain_t, domain); 411 cont_domain_t *cont_dom = container_of(edf, cont_domain_t, domain);
@@ -386,6 +418,7 @@ static int c_check_resched(rt_domain_t *edf)
386static void g_remove_task(struct task_struct *t) 418static void g_remove_task(struct task_struct *t)
387{ 419{
388 BUG_ON(is_container(t)); 420 BUG_ON(is_container(t));
421 //BUG_ON(get_rt_utilization(t) > m_util);
389 m_util -= get_rt_utilization(t); 422 m_util -= get_rt_utilization(t);
390 sys_util -= get_rt_utilization(t); 423 sys_util -= get_rt_utilization(t);
391 sys_changed = 1; 424 sys_changed = 1;
@@ -393,9 +426,8 @@ static void g_remove_task(struct task_struct *t)
393 426
394static void c_remove_task(struct task_struct *t) 427static void c_remove_task(struct task_struct *t)
395{ 428{
396 struct task_struct* container_task = tsk_rt(t)->edfsc_params.container_task; 429 //BUG_ON(get_rt_utilization(t) > container_domains[tsk_rt(t)->task_params.cpu].f_util);
397 tsk_rt(container_task)->edfsc_params.domain->f_util -= 430 container_domains[tsk_rt(t)->task_params.cpu].f_util -= get_rt_utilization(t);
398 get_rt_utilization(t);
399 sys_util -= get_rt_utilization(t); 431 sys_util -= get_rt_utilization(t);
400 sys_changed = 1; 432 sys_changed = 1;
401} 433}
@@ -415,10 +447,11 @@ static void migrate_task(struct task_struct *t)
415 remove(tsk_rt(t)->domain, t); 447 remove(tsk_rt(t)->domain, t);
416 // Remove the util of the "fake reservation task"(specified by the paper) from the system 448 // Remove the util of the "fake reservation task"(specified by the paper) from the system
417 sys_util -= get_rt_utilization(t); 449 sys_util -= get_rt_utilization(t);
418 prepare_for_next_period(t); 450 m_util -= get_rt_utilization(t);
419 tsk_rt(t)->domain = (rt_domain_t*)tsk_rt(t)->edfsc_params.move_to; 451 tsk_rt(t)->domain = (rt_domain_t*)tsk_rt(t)->edfsc_params.move_to;
420 tsk_rt(t)->edfsc_params.container_task = tsk_rt(t)->edfsc_params.move_to->container; 452 tsk_rt(t)->edfsc_params.container_task = tsk_rt(t)->edfsc_params.move_to->container;
421 requeue(t); 453 requeue(t);
454 c_preempt_check((cont_domain_t*)tsk_rt(t)->domain);
422 tsk_rt(t)->edfsc_params.move_to = NULL; 455 tsk_rt(t)->edfsc_params.move_to = NULL;
423 sys_changed = 1; 456 sys_changed = 1;
424} 457}
@@ -429,11 +462,16 @@ static void migrate_task(struct task_struct *t)
429 * Note: This is shared by container_boundary() and g_task_completion(). 462 * Note: This is shared by container_boundary() and g_task_completion().
430 */ 463 */
431static void c_release(struct task_struct *t) { 464static void c_release(struct task_struct *t) {
432 cpu_entry_t* entry = &per_cpu(edfsc_cpu_entries, tsk_rt(t)->edfsc_params.id); 465 cpu_entry_t* entry;
466
467 BUG_ON(!is_container(t));
468 BUG_ON(t->rt_param.edfsc_params.domain->timer_armed);
469
470 entry = &per_cpu(edfsc_cpu_entries, tsk_rt(t)->edfsc_params.id);
471 tsk_rt(t)->task_params.exec_cost = from_fp(get_rt_utilization(t) * get_rt_period(t));
433 prepare_for_next_period(t); 472 prepare_for_next_period(t);
434 if (is_early_releasing(t) || is_released(t, litmus_clock())) 473 if (is_early_releasing(t) || is_released(t, litmus_clock()))
435 sched_trace_task_release(t); 474 sched_trace_task_release(t);
436 tsk_rt(t)->task_params.exec_cost = from_fp(get_rt_utilization(t) * get_rt_period(t));
437 /* If this container is fully provisioned, remove it from gsched_domain, 475 /* If this container is fully provisioned, remove it from gsched_domain,
438 * edfsc_cpu_heap, and disable the idle enforcement timer. If not, restore. 476 * edfsc_cpu_heap, and disable the idle enforcement timer. If not, restore.
439 */ 477 */
@@ -441,14 +479,18 @@ static void c_release(struct task_struct *t) {
441 // Make this cpu unavailable to the global scheduler 479 // Make this cpu unavailable to the global scheduler
442 if (bheap_node_in_heap(entry->hn)) 480 if (bheap_node_in_heap(entry->hn))
443 remove_cpu_from_global(entry); 481 remove_cpu_from_global(entry);
444 // Fully provisioned containers always run, so just set this here
445 if (entry->linked != t)
446 link_task_to_cpu(t, entry);
447 // Note that we no longer need the global scheduler to schedule us 482 // Note that we no longer need the global scheduler to schedule us
448 if (is_queued(t)) 483 if (is_queued(t)) {
449 remove(&gsched_domain, t); 484 remove(&gsched_domain, t);
450 // Fully provisioned containers always run, so idle enforcement is superfluous 485 }
451 cancel_idle_enforcement_timer(t); 486 // Fully provisioned containers always run, so just set this here
487 if (entry->linked != t) {
488 BUG_ON(is_container(entry->linked));
489 if (requeue_preempted_job(entry->linked)) {
490 requeue(entry->linked);
491 }
492 link_task_to_cpu(t, entry);
493 }
452 tsk_rt(t)->edfsc_params.domain->scheduled_last_exec_time = litmus_clock(); 494 tsk_rt(t)->edfsc_params.domain->scheduled_last_exec_time = litmus_clock();
453 // Run schedule again to make sure that we're run 495 // Run schedule again to make sure that we're run
454 preempt(entry); 496 preempt(entry);
@@ -457,13 +499,9 @@ static void c_release(struct task_struct *t) {
457 if (!bheap_node_in_heap(entry->hn)) 499 if (!bheap_node_in_heap(entry->hn))
458 add_cpu_to_global(entry); 500 add_cpu_to_global(entry);
459 // Note that container's aren't real tasks and thus can't block 501 // Note that container's aren't real tasks and thus can't block
460 // Let g_preempt_check() decide what to run, don't impose
461 unlink(t); 502 unlink(t);
462 // Request to be scheduled globally again 503 // Request to be scheduled globally again
463 if (!is_queued(t)) 504 requeue(t);
464 requeue(t);
465 // Re-run our EDF scheduling to adjust for the added core
466 g_preempt_check();
467 } 505 }
468} 506}
469 507
@@ -476,15 +514,19 @@ static noinline void g_job_completion(struct task_struct* t, int forced)
476 514
477 TRACE_TASK(t, "g_job_completion(forced=%d).\n", forced); 515 TRACE_TASK(t, "g_job_completion(forced=%d).\n", forced);
478 516
479 tsk_rt(t)->completed = 0;
480 unlink(t); 517 unlink(t);
518 tsk_rt(t)->completed = 0;
481 519
482 // When a migrating task is being turned turned into a fixed task 520 // When a migrating task is being turned turned into a fixed task
483 if (is_migrating(t) && tsk_rt(t)->edfsc_params.move_to) { 521 if (is_migrating(t) && tsk_rt(t)->edfsc_params.move_to) {
484 if (t->rt_param.job_params.lateness > 0) { 522 prepare_for_next_period(t);
485 // Don't wait if late 523 if (is_early_releasing(t) || is_released(t, litmus_clock()))
524 sched_trace_task_release(t);
525 if (tsk_rt(t)->job_params.lateness > 0) {
526 // Don't wait if prev job was tardy
486 migrate_task(t); 527 migrate_task(t);
487 } else { 528 } else {
529 list_add(&t->edfsc_qnode, &pending_removes);
488 hrtimer_start(&t->edfsc_deadline_timer, ns_to_ktime(get_deadline(t)), 530 hrtimer_start(&t->edfsc_deadline_timer, ns_to_ktime(get_deadline(t)),
489 HRTIMER_MODE_ABS_PINNED); 531 HRTIMER_MODE_ABS_PINNED);
490 } 532 }
@@ -499,46 +541,17 @@ static noinline void g_job_completion(struct task_struct* t, int forced)
499 requeue(t); 541 requeue(t);
500 g_preempt_check(); 542 g_preempt_check();
501 } 543 }
502 /* A container may be in several different states when it finishes. It may:
503 * - Be scheduling a migrating task that is finished, blocked, or out of budget
504 * - Be scheduling a fixed task
505 * - Be scheduling nothing
506 * If there's a migrating task being scheduled, we can't unconditionally
507 * requeue it. Often, we may actually have to call g_job_completion() on
508 * that migrating task. If we finish while running a fixed task, we just
509 * "freeze" it in the container - edfsc_cschedule() will take care of
510 * processing its state when the container is rescheduled.
511 *
512 * If the container is tardy, we process its scheduled task as in the non-
513 * tardy case, then just immediately call c_release() on the container.
514 */
515 } else if (is_container(t)) { 544 } else if (is_container(t)) {
516 /*
517 struct task_struct** child = &tsk_rt(t)->edfsc_params.domain->scheduled;
518 // No need to handle fixed tasks, cschedule will do that when it runs next
519 if (*child && is_migrating(*child)) {
520 BUG_ON(is_queued(*child));
521 // If migrating and done
522 if (is_completed(*child) || (budget_enforced(*child) && budget_exhausted(*child))) {
523 g_job_completion(*child, budget_enforced(*child) && budget_exhausted(*child));
524 // If migrating and blocked
525 } else if (!is_current_running()) {
526 unlink(*child);
527 // Otherwise it can keep running globally
528 } else {
529 requeue(*child);
530 }
531 // Regardless, we never "freeze" a migrating task in a container
532 *child = NULL;
533 }
534 */
535 // When a container job finishes late, release it immediately 545 // When a container job finishes late, release it immediately
536 if (tsk_rt(t)->edfsc_params.can_release) { 546 if (get_deadline(t) < litmus_clock()) {
537 tsk_rt(t)->edfsc_params.can_release = 0;
538 c_release(t); 547 c_release(t);
548 g_preempt_check();
539 if (get_rt_utilization(t) == to_fp(1)) 549 if (get_rt_utilization(t) == to_fp(1))
540 manage_idle_enforcement_timer(t); 550 manage_idle_enforcement_timer(t);
541 } 551 }
552 else {
553 tsk_rt(t)->completed = 1;
554 }
542 } 555 }
543} 556}
544 557
@@ -557,33 +570,25 @@ static void c_job_completion(struct task_struct* t, int forced)
557// As long as this only touches CPU-local state, it shouldn't need g_lock: 570// As long as this only touches CPU-local state, it shouldn't need g_lock:
558static void g_finish_switch(struct task_struct *prev) 571static void g_finish_switch(struct task_struct *prev)
559{ 572{
573 unsigned long flags;
560 cpu_entry_t* entry = this_cpu_ptr(&edfsc_cpu_entries); 574 cpu_entry_t* entry = this_cpu_ptr(&edfsc_cpu_entries);
561 struct task_struct* container = &container_tasks[entry->cpu]; 575 struct task_struct* container = &container_tasks[entry->cpu];
562 unsigned long flags; 576 raw_spin_lock_irqsave(&g_lock, flags);
563 BUG_ON(is_realtime(current) && tsk_rt(current)->domain == NULL);
564
565 // FIXME: It's really expensive to put a lock in here, but since we touch
566 // members of entry multiple times, we have to lock. Otherwise we
567 // may make an if branch based off entry->linked, and then have it
568 // change before we can set entry->scheduled.
569 //raw_spin_lock_irqsave(&g_lock, flags);
570 preempt_disable();
571 entry->scheduled = is_realtime(current) ? current : NULL; 577 entry->scheduled = is_realtime(current) ? current : NULL;
572 // If we're scheduling a task in a container, set entry->scheduled to the container 578 // If we're scheduling a task in a container, set entry->scheduled to the container
573 if (entry->scheduled) { 579 if (entry->scheduled) {
574 if (tsk_rt(container)->edfsc_params.domain->scheduled == entry->scheduled) 580 if (entry->scheduled->rt_param.edfsc_params.container_task) {
575 entry->scheduled = container; 581 entry->scheduled = entry->scheduled->rt_param.edfsc_params.container_task;
582 }
576 } 583 }
577 // occurs when current is non-rt, and linked is a container 584 // occurs when current is non-rt, and linked is a container
578 // this happens when an empty container "task" is supposed to be current 585 // this happens when an empty container "task" is supposed to be current
579 // but because it's not a real task, a non-rt task is current instead 586 // but because it's not a real task, a non-rt task is current instead
580 else if (is_container(entry->linked)) { 587 else if (tsk_rt(container)->scheduled_on != NO_CPU){
581 entry->scheduled = entry->linked; 588 entry->scheduled = container;
582 } 589 }
583 590
584 BUG_ON(is_fixed(entry->scheduled)); 591 raw_spin_unlock_irqrestore(&g_lock, flags);
585 //raw_spin_unlock_irqrestore(&g_lock, flags);
586 preempt_enable();
587#ifdef WANT_ALL_SCHED_EVENTS 592#ifdef WANT_ALL_SCHED_EVENTS
588 TRACE_TASK(prev, "switched away from\n"); 593 TRACE_TASK(prev, "switched away from\n");
589#endif 594#endif
@@ -600,12 +605,14 @@ static int fifo_prio(struct bheap_node* _a, struct bheap_node* _b)
600 * @param cedf Pointer to tsk_rt(container)->edfsc_params->domain 605 * @param cedf Pointer to tsk_rt(container)->edfsc_params->domain
601 * @param prev Previous task running on this processor before schedule was called 606 * @param prev Previous task running on this processor before schedule was called
602 */ 607 */
603static void edfsc_cschedule(cont_domain_t* cedf, struct task_struct * prev) 608static noinline void edfsc_cschedule(cont_domain_t* cedf, struct task_struct * prev)
604{ 609{
605 rt_domain_t *edf = &cedf->domain; 610 rt_domain_t *edf = &cedf->domain;
606 611
607 struct task_struct* next; 612 struct task_struct* next;
613 struct task_struct* other_t;
608 struct bheap temp; 614 struct bheap temp;
615 cpu_entry_t *this_entry, *other_entry;
609 int out_of_time, sleep, preempt, 616 int out_of_time, sleep, preempt,
610 np, exists, blocks, resched; 617 np, exists, blocks, resched;
611 // XXX: The scheduler we copied this from also used `cont_out_of_time`. Is 618 // XXX: The scheduler we copied this from also used `cont_out_of_time`. Is
@@ -620,13 +627,13 @@ static void edfsc_cschedule(cont_domain_t* cedf, struct task_struct * prev)
620 627
621 /* (0) Determine state */ 628 /* (0) Determine state */
622 exists = cedf->scheduled != NULL; 629 exists = cedf->scheduled != NULL;
623 blocks = exists && !is_current_running(); 630 blocks = exists && current == cedf->scheduled && !is_current_running();
624 out_of_time = exists && budget_enforced(cedf->scheduled) 631 out_of_time = exists && budget_enforced(cedf->scheduled)
625 && budget_exhausted(cedf->scheduled); 632 && budget_exhausted(cedf->scheduled);
626 np = exists && is_np(cedf->scheduled); 633 np = exists && is_np(cedf->scheduled);
627 sleep = exists && is_completed(cedf->scheduled); 634 sleep = exists && is_completed(cedf->scheduled);
628 preempt = (is_migrating(cedf->scheduled) && __peek_ready(edf)) || 635 preempt = (is_migrating(cedf->scheduled) && __peek_ready(edf)) ||
629 (exists && edf_preemption_needed(edf, cedf->scheduled)); 636 edf_preemption_needed(edf, cedf->scheduled);
630 637
631 /* If we need to preempt do so. 638 /* If we need to preempt do so.
632 * The following checks set resched to 1 in case of special 639 * The following checks set resched to 1 in case of special
@@ -647,15 +654,19 @@ static void edfsc_cschedule(cont_domain_t* cedf, struct task_struct * prev)
647 if (!np && (out_of_time || sleep)) { 654 if (!np && (out_of_time || sleep)) {
648 if (is_fixed(cedf->scheduled)) 655 if (is_fixed(cedf->scheduled))
649 c_job_completion(cedf->scheduled, !sleep); 656 c_job_completion(cedf->scheduled, !sleep);
650 else 657 else {
658 tsk_rt(cedf->scheduled)->edfsc_params.container_task = NULL;
651 g_job_completion(cedf->scheduled, !sleep); 659 g_job_completion(cedf->scheduled, !sleep);
660 }
652 resched = 1; 661 resched = 1;
653 } 662 }
654
655 // Deschedule any background jobs if a fixed task is ready 663 // Deschedule any background jobs if a fixed task is ready
656 if (is_migrating(cedf->scheduled) || preempt) { 664 else if (!np && preempt) {
657 if (!sleep && !out_of_time && !blocks && !is_queued(cedf->scheduled)) 665 if (!blocks && cedf->scheduled && !is_queued(cedf->scheduled)) {
666 if (is_migrating(cedf->scheduled))
667 tsk_rt(cedf->scheduled)->edfsc_params.container_task = NULL;
658 requeue(cedf->scheduled); 668 requeue(cedf->scheduled);
669 }
659 resched = 1; 670 resched = 1;
660 } 671 }
661 672
@@ -665,28 +676,40 @@ static void edfsc_cschedule(cont_domain_t* cedf, struct task_struct * prev)
665 */ 676 */
666 next = NULL; 677 next = NULL;
667 if (blocks || !exists || (!np && resched)) { 678 if (blocks || !exists || (!np && resched)) {
668 /*if (exists && !out_of_time && !sleep && !is_queued(cedf->scheduled)) { 679 BUG_ON(cedf->scheduled && !blocks && !out_of_time && !sleep && !is_migrating(cedf->scheduled) && !is_queued(cedf->scheduled));
669 requeue(cedf->scheduled);
670 }*/
671 next = __take_ready(edf); 680 next = __take_ready(edf);
681 // Check for direct swap (1->2, 2->1) scenarios, which can cause deadlock
682 /*if (next) {
683 other_entry = &per_cpu(edfsc_cpu_entries, next->cpu);
684 this_entry = this_cpu_ptr(&edfsc_cpu_entries);
685 if (other_entry != this_entry
686 && other_entry->cpu == this_entry->scheduled->cpu) {
687 requeue(next);
688 next = NULL;
689 }
690 }*/
672 } else if (exists) { 691 } else if (exists) {
673 // This is safe when background scheduling, as we can only get here if 692 // This is safe when background scheduling, as we can only get here if
674 // there were no other fixed tasks ready to run. 693 // there were no other fixed tasks ready to run.
694 BUG_ON(is_queued(cedf->scheduled));
675 next = cedf->scheduled; 695 next = cedf->scheduled;
676 } 696 }
677 697
698 this_entry = this_cpu_ptr(&edfsc_cpu_entries);
678 if (next) { 699 if (next) {
679 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); 700 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
701 // Give the container a little breathing room, otherwise, the core will be pounded with work
702 // Will often trigger watchdog due to continous execution
680 } else { 703 } else {
681 // Find a task in gsched_domain that isn't a container to background schedule 704 // Find a task in gsched_domain that isn't a container to background schedule
682 bheap_init(&temp); // XXX this seems inefficient - maybe use a global temp? 705 bheap_init(&temp);
683 next = __take_ready(&gsched_domain); 706 next = __take_ready(&gsched_domain);
684 while (is_container(next)) { 707 while (is_container(next) || (is_migrating(next) && next->cpu != this_entry->cpu)) {
685 bheap_insert(fifo_prio, &temp, tsk_rt(next)->heap_node); 708 bheap_insert(fifo_prio, &temp, tsk_rt(next)->heap_node);
686 next = __take_ready(&gsched_domain); 709 next = __take_ready(&gsched_domain);
687 BUG_ON(next && is_queued(next));
688 } 710 }
689 if (next) { 711 if (next) {
712 tsk_rt(next)->edfsc_params.container_task = cedf->container;
690 TRACE_TASK(next, "background scheduling at %llu\n", litmus_clock()); 713 TRACE_TASK(next, "background scheduling at %llu\n", litmus_clock());
691 } else { 714 } else {
692 TRACE("container becomes idle at %llu\n", litmus_clock()); 715 TRACE("container becomes idle at %llu\n", litmus_clock());
@@ -696,6 +719,22 @@ static void edfsc_cschedule(cont_domain_t* cedf, struct task_struct * prev)
696 } 719 }
697 } 720 }
698 721
722 if (next && next->cpu != this_entry->cpu) {
723 other_entry = &per_cpu(edfsc_cpu_entries, next->cpu);
724 other_t = is_container(other_entry->linked) ?
725 other_entry->linked->rt_param.edfsc_params.domain->scheduled : other_entry->linked;
726 // If we detect a direct swap, and the other task has already gone through gschedule
727 // To prevent a deadlock, we let them go first and reschedule
728 if (other_t && other_t->cpu == this_entry->cpu) {
729 if (is_migrating(other_t) || other_entry->linked->rt_param.scheduled_on == other_entry->cpu) {
730 if (is_migrating(next))
731 next->rt_param.edfsc_params.container_task = NULL;
732 requeue(next);
733 next = NULL;
734 }
735 }
736 }
737
699 cedf->scheduled = next; 738 cedf->scheduled = next;
700} 739}
701 740
@@ -712,53 +751,49 @@ static struct task_struct *edfsc_gschedule(struct task_struct *prev)
712 751
713 /* sanity checking */ 752 /* sanity checking */
714 BUG_ON(entry->scheduled && entry->scheduled != prev && !is_container(entry->scheduled)); 753 BUG_ON(entry->scheduled && entry->scheduled != prev && !is_container(entry->scheduled));
715 //BUG_ON(entry->scheduled && entry->scheduled != prev && is_realtime(prev) &&
716 // (cont_domain_t*)tsk_rt(prev)->domain != tsk_rt(entry->scheduled)->edfsc_params.domain);
717 //BUG_ON(entry->scheduled && entry->scheduled != prev && is_realtime(prev) &&
718 // prev != tsk_rt(entry->scheduled)->edfsc_params.domain->scheduled);
719 // It's okay for the previously scheduled task to not be rt if we think a 754 // It's okay for the previously scheduled task to not be rt if we think a
720 // container task is scheduled and the container doesn't have any pending 755 // container task is scheduled and the container doesn't have any pending
721 // jobs of fixed tasks. 756 // jobs of fixed tasks.
722 BUG_ON(entry->scheduled && !is_container(entry->scheduled) && !is_realtime(prev)); 757 BUG_ON(entry->scheduled && !is_container(entry->scheduled) && !is_realtime(prev));
723 // Bug if we didn't think anything was scheduled, but a realtime task was running on our CPU 758 // Bug if we didn't think anything was scheduled, but a realtime task was running on our CPU
724 BUG_ON(is_realtime(prev) && tsk_rt(prev)->linked_on != NO_CPU && !entry->scheduled); 759 //BUG_ON(is_realtime(prev) && tsk_rt(prev)->linked_on != NO_CPU && !entry->scheduled);
725
726 if (is_container(entry->scheduled)) {
727 lt_t now = litmus_clock();
728 tsk_rt(entry->scheduled)->job_params.exec_time += now
729 - tsk_rt(entry->scheduled)->edfsc_params.domain->scheduled_last_exec_time;
730 tsk_rt(entry->scheduled)->edfsc_params.domain->scheduled_last_exec_time = now;
731 }
732 760
733 /* (0) Determine state */ 761 /* (0) Determine state */
734 exists = entry->scheduled != NULL; 762 exists = entry->scheduled != NULL;
735 is_cont = is_container(entry->scheduled); 763 is_cont = is_container(entry->scheduled);
736 blocks = exists && !is_cont && !is_current_running(); 764 blocks = exists && !is_cont && !is_current_running();
737 out_of_time = exists && budget_enforced(entry->scheduled)
738 && budget_exhausted(entry->scheduled);
739 np = exists && !is_cont && is_np(entry->scheduled); 765 np = exists && !is_cont && is_np(entry->scheduled);
740 sleep = exists && !is_cont && is_completed(entry->scheduled); 766 sleep = exists && is_completed(entry->scheduled);
741 preempted = entry->scheduled != entry->linked; 767 preempted = entry->scheduled != entry->linked;
742 768
769 /* Manually track container budget */
770 if (is_cont && (tsk_rt(entry->scheduled)->edfsc_params.domain->timer_armed || sleep)) {
771 update_container_budget(entry->scheduled);
772 out_of_time = exists && budget_enforced(entry->scheduled)
773 && budget_exhausted(entry->scheduled);
774 /* Cancel container enforcement timer if container is fully provisioned and out of sync with
775 * container_boundary, or if it is currently being scheduled in gedf
776 */
777 if (bheap_node_in_heap(entry->hn) || (!bheap_node_in_heap(entry->hn) && out_of_time))
778 cancel_idle_enforcement_timer(entry->scheduled);
779 }
780 else {
781 out_of_time = exists && budget_enforced(entry->scheduled)
782 && budget_exhausted(entry->scheduled);
783 }
743 784
744 if (exists) 785 if (exists)
745 TRACE_TASK(prev, 786 TRACE_TASK(prev,
746 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " 787 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
747 "state:%d sig:%d is_cont:%d\n", 788 "state:%d sig:%d is_cont:%d\n",
748 blocks, out_of_time, np, sleep, preempt, 789 blocks, out_of_time, np, sleep, preempt,
749 prev->state, signal_pending(prev), is_cont); 790 prev->state, signal_pending(prev), is_cont);
750 791
751 if (entry->linked && preempted) 792 if (entry->linked && preempted)
752 TRACE_TASK(prev, "will be preempted by %s/%d\n", 793 TRACE_TASK(prev, "will be preempted by %s/%d\n",
753 entry->linked->comm, entry->linked->pid); 794 entry->linked->comm, entry->linked->pid);
754
755 if (exists && preempted && !is_queued(entry->scheduled))
756 requeue(entry->scheduled);
757 795
758 /* If a task blocks we have no choice but to reschedule. 796 // If a task blocks we have no choice but to reschedule.
759 * Note: containers never block, so if blocks is true and we're background
760 * scheduling, we want to unlink `prev` NOT `entry->scheduled`.
761 */
762 if (blocks) 797 if (blocks)
763 unlink(prev); 798 unlink(prev);
764 799
@@ -778,76 +813,49 @@ static struct task_struct *edfsc_gschedule(struct task_struct *prev)
778 * for blocked jobs). 813 * for blocked jobs).
779 */ 814 */
780 if (!np && (out_of_time || sleep)) { 815 if (!np && (out_of_time || sleep)) {
781 // This is not a global job completion if we're in a fully provisioned container 816 g_job_completion(entry->scheduled, !sleep);
782 if (bheap_node_in_heap(entry->hn))
783 g_job_completion(entry->scheduled, !sleep);
784 else
785 unlink(entry->scheduled);
786 } 817 }
787 818
788 // We should have descheduled globally scheduled tasks without budget by now 819 BUG_ON(!bheap_node_in_heap(entry->hn) && entry->linked && !is_container(entry->linked));
789 BUG_ON(entry->linked && budget_enforced(entry->linked) && budget_exhausted(entry->linked)); 820
790 821 if (!entry->linked && bheap_node_in_heap(entry->hn)) {
791 // Determine what to run next (set entry->linked) 822 g_preempt_check();
792 if (!entry->linked) {
793 struct task_struct* task = __take_ready(&gsched_domain);
794 // Make sure that containers are only scheduled on cores with same id
795 if (is_container(task) && entry->cpu != tsk_rt(task)->edfsc_params.id) {
796 // Get cpu_entry for task's core assignment
797 cpu_entry_t* target = &per_cpu(edfsc_cpu_entries, tsk_rt(task)->edfsc_params.id);
798 // Make sure that someone didn't requeue `task` without unlinking it
799 BUG_ON(target->linked && target->linked == task);
800 // Move their linked task to us
801 link_task_to_cpu(target->linked, entry);
802 // Setup the container to run next on the remote core
803 link_task_to_cpu(task, target);
804 // Alert the remote core that it now needs to reschedule
805 preempt(target);
806 } else if (task) {
807 // We'll now schedule the ready task here
808 link_task_to_cpu(task, entry);
809 // Tasks on the ready queue should never be out of budget, so it's safe
810 // to continue the scheduling process from this point on.
811 }
812 } 823 }
813 824
814 BUG_ON(entry->linked && budget_enforced(entry->linked) && budget_exhausted(entry->linked)); 825 BUG_ON(entry->linked && is_queued(entry->linked));
815 BUG_ON(!bheap_node_in_heap(entry->hn) && entry->linked && tsk_rt(entry->linked)->edfsc_params.id != entry->cpu); 826 BUG_ON(!bheap_node_in_heap(entry->hn) && entry->linked
827 && tsk_rt(entry->linked)->edfsc_params.id != entry->cpu);
816 BUG_ON(is_container(entry->linked) && tsk_rt(entry->linked)->edfsc_params.id != entry->cpu); 828 BUG_ON(is_container(entry->linked) && tsk_rt(entry->linked)->edfsc_params.id != entry->cpu);
817 829
818 /* The final scheduling decision. Do we need to switch for some reason? 830 /* The final scheduling decision. Do we need to switch for some reason?
819 * If linked is different from scheduled, then select linked as next. 831 * If linked is different from scheduled, then select linked as next.
820 */ 832 */
821 if ((!np || blocks) && entry->linked != entry->scheduled) { 833 if ((!np || blocks) && entry->linked != entry->scheduled) {
822 /* Schedule a linked job? */ 834 // Set the newly linked job to be scheduled
823 if (entry->linked) { 835 if (entry->linked) {
824 next = entry->linked; 836 next = entry->linked;
837 tsk_rt(entry->linked)->scheduled_on = entry->cpu;
838 BUG_ON(is_queued(entry->linked));
825 TRACE_TASK(next, "scheduled on P%d\n", smp_processor_id()); 839 TRACE_TASK(next, "scheduled on P%d\n", smp_processor_id());
826 } 840 }
827 // Note what was running before 841 // Set the previously linked to to be unscheduled
828 if (entry->scheduled) { 842 if (entry->scheduled) {
843 /* When a scheduled is linked to another cpu, from this cpu, there's no guarantee on the order
844 * in which gschedule is called on both cpus. If it has already have scheduled_on set to the other
845 * cpu, then we have to preserve it and can't just set it to NO_CPU
846 */
847 if (tsk_rt(entry->scheduled)->scheduled_on == entry->cpu) {
848 tsk_rt(entry->scheduled)->scheduled_on = NO_CPU;
849 }
829 TRACE_TASK(entry->scheduled, "descheduled\n"); 850 TRACE_TASK(entry->scheduled, "descheduled\n");
830 } 851 }
831 } else if (entry->scheduled) { 852 } else if (entry->scheduled) {
832 // If we've been running a container, make sure that it has nothing new to schedule 853 next = entry->scheduled;
833 if (is_container(entry->scheduled)) 854 tsk_rt(next)->scheduled_on = entry->cpu;
834 next = entry->scheduled;
835 // Otherwise we can keep running any tasks we previously scheduled
836 else if (is_realtime(prev))
837 next = prev;
838 } 855 }
856 BUG_ON(next && get_exec_time(next) > get_exec_cost(next));
839 857
840 // Tell LITMUS^RT that we choose a task and are done scheduling after return 858 // If next is a container, then perform cschedule to determine the fixed task to schedule
841 sched_state_task_picked();
842
843 // When we transition from doing background scheduling to doing normal
844 // scheduling, we may schedule the same task. Unfortunately, when this
845 // happens, g_finish_switch() will /not/ be called. Fix the state manually.
846 temp = entry->scheduled;
847 entry->scheduled = next;
848
849 // if no fixed tasks to be scheduled by the container, then container->scheduled
850 // should be the previous non-rt task if any
851 if (is_container(next)) { 859 if (is_container(next)) {
852 edfsc_cschedule(tsk_rt(next)->edfsc_params.domain, prev); 860 edfsc_cschedule(tsk_rt(next)->edfsc_params.domain, prev);
853 if (bheap_node_in_heap(entry->hn)) 861 if (bheap_node_in_heap(entry->hn))
@@ -855,26 +863,30 @@ static struct task_struct *edfsc_gschedule(struct task_struct *prev)
855 next = tsk_rt(next)->edfsc_params.domain->scheduled; 863 next = tsk_rt(next)->edfsc_params.domain->scheduled;
856 } 864 }
857 // When next is migrating, but previously scheduled realtime task is a container 865 // When next is migrating, but previously scheduled realtime task is a container
858 // must properly restore background scheduled task to its correct queue/heap 866 // must properly restore background scheduled task(if any) to its correct queue/heap
859 else if (is_container(temp) && next != temp) { 867 else if (is_container(entry->scheduled) && next != entry->scheduled) {
860 struct task_struct** child = &tsk_rt(temp)->edfsc_params.domain->scheduled; 868 struct task_struct** child = &tsk_rt(entry->scheduled)->edfsc_params.domain->scheduled;
861 // No need to handle fixed tasks, cschedule will do that when it runs next 869 // No need to handle fixed tasks, cschedule will do that when it runs next
862 if (*child && is_migrating(*child)) { 870 if (*child && is_migrating(*child)) {
871 int background_out_of_time = budget_enforced(*child) && budget_exhausted(*child);
863 BUG_ON(is_queued(*child)); 872 BUG_ON(is_queued(*child));
873 BUG_ON(tsk_rt(*child)->linked_on != NO_CPU);
874 tsk_rt(*child)->edfsc_params.container_task = NULL;
864 // If migrating and done 875 // If migrating and done
865 if (is_completed(*child) || (budget_enforced(*child) && budget_exhausted(*child))) { 876 if (is_completed(*child) || background_out_of_time) {
866 g_job_completion(*child, budget_enforced(*child) && budget_exhausted(*child)); 877 g_job_completion(*child, background_out_of_time);
867 // If migrating and blocked 878 // If migrating and not blocked
868 } else if (!is_current_running()) { 879 } else if (is_current_running()) {
869 unlink(*child);
870 // Otherwise it can keep running globally
871 } else {
872 requeue(*child); 880 requeue(*child);
873 } 881 }
874 // Regardless, we never "freeze" a migrating task in a container 882 // Regardless, we never "freeze" a migrating task in a container
875 *child = NULL; 883 *child = NULL;
876 } 884 }
877 } 885 }
886 BUG_ON(is_migrating(entry->scheduled) && !tsk_rt(entry->scheduled)->edfsc_params.container_task
887 && !blocks && tsk_rt(entry->scheduled)->linked_on == NO_CPU && !is_queued(entry->scheduled));
888 // Tell LITMUS^RT that we choose a task and are done scheduling after return
889 sched_state_task_picked();
878 890
879 raw_spin_unlock_irqrestore(&g_lock, flags); 891 raw_spin_unlock_irqrestore(&g_lock, flags);
880 892
@@ -899,39 +911,32 @@ static enum hrtimer_restart container_boundary(struct hrtimer *timer)
899 int i; 911 int i;
900 struct list_head *it; 912 struct list_head *it;
901 struct list_head *temp; 913 struct list_head *temp;
902 u64 u_extra; 914 u64 u_extra, leeway;
903 cont_domain_t *container; 915 cont_domain_t *container;
904 struct task_struct *t; 916 struct task_struct *t;
905 lt_t now;
906 int num_cpus = num_online_cpus(); 917 int num_cpus = num_online_cpus();
907 unsigned long flags; 918 unsigned long flags;
908 919
909 raw_spin_lock_irqsave(&g_lock, flags); 920 TS_SCHED_TIMER_START
910
911 now = litmus_clock();
912 921
913 // Update budget tracking for containers 922 raw_spin_lock_irqsave(&g_lock, flags);
914 for (i = 0; i < num_cpus; i++) {
915 t = container_list[i]->container;
916 if (container_list[i]->timer_armed)
917 tsk_rt(t)->job_params.exec_time += now - container_list[i]->scheduled_last_exec_time;
918 else
919 tsk_rt(t)->job_params.exec_time = get_exec_cost(t);
920 }
921 923
922 t = NULL; 924 t = NULL;
925 leeway = fp_div(1, 50);
923 926
924 // Try to add tasks from the queue 927 // Try to add tasks from the queue
925 list_for_each_safe(it, temp, &pending_adds) { 928 list_for_each_safe(it, temp, &pending_adds) {
926 u_extra = to_fp(num_cpus) - sys_util;
927 container = NULL; 929 container = NULL;
928 t = task_of_list_node(it); 930 t = task_of_list_node(it);
929 list_del_init(it); 931 list_del_init(it);
930 if (u_extra >= get_rt_utilization(t)) { 932 //sort(container_list, num_cpus, sizeof(cont_domain_t *), &container_lower_prio, NULL); // Best fit
933 sort(container_list, num_cpus, sizeof(cont_domain_t *), &container_higher_prio, NULL); // Worst fit
934 if (to_fp(num_cpus) > get_rt_utilization(t) + sys_util + leeway) {
931 for (i = 0; i < num_cpus; i++) { 935 for (i = 0; i < num_cpus; i++) {
932 u64 leftover = to_fp(1) - container_domains[i].f_util; 936 if (to_fp(1) > get_rt_utilization(t) + container_list[i]->f_util + leeway) {
933 if (leftover >= get_rt_utilization(t)) { 937 //if (to_fp(1) > get_rt_utilization(t) + container_domains[i].f_util + leeway) {
934 container = &(container_domains[i]); 938 //container = &(container_domains[i]);
939 container = container_list[i]; // Used for best/worst fit
935 break; 940 break;
936 } 941 }
937 } 942 }
@@ -944,13 +949,10 @@ static enum hrtimer_restart container_boundary(struct hrtimer *timer)
944 tsk_rt(t)->domain = &gsched_domain; 949 tsk_rt(t)->domain = &gsched_domain;
945 tsk_rt(t)->edfsc_params.container_task = NULL; 950 tsk_rt(t)->edfsc_params.container_task = NULL;
946 m_util += get_rt_utilization(t); 951 m_util += get_rt_utilization(t);
947 //list_add(&tsk_rt(t)->edfsc_params.qnode, &migrating_tasks);
948 list_add(&t->edfsc_qnode, &migrating_tasks); 952 list_add(&t->edfsc_qnode, &migrating_tasks);
949 } 953 }
950 sys_util += get_rt_utilization(t); 954 sys_util += get_rt_utilization(t);
951 sys_changed = 1; 955 sys_changed = 1;
952 // Setup the release time for the first job to be now
953 release_at(t, litmus_clock());
954 } 956 }
955 /* Unblock the task waiting on our admission decision. They will detect 957 /* Unblock the task waiting on our admission decision. They will detect
956 * if they have been admitted by examining if tsk_rt(t)->domain != NULL 958 * if they have been admitted by examining if tsk_rt(t)->domain != NULL
@@ -962,7 +964,7 @@ static enum hrtimer_restart container_boundary(struct hrtimer *timer)
962 * longer than we should. 964 * longer than we should.
963 */ 965 */
964 raw_spin_unlock_irqrestore(&g_lock, flags); 966 raw_spin_unlock_irqrestore(&g_lock, flags);
965 wake_up_new_task(t); 967 BUG_ON(!wake_up_process(t));
966 raw_spin_lock_irqsave(&g_lock, flags); 968 raw_spin_lock_irqsave(&g_lock, flags);
967 } 969 }
968 970
@@ -971,34 +973,38 @@ static enum hrtimer_restart container_boundary(struct hrtimer *timer)
971 // migrating tasks and potentially all the containers every period for a 973 // migrating tasks and potentially all the containers every period for a
972 // best-case Omega(m) and worst-case O(m^2) work---only once the scheduler 974 // best-case Omega(m) and worst-case O(m^2) work---only once the scheduler
973 // is actually working 975 // is actually working
976 // Done, only does stabilization when stuff changes in the system
974 // According to the paper, when we migrate, we must reserve space in the container 977 // According to the paper, when we migrate, we must reserve space in the container
975 // We do this by adding a fake task that ultimately doesn't release any jobs 978 // We do this by adding a fake task that ultimately doesn't release any jobs
976 // This is represented here by adding the utilization to sys_util 979 // This is represented here by adding the utilization to sys_util
977 // which will be subtracted when the migrating task is actually changed to fixed 980 // which will be subtracted when the migrating task is actually changed to fixed
978 if (sys_changed) { 981 if (sys_changed) { // change this to false to disable stabilization
979 list_for_each(it, &migrating_tasks) { 982 list_for_each_safe(it, temp, &migrating_tasks) {
980 struct task_struct* t = task_of_list_node(it); 983 struct task_struct* t = task_of_list_node(it);
981 // Although technically selecting the migrating tasks to be moved into containers 984 // Although technically selecting the migrating tasks to be moved into containers
982 // doesn't change m_util and the container's f_util until after the move, 985 // doesn't change m_util and the container's f_util until after the move,
983 // but since the move is guaranteed to happen before the next container_boundary 986 // but since the move is guaranteed to happen before the next container_boundary
984 // where we check all the utilization stuff, it's fine to account for it now 987 // where we check all the utilization stuff, it's fine to account for it now
985 if (!(tsk_rt(t)->edfsc_params.move_to) && !is_released(t, now) 988 if (!(tsk_rt(t)->edfsc_params.move_to)) {
986 && get_deadline(t) < get_deadline(&container_tasks[0]) + get_rt_period(&container_tasks[0])) {
987 tsk_rt(t)->edfsc_params.move_to = NULL; 989 tsk_rt(t)->edfsc_params.move_to = NULL;
988 990
989 container = NULL; 991 container = NULL;
992 //sort(container_list, num_cpus, sizeof(cont_domain_t *), &container_lower_prio, NULL); // Best fit
993 //sort(container_list, num_cpus, sizeof(cont_domain_t *), &container_higher_prio, NULL); // Worst fit
990 for (i = 0; i < num_cpus; i++) { 994 for (i = 0; i < num_cpus; i++) {
991 u64 leftover = to_fp(1) - container_domains[i].f_util; 995 u64 leftover = to_fp(1) - container_domains[i].f_util - leeway;
992 if (leftover>=get_rt_utilization(t) && to_fp(num_cpus)>=get_rt_utilization(t)+sys_util) { 996 //if (to_fp(1) > get_rt_utilization(t) + container_list[i]->f_util + leeway &&
997 if (to_fp(1) > get_rt_utilization(t) + container_domains[i].f_util + leeway &&
998 to_fp(num_cpus) > get_rt_utilization(t) + sys_util + leeway) {
993 container = &(container_domains[i]); 999 container = &(container_domains[i]);
1000 //container = container_list[i]; // Used for best/worst fit
994 break; 1001 break;
995 } 1002 }
996 } 1003 }
997 1004
998 if (container) { 1005 if (container) {
999 list_del_init(&t->edfsc_qnode); 1006 list_del_init(it);
1000 container->f_util += get_rt_utilization(t); 1007 container->f_util += get_rt_utilization(t);
1001 m_util -= get_rt_utilization(t);
1002 sys_util += get_rt_utilization(t); 1008 sys_util += get_rt_utilization(t);
1003 tsk_rt(t)->edfsc_params.move_to = container; 1009 tsk_rt(t)->edfsc_params.move_to = container;
1004 sys_changed = 1; 1010 sys_changed = 1;
@@ -1012,7 +1018,7 @@ static enum hrtimer_restart container_boundary(struct hrtimer *timer)
1012 int remaining; 1018 int remaining;
1013 // Sort containers by the utilization of their fixed tasks 1019 // Sort containers by the utilization of their fixed tasks
1014 sort(container_list, num_cpus, sizeof(cont_domain_t *), &container_lower_prio, NULL); 1020 sort(container_list, num_cpus, sizeof(cont_domain_t *), &container_lower_prio, NULL);
1015 u_extra = to_fp(num_cpus) - sys_util; 1021 u_extra = to_fp(num_cpus) - sys_util - leeway;
1016 // Fully provision all the container tasks we can 1022 // Fully provision all the container tasks we can
1017 for (i = 0; i < num_cpus && u_extra >= to_fp(1) - container_list[i]->f_util; i++) { 1023 for (i = 0; i < num_cpus && u_extra >= to_fp(1) - container_list[i]->f_util; i++) {
1018 struct task_struct* t = container_list[i]->container; 1024 struct task_struct* t = container_list[i]->container;
@@ -1034,7 +1040,9 @@ static enum hrtimer_restart container_boundary(struct hrtimer *timer)
1034 remaining = num_cpus - i; 1040 remaining = num_cpus - i;
1035 for (; i < num_cpus; i++) { 1041 for (; i < num_cpus; i++) {
1036 struct task_struct* t = container_list[i]->container; 1042 struct task_struct* t = container_list[i]->container;
1037 tsk_rt(t)->task_params.utilization = container_list[i]->f_util + u_extra / remaining; 1043 u64 temp_val = container_list[i]->f_util + u_extra / remaining;
1044 tsk_rt(t)->task_params.utilization = (temp_val < to_fp(1)) ? temp_val : to_fp(1);
1045 BUG_ON(tsk_rt(t)->task_params.utilization > to_fp(1));
1038 } 1046 }
1039 } 1047 }
1040 sys_changed = 0; 1048 sys_changed = 0;
@@ -1043,51 +1051,73 @@ static enum hrtimer_restart container_boundary(struct hrtimer *timer)
1043 1051
1044 // Re-release container tasks, or tell them they can if they're tardy 1052 // Re-release container tasks, or tell them they can if they're tardy
1045 for (i = 0; i < num_cpus; i++) { 1053 for (i = 0; i < num_cpus; i++) {
1046 // will first iterate through fully provisioned containers, then not fully provisioned ones 1054 t = container_list[i]->container;
1047 struct task_struct* t = container_list[i]->container; 1055 int armed = container_list[i]->timer_armed;
1048 // If the last job completed on time, release it now 1056 // If the container tasks are currently scheduled, update their budget
1049 if (budget_exhausted(t)) { 1057 if (armed) {
1058 update_container_budget(t);
1059 }
1060
1061 /* Either container has completed, or it is fully provisioned and in sync
1062 * (thus not requiring a budget enforcement timer).
1063 */
1064 if ((!armed && get_rt_period(t) == get_exec_cost(t)) || budget_exhausted(t) || is_completed(t)) {
1050 BUG_ON(is_queued(t)); 1065 BUG_ON(is_queued(t));
1066 sched_trace_task_completion(t, 0);
1067 if (armed)
1068 cancel_idle_enforcement_timer(t);
1069 tsk_rt(t)->completed = 0;
1051 c_release(t); 1070 c_release(t);
1052 // Otherwise let it release itself when it completes
1053 } else {
1054 tsk_rt(t)->edfsc_params.can_release = 1;
1055 manage_idle_enforcement_timer(t);
1056 } 1071 }
1057 } 1072 }
1073 g_preempt_check();
1058 1074
1059 raw_spin_unlock_irqrestore(&g_lock, flags); 1075 raw_spin_unlock_irqrestore(&g_lock, flags);
1060 1076
1077 TS_SCHED_TIMER_END
1078
1061 hrtimer_add_expires_ns(timer, LITMUS_QUANTUM_LENGTH_NS); 1079 hrtimer_add_expires_ns(timer, LITMUS_QUANTUM_LENGTH_NS);
1062 return HRTIMER_RESTART; 1080 return HRTIMER_RESTART;
1063} 1081}
1064 1082
1083/*
1084 * When preempt check scheduled a task to multiple cores(due to swapping and multiple
1085 * multiple invocations of preempt_check), we should not wait for stack, and reschedule
1086 */
1087static bool edfsc_should_wait_for_stack(struct task_struct* t) {
1088 cpu_entry_t* entry = this_cpu_ptr(&edfsc_cpu_entries);
1089 struct task_struct* tsk = tsk_rt(t)->edfsc_params.container_task;
1090 tsk = tsk ? tsk : t;
1091 return tsk_rt(tsk)->linked_on == tsk_rt(tsk)->scheduled_on && tsk_rt(tsk)->linked_on == entry->cpu;
1092}
1093
1065/** 1094/**
1066 * Fired when a task reaches its deadline and is pending deletion or migration 1095 * Fired when a task reaches its deadline and is pending deletion or migration
1067 */ 1096 */
1068static enum hrtimer_restart task_deadline_callback(struct hrtimer* timer) { 1097static enum hrtimer_restart task_deadline_callback(struct hrtimer* timer) {
1098 unsigned long flags;
1069 struct task_struct *t = container_of(timer, struct task_struct, edfsc_deadline_timer); 1099 struct task_struct *t = container_of(timer, struct task_struct, edfsc_deadline_timer);
1070 1100
1101 raw_spin_lock_irqsave(&g_lock, flags);
1071 BUG_ON(is_container(t)); 1102 BUG_ON(is_container(t));
1072 printk("util: %d\n", sys_util);
1073 // This is true only if set to be migrating from container_boundary 1103 // This is true only if set to be migrating from container_boundary
1074 if (tsk_rt(t)->edfsc_params.move_to) { 1104 if (tsk_rt(t)->edfsc_params.move_to) {
1075 // Migrate here if the task is not late, otherwise migrate in job_complete 1105 // Can only be here when called from g_job_completion
1076 if (!is_released(t, litmus_clock()) 1106 migrate_task(t);
1077 || (budget_enforced(t) && budget_exhausted(t)) 1107 // In the else case, only task_params is guaranteed to be valid
1078 || is_completed(t)) 1108 // However, in task_exit, we stored information in task_param.cpu
1079 migrate_task(t); 1109 // To help up do remove operations
1080 } else { 1110 } else {
1081 // A move to NULL means deletion 1111 // A move to NULL means deletion
1082 // HACK: See comment in edfsc_task_exit() 1112 if (tsk_rt(t)->task_params.cpu == NO_CPU)
1083 tsk_rt(t)->edfsc_params.container_task = (struct task_struct*)tsk_rt(t)->task_params.phase;
1084 if (is_fixed(t))
1085 c_remove_task(t);
1086 else
1087 g_remove_task(t); 1113 g_remove_task(t);
1114 else
1115 c_remove_task(t);
1088 // Release our reference to the task struct 1116 // Release our reference to the task struct
1089 put_task_struct(t); 1117 put_task_struct(t);
1090 } 1118 }
1119 list_del_init(&t->edfsc_qnode);
1120 raw_spin_unlock_irqrestore(&g_lock, flags);
1091 return HRTIMER_NORESTART; 1121 return HRTIMER_NORESTART;
1092} 1122}
1093 1123
@@ -1103,20 +1133,30 @@ static void edfsc_task_new(struct task_struct* t, int on_rq, int is_scheduled)
1103 1133
1104 tsk_rt(t)->sporadic_release = 0; 1134 tsk_rt(t)->sporadic_release = 0;
1105 1135
1136 TRACE("EDF-sc: task new %d\n", t->pid);
1137
1106 // Create a timer that we'll use to delay accounting during migrations 1138 // Create a timer that we'll use to delay accounting during migrations
1107 hrtimer_init(&t->edfsc_deadline_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 1139 hrtimer_init(&t->edfsc_deadline_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1108 t->edfsc_deadline_timer.function = task_deadline_callback; 1140 t->edfsc_deadline_timer.function = task_deadline_callback;
1109 1141
1110 raw_spin_lock_irqsave(&g_lock, flags); 1142 raw_spin_lock_irqsave(&g_lock, flags);
1111 // Queue this task and request a reschedule
1112 requeue(t);
1113 preempt(entry);
1114 1143
1115 // Since `t` is not going to run again until we schedule, harmonize state 1144 release_at(t, litmus_clock());
1116 t->rt_param.linked_on = NO_CPU; 1145 sched_trace_task_release(t);
1146 tsk_rt(t)->linked_on = NO_CPU;
1147 tsk_rt(t)->scheduled_on = NO_CPU;
1148 // Queue this task and request a reschedule
1149 if (on_rq || is_scheduled) {
1150 requeue(t);
1151 if (is_migrating(t)) {
1152 g_preempt_check();
1153 }
1154 else if (is_fixed(t)) {
1155 c_preempt_check((cont_domain_t*)tsk_rt(t)->domain);
1156 }
1157 preempt(entry);
1158 }
1117 raw_spin_unlock_irqrestore(&g_lock, flags); 1159 raw_spin_unlock_irqrestore(&g_lock, flags);
1118
1119 TRACE("EDF-sc: task new %d\n", t->pid);
1120} 1160}
1121 1161
1122/** 1162/**
@@ -1127,18 +1167,32 @@ static void edfsc_task_new(struct task_struct* t, int on_rq, int is_scheduled)
1127static void edfsc_task_wake_up(struct task_struct *task) 1167static void edfsc_task_wake_up(struct task_struct *task)
1128{ 1168{
1129 unsigned long flags; 1169 unsigned long flags;
1130 1170 lt_t now = litmus_clock();
1131 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); 1171 cpu_entry_t* entry = &per_cpu(edfsc_cpu_entries, task_cpu(task));
1172 TRACE_TASK(task, "wake_up at %llu\n", now);
1132 raw_spin_lock_irqsave(&g_lock, flags); 1173 raw_spin_lock_irqsave(&g_lock, flags);
1133 // TODO: Look into handling sporadic tasks as sched_gsnedf.c does 1174 now = litmus_clock();
1134 requeue(task); 1175 if (is_sporadic(task) && is_tardy(task, now)) {
1135 // TODO: Look into queuing preemption as sched_gsnedf.c does? 1176 inferred_sporadic_job_release_at(task, now);
1177 }
1178 if (!is_queued(task) && tsk_rt(task)->domain)
1179 requeue(task);
1180 if (is_migrating(task))
1181 g_preempt_check();
1182 else if (is_fixed(task))
1183 c_preempt_check((cont_domain_t*)tsk_rt(task)->domain);
1184 preempt(entry);
1136 raw_spin_unlock_irqrestore(&g_lock, flags); 1185 raw_spin_unlock_irqrestore(&g_lock, flags);
1137} 1186}
1138 1187
1139static void edfsc_task_block(struct task_struct *t) 1188static void edfsc_task_block(struct task_struct *t)
1140{ 1189{
1141 // TODO 1190 unsigned long flags;
1191 raw_spin_lock_irqsave(&g_lock, flags);
1192 if (is_migrating(t) && tsk_rt(t)->edfsc_params.container_task) {
1193 tsk_rt(t)->edfsc_params.container_task = NULL;
1194 }
1195 raw_spin_unlock_irqrestore(&g_lock, flags);
1142} 1196}
1143 1197
1144/** 1198/**
@@ -1152,35 +1206,37 @@ static void edfsc_task_exit(struct task_struct* t)
1152{ 1206{
1153 unsigned long flags; 1207 unsigned long flags;
1154 lt_t now, unaccount_time = 0; 1208 lt_t now, unaccount_time = 0;
1155 cpu_entry_t* entry; 1209 cpu_entry_t* entry = &per_cpu(edfsc_cpu_entries, task_cpu(t));
1156 1210
1157 BUG_ON(is_container(t)); 1211 BUG_ON(is_container(t));
1158 raw_spin_lock_irqsave(&g_lock, flags); 1212 raw_spin_lock_irqsave(&g_lock, flags);
1159 TRACE_TASK(t, "called edfsc_task_exit\n"); 1213 TRACE_TASK(t, "called edfsc_task_exit\n");
1160 1214
1161 // Remove this task from all members of its scheduling domain 1215 // Remove this task from all members of its scheduling domain
1162 unlink(t); 1216 if (is_fixed(t)) {
1163 if (is_queued(t)) { 1217 tsk_rt(t)->task_params.cpu=((cont_domain_t*)tsk_rt(t)->domain)->container->rt_param.edfsc_params.id;
1164 remove(tsk_rt(t)->domain, t); 1218 if (is_queued(t))
1165 } else if (is_fixed(t)) { 1219 remove(tsk_rt(t)->domain, t);
1166 // If we're fixed and not on the ready queues, we should be currently running 1220 else {
1167 BUG_ON(((cont_domain_t*)tsk_rt(t)->domain)->scheduled != t); 1221 // If we're fixed and not on the ready queues, we should be currently running
1168 BUG_ON(t != current); 1222 BUG_ON(((cont_domain_t*)tsk_rt(t)->domain)->scheduled != t);
1169 ((cont_domain_t*)tsk_rt(t)->domain)->scheduled = NULL; 1223 BUG_ON(t != current);
1224 ((cont_domain_t*)tsk_rt(t)->domain)->scheduled = NULL;
1225 }
1170 } else { 1226 } else {
1171 // We're in the global domain and not on the ready queues, so we must be running 1227 tsk_rt(t)->task_params.cpu = NO_CPU;
1172 BUG_ON(t != current); 1228 list_del_init(&t->edfsc_qnode);
1173 list_del(&t->edfsc_qnode); 1229 if (tsk_rt(t)->edfsc_params.container_task != NULL) {
1174 entry = &per_cpu(edfsc_cpu_entries, task_cpu(t)); 1230 BUG_ON(tsk_rt(t)->edfsc_params.container_task->rt_param.edfsc_params.domain->scheduled != t);
1175 // Handle the case where we exit while being background scheduled 1231 tsk_rt(t)->edfsc_params.container_task->rt_param.edfsc_params.domain->scheduled = NULL;
1176 if (is_container(entry->scheduled)) { 1232 }
1177 BUG_ON(entry->scheduled->rt_param.edfsc_params.domain->scheduled != t); 1233 else {
1178 entry->scheduled->rt_param.edfsc_params.domain->scheduled = NULL; 1234 unlink(t);
1179 } else {
1180 BUG_ON(entry->scheduled != t);
1181 entry->scheduled = NULL; 1235 entry->scheduled = NULL;
1182 } 1236 }
1183 } 1237 }
1238 tsk_rt(t)->domain = NULL;
1239 BUG_ON(is_queued(t));
1184 1240
1185 /* To preserve EDF-sc scheduling invariants, we can only release a task's 1241 /* To preserve EDF-sc scheduling invariants, we can only release a task's
1186 * utilization at the greater of completion or deadline boundary. Thus, here 1242 * utilization at the greater of completion or deadline boundary. Thus, here
@@ -1209,20 +1265,19 @@ static void edfsc_task_exit(struct task_struct* t)
1209 get_task_struct(t); 1265 get_task_struct(t);
1210 // Make it clear that this task is going away 1266 // Make it clear that this task is going away
1211 tsk_rt(t)->edfsc_params.move_to = NULL; 1267 tsk_rt(t)->edfsc_params.move_to = NULL;
1212 /* HACK: Unfortunately, even though we hold a reference to the task struct,
1213 * LITMUS clears edfsc_params before our timer expires. `task_params` seems
1214 * untouched, so hijack `task_params.phase` to link to the container task
1215 */
1216 tsk_rt(t)->task_params.phase = (lt_t)tsk_rt(t)->edfsc_params.container_task;
1217 1268
1218 if (unaccount_time == 0) 1269
1270 if (unaccount_time == 0) {
1271 raw_spin_unlock_irqrestore(&g_lock, flags);
1219 // Don't bother setting a zero-length timer - just skip straight to the callback 1272 // Don't bother setting a zero-length timer - just skip straight to the callback
1220 task_deadline_callback(&t->edfsc_deadline_timer); 1273 task_deadline_callback(&t->edfsc_deadline_timer);
1221 else 1274 }
1275 else {
1276 list_add(&t->edfsc_qnode, &pending_removes);
1277 raw_spin_unlock_irqrestore(&g_lock, flags);
1222 hrtimer_start(&t->edfsc_deadline_timer, ns_to_ktime(unaccount_time), 1278 hrtimer_start(&t->edfsc_deadline_timer, ns_to_ktime(unaccount_time),
1223 HRTIMER_MODE_ABS_PINNED); 1279 HRTIMER_MODE_ABS_PINNED);
1224 1280 }
1225 raw_spin_unlock_irqrestore(&g_lock, flags);
1226} 1281}
1227 1282
1228static struct domain_proc_info edfsc_domain_proc_info; 1283static struct domain_proc_info edfsc_domain_proc_info;
@@ -1274,23 +1329,130 @@ static long edfsc_activate_plugin(void)
1274 * be reusable if we don't destroy them when the plugin is deactivated) 1329 * be reusable if we don't destroy them when the plugin is deactivated)
1275 * - ... 1330 * - ...
1276 */ 1331 */
1332 int i;
1333 lt_t now;
1334 cpu_entry_t* entry;
1335 struct task_struct* t;
1336
1337 edfsc_setup_domain_proc();
1338
1339 INIT_LIST_HEAD(&pending_adds);
1340 INIT_LIST_HEAD(&migrating_tasks);
1341 INIT_LIST_HEAD(&pending_removes);
1342 bheap_init(&edfsc_cpu_heap);
1343
1344 // Set up the container boundary timer
1345 hrtimer_init(&container_release_timer, CLOCK_MONOTONIC,
1346 HRTIMER_MODE_ABS_PINNED);
1347 container_release_timer.function = container_boundary;
1348
1349 edf_domain_init(&gsched_domain, NULL, g_release_jobs);
1350
1351 container_tasks = kmalloc(sizeof(struct task_struct) * num_online_cpus(), GFP_KERNEL);
1352 container_domains = kmalloc(sizeof(cont_domain_t) * num_online_cpus(), GFP_KERNEL);
1353 container_list = kmalloc(sizeof(cont_domain_t*) * num_online_cpus(), GFP_KERNEL);
1354 edfsc_cpu_heap_node = kmalloc(sizeof(struct bheap_node) * num_online_cpus(), GFP_KERNEL);
1355
1356 sys_util = to_fp(0);
1357 m_util = to_fp(0);
1358 sys_changed = 1;
1359
1360 memset(container_tasks, 0, sizeof(struct task_struct) * num_online_cpus());
1361 memset(container_domains, 0, sizeof(cont_domain_t) * num_online_cpus());
1362
1363 // Initialize container domains
1364 for (i = 0; i < num_online_cpus(); i++) {
1365 edf_domain_init(&container_domains[i].domain, c_check_resched, NULL);
1366 container_domains[i].scheduled = NULL;
1367 container_domains[i].container = &container_tasks[i];
1368 container_domains[i].f_util = to_fp(0);
1369 hrtimer_init(&(container_domains[i].idle_enforcement_timer), CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1370 container_domains[i].idle_enforcement_timer.function = on_idle_enforcement_timeout;
1371
1372
1373 // Name the task its container ID mapped to ASCII
1374 snprintf(container_tasks[i].comm, TASK_COMM_LEN, "%d", i);
1375 container_tasks[i].pid = -i;
1376 tsk_rt(&container_tasks[i])->task_params.exec_cost = LITMUS_QUANTUM_LENGTH_NS;
1377 tsk_rt(&container_tasks[i])->task_params.period =
1378 LITMUS_QUANTUM_LENGTH_NS;
1379 tsk_rt(&container_tasks[i])->task_params.utilization = to_fp(1);
1380 tsk_rt(&container_tasks[i])->task_params.relative_deadline =
1381 LITMUS_QUANTUM_LENGTH_NS;
1382 tsk_rt(&container_tasks[i])->task_params.budget_policy = PRECISE_ENFORCEMENT;
1383 tsk_rt(&container_tasks[i])->edfsc_params.container_task = NULL;
1384 tsk_rt(&container_tasks[i])->domain = &gsched_domain;
1385 tsk_rt(&container_tasks[i])->edfsc_params.domain = &container_domains[i];
1386 tsk_rt(&container_tasks[i])->sporadic_release = 0;
1387 tsk_rt(&container_tasks[i])->edfsc_params.id = i;
1388 tsk_rt(&container_tasks[i])->heap_node = bheap_node_alloc(GFP_ATOMIC);
1389 tsk_rt(&container_tasks[i])->rel_heap = release_heap_alloc(GFP_ATOMIC);
1390 tsk_rt(&container_tasks[i])->linked_on = NO_CPU;
1391 tsk_rt(&container_tasks[i])->scheduled_on = NO_CPU;
1392
1393 if (!tsk_rt(&container_tasks[i])->heap_node || !tsk_rt(&container_tasks[i])->rel_heap) {
1394 printk(KERN_WARNING "litmus: no more heap node memory!?\n");
1395 return -ENOMEM;
1396 } else {
1397 bheap_node_init(&tsk_rt(&container_tasks[i])->heap_node, &container_tasks[i]);
1398 }
1399
1400 container_tasks[i].policy = SCHED_LITMUS;
1401
1402 // Populate the container_list while we're at it.
1403 container_list[i] = &container_domains[i];
1404
1405 // Link heap nodes to CPU structures
1406 entry = &per_cpu(edfsc_cpu_entries, i);
1407 entry->cpu = i;
1408 entry->scheduled = NULL;
1409 entry->linked = NULL;
1410 entry->hn = &edfsc_cpu_heap_node[i];
1411 bheap_node_init(&entry->hn, entry);
1412 }
1413
1414 now = litmus_clock();
1415 for (i = 0; i < num_online_cpus(); i++) {
1416 t = &container_tasks[i];
1417 entry = &per_cpu(edfsc_cpu_entries, tsk_rt(t)->edfsc_params.id);
1418 ((cont_domain_t*)tsk_rt(t)->edfsc_params.domain)->scheduled_last_exec_time = now;
1419 release_at(t, now);
1420 link_task_to_cpu(t, entry);
1421 }
1277 1422
1278 // Start the container boundary timer 1423 // Start the container boundary timer
1279 hrtimer_start(&container_release_timer, 1424 hrtimer_start(&container_release_timer,
1280 ns_to_ktime(litmus_clock() + LITMUS_QUANTUM_LENGTH_NS), 1425 ns_to_ktime(now + LITMUS_QUANTUM_LENGTH_NS),
1281 HRTIMER_MODE_ABS_PINNED); 1426 HRTIMER_MODE_ABS_PINNED);
1282 1427
1283 edfsc_setup_domain_proc();
1284
1285 return 0; 1428 return 0;
1286} 1429}
1287 1430
1288static long edfsc_deactivate_plugin(void) 1431static long edfsc_deactivate_plugin(void)
1289{ 1432{
1290 // TODO: Reset our internal state 1433 int i;
1434 struct list_head *l, *temp;
1435 struct task_struct* t;
1291 1436
1292 // Stop the container boundary timer 1437 // Stop the container boundary timer
1293 hrtimer_cancel(&container_release_timer); 1438 hrtimer_try_to_cancel(&container_release_timer);
1439
1440 list_for_each_safe(l, temp, &pending_removes) {
1441 t = task_of_list_node(l);
1442 list_del_init(l);
1443 hrtimer_try_to_cancel(&t->edfsc_deadline_timer);
1444 }
1445
1446 for (i = 0; i < num_online_cpus(); i++) {
1447 bheap_node_free(tsk_rt(&container_tasks[i])->heap_node);
1448 release_heap_free(tsk_rt(&container_tasks[i])->rel_heap);
1449 hrtimer_try_to_cancel(&container_domains[i].idle_enforcement_timer);
1450 }
1451
1452 kfree(container_tasks);
1453 kfree(container_domains);
1454 kfree(container_list);
1455 kfree(edfsc_cpu_heap_node);
1294 1456
1295 destroy_domain_proc_info(&edfsc_domain_proc_info); 1457 destroy_domain_proc_info(&edfsc_domain_proc_info);
1296 return 0; 1458 return 0;
@@ -1303,9 +1465,11 @@ static long edfsc_deactivate_plugin(void)
1303 */ 1465 */
1304static long edfsc_admit_task(struct task_struct* tsk) 1466static long edfsc_admit_task(struct task_struct* tsk)
1305{ 1467{
1468 unsigned long flags;
1306 // We assume that we're running in the context of `tsk` 1469 // We assume that we're running in the context of `tsk`
1307 BUG_ON(tsk != current); 1470 BUG_ON(tsk != current);
1308 1471
1472 raw_spin_lock_irqsave(&g_lock, flags);
1309 // Make sure that edfsc_params doesn't contain garbage 1473 // Make sure that edfsc_params doesn't contain garbage
1310 // Note that edfsc_params->domain will always be NULL for non-container tasks 1474 // Note that edfsc_params->domain will always be NULL for non-container tasks
1311 memset(&tsk_rt(tsk)->edfsc_params, 0, sizeof(struct edfsc_params)); 1475 memset(&tsk_rt(tsk)->edfsc_params, 0, sizeof(struct edfsc_params));
@@ -1317,6 +1481,7 @@ static long edfsc_admit_task(struct task_struct* tsk)
1317 tsk_rt(tsk)->task_params.utilization = fp_div(get_exec_cost(tsk), get_rt_period(tsk)); 1481 tsk_rt(tsk)->task_params.utilization = fp_div(get_exec_cost(tsk), get_rt_period(tsk));
1318 // Add us to the queue of tasks waiting on admission 1482 // Add us to the queue of tasks waiting on admission
1319 list_add_tail(&tsk->edfsc_qnode, &pending_adds); 1483 list_add_tail(&tsk->edfsc_qnode, &pending_adds);
1484 raw_spin_unlock_irqrestore(&g_lock, flags);
1320 // We don't know if we can be admitted until a container job boundry is reached, 1485 // We don't know if we can be admitted until a container job boundry is reached,
1321 // so block until the scheduler can make that decision 1486 // so block until the scheduler can make that decision
1322 set_current_state(TASK_INTERRUPTIBLE); // Changed from TASK_RUNNING 1487 set_current_state(TASK_INTERRUPTIBLE); // Changed from TASK_RUNNING
@@ -1329,14 +1494,18 @@ static long edfsc_admit_task(struct task_struct* tsk)
1329 if (tsk_rt(tsk)->domain != NULL) 1494 if (tsk_rt(tsk)->domain != NULL)
1330 return 0; // Successfully admitted 1495 return 0; // Successfully admitted
1331 else { 1496 else {
1497 raw_spin_lock_irqsave(&g_lock, flags);
1332 // We'll still be on pending_adds if interrupted by a signal 1498 // We'll still be on pending_adds if interrupted by a signal
1333 struct list_head* l; 1499 struct list_head* l;
1334 list_for_each(l, &pending_adds) { 1500 struct list_head *temp;
1335 if (l == &tsk->edfsc_qnode) { 1501 list_for_each_safe(l, temp, &pending_adds) {
1336 list_del(l); 1502 if (task_of_list_node(l) == tsk) {
1503 list_del_init(l);
1504 raw_spin_unlock_irqrestore(&g_lock, flags);
1337 return -EINTR; // Interrupted 1505 return -EINTR; // Interrupted
1338 } 1506 }
1339 } 1507 }
1508 raw_spin_unlock_irqrestore(&g_lock, flags);
1340 return -ENOSPC; // Rejected 1509 return -ENOSPC; // Rejected
1341 } 1510 }
1342} 1511}
@@ -1354,90 +1523,13 @@ static struct sched_plugin edfsc_plugin __cacheline_aligned_in_smp = {
1354 .admit_task = edfsc_admit_task, 1523 .admit_task = edfsc_admit_task,
1355 .activate_plugin = edfsc_activate_plugin, 1524 .activate_plugin = edfsc_activate_plugin,
1356 .deactivate_plugin = edfsc_deactivate_plugin, 1525 .deactivate_plugin = edfsc_deactivate_plugin,
1526 .should_wait_for_stack = edfsc_should_wait_for_stack,
1357 .get_domain_proc_info = edfsc_get_domain_proc_info, 1527 .get_domain_proc_info = edfsc_get_domain_proc_info,
1358}; 1528};
1359 1529
1360 1530
1361static int __init init_edfsc(void) 1531static int __init init_edfsc(void)
1362{ 1532{
1363 int i;
1364 cpu_entry_t *entry;
1365
1366 INIT_LIST_HEAD(&pending_adds);
1367 INIT_LIST_HEAD(&migrating_tasks);
1368
1369 bheap_init(&edfsc_cpu_heap);
1370
1371 edf_domain_init(&gsched_domain, NULL, g_release_jobs);
1372
1373 // Set up the container boundary timer
1374 hrtimer_init(&container_release_timer, CLOCK_MONOTONIC,
1375 HRTIMER_MODE_ABS_PINNED);
1376 container_release_timer.function = container_boundary;
1377
1378 container_tasks = kmalloc(sizeof(struct task_struct) * num_online_cpus(), GFP_KERNEL);
1379 container_domains = kmalloc(sizeof(cont_domain_t) * num_online_cpus(), GFP_KERNEL);
1380 container_list = kmalloc(sizeof(cont_domain_t*) * num_online_cpus(), GFP_KERNEL);
1381 edfsc_cpu_heap_node = kmalloc(sizeof(struct bheap_node) * num_online_cpus(), GFP_KERNEL);
1382
1383 sys_util = to_fp(0);
1384 m_util = to_fp(0);
1385 sys_changed = 1;
1386
1387 memset(container_tasks, 0, sizeof(struct task_struct) * num_online_cpus());
1388 memset(container_domains, 0, sizeof(cont_domain_t) * num_online_cpus());
1389
1390 // Initialize container domains
1391 for (i = 0; i < num_online_cpus(); i++) {
1392 edf_domain_init(&container_domains[i].domain, c_check_resched, NULL);
1393 container_domains[i].scheduled = NULL;
1394 container_domains[i].container = &container_tasks[i];
1395 container_domains[i].f_util = to_fp(0);
1396 hrtimer_init(&(container_domains[i].idle_enforcement_timer), CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1397 container_domains[i].idle_enforcement_timer.function = on_idle_enforcement_timeout;
1398
1399
1400 // Name the task its container ID mapped to ASCII
1401 snprintf(container_tasks[i].comm, TASK_COMM_LEN, "%d", i);
1402 tsk_rt(&container_tasks[i])->task_params.exec_cost = LITMUS_QUANTUM_LENGTH_NS / 2;
1403 tsk_rt(&container_tasks[i])->task_params.period =
1404 LITMUS_QUANTUM_LENGTH_NS;
1405 tsk_rt(&container_tasks[i])->task_params.relative_deadline =
1406 LITMUS_QUANTUM_LENGTH_NS;
1407 tsk_rt(&container_tasks[i])->task_params.budget_policy = PRECISE_ENFORCEMENT;
1408 tsk_rt(&container_tasks[i])->edfsc_params.container_task = NULL;
1409 tsk_rt(&container_tasks[i])->domain = &gsched_domain;
1410 tsk_rt(&container_tasks[i])->edfsc_params.domain = &container_domains[i];
1411 tsk_rt(&container_tasks[i])->edfsc_params.can_release = 0;
1412 tsk_rt(&container_tasks[i])->sporadic_release = 0;
1413 tsk_rt(&container_tasks[i])->edfsc_params.id = i;
1414 tsk_rt(&container_tasks[i])->heap_node = bheap_node_alloc(GFP_ATOMIC);
1415 tsk_rt(&container_tasks[i])->rel_heap = release_heap_alloc(GFP_ATOMIC);
1416
1417 if (!tsk_rt(&container_tasks[i])->heap_node || !tsk_rt(&container_tasks[i])->rel_heap) {
1418 printk(KERN_WARNING "litmus: no more heap node memory!?\n");
1419 return -ENOMEM;
1420 } else {
1421 bheap_node_init(&tsk_rt(&container_tasks[i])->heap_node, &container_tasks[i]);
1422 }
1423
1424 container_tasks[i].policy = SCHED_LITMUS;
1425 release_at(&container_tasks[i], litmus_clock());
1426 requeue(&container_tasks[i]);
1427
1428 // Populate the container_list while we're at it.
1429 container_list[i] = &container_domains[i];
1430
1431 // Link heap nodes to CPU structures
1432 entry = &per_cpu(edfsc_cpu_entries, i);
1433 entry->cpu = i;
1434 entry->scheduled = NULL;
1435 entry->linked = NULL;
1436 entry->hn = &edfsc_cpu_heap_node[i];
1437 bheap_node_init(&entry->hn, entry);
1438 entry->scheduled = NULL;
1439 }
1440
1441 return register_sched_plugin(&edfsc_plugin); 1533 return register_sched_plugin(&edfsc_plugin);
1442} 1534}
1443 1535