diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-05-14 16:51:05 -0400 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2013-05-19 22:46:49 -0400 |
commit | 44326648c2ea81b9a32619644fe9c665ed0d9e0b (patch) | |
tree | ff1e00cf3cbc0e06f511a90c4f28aa8f7b40b12e | |
parent | af6eeb156c7da47ff5df03a3da04432c8ac4460c (diff) |
Final GPUSync implementation.gpusync-rtss12
-rw-r--r-- | include/linux/interrupt.h | 6 | ||||
-rw-r--r-- | include/litmus/gpu_affinity.h | 7 | ||||
-rw-r--r-- | include/litmus/nvidia_info.h | 1 | ||||
-rw-r--r-- | include/litmus/rt_param.h | 5 | ||||
-rw-r--r-- | include/litmus/sched_trace.h | 44 | ||||
-rw-r--r-- | kernel/mutex.c | 48 | ||||
-rw-r--r-- | kernel/sched.c | 4 | ||||
-rw-r--r-- | kernel/softirq.c | 66 | ||||
-rw-r--r-- | litmus/Kconfig | 16 | ||||
-rw-r--r-- | litmus/gpu_affinity.c | 38 | ||||
-rw-r--r-- | litmus/ikglp_lock.c | 39 | ||||
-rw-r--r-- | litmus/jobs.c | 17 | ||||
-rw-r--r-- | litmus/kfmlp_lock.c | 2 | ||||
-rw-r--r-- | litmus/litmus.c | 20 | ||||
-rw-r--r-- | litmus/locking.c | 8 | ||||
-rw-r--r-- | litmus/nvidia_info.c | 13 | ||||
-rw-r--r-- | litmus/rsm_lock.c | 7 | ||||
-rw-r--r-- | litmus/sched_cedf.c | 13 | ||||
-rw-r--r-- | litmus/sched_plugin.c | 7 | ||||
-rw-r--r-- | litmus/sched_task_trace.c | 50 |
20 files changed, 313 insertions, 98 deletions
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 57a7bc8807be..8fb3dad55f19 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h | |||
@@ -528,7 +528,7 @@ static inline int tasklet_trylock(struct tasklet_struct *t) | |||
528 | 528 | ||
529 | static inline void tasklet_unlock(struct tasklet_struct *t) | 529 | static inline void tasklet_unlock(struct tasklet_struct *t) |
530 | { | 530 | { |
531 | smp_mb__before_clear_bit(); | 531 | smp_mb__before_clear_bit(); |
532 | clear_bit(TASKLET_STATE_RUN, &(t)->state); | 532 | clear_bit(TASKLET_STATE_RUN, &(t)->state); |
533 | } | 533 | } |
534 | 534 | ||
@@ -590,7 +590,7 @@ static inline void tasklet_disable(struct tasklet_struct *t) | |||
590 | } | 590 | } |
591 | 591 | ||
592 | static inline void tasklet_enable(struct tasklet_struct *t) | 592 | static inline void tasklet_enable(struct tasklet_struct *t) |
593 | { | 593 | { |
594 | smp_mb__before_atomic_dec(); | 594 | smp_mb__before_atomic_dec(); |
595 | atomic_dec(&t->count); | 595 | atomic_dec(&t->count); |
596 | } | 596 | } |
@@ -659,7 +659,7 @@ void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer) | |||
659 | * if more than one irq occurred. | 659 | * if more than one irq occurred. |
660 | */ | 660 | */ |
661 | 661 | ||
662 | #if defined(CONFIG_GENERIC_HARDIRQS) && !defined(CONFIG_GENERIC_IRQ_PROBE) | 662 | #if defined(CONFIG_GENERIC_HARDIRQS) && !defined(CONFIG_GENERIC_IRQ_PROBE) |
663 | static inline unsigned long probe_irq_on(void) | 663 | static inline unsigned long probe_irq_on(void) |
664 | { | 664 | { |
665 | return 0; | 665 | return 0; |
diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h index d4db2003ad86..6b3fb8b28745 100644 --- a/include/litmus/gpu_affinity.h +++ b/include/litmus/gpu_affinity.h | |||
@@ -43,10 +43,7 @@ static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t | |||
43 | val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est); | 43 | val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est); |
44 | } | 44 | } |
45 | 45 | ||
46 | // minimum value is 1 (val is 0 if we haven't run with local affinity yet) | 46 | return ((val > 0) ? val : dist+1); |
47 | // TODO: pick a better default min-value. 1 is too small. perhaps | ||
48 | // task execution time? | ||
49 | return ((val > 0) ? val : 1); | ||
50 | } | 47 | } |
51 | 48 | ||
52 | #endif \ No newline at end of file | 49 | #endif |
diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h index 580728051d4e..97c9577141db 100644 --- a/include/litmus/nvidia_info.h +++ b/include/litmus/nvidia_info.h | |||
@@ -12,6 +12,7 @@ | |||
12 | #define NV_MAX_SIMULT_USERS CONFIG_NV_MAX_SIMULT_USERS | 12 | #define NV_MAX_SIMULT_USERS CONFIG_NV_MAX_SIMULT_USERS |
13 | 13 | ||
14 | int init_nvidia_info(void); | 14 | int init_nvidia_info(void); |
15 | void shutdown_nvidia_info(void); | ||
15 | 16 | ||
16 | int is_nvidia_func(void* func_addr); | 17 | int is_nvidia_func(void* func_addr); |
17 | 18 | ||
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 4553521146cc..0198884eab86 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h | |||
@@ -26,6 +26,7 @@ static inline int lt_after_eq(lt_t a, lt_t b) | |||
26 | typedef enum { | 26 | typedef enum { |
27 | RT_CLASS_HARD, | 27 | RT_CLASS_HARD, |
28 | RT_CLASS_SOFT, | 28 | RT_CLASS_SOFT, |
29 | RT_CLASS_SOFT_W_SLIP, | ||
29 | RT_CLASS_BEST_EFFORT | 30 | RT_CLASS_BEST_EFFORT |
30 | } task_class_t; | 31 | } task_class_t; |
31 | 32 | ||
@@ -189,8 +190,8 @@ struct rt_param { | |||
189 | long unsigned int held_gpus; // bitmap of held GPUs. | 190 | long unsigned int held_gpus; // bitmap of held GPUs. |
190 | 191 | ||
191 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | 192 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
192 | fp_t gpu_fb_param_a; | 193 | fp_t gpu_fb_param_a[MIG_LAST+1]; |
193 | fp_t gpu_fb_param_b; | 194 | fp_t gpu_fb_param_b[MIG_LAST+1]; |
194 | 195 | ||
195 | gpu_migration_dist_t gpu_migration; | 196 | gpu_migration_dist_t gpu_migration; |
196 | int last_gpu; | 197 | int last_gpu; |
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h index 232c7588d103..b1b71f6c5f0c 100644 --- a/include/litmus/sched_trace.h +++ b/include/litmus/sched_trace.h | |||
@@ -10,7 +10,8 @@ struct st_trace_header { | |||
10 | u8 type; /* Of what type is this record? */ | 10 | u8 type; /* Of what type is this record? */ |
11 | u8 cpu; /* On which CPU was it recorded? */ | 11 | u8 cpu; /* On which CPU was it recorded? */ |
12 | u16 pid; /* PID of the task. */ | 12 | u16 pid; /* PID of the task. */ |
13 | u32 job; /* The job sequence number. */ | 13 | u32 job:24; /* The job sequence number. */ |
14 | u8 extra; | ||
14 | } __attribute__((packed)); | 15 | } __attribute__((packed)); |
15 | 16 | ||
16 | #define ST_NAME_LEN 16 | 17 | #define ST_NAME_LEN 16 |
@@ -136,6 +137,22 @@ struct st_nv_interrupt_end_data { | |||
136 | u32 serialNumber; | 137 | u32 serialNumber; |
137 | } __attribute__((packed)); | 138 | } __attribute__((packed)); |
138 | 139 | ||
140 | struct st_prediction_err_data { | ||
141 | u64 distance; | ||
142 | u64 rel_err; | ||
143 | } __attribute__((packed)); | ||
144 | |||
145 | struct st_migration_data { | ||
146 | u64 observed; | ||
147 | u64 estimated; | ||
148 | } __attribute__((packed)); | ||
149 | |||
150 | struct migration_info { | ||
151 | u64 observed; | ||
152 | u64 estimated; | ||
153 | u8 distance; | ||
154 | } __attribute__((packed)); | ||
155 | |||
139 | #define DATA(x) struct st_ ## x ## _data x; | 156 | #define DATA(x) struct st_ ## x ## _data x; |
140 | 157 | ||
141 | typedef enum { | 158 | typedef enum { |
@@ -160,6 +177,9 @@ typedef enum { | |||
160 | ST_EFF_PRIO_CHANGE, | 177 | ST_EFF_PRIO_CHANGE, |
161 | ST_NV_INTERRUPT_BEGIN, | 178 | ST_NV_INTERRUPT_BEGIN, |
162 | ST_NV_INTERRUPT_END, | 179 | ST_NV_INTERRUPT_END, |
180 | |||
181 | ST_PREDICTION_ERR, | ||
182 | ST_MIGRATION, | ||
163 | } st_event_record_type_t; | 183 | } st_event_record_type_t; |
164 | 184 | ||
165 | struct st_event_record { | 185 | struct st_event_record { |
@@ -187,6 +207,9 @@ struct st_event_record { | |||
187 | DATA(effective_priority_change); | 207 | DATA(effective_priority_change); |
188 | DATA(nv_interrupt_begin); | 208 | DATA(nv_interrupt_begin); |
189 | DATA(nv_interrupt_end); | 209 | DATA(nv_interrupt_end); |
210 | |||
211 | DATA(prediction_err); | ||
212 | DATA(migration); | ||
190 | } data; | 213 | } data; |
191 | } __attribute__((packed)); | 214 | } __attribute__((packed)); |
192 | 215 | ||
@@ -259,6 +282,19 @@ feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id, | |||
259 | feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, | 282 | feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, |
260 | unsigned long unused); | 283 | unsigned long unused); |
261 | 284 | ||
285 | feather_callback void do_sched_trace_prediction_err(unsigned long id, | ||
286 | struct task_struct* task, | ||
287 | gpu_migration_dist_t* distance, | ||
288 | fp_t* rel_err); | ||
289 | |||
290 | |||
291 | |||
292 | |||
293 | |||
294 | feather_callback void do_sched_trace_migration(unsigned long id, | ||
295 | struct task_struct* task, | ||
296 | struct migration_info* mig_info); | ||
297 | |||
262 | 298 | ||
263 | /* returns true if we're tracing an interrupt on current CPU */ | 299 | /* returns true if we're tracing an interrupt on current CPU */ |
264 | /* int is_interrupt_tracing_active(void); */ | 300 | /* int is_interrupt_tracing_active(void); */ |
@@ -331,6 +367,12 @@ feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, | |||
331 | #define sched_trace_nv_interrupt_end(d) \ | 367 | #define sched_trace_nv_interrupt_end(d) \ |
332 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, d) | 368 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, d) |
333 | 369 | ||
370 | #define sched_trace_prediction_err(t, dist, rel_err) \ | ||
371 | SCHED_TRACE3(SCHED_TRACE_BASE_ID + 20, do_sched_trace_prediction_err, t, dist, rel_err) | ||
372 | |||
373 | #define sched_trace_migration(t, mig_info) \ | ||
374 | SCHED_TRACE2(SCHED_TRACE_BASE_ID + 21, do_sched_trace_migration, t, mig_info) | ||
375 | |||
334 | #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */ | 376 | #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */ |
335 | 377 | ||
336 | #endif /* __KERNEL__ */ | 378 | #endif /* __KERNEL__ */ |
diff --git a/kernel/mutex.c b/kernel/mutex.c index 2f363b9bfc1f..96bcecd385d3 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -511,12 +511,12 @@ void mutex_lock_sfx(struct mutex *lock, | |||
511 | struct task_struct *task = current; | 511 | struct task_struct *task = current; |
512 | struct mutex_waiter waiter; | 512 | struct mutex_waiter waiter; |
513 | unsigned long flags; | 513 | unsigned long flags; |
514 | 514 | ||
515 | preempt_disable(); | 515 | preempt_disable(); |
516 | mutex_acquire(&lock->dep_map, subclass, 0, ip); | 516 | mutex_acquire(&lock->dep_map, subclass, 0, ip); |
517 | 517 | ||
518 | spin_lock_mutex(&lock->wait_lock, flags); | 518 | spin_lock_mutex(&lock->wait_lock, flags); |
519 | 519 | ||
520 | if(pre) | 520 | if(pre) |
521 | { | 521 | { |
522 | if(unlikely(pre(pre_arg))) | 522 | if(unlikely(pre(pre_arg))) |
@@ -530,16 +530,16 @@ void mutex_lock_sfx(struct mutex *lock, | |||
530 | 530 | ||
531 | debug_mutex_lock_common(lock, &waiter); | 531 | debug_mutex_lock_common(lock, &waiter); |
532 | debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); | 532 | debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); |
533 | 533 | ||
534 | /* add waiting tasks to the end of the waitqueue (FIFO): */ | 534 | /* add waiting tasks to the end of the waitqueue (FIFO): */ |
535 | list_add_tail(&waiter.list, &lock->wait_list); | 535 | list_add_tail(&waiter.list, &lock->wait_list); |
536 | waiter.task = task; | 536 | waiter.task = task; |
537 | 537 | ||
538 | if (atomic_xchg(&lock->count, -1) == 1) | 538 | if (atomic_xchg(&lock->count, -1) == 1) |
539 | goto done; | 539 | goto done; |
540 | 540 | ||
541 | lock_contended(&lock->dep_map, ip); | 541 | lock_contended(&lock->dep_map, ip); |
542 | 542 | ||
543 | for (;;) { | 543 | for (;;) { |
544 | /* | 544 | /* |
545 | * Lets try to take the lock again - this is needed even if | 545 | * Lets try to take the lock again - this is needed even if |
@@ -552,9 +552,9 @@ void mutex_lock_sfx(struct mutex *lock, | |||
552 | */ | 552 | */ |
553 | if (atomic_xchg(&lock->count, -1) == 1) | 553 | if (atomic_xchg(&lock->count, -1) == 1) |
554 | break; | 554 | break; |
555 | 555 | ||
556 | __set_task_state(task, state); | 556 | __set_task_state(task, state); |
557 | 557 | ||
558 | /* didnt get the lock, go to sleep: */ | 558 | /* didnt get the lock, go to sleep: */ |
559 | spin_unlock_mutex(&lock->wait_lock, flags); | 559 | spin_unlock_mutex(&lock->wait_lock, flags); |
560 | preempt_enable_no_resched(); | 560 | preempt_enable_no_resched(); |
@@ -562,22 +562,22 @@ void mutex_lock_sfx(struct mutex *lock, | |||
562 | preempt_disable(); | 562 | preempt_disable(); |
563 | spin_lock_mutex(&lock->wait_lock, flags); | 563 | spin_lock_mutex(&lock->wait_lock, flags); |
564 | } | 564 | } |
565 | 565 | ||
566 | done: | 566 | done: |
567 | lock_acquired(&lock->dep_map, ip); | 567 | lock_acquired(&lock->dep_map, ip); |
568 | /* got the lock - rejoice! */ | 568 | /* got the lock - rejoice! */ |
569 | mutex_remove_waiter(lock, &waiter, current_thread_info()); | 569 | mutex_remove_waiter(lock, &waiter, current_thread_info()); |
570 | mutex_set_owner(lock); | 570 | mutex_set_owner(lock); |
571 | 571 | ||
572 | /* set it to 0 if there are no waiters left: */ | 572 | /* set it to 0 if there are no waiters left: */ |
573 | if (likely(list_empty(&lock->wait_list))) | 573 | if (likely(list_empty(&lock->wait_list))) |
574 | atomic_set(&lock->count, 0); | 574 | atomic_set(&lock->count, 0); |
575 | 575 | ||
576 | if(post) | 576 | if(post) |
577 | post(post_arg); | 577 | post(post_arg); |
578 | 578 | ||
579 | spin_unlock_mutex(&lock->wait_lock, flags); | 579 | spin_unlock_mutex(&lock->wait_lock, flags); |
580 | 580 | ||
581 | debug_mutex_free_waiter(&waiter); | 581 | debug_mutex_free_waiter(&waiter); |
582 | preempt_enable(); | 582 | preempt_enable(); |
583 | } | 583 | } |
@@ -588,16 +588,16 @@ void mutex_unlock_sfx(struct mutex *lock, | |||
588 | side_effect_t post, unsigned long post_arg) | 588 | side_effect_t post, unsigned long post_arg) |
589 | { | 589 | { |
590 | unsigned long flags; | 590 | unsigned long flags; |
591 | 591 | ||
592 | spin_lock_mutex(&lock->wait_lock, flags); | 592 | spin_lock_mutex(&lock->wait_lock, flags); |
593 | 593 | ||
594 | if(pre) | 594 | if(pre) |
595 | pre(pre_arg); | 595 | pre(pre_arg); |
596 | 596 | ||
597 | //mutex_release(&lock->dep_map, nested, _RET_IP_); | 597 | //mutex_release(&lock->dep_map, nested, _RET_IP_); |
598 | mutex_release(&lock->dep_map, 1, _RET_IP_); | 598 | mutex_release(&lock->dep_map, 1, _RET_IP_); |
599 | debug_mutex_unlock(lock); | 599 | debug_mutex_unlock(lock); |
600 | 600 | ||
601 | /* | 601 | /* |
602 | * some architectures leave the lock unlocked in the fastpath failure | 602 | * some architectures leave the lock unlocked in the fastpath failure |
603 | * case, others need to leave it locked. In the later case we have to | 603 | * case, others need to leave it locked. In the later case we have to |
@@ -605,21 +605,21 @@ void mutex_unlock_sfx(struct mutex *lock, | |||
605 | */ | 605 | */ |
606 | if (__mutex_slowpath_needs_to_unlock()) | 606 | if (__mutex_slowpath_needs_to_unlock()) |
607 | atomic_set(&lock->count, 1); | 607 | atomic_set(&lock->count, 1); |
608 | 608 | ||
609 | if (!list_empty(&lock->wait_list)) { | 609 | if (!list_empty(&lock->wait_list)) { |
610 | /* get the first entry from the wait-list: */ | 610 | /* get the first entry from the wait-list: */ |
611 | struct mutex_waiter *waiter = | 611 | struct mutex_waiter *waiter = |
612 | list_entry(lock->wait_list.next, | 612 | list_entry(lock->wait_list.next, |
613 | struct mutex_waiter, list); | 613 | struct mutex_waiter, list); |
614 | 614 | ||
615 | debug_mutex_wake_waiter(lock, waiter); | 615 | debug_mutex_wake_waiter(lock, waiter); |
616 | 616 | ||
617 | wake_up_process(waiter->task); | 617 | wake_up_process(waiter->task); |
618 | } | 618 | } |
619 | 619 | ||
620 | if(post) | 620 | if(post) |
621 | post(post_arg); | 621 | post(post_arg); |
622 | 622 | ||
623 | spin_unlock_mutex(&lock->wait_lock, flags); | 623 | spin_unlock_mutex(&lock->wait_lock, flags); |
624 | } | 624 | } |
625 | EXPORT_SYMBOL(mutex_unlock_sfx); | 625 | EXPORT_SYMBOL(mutex_unlock_sfx); |
diff --git a/kernel/sched.c b/kernel/sched.c index f3d9a69a3777..2f990b4b24f9 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -4430,8 +4430,8 @@ litmus_need_resched_nonpreemptible: | |||
4430 | 4430 | ||
4431 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | 4431 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD |
4432 | litmus->run_tasklets(prev); | 4432 | litmus->run_tasklets(prev); |
4433 | #endif | 4433 | #endif |
4434 | 4434 | ||
4435 | srp_ceiling_block(); | 4435 | srp_ceiling_block(); |
4436 | } | 4436 | } |
4437 | EXPORT_SYMBOL(schedule); | 4437 | EXPORT_SYMBOL(schedule); |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 1c42e08fdfaa..4d7b1a3e4d01 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -216,7 +216,7 @@ EXPORT_SYMBOL(local_bh_enable_ip); | |||
216 | 216 | ||
217 | asmlinkage void __do_softirq(void) | 217 | asmlinkage void __do_softirq(void) |
218 | { | 218 | { |
219 | struct softirq_action *h; | 219 | struct softirq_action *h; |
220 | __u32 pending; | 220 | __u32 pending; |
221 | int max_restart = MAX_SOFTIRQ_RESTART; | 221 | int max_restart = MAX_SOFTIRQ_RESTART; |
222 | int cpu; | 222 | int cpu; |
@@ -254,10 +254,10 @@ restart: | |||
254 | softirq_to_name[vec_nr], h->action, | 254 | softirq_to_name[vec_nr], h->action, |
255 | prev_count, preempt_count()); | 255 | prev_count, preempt_count()); |
256 | preempt_count() = prev_count; | 256 | preempt_count() = prev_count; |
257 | } | 257 | } |
258 | 258 | ||
259 | rcu_bh_qs(cpu); | 259 | rcu_bh_qs(cpu); |
260 | } | 260 | } |
261 | h++; | 261 | h++; |
262 | pending >>= 1; | 262 | pending >>= 1; |
263 | } while (pending); | 263 | } while (pending); |
@@ -412,13 +412,45 @@ struct tasklet_head | |||
412 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec); | 412 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec); |
413 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec); | 413 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec); |
414 | 414 | ||
415 | #ifdef CONFIG_LITMUS_NVIDIA | ||
416 | static int __do_nv_now(struct tasklet_struct* tasklet) | ||
417 | { | ||
418 | int success = 1; | ||
419 | |||
420 | if(tasklet_trylock(tasklet)) { | ||
421 | if (!atomic_read(&tasklet->count)) { | ||
422 | if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) { | ||
423 | BUG(); | ||
424 | } | ||
425 | tasklet->func(tasklet->data); | ||
426 | tasklet_unlock(tasklet); | ||
427 | } | ||
428 | else { | ||
429 | success = 0; | ||
430 | } | ||
431 | |||
432 | tasklet_unlock(tasklet); | ||
433 | } | ||
434 | else { | ||
435 | success = 0; | ||
436 | } | ||
437 | |||
438 | return success; | ||
439 | } | ||
440 | #endif | ||
441 | |||
415 | 442 | ||
416 | void __tasklet_schedule(struct tasklet_struct *t) | 443 | void __tasklet_schedule(struct tasklet_struct *t) |
417 | { | 444 | { |
418 | #ifdef CONFIG_LITMUS_NVIDIA | 445 | #ifdef CONFIG_LITMUS_NVIDIA |
419 | if(is_nvidia_func(t->func)) | 446 | if(is_nvidia_func(t->func)) |
420 | { | 447 | { |
421 | u32 nvidia_device = get_tasklet_nv_device_num(t); | 448 | #if 0 |
449 | // do nvidia tasklets right away and return | ||
450 | if(__do_nv_now(t)) | ||
451 | return; | ||
452 | #else | ||
453 | u32 nvidia_device = get_tasklet_nv_device_num(t); | ||
422 | // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", | 454 | // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", |
423 | // __FUNCTION__, nvidia_device,litmus_clock()); | 455 | // __FUNCTION__, nvidia_device,litmus_clock()); |
424 | 456 | ||
@@ -438,7 +470,7 @@ void __tasklet_schedule(struct tasklet_struct *t) | |||
438 | if(is_realtime(device_owner)) | 470 | if(is_realtime(device_owner)) |
439 | { | 471 | { |
440 | TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n", | 472 | TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n", |
441 | __FUNCTION__, nvidia_device,litmus_clock()); | 473 | __FUNCTION__, nvidia_device,litmus_clock()); |
442 | TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", | 474 | TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", |
443 | __FUNCTION__,device_owner->pid,nvidia_device); | 475 | __FUNCTION__,device_owner->pid,nvidia_device); |
444 | 476 | ||
@@ -461,7 +493,9 @@ void __tasklet_schedule(struct tasklet_struct *t) | |||
461 | } | 493 | } |
462 | } | 494 | } |
463 | unlock_nv_registry(nvidia_device, &flags); | 495 | unlock_nv_registry(nvidia_device, &flags); |
496 | #endif | ||
464 | } | 497 | } |
498 | |||
465 | #endif | 499 | #endif |
466 | 500 | ||
467 | ___tasklet_schedule(t); | 501 | ___tasklet_schedule(t); |
@@ -487,19 +521,19 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) | |||
487 | { | 521 | { |
488 | #ifdef CONFIG_LITMUS_NVIDIA | 522 | #ifdef CONFIG_LITMUS_NVIDIA |
489 | if(is_nvidia_func(t->func)) | 523 | if(is_nvidia_func(t->func)) |
490 | { | 524 | { |
491 | u32 nvidia_device = get_tasklet_nv_device_num(t); | 525 | u32 nvidia_device = get_tasklet_nv_device_num(t); |
492 | // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", | 526 | // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", |
493 | // __FUNCTION__, nvidia_device,litmus_clock()); | 527 | // __FUNCTION__, nvidia_device,litmus_clock()); |
494 | 528 | ||
495 | unsigned long flags; | 529 | unsigned long flags; |
496 | struct task_struct* device_owner; | 530 | struct task_struct* device_owner; |
497 | 531 | ||
498 | lock_nv_registry(nvidia_device, &flags); | 532 | lock_nv_registry(nvidia_device, &flags); |
499 | 533 | ||
500 | device_owner = get_nv_max_device_owner(nvidia_device); | 534 | device_owner = get_nv_max_device_owner(nvidia_device); |
501 | 535 | ||
502 | if(device_owner==NULL) | 536 | if(device_owner==NULL) |
503 | { | 537 | { |
504 | t->owner = NULL; | 538 | t->owner = NULL; |
505 | } | 539 | } |
@@ -508,10 +542,10 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) | |||
508 | if( is_realtime(device_owner)) | 542 | if( is_realtime(device_owner)) |
509 | { | 543 | { |
510 | TRACE("%s: Handling NVIDIA tasklet for device %u\tat %llu\n", | 544 | TRACE("%s: Handling NVIDIA tasklet for device %u\tat %llu\n", |
511 | __FUNCTION__, nvidia_device,litmus_clock()); | 545 | __FUNCTION__, nvidia_device,litmus_clock()); |
512 | TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", | 546 | TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", |
513 | __FUNCTION__,device_owner->pid,nvidia_device); | 547 | __FUNCTION__,device_owner->pid,nvidia_device); |
514 | 548 | ||
515 | t->owner = device_owner; | 549 | t->owner = device_owner; |
516 | sched_trace_tasklet_release(t->owner); | 550 | sched_trace_tasklet_release(t->owner); |
517 | if(likely(_litmus_tasklet_hi_schedule(t,nvidia_device))) | 551 | if(likely(_litmus_tasklet_hi_schedule(t,nvidia_device))) |
@@ -553,15 +587,15 @@ EXPORT_SYMBOL(___tasklet_hi_schedule); | |||
553 | void __tasklet_hi_schedule_first(struct tasklet_struct *t) | 587 | void __tasklet_hi_schedule_first(struct tasklet_struct *t) |
554 | { | 588 | { |
555 | BUG_ON(!irqs_disabled()); | 589 | BUG_ON(!irqs_disabled()); |
556 | #ifdef CONFIG_LITMUS_NVIDIA | 590 | #ifdef CONFIG_LITMUS_NVIDIA |
557 | if(is_nvidia_func(t->func)) | 591 | if(is_nvidia_func(t->func)) |
558 | { | 592 | { |
559 | u32 nvidia_device = get_tasklet_nv_device_num(t); | 593 | u32 nvidia_device = get_tasklet_nv_device_num(t); |
560 | // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", | 594 | // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", |
561 | // __FUNCTION__, nvidia_device,litmus_clock()); | 595 | // __FUNCTION__, nvidia_device,litmus_clock()); |
562 | unsigned long flags; | 596 | unsigned long flags; |
563 | struct task_struct* device_owner; | 597 | struct task_struct* device_owner; |
564 | 598 | ||
565 | lock_nv_registry(nvidia_device, &flags); | 599 | lock_nv_registry(nvidia_device, &flags); |
566 | 600 | ||
567 | device_owner = get_nv_max_device_owner(nvidia_device); | 601 | device_owner = get_nv_max_device_owner(nvidia_device); |
@@ -576,10 +610,10 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t) | |||
576 | { | 610 | { |
577 | TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n", | 611 | TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n", |
578 | __FUNCTION__, nvidia_device,litmus_clock()); | 612 | __FUNCTION__, nvidia_device,litmus_clock()); |
579 | 613 | ||
580 | TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", | 614 | TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", |
581 | __FUNCTION__,device_owner->pid,nvidia_device); | 615 | __FUNCTION__,device_owner->pid,nvidia_device); |
582 | 616 | ||
583 | t->owner = device_owner; | 617 | t->owner = device_owner; |
584 | sched_trace_tasklet_release(t->owner); | 618 | sched_trace_tasklet_release(t->owner); |
585 | if(likely(_litmus_tasklet_hi_schedule_first(t,nvidia_device))) | 619 | if(likely(_litmus_tasklet_hi_schedule_first(t,nvidia_device))) |
diff --git a/litmus/Kconfig b/litmus/Kconfig index a34440f3d8bc..03cc92c50eb9 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig | |||
@@ -157,7 +157,7 @@ config SCHED_TASK_TRACE | |||
157 | config SCHED_TASK_TRACE_SHIFT | 157 | config SCHED_TASK_TRACE_SHIFT |
158 | int "Buffer size for sched_trace_xxx() events" | 158 | int "Buffer size for sched_trace_xxx() events" |
159 | depends on SCHED_TASK_TRACE | 159 | depends on SCHED_TASK_TRACE |
160 | range 8 13 | 160 | range 8 15 |
161 | default 9 | 161 | default 9 |
162 | help | 162 | help |
163 | 163 | ||
@@ -253,7 +253,7 @@ endmenu | |||
253 | 253 | ||
254 | menu "Interrupt Handling" | 254 | menu "Interrupt Handling" |
255 | 255 | ||
256 | choice | 256 | choice |
257 | prompt "Scheduling of interrupt bottom-halves in Litmus." | 257 | prompt "Scheduling of interrupt bottom-halves in Litmus." |
258 | default LITMUS_SOFTIRQD_NONE | 258 | default LITMUS_SOFTIRQD_NONE |
259 | depends on LITMUS_LOCKING && !LITMUS_THREAD_ALL_SOFTIRQ | 259 | depends on LITMUS_LOCKING && !LITMUS_THREAD_ALL_SOFTIRQ |
@@ -282,11 +282,11 @@ config LITMUS_PAI_SOFTIRQD | |||
282 | scheduling points. Trades context switch overhead | 282 | scheduling points. Trades context switch overhead |
283 | at the cost of non-preemptive durations of bottom half | 283 | at the cost of non-preemptive durations of bottom half |
284 | processing. | 284 | processing. |
285 | 285 | ||
286 | G-EDF/RM, C-EDF/RM ONLY for now! | 286 | G-EDF/RM, C-EDF/RM ONLY for now! |
287 | 287 | ||
288 | endchoice | 288 | endchoice |
289 | 289 | ||
290 | 290 | ||
291 | config NR_LITMUS_SOFTIRQD | 291 | config NR_LITMUS_SOFTIRQD |
292 | int "Number of klitirqd." | 292 | int "Number of klitirqd." |
@@ -344,7 +344,7 @@ choice | |||
344 | depends on LITMUS_NVIDIA | 344 | depends on LITMUS_NVIDIA |
345 | help | 345 | help |
346 | Select the version of CUDA/driver to support. | 346 | Select the version of CUDA/driver to support. |
347 | 347 | ||
348 | config CUDA_4_0 | 348 | config CUDA_4_0 |
349 | bool "CUDA 4.0" | 349 | bool "CUDA 4.0" |
350 | depends on LITMUS_NVIDIA | 350 | depends on LITMUS_NVIDIA |
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c index 70a86bdd9aec..9762be1a085e 100644 --- a/litmus/gpu_affinity.c +++ b/litmus/gpu_affinity.c | |||
@@ -5,25 +5,32 @@ | |||
5 | #include <litmus/litmus.h> | 5 | #include <litmus/litmus.h> |
6 | #include <litmus/gpu_affinity.h> | 6 | #include <litmus/gpu_affinity.h> |
7 | 7 | ||
8 | #include <litmus/sched_trace.h> | ||
9 | |||
8 | #define OBSERVATION_CAP 2*1e9 | 10 | #define OBSERVATION_CAP 2*1e9 |
9 | 11 | ||
10 | static void update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed) | 12 | static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed) |
11 | { | 13 | { |
14 | fp_t relative_err; | ||
12 | fp_t err, new; | 15 | fp_t err, new; |
13 | fp_t actual = _integer_to_fp(observed); | 16 | fp_t actual = _integer_to_fp(observed); |
14 | 17 | ||
15 | err = _sub(actual, fb->est); | 18 | err = _sub(actual, fb->est); |
16 | new = _add(_mul(a, err), _mul(b, fb->accum_err)); | 19 | new = _add(_mul(a, err), _mul(b, fb->accum_err)); |
17 | 20 | ||
21 | relative_err = _div(err, actual); | ||
22 | |||
18 | fb->est = new; | 23 | fb->est = new; |
19 | fb->accum_err = _add(fb->accum_err, err); | 24 | fb->accum_err = _add(fb->accum_err, err); |
25 | |||
26 | return relative_err; | ||
20 | } | 27 | } |
21 | 28 | ||
22 | void update_gpu_estimate(struct task_struct *t, lt_t observed) | 29 | void update_gpu_estimate(struct task_struct *t, lt_t observed) |
23 | { | 30 | { |
24 | feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); | 31 | feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); |
25 | 32 | ||
26 | WARN_ON(tsk_rt(t)->gpu_migration > MIG_LAST); | 33 | BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST); |
27 | 34 | ||
28 | if(unlikely(fb->est.val == 0)) { | 35 | if(unlikely(fb->est.val == 0)) { |
29 | // kludge-- cap observed values to prevent whacky estimations. | 36 | // kludge-- cap observed values to prevent whacky estimations. |
@@ -40,18 +47,29 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed) | |||
40 | fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work. | 47 | fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work. |
41 | } | 48 | } |
42 | else { | 49 | else { |
43 | update_estimate(fb, | 50 | fp_t rel_err = update_estimate(fb, |
44 | tsk_rt(t)->gpu_fb_param_a, | 51 | tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration], |
45 | tsk_rt(t)->gpu_fb_param_b, | 52 | tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration], |
46 | observed); | 53 | observed); |
47 | 54 | ||
48 | if(_fp_to_integer(fb->est) <= 0) { | 55 | if(unlikely(_fp_to_integer(fb->est) <= 0)) { |
49 | // TODO: talk to Jonathan about how well this works. | ||
50 | // Maybe we should average the observed and est instead? | ||
51 | TRACE_TASK(t, "Invalid estimate. Patching.\n"); | 56 | TRACE_TASK(t, "Invalid estimate. Patching.\n"); |
52 | fb->est = _integer_to_fp(observed); | 57 | fb->est = _integer_to_fp(observed); |
53 | fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work. | 58 | fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work. |
54 | } | 59 | } |
60 | else { | ||
61 | // struct migration_info mig_info; | ||
62 | |||
63 | sched_trace_prediction_err(t, | ||
64 | &(tsk_rt(t)->gpu_migration), | ||
65 | &rel_err); | ||
66 | |||
67 | // mig_info.observed = observed; | ||
68 | // mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration); | ||
69 | // mig_info.distance = tsk_rt(t)->gpu_migration; | ||
70 | // | ||
71 | // sched_trace_migration(t, &mig_info); | ||
72 | } | ||
55 | } | 73 | } |
56 | 74 | ||
57 | TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n", | 75 | TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n", |
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c index 023443014d4b..83b708ab85cb 100644 --- a/litmus/ikglp_lock.c +++ b/litmus/ikglp_lock.c | |||
@@ -1346,6 +1346,10 @@ int ikglp_unlock(struct litmus_lock* l) | |||
1346 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | 1346 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
1347 | if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) { | 1347 | if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) { |
1348 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); | 1348 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); |
1349 | if(fq_of_new_on_fq->count == 0) { | ||
1350 | // ignore it? | ||
1351 | // fq_of_new_on_fq = fq; | ||
1352 | } | ||
1349 | } | 1353 | } |
1350 | else { | 1354 | else { |
1351 | fq_of_new_on_fq = fq; | 1355 | fq_of_new_on_fq = fq; |
@@ -1383,6 +1387,10 @@ int ikglp_unlock(struct litmus_lock* l) | |||
1383 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | 1387 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
1384 | if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) { | 1388 | if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) { |
1385 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); | 1389 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); |
1390 | if(fq_of_new_on_fq->count == 0) { | ||
1391 | // ignore it? | ||
1392 | // fq_of_new_on_fq = fq; | ||
1393 | } | ||
1386 | } | 1394 | } |
1387 | else { | 1395 | else { |
1388 | fq_of_new_on_fq = fq; | 1396 | fq_of_new_on_fq = fq; |
@@ -1409,6 +1417,10 @@ int ikglp_unlock(struct litmus_lock* l) | |||
1409 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | 1417 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
1410 | if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) { | 1418 | if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) { |
1411 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); | 1419 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); |
1420 | if(fq_of_new_on_fq->count == 0) { | ||
1421 | // ignore it? | ||
1422 | // fq_of_new_on_fq = fq; | ||
1423 | } | ||
1412 | } | 1424 | } |
1413 | else { | 1425 | else { |
1414 | fq_of_new_on_fq = fq; | 1426 | fq_of_new_on_fq = fq; |
@@ -1569,7 +1581,7 @@ int ikglp_unlock(struct litmus_lock* l) | |||
1569 | } | 1581 | } |
1570 | } | 1582 | } |
1571 | 1583 | ||
1572 | 1584 | wake_kludge: | |
1573 | if(waitqueue_active(&fq->wait)) | 1585 | if(waitqueue_active(&fq->wait)) |
1574 | { | 1586 | { |
1575 | wait_queue_t *wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list); | 1587 | wait_queue_t *wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list); |
@@ -1672,6 +1684,16 @@ int ikglp_unlock(struct litmus_lock* l) | |||
1672 | // wake up the new resource holder! | 1684 | // wake up the new resource holder! |
1673 | wake_up_process(next); | 1685 | wake_up_process(next); |
1674 | } | 1686 | } |
1687 | if(fq_of_new_on_fq && fq_of_new_on_fq != fq && fq_of_new_on_fq->count == 1) { | ||
1688 | // The guy we promoted when to an empty FQ. (Why didn't stealing pick this up?) | ||
1689 | // Wake up the new guy too. | ||
1690 | |||
1691 | BUG_ON(fq_of_new_on_fq->owner != NULL); | ||
1692 | |||
1693 | fq = fq_of_new_on_fq; | ||
1694 | fq_of_new_on_fq = NULL; | ||
1695 | goto wake_kludge; | ||
1696 | } | ||
1675 | 1697 | ||
1676 | unlock_fine_irqrestore(&sem->lock, flags); | 1698 | unlock_fine_irqrestore(&sem->lock, flags); |
1677 | unlock_global_irqrestore(dgl_lock, flags); | 1699 | unlock_global_irqrestore(dgl_lock, flags); |
@@ -1917,7 +1939,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* | |||
1917 | if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { | 1939 | if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { |
1918 | TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", | 1940 | TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", |
1919 | NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); | 1941 | NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); |
1920 | return(NULL); | 1942 | // return(NULL); |
1921 | } | 1943 | } |
1922 | 1944 | ||
1923 | ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL); | 1945 | ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL); |
@@ -2600,8 +2622,8 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, | |||
2600 | 2622 | ||
2601 | tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration | 2623 | tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration |
2602 | 2624 | ||
2603 | TRACE_CUR("%s/%d acquired gpu %d. migration type = %d\n", | 2625 | TRACE_CUR("%s/%d acquired gpu %d (prev = %d). migration type = %d\n", |
2604 | t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration); | 2626 | t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, tsk_rt(t)->gpu_migration); |
2605 | 2627 | ||
2606 | // count the number or resource holders | 2628 | // count the number or resource holders |
2607 | ++(*(aff->q_info[replica].nr_cur_users)); | 2629 | ++(*(aff->q_info[replica].nr_cur_users)); |
@@ -2626,8 +2648,6 @@ void gpu_ikglp_notify_freed(struct ikglp_affinity* aff, | |||
2626 | 2648 | ||
2627 | est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); | 2649 | est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); |
2628 | 2650 | ||
2629 | tsk_rt(t)->last_gpu = gpu; | ||
2630 | |||
2631 | // count the number or resource holders | 2651 | // count the number or resource holders |
2632 | --(*(aff->q_info[replica].nr_cur_users)); | 2652 | --(*(aff->q_info[replica].nr_cur_users)); |
2633 | 2653 | ||
@@ -2636,12 +2656,15 @@ void gpu_ikglp_notify_freed(struct ikglp_affinity* aff, | |||
2636 | // update estimates | 2656 | // update estimates |
2637 | update_gpu_estimate(t, get_gpu_time(t)); | 2657 | update_gpu_estimate(t, get_gpu_time(t)); |
2638 | 2658 | ||
2639 | TRACE_CUR("%s/%d freed gpu %d. actual time was %llu. " | 2659 | TRACE_CUR("%s/%d freed gpu %d (prev = %d). mig type = %d. actual time was %llu. " |
2640 | "estimated was %llu. diff is %d\n", | 2660 | "estimated was %llu. diff is %d\n", |
2641 | t->comm, t->pid, gpu, | 2661 | t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, |
2662 | tsk_rt(t)->gpu_migration, | ||
2642 | get_gpu_time(t), | 2663 | get_gpu_time(t), |
2643 | est_time, | 2664 | est_time, |
2644 | (long long)get_gpu_time(t) - (long long)est_time); | 2665 | (long long)get_gpu_time(t) - (long long)est_time); |
2666 | |||
2667 | tsk_rt(t)->last_gpu = gpu; | ||
2645 | } | 2668 | } |
2646 | 2669 | ||
2647 | struct ikglp_affinity_ops gpu_ikglp_affinity = | 2670 | struct ikglp_affinity_ops gpu_ikglp_affinity = |
diff --git a/litmus/jobs.c b/litmus/jobs.c index 36e314625d86..1d97462cc128 100644 --- a/litmus/jobs.c +++ b/litmus/jobs.c | |||
@@ -10,8 +10,21 @@ void prepare_for_next_period(struct task_struct *t) | |||
10 | { | 10 | { |
11 | BUG_ON(!t); | 11 | BUG_ON(!t); |
12 | /* prepare next release */ | 12 | /* prepare next release */ |
13 | t->rt_param.job_params.release = t->rt_param.job_params.deadline; | 13 | |
14 | t->rt_param.job_params.deadline += get_rt_period(t); | 14 | if(tsk_rt(t)->task_params.cls == RT_CLASS_SOFT_W_SLIP) { |
15 | /* allow the release point to slip if we've passed our deadline. */ | ||
16 | lt_t now = litmus_clock(); | ||
17 | t->rt_param.job_params.release = | ||
18 | (t->rt_param.job_params.deadline < now) ? | ||
19 | now : t->rt_param.job_params.deadline; | ||
20 | t->rt_param.job_params.deadline = | ||
21 | t->rt_param.job_params.release + get_rt_period(t); | ||
22 | } | ||
23 | else { | ||
24 | t->rt_param.job_params.release = t->rt_param.job_params.deadline; | ||
25 | t->rt_param.job_params.deadline += get_rt_period(t); | ||
26 | } | ||
27 | |||
15 | t->rt_param.job_params.exec_time = 0; | 28 | t->rt_param.job_params.exec_time = 0; |
16 | /* update job sequence number */ | 29 | /* update job sequence number */ |
17 | t->rt_param.job_params.job_no++; | 30 | t->rt_param.job_params.job_no++; |
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c index 0b64977789a6..bff857ed8d4e 100644 --- a/litmus/kfmlp_lock.c +++ b/litmus/kfmlp_lock.c | |||
@@ -590,7 +590,7 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* | |||
590 | if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { | 590 | if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { |
591 | TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", | 591 | TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", |
592 | NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); | 592 | NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); |
593 | return(NULL); | 593 | // return(NULL); |
594 | } | 594 | } |
595 | 595 | ||
596 | kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL); | 596 | kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL); |
diff --git a/litmus/litmus.c b/litmus/litmus.c index 5b301c418b96..d1f836c8af6e 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c | |||
@@ -318,11 +318,21 @@ asmlinkage long sys_null_call(cycles_t __user *ts) | |||
318 | void init_gpu_affinity_state(struct task_struct* p) | 318 | void init_gpu_affinity_state(struct task_struct* p) |
319 | { | 319 | { |
320 | // under-damped | 320 | // under-damped |
321 | p->rt_param.gpu_fb_param_a = _frac(14008, 10000); | 321 | //p->rt_param.gpu_fb_param_a = _frac(14008, 10000); |
322 | p->rt_param.gpu_fb_param_b = _frac(16024, 10000); | 322 | //p->rt_param.gpu_fb_param_b = _frac(16024, 10000); |
323 | // critically-damped | 323 | |
324 | // p->rt_param.gpu_fb_param_a = _frac(102, 1000); | 324 | // emperical; |
325 | // p->rt_param.gpu_fb_param_b = _frac(303, 1000); | 325 | p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000); |
326 | p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000); | ||
327 | |||
328 | p->rt_param.gpu_fb_param_a[1] = _frac(8600, 10000); | ||
329 | p->rt_param.gpu_fb_param_b[1] = _frac(40000, 10000); | ||
330 | |||
331 | p->rt_param.gpu_fb_param_a[2] = _frac(6890, 10000); | ||
332 | p->rt_param.gpu_fb_param_b[2] = _frac(40000, 10000); | ||
333 | |||
334 | p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000); | ||
335 | p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000); | ||
326 | 336 | ||
327 | p->rt_param.gpu_migration = MIG_NONE; | 337 | p->rt_param.gpu_migration = MIG_NONE; |
328 | p->rt_param.last_gpu = -1; | 338 | p->rt_param.last_gpu = -1; |
diff --git a/litmus/locking.c b/litmus/locking.c index cb11c04ed0d4..718a5a3281d7 100644 --- a/litmus/locking.c +++ b/litmus/locking.c | |||
@@ -349,10 +349,10 @@ static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait) | |||
349 | all_acquired: | 349 | all_acquired: |
350 | 350 | ||
351 | // FOR SANITY CHECK FOR TESTING | 351 | // FOR SANITY CHECK FOR TESTING |
352 | for(i = 0; i < dgl_wait->size; ++i) { | 352 | // for(i = 0; i < dgl_wait->size; ++i) { |
353 | struct litmus_lock *l = dgl_wait->locks[i]; | 353 | // struct litmus_lock *l = dgl_wait->locks[i]; |
354 | BUG_ON(!l->ops->is_owner(l, dgl_wait->task)); | 354 | // BUG_ON(!l->ops->is_owner(l, dgl_wait->task)); |
355 | } | 355 | // } |
356 | 356 | ||
357 | TRACE_CUR("Acquired entire DGL\n"); | 357 | TRACE_CUR("Acquired entire DGL\n"); |
358 | 358 | ||
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c index 3d8c50882066..4b86a50d3bd1 100644 --- a/litmus/nvidia_info.c +++ b/litmus/nvidia_info.c | |||
@@ -265,6 +265,11 @@ int init_nvidia_info(void) | |||
265 | } | 265 | } |
266 | } | 266 | } |
267 | 267 | ||
268 | void shutdown_nvidia_info(void) | ||
269 | { | ||
270 | nvidia_mod = NULL; | ||
271 | mb(); | ||
272 | } | ||
268 | 273 | ||
269 | /* works with pointers to static data inside the module too. */ | 274 | /* works with pointers to static data inside the module too. */ |
270 | int is_nvidia_func(void* func_addr) | 275 | int is_nvidia_func(void* func_addr) |
@@ -319,14 +324,11 @@ u32 get_work_nv_device_num(const struct work_struct *t) | |||
319 | } | 324 | } |
320 | 325 | ||
321 | 326 | ||
322 | |||
323 | #define MAX_NR_OWNERS 3 | ||
324 | |||
325 | typedef struct { | 327 | typedef struct { |
326 | raw_spinlock_t lock; | 328 | raw_spinlock_t lock; |
327 | int nr_owners; | 329 | int nr_owners; |
328 | struct task_struct* max_prio_owner; | 330 | struct task_struct* max_prio_owner; |
329 | struct task_struct* owners[MAX_NR_OWNERS]; | 331 | struct task_struct* owners[NV_MAX_SIMULT_USERS]; |
330 | }nv_device_registry_t; | 332 | }nv_device_registry_t; |
331 | 333 | ||
332 | static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM]; | 334 | static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM]; |
@@ -431,6 +433,7 @@ static int __reg_nv_device(int reg_device_id, struct task_struct *t) | |||
431 | return ret; // assume already registered. | 433 | return ret; // assume already registered. |
432 | } | 434 | } |
433 | 435 | ||
436 | |||
434 | raw_spin_lock_irqsave(®->lock, flags); | 437 | raw_spin_lock_irqsave(®->lock, flags); |
435 | 438 | ||
436 | if(reg->nr_owners < NV_MAX_SIMULT_USERS) { | 439 | if(reg->nr_owners < NV_MAX_SIMULT_USERS) { |
@@ -461,7 +464,7 @@ static int __reg_nv_device(int reg_device_id, struct task_struct *t) | |||
461 | else | 464 | else |
462 | { | 465 | { |
463 | TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); | 466 | TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); |
464 | ret = -EBUSY; | 467 | //ret = -EBUSY; |
465 | } | 468 | } |
466 | 469 | ||
467 | raw_spin_unlock_irqrestore(®->lock, flags); | 470 | raw_spin_unlock_irqrestore(®->lock, flags); |
diff --git a/litmus/rsm_lock.c b/litmus/rsm_lock.c index 965164c43537..75ed87c5ed48 100644 --- a/litmus/rsm_lock.c +++ b/litmus/rsm_lock.c | |||
@@ -502,6 +502,13 @@ int rsm_mutex_unlock(struct litmus_lock* l) | |||
502 | tsk_rt(next)->blocked_lock = NULL; | 502 | tsk_rt(next)->blocked_lock = NULL; |
503 | mb(); | 503 | mb(); |
504 | 504 | ||
505 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
506 | // re-enable tracking | ||
507 | if(tsk_rt(next)->held_gpus) { | ||
508 | tsk_rt(next)->suspend_gpu_tracker_on_block = 0; | ||
509 | } | ||
510 | #endif | ||
511 | |||
505 | wake_up_process(next); | 512 | wake_up_process(next); |
506 | } | 513 | } |
507 | else { | 514 | else { |
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c index 99f7620925ba..be14dbec6ed2 100644 --- a/litmus/sched_cedf.c +++ b/litmus/sched_cedf.c | |||
@@ -733,11 +733,11 @@ static void cedf_change_prio_pai_tasklet(struct task_struct *old_prio, | |||
733 | unsigned long flags; | 733 | unsigned long flags; |
734 | cedf_domain_t *cluster; | 734 | cedf_domain_t *cluster; |
735 | struct task_struct *probe; | 735 | struct task_struct *probe; |
736 | 736 | ||
737 | // identify the cluster by the assignment of these tasks. one should | 737 | // identify the cluster by the assignment of these tasks. one should |
738 | // be non-NULL. | 738 | // be non-NULL. |
739 | probe = (old_prio) ? old_prio : new_prio; | 739 | probe = (old_prio) ? old_prio : new_prio; |
740 | 740 | ||
741 | if(probe) { | 741 | if(probe) { |
742 | cluster = task_cpu_cluster(probe); | 742 | cluster = task_cpu_cluster(probe); |
743 | 743 | ||
@@ -838,8 +838,13 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) | |||
838 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) | 838 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) |
839 | if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) { | 839 | if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) { |
840 | if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) { | 840 | if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) { |
841 | // don't track preemptions or locking protocol suspensions. | ||
842 | TRACE_TASK(entry->scheduled, "stopping GPU tracker.\n"); | ||
841 | stop_gpu_tracker(entry->scheduled); | 843 | stop_gpu_tracker(entry->scheduled); |
842 | } | 844 | } |
845 | else if(blocks && !tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) { | ||
846 | TRACE_TASK(entry->scheduled, "GPU tracker remains on during suspension.\n"); | ||
847 | } | ||
843 | } | 848 | } |
844 | #endif | 849 | #endif |
845 | 850 | ||
@@ -1596,6 +1601,10 @@ static void cleanup_cedf(void) | |||
1596 | { | 1601 | { |
1597 | int i; | 1602 | int i; |
1598 | 1603 | ||
1604 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1605 | shutdown_nvidia_info(); | ||
1606 | #endif | ||
1607 | |||
1599 | if (clusters_allocated) { | 1608 | if (clusters_allocated) { |
1600 | for (i = 0; i < num_clusters; i++) { | 1609 | for (i = 0; i < num_clusters; i++) { |
1601 | kfree(cedf[i].cpus); | 1610 | kfree(cedf[i].cpus); |
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c index 2433297b7482..245e41c25a5d 100644 --- a/litmus/sched_plugin.c +++ b/litmus/sched_plugin.c | |||
@@ -13,6 +13,10 @@ | |||
13 | #include <litmus/preempt.h> | 13 | #include <litmus/preempt.h> |
14 | #include <litmus/jobs.h> | 14 | #include <litmus/jobs.h> |
15 | 15 | ||
16 | #ifdef CONFIG_LITMUS_NVIDIA | ||
17 | #include <litmus/nvidia_info.h> | ||
18 | #endif | ||
19 | |||
16 | /* | 20 | /* |
17 | * Generic function to trigger preemption on either local or remote cpu | 21 | * Generic function to trigger preemption on either local or remote cpu |
18 | * from scheduler plugins. The key feature is that this function is | 22 | * from scheduler plugins. The key feature is that this function is |
@@ -102,6 +106,9 @@ static long litmus_dummy_complete_job(void) | |||
102 | 106 | ||
103 | static long litmus_dummy_activate_plugin(void) | 107 | static long litmus_dummy_activate_plugin(void) |
104 | { | 108 | { |
109 | #ifdef CONFIG_LITMUS_NVIDIA | ||
110 | shutdown_nvidia_info(); | ||
111 | #endif | ||
105 | return 0; | 112 | return 0; |
106 | } | 113 | } |
107 | 114 | ||
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c index 2bd3a787611b..f7f575346b54 100644 --- a/litmus/sched_task_trace.c +++ b/litmus/sched_task_trace.c | |||
@@ -247,6 +247,53 @@ feather_callback void do_sched_trace_action(unsigned long id, | |||
247 | } | 247 | } |
248 | 248 | ||
249 | 249 | ||
250 | |||
251 | |||
252 | feather_callback void do_sched_trace_prediction_err(unsigned long id, | ||
253 | unsigned long _task, | ||
254 | unsigned long _distance, | ||
255 | unsigned long _rel_err) | ||
256 | { | ||
257 | struct task_struct *t = (struct task_struct*) _task; | ||
258 | struct st_event_record *rec = get_record(ST_PREDICTION_ERR, t); | ||
259 | |||
260 | if (rec) { | ||
261 | gpu_migration_dist_t* distance = (gpu_migration_dist_t*) _distance; | ||
262 | fp_t* rel_err = (fp_t*) _rel_err; | ||
263 | |||
264 | rec->data.prediction_err.distance = *distance; | ||
265 | rec->data.prediction_err.rel_err = rel_err->val; | ||
266 | put_record(rec); | ||
267 | } | ||
268 | } | ||
269 | |||
270 | |||
271 | feather_callback void do_sched_trace_migration(unsigned long id, | ||
272 | unsigned long _task, | ||
273 | unsigned long _mig_info) | ||
274 | { | ||
275 | struct task_struct *t = (struct task_struct*) _task; | ||
276 | struct st_event_record *rec = get_record(ST_MIGRATION, t); | ||
277 | |||
278 | if (rec) { | ||
279 | struct migration_info* mig_info = (struct migration_info*) _mig_info; | ||
280 | |||
281 | rec->hdr.extra = mig_info->distance; | ||
282 | rec->data.migration.observed = mig_info->observed; | ||
283 | rec->data.migration.estimated = mig_info->estimated; | ||
284 | |||
285 | put_record(rec); | ||
286 | } | ||
287 | } | ||
288 | |||
289 | |||
290 | |||
291 | |||
292 | |||
293 | |||
294 | |||
295 | |||
296 | |||
250 | feather_callback void do_sched_trace_tasklet_release(unsigned long id, | 297 | feather_callback void do_sched_trace_tasklet_release(unsigned long id, |
251 | unsigned long _owner) | 298 | unsigned long _owner) |
252 | { | 299 | { |
@@ -457,3 +504,6 @@ EXPORT_SYMBOL(do_sched_trace_nv_interrupt_end); | |||
457 | 504 | ||
458 | 505 | ||
459 | 506 | ||
507 | |||
508 | |||
509 | |||