diff options
| author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-05-14 16:51:05 -0400 |
|---|---|---|
| committer | Glenn Elliott <gelliott@cs.unc.edu> | 2013-05-19 22:46:49 -0400 |
| commit | 44326648c2ea81b9a32619644fe9c665ed0d9e0b (patch) | |
| tree | ff1e00cf3cbc0e06f511a90c4f28aa8f7b40b12e | |
| parent | af6eeb156c7da47ff5df03a3da04432c8ac4460c (diff) | |
Final GPUSync implementation.gpusync-rtss12
| -rw-r--r-- | include/linux/interrupt.h | 6 | ||||
| -rw-r--r-- | include/litmus/gpu_affinity.h | 7 | ||||
| -rw-r--r-- | include/litmus/nvidia_info.h | 1 | ||||
| -rw-r--r-- | include/litmus/rt_param.h | 5 | ||||
| -rw-r--r-- | include/litmus/sched_trace.h | 44 | ||||
| -rw-r--r-- | kernel/mutex.c | 48 | ||||
| -rw-r--r-- | kernel/sched.c | 4 | ||||
| -rw-r--r-- | kernel/softirq.c | 66 | ||||
| -rw-r--r-- | litmus/Kconfig | 16 | ||||
| -rw-r--r-- | litmus/gpu_affinity.c | 38 | ||||
| -rw-r--r-- | litmus/ikglp_lock.c | 39 | ||||
| -rw-r--r-- | litmus/jobs.c | 17 | ||||
| -rw-r--r-- | litmus/kfmlp_lock.c | 2 | ||||
| -rw-r--r-- | litmus/litmus.c | 20 | ||||
| -rw-r--r-- | litmus/locking.c | 8 | ||||
| -rw-r--r-- | litmus/nvidia_info.c | 13 | ||||
| -rw-r--r-- | litmus/rsm_lock.c | 7 | ||||
| -rw-r--r-- | litmus/sched_cedf.c | 13 | ||||
| -rw-r--r-- | litmus/sched_plugin.c | 7 | ||||
| -rw-r--r-- | litmus/sched_task_trace.c | 50 |
20 files changed, 313 insertions, 98 deletions
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 57a7bc8807be..8fb3dad55f19 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h | |||
| @@ -528,7 +528,7 @@ static inline int tasklet_trylock(struct tasklet_struct *t) | |||
| 528 | 528 | ||
| 529 | static inline void tasklet_unlock(struct tasklet_struct *t) | 529 | static inline void tasklet_unlock(struct tasklet_struct *t) |
| 530 | { | 530 | { |
| 531 | smp_mb__before_clear_bit(); | 531 | smp_mb__before_clear_bit(); |
| 532 | clear_bit(TASKLET_STATE_RUN, &(t)->state); | 532 | clear_bit(TASKLET_STATE_RUN, &(t)->state); |
| 533 | } | 533 | } |
| 534 | 534 | ||
| @@ -590,7 +590,7 @@ static inline void tasklet_disable(struct tasklet_struct *t) | |||
| 590 | } | 590 | } |
| 591 | 591 | ||
| 592 | static inline void tasklet_enable(struct tasklet_struct *t) | 592 | static inline void tasklet_enable(struct tasklet_struct *t) |
| 593 | { | 593 | { |
| 594 | smp_mb__before_atomic_dec(); | 594 | smp_mb__before_atomic_dec(); |
| 595 | atomic_dec(&t->count); | 595 | atomic_dec(&t->count); |
| 596 | } | 596 | } |
| @@ -659,7 +659,7 @@ void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer) | |||
| 659 | * if more than one irq occurred. | 659 | * if more than one irq occurred. |
| 660 | */ | 660 | */ |
| 661 | 661 | ||
| 662 | #if defined(CONFIG_GENERIC_HARDIRQS) && !defined(CONFIG_GENERIC_IRQ_PROBE) | 662 | #if defined(CONFIG_GENERIC_HARDIRQS) && !defined(CONFIG_GENERIC_IRQ_PROBE) |
| 663 | static inline unsigned long probe_irq_on(void) | 663 | static inline unsigned long probe_irq_on(void) |
| 664 | { | 664 | { |
| 665 | return 0; | 665 | return 0; |
diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h index d4db2003ad86..6b3fb8b28745 100644 --- a/include/litmus/gpu_affinity.h +++ b/include/litmus/gpu_affinity.h | |||
| @@ -43,10 +43,7 @@ static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t | |||
| 43 | val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est); | 43 | val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est); |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | // minimum value is 1 (val is 0 if we haven't run with local affinity yet) | 46 | return ((val > 0) ? val : dist+1); |
| 47 | // TODO: pick a better default min-value. 1 is too small. perhaps | ||
| 48 | // task execution time? | ||
| 49 | return ((val > 0) ? val : 1); | ||
| 50 | } | 47 | } |
| 51 | 48 | ||
| 52 | #endif \ No newline at end of file | 49 | #endif |
diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h index 580728051d4e..97c9577141db 100644 --- a/include/litmus/nvidia_info.h +++ b/include/litmus/nvidia_info.h | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #define NV_MAX_SIMULT_USERS CONFIG_NV_MAX_SIMULT_USERS | 12 | #define NV_MAX_SIMULT_USERS CONFIG_NV_MAX_SIMULT_USERS |
| 13 | 13 | ||
| 14 | int init_nvidia_info(void); | 14 | int init_nvidia_info(void); |
| 15 | void shutdown_nvidia_info(void); | ||
| 15 | 16 | ||
| 16 | int is_nvidia_func(void* func_addr); | 17 | int is_nvidia_func(void* func_addr); |
| 17 | 18 | ||
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 4553521146cc..0198884eab86 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h | |||
| @@ -26,6 +26,7 @@ static inline int lt_after_eq(lt_t a, lt_t b) | |||
| 26 | typedef enum { | 26 | typedef enum { |
| 27 | RT_CLASS_HARD, | 27 | RT_CLASS_HARD, |
| 28 | RT_CLASS_SOFT, | 28 | RT_CLASS_SOFT, |
| 29 | RT_CLASS_SOFT_W_SLIP, | ||
| 29 | RT_CLASS_BEST_EFFORT | 30 | RT_CLASS_BEST_EFFORT |
| 30 | } task_class_t; | 31 | } task_class_t; |
| 31 | 32 | ||
| @@ -189,8 +190,8 @@ struct rt_param { | |||
| 189 | long unsigned int held_gpus; // bitmap of held GPUs. | 190 | long unsigned int held_gpus; // bitmap of held GPUs. |
| 190 | 191 | ||
| 191 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | 192 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
| 192 | fp_t gpu_fb_param_a; | 193 | fp_t gpu_fb_param_a[MIG_LAST+1]; |
| 193 | fp_t gpu_fb_param_b; | 194 | fp_t gpu_fb_param_b[MIG_LAST+1]; |
| 194 | 195 | ||
| 195 | gpu_migration_dist_t gpu_migration; | 196 | gpu_migration_dist_t gpu_migration; |
| 196 | int last_gpu; | 197 | int last_gpu; |
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h index 232c7588d103..b1b71f6c5f0c 100644 --- a/include/litmus/sched_trace.h +++ b/include/litmus/sched_trace.h | |||
| @@ -10,7 +10,8 @@ struct st_trace_header { | |||
| 10 | u8 type; /* Of what type is this record? */ | 10 | u8 type; /* Of what type is this record? */ |
| 11 | u8 cpu; /* On which CPU was it recorded? */ | 11 | u8 cpu; /* On which CPU was it recorded? */ |
| 12 | u16 pid; /* PID of the task. */ | 12 | u16 pid; /* PID of the task. */ |
| 13 | u32 job; /* The job sequence number. */ | 13 | u32 job:24; /* The job sequence number. */ |
| 14 | u8 extra; | ||
| 14 | } __attribute__((packed)); | 15 | } __attribute__((packed)); |
| 15 | 16 | ||
| 16 | #define ST_NAME_LEN 16 | 17 | #define ST_NAME_LEN 16 |
| @@ -136,6 +137,22 @@ struct st_nv_interrupt_end_data { | |||
| 136 | u32 serialNumber; | 137 | u32 serialNumber; |
| 137 | } __attribute__((packed)); | 138 | } __attribute__((packed)); |
| 138 | 139 | ||
| 140 | struct st_prediction_err_data { | ||
| 141 | u64 distance; | ||
| 142 | u64 rel_err; | ||
| 143 | } __attribute__((packed)); | ||
| 144 | |||
| 145 | struct st_migration_data { | ||
| 146 | u64 observed; | ||
| 147 | u64 estimated; | ||
| 148 | } __attribute__((packed)); | ||
| 149 | |||
| 150 | struct migration_info { | ||
| 151 | u64 observed; | ||
| 152 | u64 estimated; | ||
| 153 | u8 distance; | ||
| 154 | } __attribute__((packed)); | ||
| 155 | |||
| 139 | #define DATA(x) struct st_ ## x ## _data x; | 156 | #define DATA(x) struct st_ ## x ## _data x; |
| 140 | 157 | ||
| 141 | typedef enum { | 158 | typedef enum { |
| @@ -160,6 +177,9 @@ typedef enum { | |||
| 160 | ST_EFF_PRIO_CHANGE, | 177 | ST_EFF_PRIO_CHANGE, |
| 161 | ST_NV_INTERRUPT_BEGIN, | 178 | ST_NV_INTERRUPT_BEGIN, |
| 162 | ST_NV_INTERRUPT_END, | 179 | ST_NV_INTERRUPT_END, |
| 180 | |||
| 181 | ST_PREDICTION_ERR, | ||
| 182 | ST_MIGRATION, | ||
| 163 | } st_event_record_type_t; | 183 | } st_event_record_type_t; |
| 164 | 184 | ||
| 165 | struct st_event_record { | 185 | struct st_event_record { |
| @@ -187,6 +207,9 @@ struct st_event_record { | |||
| 187 | DATA(effective_priority_change); | 207 | DATA(effective_priority_change); |
| 188 | DATA(nv_interrupt_begin); | 208 | DATA(nv_interrupt_begin); |
| 189 | DATA(nv_interrupt_end); | 209 | DATA(nv_interrupt_end); |
| 210 | |||
| 211 | DATA(prediction_err); | ||
| 212 | DATA(migration); | ||
| 190 | } data; | 213 | } data; |
| 191 | } __attribute__((packed)); | 214 | } __attribute__((packed)); |
| 192 | 215 | ||
| @@ -259,6 +282,19 @@ feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id, | |||
| 259 | feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, | 282 | feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, |
| 260 | unsigned long unused); | 283 | unsigned long unused); |
| 261 | 284 | ||
| 285 | feather_callback void do_sched_trace_prediction_err(unsigned long id, | ||
| 286 | struct task_struct* task, | ||
| 287 | gpu_migration_dist_t* distance, | ||
| 288 | fp_t* rel_err); | ||
| 289 | |||
| 290 | |||
| 291 | |||
| 292 | |||
| 293 | |||
| 294 | feather_callback void do_sched_trace_migration(unsigned long id, | ||
| 295 | struct task_struct* task, | ||
| 296 | struct migration_info* mig_info); | ||
| 297 | |||
| 262 | 298 | ||
| 263 | /* returns true if we're tracing an interrupt on current CPU */ | 299 | /* returns true if we're tracing an interrupt on current CPU */ |
| 264 | /* int is_interrupt_tracing_active(void); */ | 300 | /* int is_interrupt_tracing_active(void); */ |
| @@ -331,6 +367,12 @@ feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, | |||
| 331 | #define sched_trace_nv_interrupt_end(d) \ | 367 | #define sched_trace_nv_interrupt_end(d) \ |
| 332 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, d) | 368 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, d) |
| 333 | 369 | ||
| 370 | #define sched_trace_prediction_err(t, dist, rel_err) \ | ||
| 371 | SCHED_TRACE3(SCHED_TRACE_BASE_ID + 20, do_sched_trace_prediction_err, t, dist, rel_err) | ||
| 372 | |||
| 373 | #define sched_trace_migration(t, mig_info) \ | ||
| 374 | SCHED_TRACE2(SCHED_TRACE_BASE_ID + 21, do_sched_trace_migration, t, mig_info) | ||
| 375 | |||
| 334 | #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */ | 376 | #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */ |
| 335 | 377 | ||
| 336 | #endif /* __KERNEL__ */ | 378 | #endif /* __KERNEL__ */ |
diff --git a/kernel/mutex.c b/kernel/mutex.c index 2f363b9bfc1f..96bcecd385d3 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
| @@ -511,12 +511,12 @@ void mutex_lock_sfx(struct mutex *lock, | |||
| 511 | struct task_struct *task = current; | 511 | struct task_struct *task = current; |
| 512 | struct mutex_waiter waiter; | 512 | struct mutex_waiter waiter; |
| 513 | unsigned long flags; | 513 | unsigned long flags; |
| 514 | 514 | ||
| 515 | preempt_disable(); | 515 | preempt_disable(); |
| 516 | mutex_acquire(&lock->dep_map, subclass, 0, ip); | 516 | mutex_acquire(&lock->dep_map, subclass, 0, ip); |
| 517 | 517 | ||
| 518 | spin_lock_mutex(&lock->wait_lock, flags); | 518 | spin_lock_mutex(&lock->wait_lock, flags); |
| 519 | 519 | ||
| 520 | if(pre) | 520 | if(pre) |
| 521 | { | 521 | { |
| 522 | if(unlikely(pre(pre_arg))) | 522 | if(unlikely(pre(pre_arg))) |
| @@ -530,16 +530,16 @@ void mutex_lock_sfx(struct mutex *lock, | |||
| 530 | 530 | ||
| 531 | debug_mutex_lock_common(lock, &waiter); | 531 | debug_mutex_lock_common(lock, &waiter); |
| 532 | debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); | 532 | debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); |
| 533 | 533 | ||
| 534 | /* add waiting tasks to the end of the waitqueue (FIFO): */ | 534 | /* add waiting tasks to the end of the waitqueue (FIFO): */ |
| 535 | list_add_tail(&waiter.list, &lock->wait_list); | 535 | list_add_tail(&waiter.list, &lock->wait_list); |
| 536 | waiter.task = task; | 536 | waiter.task = task; |
| 537 | 537 | ||
| 538 | if (atomic_xchg(&lock->count, -1) == 1) | 538 | if (atomic_xchg(&lock->count, -1) == 1) |
| 539 | goto done; | 539 | goto done; |
| 540 | 540 | ||
| 541 | lock_contended(&lock->dep_map, ip); | 541 | lock_contended(&lock->dep_map, ip); |
| 542 | 542 | ||
| 543 | for (;;) { | 543 | for (;;) { |
| 544 | /* | 544 | /* |
| 545 | * Lets try to take the lock again - this is needed even if | 545 | * Lets try to take the lock again - this is needed even if |
| @@ -552,9 +552,9 @@ void mutex_lock_sfx(struct mutex *lock, | |||
| 552 | */ | 552 | */ |
| 553 | if (atomic_xchg(&lock->count, -1) == 1) | 553 | if (atomic_xchg(&lock->count, -1) == 1) |
| 554 | break; | 554 | break; |
| 555 | 555 | ||
| 556 | __set_task_state(task, state); | 556 | __set_task_state(task, state); |
| 557 | 557 | ||
| 558 | /* didnt get the lock, go to sleep: */ | 558 | /* didnt get the lock, go to sleep: */ |
| 559 | spin_unlock_mutex(&lock->wait_lock, flags); | 559 | spin_unlock_mutex(&lock->wait_lock, flags); |
| 560 | preempt_enable_no_resched(); | 560 | preempt_enable_no_resched(); |
| @@ -562,22 +562,22 @@ void mutex_lock_sfx(struct mutex *lock, | |||
| 562 | preempt_disable(); | 562 | preempt_disable(); |
| 563 | spin_lock_mutex(&lock->wait_lock, flags); | 563 | spin_lock_mutex(&lock->wait_lock, flags); |
| 564 | } | 564 | } |
| 565 | 565 | ||
| 566 | done: | 566 | done: |
| 567 | lock_acquired(&lock->dep_map, ip); | 567 | lock_acquired(&lock->dep_map, ip); |
| 568 | /* got the lock - rejoice! */ | 568 | /* got the lock - rejoice! */ |
| 569 | mutex_remove_waiter(lock, &waiter, current_thread_info()); | 569 | mutex_remove_waiter(lock, &waiter, current_thread_info()); |
| 570 | mutex_set_owner(lock); | 570 | mutex_set_owner(lock); |
| 571 | 571 | ||
| 572 | /* set it to 0 if there are no waiters left: */ | 572 | /* set it to 0 if there are no waiters left: */ |
| 573 | if (likely(list_empty(&lock->wait_list))) | 573 | if (likely(list_empty(&lock->wait_list))) |
| 574 | atomic_set(&lock->count, 0); | 574 | atomic_set(&lock->count, 0); |
| 575 | 575 | ||
| 576 | if(post) | 576 | if(post) |
| 577 | post(post_arg); | 577 | post(post_arg); |
| 578 | 578 | ||
| 579 | spin_unlock_mutex(&lock->wait_lock, flags); | 579 | spin_unlock_mutex(&lock->wait_lock, flags); |
| 580 | 580 | ||
| 581 | debug_mutex_free_waiter(&waiter); | 581 | debug_mutex_free_waiter(&waiter); |
| 582 | preempt_enable(); | 582 | preempt_enable(); |
| 583 | } | 583 | } |
| @@ -588,16 +588,16 @@ void mutex_unlock_sfx(struct mutex *lock, | |||
| 588 | side_effect_t post, unsigned long post_arg) | 588 | side_effect_t post, unsigned long post_arg) |
| 589 | { | 589 | { |
| 590 | unsigned long flags; | 590 | unsigned long flags; |
| 591 | 591 | ||
| 592 | spin_lock_mutex(&lock->wait_lock, flags); | 592 | spin_lock_mutex(&lock->wait_lock, flags); |
| 593 | 593 | ||
| 594 | if(pre) | 594 | if(pre) |
| 595 | pre(pre_arg); | 595 | pre(pre_arg); |
| 596 | 596 | ||
| 597 | //mutex_release(&lock->dep_map, nested, _RET_IP_); | 597 | //mutex_release(&lock->dep_map, nested, _RET_IP_); |
| 598 | mutex_release(&lock->dep_map, 1, _RET_IP_); | 598 | mutex_release(&lock->dep_map, 1, _RET_IP_); |
| 599 | debug_mutex_unlock(lock); | 599 | debug_mutex_unlock(lock); |
| 600 | 600 | ||
| 601 | /* | 601 | /* |
| 602 | * some architectures leave the lock unlocked in the fastpath failure | 602 | * some architectures leave the lock unlocked in the fastpath failure |
| 603 | * case, others need to leave it locked. In the later case we have to | 603 | * case, others need to leave it locked. In the later case we have to |
| @@ -605,21 +605,21 @@ void mutex_unlock_sfx(struct mutex *lock, | |||
| 605 | */ | 605 | */ |
| 606 | if (__mutex_slowpath_needs_to_unlock()) | 606 | if (__mutex_slowpath_needs_to_unlock()) |
| 607 | atomic_set(&lock->count, 1); | 607 | atomic_set(&lock->count, 1); |
| 608 | 608 | ||
| 609 | if (!list_empty(&lock->wait_list)) { | 609 | if (!list_empty(&lock->wait_list)) { |
| 610 | /* get the first entry from the wait-list: */ | 610 | /* get the first entry from the wait-list: */ |
| 611 | struct mutex_waiter *waiter = | 611 | struct mutex_waiter *waiter = |
| 612 | list_entry(lock->wait_list.next, | 612 | list_entry(lock->wait_list.next, |
| 613 | struct mutex_waiter, list); | 613 | struct mutex_waiter, list); |
| 614 | 614 | ||
| 615 | debug_mutex_wake_waiter(lock, waiter); | 615 | debug_mutex_wake_waiter(lock, waiter); |
| 616 | 616 | ||
| 617 | wake_up_process(waiter->task); | 617 | wake_up_process(waiter->task); |
| 618 | } | 618 | } |
| 619 | 619 | ||
| 620 | if(post) | 620 | if(post) |
| 621 | post(post_arg); | 621 | post(post_arg); |
| 622 | 622 | ||
| 623 | spin_unlock_mutex(&lock->wait_lock, flags); | 623 | spin_unlock_mutex(&lock->wait_lock, flags); |
| 624 | } | 624 | } |
| 625 | EXPORT_SYMBOL(mutex_unlock_sfx); | 625 | EXPORT_SYMBOL(mutex_unlock_sfx); |
diff --git a/kernel/sched.c b/kernel/sched.c index f3d9a69a3777..2f990b4b24f9 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -4430,8 +4430,8 @@ litmus_need_resched_nonpreemptible: | |||
| 4430 | 4430 | ||
| 4431 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | 4431 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD |
| 4432 | litmus->run_tasklets(prev); | 4432 | litmus->run_tasklets(prev); |
| 4433 | #endif | 4433 | #endif |
| 4434 | 4434 | ||
| 4435 | srp_ceiling_block(); | 4435 | srp_ceiling_block(); |
| 4436 | } | 4436 | } |
| 4437 | EXPORT_SYMBOL(schedule); | 4437 | EXPORT_SYMBOL(schedule); |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 1c42e08fdfaa..4d7b1a3e4d01 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -216,7 +216,7 @@ EXPORT_SYMBOL(local_bh_enable_ip); | |||
| 216 | 216 | ||
| 217 | asmlinkage void __do_softirq(void) | 217 | asmlinkage void __do_softirq(void) |
| 218 | { | 218 | { |
| 219 | struct softirq_action *h; | 219 | struct softirq_action *h; |
| 220 | __u32 pending; | 220 | __u32 pending; |
| 221 | int max_restart = MAX_SOFTIRQ_RESTART; | 221 | int max_restart = MAX_SOFTIRQ_RESTART; |
| 222 | int cpu; | 222 | int cpu; |
| @@ -254,10 +254,10 @@ restart: | |||
| 254 | softirq_to_name[vec_nr], h->action, | 254 | softirq_to_name[vec_nr], h->action, |
| 255 | prev_count, preempt_count()); | 255 | prev_count, preempt_count()); |
| 256 | preempt_count() = prev_count; | 256 | preempt_count() = prev_count; |
| 257 | } | 257 | } |
| 258 | 258 | ||
| 259 | rcu_bh_qs(cpu); | 259 | rcu_bh_qs(cpu); |
| 260 | } | 260 | } |
| 261 | h++; | 261 | h++; |
| 262 | pending >>= 1; | 262 | pending >>= 1; |
| 263 | } while (pending); | 263 | } while (pending); |
| @@ -412,13 +412,45 @@ struct tasklet_head | |||
| 412 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec); | 412 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec); |
| 413 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec); | 413 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec); |
| 414 | 414 | ||
| 415 | #ifdef CONFIG_LITMUS_NVIDIA | ||
| 416 | static int __do_nv_now(struct tasklet_struct* tasklet) | ||
| 417 | { | ||
| 418 | int success = 1; | ||
| 419 | |||
| 420 | if(tasklet_trylock(tasklet)) { | ||
| 421 | if (!atomic_read(&tasklet->count)) { | ||
| 422 | if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) { | ||
| 423 | BUG(); | ||
| 424 | } | ||
| 425 | tasklet->func(tasklet->data); | ||
| 426 | tasklet_unlock(tasklet); | ||
| 427 | } | ||
| 428 | else { | ||
| 429 | success = 0; | ||
| 430 | } | ||
| 431 | |||
| 432 | tasklet_unlock(tasklet); | ||
| 433 | } | ||
| 434 | else { | ||
| 435 | success = 0; | ||
| 436 | } | ||
| 437 | |||
| 438 | return success; | ||
| 439 | } | ||
| 440 | #endif | ||
| 441 | |||
| 415 | 442 | ||
| 416 | void __tasklet_schedule(struct tasklet_struct *t) | 443 | void __tasklet_schedule(struct tasklet_struct *t) |
| 417 | { | 444 | { |
| 418 | #ifdef CONFIG_LITMUS_NVIDIA | 445 | #ifdef CONFIG_LITMUS_NVIDIA |
| 419 | if(is_nvidia_func(t->func)) | 446 | if(is_nvidia_func(t->func)) |
| 420 | { | 447 | { |
| 421 | u32 nvidia_device = get_tasklet_nv_device_num(t); | 448 | #if 0 |
| 449 | // do nvidia tasklets right away and return | ||
| 450 | if(__do_nv_now(t)) | ||
| 451 | return; | ||
| 452 | #else | ||
| 453 | u32 nvidia_device = get_tasklet_nv_device_num(t); | ||
| 422 | // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", | 454 | // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", |
| 423 | // __FUNCTION__, nvidia_device,litmus_clock()); | 455 | // __FUNCTION__, nvidia_device,litmus_clock()); |
| 424 | 456 | ||
| @@ -438,7 +470,7 @@ void __tasklet_schedule(struct tasklet_struct *t) | |||
| 438 | if(is_realtime(device_owner)) | 470 | if(is_realtime(device_owner)) |
| 439 | { | 471 | { |
| 440 | TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n", | 472 | TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n", |
| 441 | __FUNCTION__, nvidia_device,litmus_clock()); | 473 | __FUNCTION__, nvidia_device,litmus_clock()); |
| 442 | TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", | 474 | TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", |
| 443 | __FUNCTION__,device_owner->pid,nvidia_device); | 475 | __FUNCTION__,device_owner->pid,nvidia_device); |
| 444 | 476 | ||
| @@ -461,7 +493,9 @@ void __tasklet_schedule(struct tasklet_struct *t) | |||
| 461 | } | 493 | } |
| 462 | } | 494 | } |
| 463 | unlock_nv_registry(nvidia_device, &flags); | 495 | unlock_nv_registry(nvidia_device, &flags); |
| 496 | #endif | ||
| 464 | } | 497 | } |
| 498 | |||
| 465 | #endif | 499 | #endif |
| 466 | 500 | ||
| 467 | ___tasklet_schedule(t); | 501 | ___tasklet_schedule(t); |
| @@ -487,19 +521,19 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) | |||
| 487 | { | 521 | { |
| 488 | #ifdef CONFIG_LITMUS_NVIDIA | 522 | #ifdef CONFIG_LITMUS_NVIDIA |
| 489 | if(is_nvidia_func(t->func)) | 523 | if(is_nvidia_func(t->func)) |
| 490 | { | 524 | { |
| 491 | u32 nvidia_device = get_tasklet_nv_device_num(t); | 525 | u32 nvidia_device = get_tasklet_nv_device_num(t); |
| 492 | // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", | 526 | // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", |
| 493 | // __FUNCTION__, nvidia_device,litmus_clock()); | 527 | // __FUNCTION__, nvidia_device,litmus_clock()); |
| 494 | 528 | ||
| 495 | unsigned long flags; | 529 | unsigned long flags; |
| 496 | struct task_struct* device_owner; | 530 | struct task_struct* device_owner; |
| 497 | 531 | ||
| 498 | lock_nv_registry(nvidia_device, &flags); | 532 | lock_nv_registry(nvidia_device, &flags); |
| 499 | 533 | ||
| 500 | device_owner = get_nv_max_device_owner(nvidia_device); | 534 | device_owner = get_nv_max_device_owner(nvidia_device); |
| 501 | 535 | ||
| 502 | if(device_owner==NULL) | 536 | if(device_owner==NULL) |
| 503 | { | 537 | { |
| 504 | t->owner = NULL; | 538 | t->owner = NULL; |
| 505 | } | 539 | } |
| @@ -508,10 +542,10 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) | |||
| 508 | if( is_realtime(device_owner)) | 542 | if( is_realtime(device_owner)) |
| 509 | { | 543 | { |
| 510 | TRACE("%s: Handling NVIDIA tasklet for device %u\tat %llu\n", | 544 | TRACE("%s: Handling NVIDIA tasklet for device %u\tat %llu\n", |
| 511 | __FUNCTION__, nvidia_device,litmus_clock()); | 545 | __FUNCTION__, nvidia_device,litmus_clock()); |
| 512 | TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", | 546 | TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", |
| 513 | __FUNCTION__,device_owner->pid,nvidia_device); | 547 | __FUNCTION__,device_owner->pid,nvidia_device); |
| 514 | 548 | ||
| 515 | t->owner = device_owner; | 549 | t->owner = device_owner; |
| 516 | sched_trace_tasklet_release(t->owner); | 550 | sched_trace_tasklet_release(t->owner); |
| 517 | if(likely(_litmus_tasklet_hi_schedule(t,nvidia_device))) | 551 | if(likely(_litmus_tasklet_hi_schedule(t,nvidia_device))) |
| @@ -553,15 +587,15 @@ EXPORT_SYMBOL(___tasklet_hi_schedule); | |||
| 553 | void __tasklet_hi_schedule_first(struct tasklet_struct *t) | 587 | void __tasklet_hi_schedule_first(struct tasklet_struct *t) |
| 554 | { | 588 | { |
| 555 | BUG_ON(!irqs_disabled()); | 589 | BUG_ON(!irqs_disabled()); |
| 556 | #ifdef CONFIG_LITMUS_NVIDIA | 590 | #ifdef CONFIG_LITMUS_NVIDIA |
| 557 | if(is_nvidia_func(t->func)) | 591 | if(is_nvidia_func(t->func)) |
| 558 | { | 592 | { |
| 559 | u32 nvidia_device = get_tasklet_nv_device_num(t); | 593 | u32 nvidia_device = get_tasklet_nv_device_num(t); |
| 560 | // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", | 594 | // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", |
| 561 | // __FUNCTION__, nvidia_device,litmus_clock()); | 595 | // __FUNCTION__, nvidia_device,litmus_clock()); |
| 562 | unsigned long flags; | 596 | unsigned long flags; |
| 563 | struct task_struct* device_owner; | 597 | struct task_struct* device_owner; |
| 564 | 598 | ||
| 565 | lock_nv_registry(nvidia_device, &flags); | 599 | lock_nv_registry(nvidia_device, &flags); |
| 566 | 600 | ||
| 567 | device_owner = get_nv_max_device_owner(nvidia_device); | 601 | device_owner = get_nv_max_device_owner(nvidia_device); |
| @@ -576,10 +610,10 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t) | |||
| 576 | { | 610 | { |
| 577 | TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n", | 611 | TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n", |
| 578 | __FUNCTION__, nvidia_device,litmus_clock()); | 612 | __FUNCTION__, nvidia_device,litmus_clock()); |
| 579 | 613 | ||
| 580 | TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", | 614 | TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", |
| 581 | __FUNCTION__,device_owner->pid,nvidia_device); | 615 | __FUNCTION__,device_owner->pid,nvidia_device); |
| 582 | 616 | ||
| 583 | t->owner = device_owner; | 617 | t->owner = device_owner; |
| 584 | sched_trace_tasklet_release(t->owner); | 618 | sched_trace_tasklet_release(t->owner); |
| 585 | if(likely(_litmus_tasklet_hi_schedule_first(t,nvidia_device))) | 619 | if(likely(_litmus_tasklet_hi_schedule_first(t,nvidia_device))) |
diff --git a/litmus/Kconfig b/litmus/Kconfig index a34440f3d8bc..03cc92c50eb9 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig | |||
| @@ -157,7 +157,7 @@ config SCHED_TASK_TRACE | |||
| 157 | config SCHED_TASK_TRACE_SHIFT | 157 | config SCHED_TASK_TRACE_SHIFT |
| 158 | int "Buffer size for sched_trace_xxx() events" | 158 | int "Buffer size for sched_trace_xxx() events" |
| 159 | depends on SCHED_TASK_TRACE | 159 | depends on SCHED_TASK_TRACE |
| 160 | range 8 13 | 160 | range 8 15 |
| 161 | default 9 | 161 | default 9 |
| 162 | help | 162 | help |
| 163 | 163 | ||
| @@ -253,7 +253,7 @@ endmenu | |||
| 253 | 253 | ||
| 254 | menu "Interrupt Handling" | 254 | menu "Interrupt Handling" |
| 255 | 255 | ||
| 256 | choice | 256 | choice |
| 257 | prompt "Scheduling of interrupt bottom-halves in Litmus." | 257 | prompt "Scheduling of interrupt bottom-halves in Litmus." |
| 258 | default LITMUS_SOFTIRQD_NONE | 258 | default LITMUS_SOFTIRQD_NONE |
| 259 | depends on LITMUS_LOCKING && !LITMUS_THREAD_ALL_SOFTIRQ | 259 | depends on LITMUS_LOCKING && !LITMUS_THREAD_ALL_SOFTIRQ |
| @@ -282,11 +282,11 @@ config LITMUS_PAI_SOFTIRQD | |||
| 282 | scheduling points. Trades context switch overhead | 282 | scheduling points. Trades context switch overhead |
| 283 | at the cost of non-preemptive durations of bottom half | 283 | at the cost of non-preemptive durations of bottom half |
| 284 | processing. | 284 | processing. |
| 285 | 285 | ||
| 286 | G-EDF/RM, C-EDF/RM ONLY for now! | 286 | G-EDF/RM, C-EDF/RM ONLY for now! |
| 287 | 287 | ||
| 288 | endchoice | 288 | endchoice |
| 289 | 289 | ||
| 290 | 290 | ||
| 291 | config NR_LITMUS_SOFTIRQD | 291 | config NR_LITMUS_SOFTIRQD |
| 292 | int "Number of klitirqd." | 292 | int "Number of klitirqd." |
| @@ -344,7 +344,7 @@ choice | |||
| 344 | depends on LITMUS_NVIDIA | 344 | depends on LITMUS_NVIDIA |
| 345 | help | 345 | help |
| 346 | Select the version of CUDA/driver to support. | 346 | Select the version of CUDA/driver to support. |
| 347 | 347 | ||
| 348 | config CUDA_4_0 | 348 | config CUDA_4_0 |
| 349 | bool "CUDA 4.0" | 349 | bool "CUDA 4.0" |
| 350 | depends on LITMUS_NVIDIA | 350 | depends on LITMUS_NVIDIA |
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c index 70a86bdd9aec..9762be1a085e 100644 --- a/litmus/gpu_affinity.c +++ b/litmus/gpu_affinity.c | |||
| @@ -5,25 +5,32 @@ | |||
| 5 | #include <litmus/litmus.h> | 5 | #include <litmus/litmus.h> |
| 6 | #include <litmus/gpu_affinity.h> | 6 | #include <litmus/gpu_affinity.h> |
| 7 | 7 | ||
| 8 | #include <litmus/sched_trace.h> | ||
| 9 | |||
| 8 | #define OBSERVATION_CAP 2*1e9 | 10 | #define OBSERVATION_CAP 2*1e9 |
| 9 | 11 | ||
| 10 | static void update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed) | 12 | static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed) |
| 11 | { | 13 | { |
| 14 | fp_t relative_err; | ||
| 12 | fp_t err, new; | 15 | fp_t err, new; |
| 13 | fp_t actual = _integer_to_fp(observed); | 16 | fp_t actual = _integer_to_fp(observed); |
| 14 | 17 | ||
| 15 | err = _sub(actual, fb->est); | 18 | err = _sub(actual, fb->est); |
| 16 | new = _add(_mul(a, err), _mul(b, fb->accum_err)); | 19 | new = _add(_mul(a, err), _mul(b, fb->accum_err)); |
| 17 | 20 | ||
| 21 | relative_err = _div(err, actual); | ||
| 22 | |||
| 18 | fb->est = new; | 23 | fb->est = new; |
| 19 | fb->accum_err = _add(fb->accum_err, err); | 24 | fb->accum_err = _add(fb->accum_err, err); |
| 25 | |||
| 26 | return relative_err; | ||
| 20 | } | 27 | } |
| 21 | 28 | ||
| 22 | void update_gpu_estimate(struct task_struct *t, lt_t observed) | 29 | void update_gpu_estimate(struct task_struct *t, lt_t observed) |
| 23 | { | 30 | { |
| 24 | feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); | 31 | feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); |
| 25 | 32 | ||
| 26 | WARN_ON(tsk_rt(t)->gpu_migration > MIG_LAST); | 33 | BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST); |
| 27 | 34 | ||
| 28 | if(unlikely(fb->est.val == 0)) { | 35 | if(unlikely(fb->est.val == 0)) { |
| 29 | // kludge-- cap observed values to prevent whacky estimations. | 36 | // kludge-- cap observed values to prevent whacky estimations. |
| @@ -40,18 +47,29 @@ void update_gpu_estimate(struct task_struct *t, lt_t observed) | |||
| 40 | fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work. | 47 | fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work. |
| 41 | } | 48 | } |
| 42 | else { | 49 | else { |
| 43 | update_estimate(fb, | 50 | fp_t rel_err = update_estimate(fb, |
| 44 | tsk_rt(t)->gpu_fb_param_a, | 51 | tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration], |
| 45 | tsk_rt(t)->gpu_fb_param_b, | 52 | tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration], |
| 46 | observed); | 53 | observed); |
| 47 | 54 | ||
| 48 | if(_fp_to_integer(fb->est) <= 0) { | 55 | if(unlikely(_fp_to_integer(fb->est) <= 0)) { |
| 49 | // TODO: talk to Jonathan about how well this works. | ||
| 50 | // Maybe we should average the observed and est instead? | ||
| 51 | TRACE_TASK(t, "Invalid estimate. Patching.\n"); | 56 | TRACE_TASK(t, "Invalid estimate. Patching.\n"); |
| 52 | fb->est = _integer_to_fp(observed); | 57 | fb->est = _integer_to_fp(observed); |
| 53 | fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work. | 58 | fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work. |
| 54 | } | 59 | } |
| 60 | else { | ||
| 61 | // struct migration_info mig_info; | ||
| 62 | |||
| 63 | sched_trace_prediction_err(t, | ||
| 64 | &(tsk_rt(t)->gpu_migration), | ||
| 65 | &rel_err); | ||
| 66 | |||
| 67 | // mig_info.observed = observed; | ||
| 68 | // mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration); | ||
| 69 | // mig_info.distance = tsk_rt(t)->gpu_migration; | ||
| 70 | // | ||
| 71 | // sched_trace_migration(t, &mig_info); | ||
| 72 | } | ||
| 55 | } | 73 | } |
| 56 | 74 | ||
| 57 | TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n", | 75 | TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n", |
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c index 023443014d4b..83b708ab85cb 100644 --- a/litmus/ikglp_lock.c +++ b/litmus/ikglp_lock.c | |||
| @@ -1346,6 +1346,10 @@ int ikglp_unlock(struct litmus_lock* l) | |||
| 1346 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | 1346 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
| 1347 | if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) { | 1347 | if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) { |
| 1348 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); | 1348 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); |
| 1349 | if(fq_of_new_on_fq->count == 0) { | ||
| 1350 | // ignore it? | ||
| 1351 | // fq_of_new_on_fq = fq; | ||
| 1352 | } | ||
| 1349 | } | 1353 | } |
| 1350 | else { | 1354 | else { |
| 1351 | fq_of_new_on_fq = fq; | 1355 | fq_of_new_on_fq = fq; |
| @@ -1383,6 +1387,10 @@ int ikglp_unlock(struct litmus_lock* l) | |||
| 1383 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | 1387 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
| 1384 | if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) { | 1388 | if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) { |
| 1385 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); | 1389 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); |
| 1390 | if(fq_of_new_on_fq->count == 0) { | ||
| 1391 | // ignore it? | ||
| 1392 | // fq_of_new_on_fq = fq; | ||
| 1393 | } | ||
| 1386 | } | 1394 | } |
| 1387 | else { | 1395 | else { |
| 1388 | fq_of_new_on_fq = fq; | 1396 | fq_of_new_on_fq = fq; |
| @@ -1409,6 +1417,10 @@ int ikglp_unlock(struct litmus_lock* l) | |||
| 1409 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | 1417 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING |
| 1410 | if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) { | 1418 | if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) { |
| 1411 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); | 1419 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); |
| 1420 | if(fq_of_new_on_fq->count == 0) { | ||
| 1421 | // ignore it? | ||
| 1422 | // fq_of_new_on_fq = fq; | ||
| 1423 | } | ||
| 1412 | } | 1424 | } |
| 1413 | else { | 1425 | else { |
| 1414 | fq_of_new_on_fq = fq; | 1426 | fq_of_new_on_fq = fq; |
| @@ -1569,7 +1581,7 @@ int ikglp_unlock(struct litmus_lock* l) | |||
| 1569 | } | 1581 | } |
| 1570 | } | 1582 | } |
| 1571 | 1583 | ||
| 1572 | 1584 | wake_kludge: | |
| 1573 | if(waitqueue_active(&fq->wait)) | 1585 | if(waitqueue_active(&fq->wait)) |
| 1574 | { | 1586 | { |
| 1575 | wait_queue_t *wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list); | 1587 | wait_queue_t *wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list); |
| @@ -1672,6 +1684,16 @@ int ikglp_unlock(struct litmus_lock* l) | |||
| 1672 | // wake up the new resource holder! | 1684 | // wake up the new resource holder! |
| 1673 | wake_up_process(next); | 1685 | wake_up_process(next); |
| 1674 | } | 1686 | } |
| 1687 | if(fq_of_new_on_fq && fq_of_new_on_fq != fq && fq_of_new_on_fq->count == 1) { | ||
| 1688 | // The guy we promoted when to an empty FQ. (Why didn't stealing pick this up?) | ||
| 1689 | // Wake up the new guy too. | ||
| 1690 | |||
| 1691 | BUG_ON(fq_of_new_on_fq->owner != NULL); | ||
| 1692 | |||
| 1693 | fq = fq_of_new_on_fq; | ||
| 1694 | fq_of_new_on_fq = NULL; | ||
| 1695 | goto wake_kludge; | ||
| 1696 | } | ||
| 1675 | 1697 | ||
| 1676 | unlock_fine_irqrestore(&sem->lock, flags); | 1698 | unlock_fine_irqrestore(&sem->lock, flags); |
| 1677 | unlock_global_irqrestore(dgl_lock, flags); | 1699 | unlock_global_irqrestore(dgl_lock, flags); |
| @@ -1917,7 +1939,7 @@ static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* | |||
| 1917 | if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { | 1939 | if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { |
| 1918 | TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", | 1940 | TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", |
| 1919 | NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); | 1941 | NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); |
| 1920 | return(NULL); | 1942 | // return(NULL); |
| 1921 | } | 1943 | } |
| 1922 | 1944 | ||
| 1923 | ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL); | 1945 | ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL); |
| @@ -2600,8 +2622,8 @@ void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, | |||
| 2600 | 2622 | ||
| 2601 | tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration | 2623 | tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration |
| 2602 | 2624 | ||
| 2603 | TRACE_CUR("%s/%d acquired gpu %d. migration type = %d\n", | 2625 | TRACE_CUR("%s/%d acquired gpu %d (prev = %d). migration type = %d\n", |
| 2604 | t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration); | 2626 | t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, tsk_rt(t)->gpu_migration); |
| 2605 | 2627 | ||
| 2606 | // count the number or resource holders | 2628 | // count the number or resource holders |
| 2607 | ++(*(aff->q_info[replica].nr_cur_users)); | 2629 | ++(*(aff->q_info[replica].nr_cur_users)); |
| @@ -2626,8 +2648,6 @@ void gpu_ikglp_notify_freed(struct ikglp_affinity* aff, | |||
| 2626 | 2648 | ||
| 2627 | est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); | 2649 | est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); |
| 2628 | 2650 | ||
| 2629 | tsk_rt(t)->last_gpu = gpu; | ||
| 2630 | |||
| 2631 | // count the number or resource holders | 2651 | // count the number or resource holders |
| 2632 | --(*(aff->q_info[replica].nr_cur_users)); | 2652 | --(*(aff->q_info[replica].nr_cur_users)); |
| 2633 | 2653 | ||
| @@ -2636,12 +2656,15 @@ void gpu_ikglp_notify_freed(struct ikglp_affinity* aff, | |||
| 2636 | // update estimates | 2656 | // update estimates |
| 2637 | update_gpu_estimate(t, get_gpu_time(t)); | 2657 | update_gpu_estimate(t, get_gpu_time(t)); |
| 2638 | 2658 | ||
| 2639 | TRACE_CUR("%s/%d freed gpu %d. actual time was %llu. " | 2659 | TRACE_CUR("%s/%d freed gpu %d (prev = %d). mig type = %d. actual time was %llu. " |
| 2640 | "estimated was %llu. diff is %d\n", | 2660 | "estimated was %llu. diff is %d\n", |
| 2641 | t->comm, t->pid, gpu, | 2661 | t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, |
| 2662 | tsk_rt(t)->gpu_migration, | ||
| 2642 | get_gpu_time(t), | 2663 | get_gpu_time(t), |
| 2643 | est_time, | 2664 | est_time, |
| 2644 | (long long)get_gpu_time(t) - (long long)est_time); | 2665 | (long long)get_gpu_time(t) - (long long)est_time); |
| 2666 | |||
| 2667 | tsk_rt(t)->last_gpu = gpu; | ||
| 2645 | } | 2668 | } |
| 2646 | 2669 | ||
| 2647 | struct ikglp_affinity_ops gpu_ikglp_affinity = | 2670 | struct ikglp_affinity_ops gpu_ikglp_affinity = |
diff --git a/litmus/jobs.c b/litmus/jobs.c index 36e314625d86..1d97462cc128 100644 --- a/litmus/jobs.c +++ b/litmus/jobs.c | |||
| @@ -10,8 +10,21 @@ void prepare_for_next_period(struct task_struct *t) | |||
| 10 | { | 10 | { |
| 11 | BUG_ON(!t); | 11 | BUG_ON(!t); |
| 12 | /* prepare next release */ | 12 | /* prepare next release */ |
| 13 | t->rt_param.job_params.release = t->rt_param.job_params.deadline; | 13 | |
| 14 | t->rt_param.job_params.deadline += get_rt_period(t); | 14 | if(tsk_rt(t)->task_params.cls == RT_CLASS_SOFT_W_SLIP) { |
| 15 | /* allow the release point to slip if we've passed our deadline. */ | ||
| 16 | lt_t now = litmus_clock(); | ||
| 17 | t->rt_param.job_params.release = | ||
| 18 | (t->rt_param.job_params.deadline < now) ? | ||
| 19 | now : t->rt_param.job_params.deadline; | ||
| 20 | t->rt_param.job_params.deadline = | ||
| 21 | t->rt_param.job_params.release + get_rt_period(t); | ||
| 22 | } | ||
| 23 | else { | ||
| 24 | t->rt_param.job_params.release = t->rt_param.job_params.deadline; | ||
| 25 | t->rt_param.job_params.deadline += get_rt_period(t); | ||
| 26 | } | ||
| 27 | |||
| 15 | t->rt_param.job_params.exec_time = 0; | 28 | t->rt_param.job_params.exec_time = 0; |
| 16 | /* update job sequence number */ | 29 | /* update job sequence number */ |
| 17 | t->rt_param.job_params.job_no++; | 30 | t->rt_param.job_params.job_no++; |
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c index 0b64977789a6..bff857ed8d4e 100644 --- a/litmus/kfmlp_lock.c +++ b/litmus/kfmlp_lock.c | |||
| @@ -590,7 +590,7 @@ static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* | |||
| 590 | if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { | 590 | if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { |
| 591 | TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", | 591 | TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", |
| 592 | NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); | 592 | NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); |
| 593 | return(NULL); | 593 | // return(NULL); |
| 594 | } | 594 | } |
| 595 | 595 | ||
| 596 | kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL); | 596 | kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL); |
diff --git a/litmus/litmus.c b/litmus/litmus.c index 5b301c418b96..d1f836c8af6e 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c | |||
| @@ -318,11 +318,21 @@ asmlinkage long sys_null_call(cycles_t __user *ts) | |||
| 318 | void init_gpu_affinity_state(struct task_struct* p) | 318 | void init_gpu_affinity_state(struct task_struct* p) |
| 319 | { | 319 | { |
| 320 | // under-damped | 320 | // under-damped |
| 321 | p->rt_param.gpu_fb_param_a = _frac(14008, 10000); | 321 | //p->rt_param.gpu_fb_param_a = _frac(14008, 10000); |
| 322 | p->rt_param.gpu_fb_param_b = _frac(16024, 10000); | 322 | //p->rt_param.gpu_fb_param_b = _frac(16024, 10000); |
| 323 | // critically-damped | 323 | |
| 324 | // p->rt_param.gpu_fb_param_a = _frac(102, 1000); | 324 | // emperical; |
| 325 | // p->rt_param.gpu_fb_param_b = _frac(303, 1000); | 325 | p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000); |
| 326 | p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000); | ||
| 327 | |||
| 328 | p->rt_param.gpu_fb_param_a[1] = _frac(8600, 10000); | ||
| 329 | p->rt_param.gpu_fb_param_b[1] = _frac(40000, 10000); | ||
| 330 | |||
| 331 | p->rt_param.gpu_fb_param_a[2] = _frac(6890, 10000); | ||
| 332 | p->rt_param.gpu_fb_param_b[2] = _frac(40000, 10000); | ||
| 333 | |||
| 334 | p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000); | ||
| 335 | p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000); | ||
| 326 | 336 | ||
| 327 | p->rt_param.gpu_migration = MIG_NONE; | 337 | p->rt_param.gpu_migration = MIG_NONE; |
| 328 | p->rt_param.last_gpu = -1; | 338 | p->rt_param.last_gpu = -1; |
diff --git a/litmus/locking.c b/litmus/locking.c index cb11c04ed0d4..718a5a3281d7 100644 --- a/litmus/locking.c +++ b/litmus/locking.c | |||
| @@ -349,10 +349,10 @@ static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait) | |||
| 349 | all_acquired: | 349 | all_acquired: |
| 350 | 350 | ||
| 351 | // FOR SANITY CHECK FOR TESTING | 351 | // FOR SANITY CHECK FOR TESTING |
| 352 | for(i = 0; i < dgl_wait->size; ++i) { | 352 | // for(i = 0; i < dgl_wait->size; ++i) { |
| 353 | struct litmus_lock *l = dgl_wait->locks[i]; | 353 | // struct litmus_lock *l = dgl_wait->locks[i]; |
| 354 | BUG_ON(!l->ops->is_owner(l, dgl_wait->task)); | 354 | // BUG_ON(!l->ops->is_owner(l, dgl_wait->task)); |
| 355 | } | 355 | // } |
| 356 | 356 | ||
| 357 | TRACE_CUR("Acquired entire DGL\n"); | 357 | TRACE_CUR("Acquired entire DGL\n"); |
| 358 | 358 | ||
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c index 3d8c50882066..4b86a50d3bd1 100644 --- a/litmus/nvidia_info.c +++ b/litmus/nvidia_info.c | |||
| @@ -265,6 +265,11 @@ int init_nvidia_info(void) | |||
| 265 | } | 265 | } |
| 266 | } | 266 | } |
| 267 | 267 | ||
| 268 | void shutdown_nvidia_info(void) | ||
| 269 | { | ||
| 270 | nvidia_mod = NULL; | ||
| 271 | mb(); | ||
| 272 | } | ||
| 268 | 273 | ||
| 269 | /* works with pointers to static data inside the module too. */ | 274 | /* works with pointers to static data inside the module too. */ |
| 270 | int is_nvidia_func(void* func_addr) | 275 | int is_nvidia_func(void* func_addr) |
| @@ -319,14 +324,11 @@ u32 get_work_nv_device_num(const struct work_struct *t) | |||
| 319 | } | 324 | } |
| 320 | 325 | ||
| 321 | 326 | ||
| 322 | |||
| 323 | #define MAX_NR_OWNERS 3 | ||
| 324 | |||
| 325 | typedef struct { | 327 | typedef struct { |
| 326 | raw_spinlock_t lock; | 328 | raw_spinlock_t lock; |
| 327 | int nr_owners; | 329 | int nr_owners; |
| 328 | struct task_struct* max_prio_owner; | 330 | struct task_struct* max_prio_owner; |
| 329 | struct task_struct* owners[MAX_NR_OWNERS]; | 331 | struct task_struct* owners[NV_MAX_SIMULT_USERS]; |
| 330 | }nv_device_registry_t; | 332 | }nv_device_registry_t; |
| 331 | 333 | ||
| 332 | static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM]; | 334 | static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM]; |
| @@ -431,6 +433,7 @@ static int __reg_nv_device(int reg_device_id, struct task_struct *t) | |||
| 431 | return ret; // assume already registered. | 433 | return ret; // assume already registered. |
| 432 | } | 434 | } |
| 433 | 435 | ||
| 436 | |||
| 434 | raw_spin_lock_irqsave(®->lock, flags); | 437 | raw_spin_lock_irqsave(®->lock, flags); |
| 435 | 438 | ||
| 436 | if(reg->nr_owners < NV_MAX_SIMULT_USERS) { | 439 | if(reg->nr_owners < NV_MAX_SIMULT_USERS) { |
| @@ -461,7 +464,7 @@ static int __reg_nv_device(int reg_device_id, struct task_struct *t) | |||
| 461 | else | 464 | else |
| 462 | { | 465 | { |
| 463 | TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); | 466 | TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); |
| 464 | ret = -EBUSY; | 467 | //ret = -EBUSY; |
| 465 | } | 468 | } |
| 466 | 469 | ||
| 467 | raw_spin_unlock_irqrestore(®->lock, flags); | 470 | raw_spin_unlock_irqrestore(®->lock, flags); |
diff --git a/litmus/rsm_lock.c b/litmus/rsm_lock.c index 965164c43537..75ed87c5ed48 100644 --- a/litmus/rsm_lock.c +++ b/litmus/rsm_lock.c | |||
| @@ -502,6 +502,13 @@ int rsm_mutex_unlock(struct litmus_lock* l) | |||
| 502 | tsk_rt(next)->blocked_lock = NULL; | 502 | tsk_rt(next)->blocked_lock = NULL; |
| 503 | mb(); | 503 | mb(); |
| 504 | 504 | ||
| 505 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
| 506 | // re-enable tracking | ||
| 507 | if(tsk_rt(next)->held_gpus) { | ||
| 508 | tsk_rt(next)->suspend_gpu_tracker_on_block = 0; | ||
| 509 | } | ||
| 510 | #endif | ||
| 511 | |||
| 505 | wake_up_process(next); | 512 | wake_up_process(next); |
| 506 | } | 513 | } |
| 507 | else { | 514 | else { |
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c index 99f7620925ba..be14dbec6ed2 100644 --- a/litmus/sched_cedf.c +++ b/litmus/sched_cedf.c | |||
| @@ -733,11 +733,11 @@ static void cedf_change_prio_pai_tasklet(struct task_struct *old_prio, | |||
| 733 | unsigned long flags; | 733 | unsigned long flags; |
| 734 | cedf_domain_t *cluster; | 734 | cedf_domain_t *cluster; |
| 735 | struct task_struct *probe; | 735 | struct task_struct *probe; |
| 736 | 736 | ||
| 737 | // identify the cluster by the assignment of these tasks. one should | 737 | // identify the cluster by the assignment of these tasks. one should |
| 738 | // be non-NULL. | 738 | // be non-NULL. |
| 739 | probe = (old_prio) ? old_prio : new_prio; | 739 | probe = (old_prio) ? old_prio : new_prio; |
| 740 | 740 | ||
| 741 | if(probe) { | 741 | if(probe) { |
| 742 | cluster = task_cpu_cluster(probe); | 742 | cluster = task_cpu_cluster(probe); |
| 743 | 743 | ||
| @@ -838,8 +838,13 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) | |||
| 838 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) | 838 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) |
| 839 | if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) { | 839 | if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) { |
| 840 | if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) { | 840 | if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) { |
| 841 | // don't track preemptions or locking protocol suspensions. | ||
| 842 | TRACE_TASK(entry->scheduled, "stopping GPU tracker.\n"); | ||
| 841 | stop_gpu_tracker(entry->scheduled); | 843 | stop_gpu_tracker(entry->scheduled); |
| 842 | } | 844 | } |
| 845 | else if(blocks && !tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) { | ||
| 846 | TRACE_TASK(entry->scheduled, "GPU tracker remains on during suspension.\n"); | ||
| 847 | } | ||
| 843 | } | 848 | } |
| 844 | #endif | 849 | #endif |
| 845 | 850 | ||
| @@ -1596,6 +1601,10 @@ static void cleanup_cedf(void) | |||
| 1596 | { | 1601 | { |
| 1597 | int i; | 1602 | int i; |
| 1598 | 1603 | ||
| 1604 | #ifdef CONFIG_LITMUS_NVIDIA | ||
| 1605 | shutdown_nvidia_info(); | ||
| 1606 | #endif | ||
| 1607 | |||
| 1599 | if (clusters_allocated) { | 1608 | if (clusters_allocated) { |
| 1600 | for (i = 0; i < num_clusters; i++) { | 1609 | for (i = 0; i < num_clusters; i++) { |
| 1601 | kfree(cedf[i].cpus); | 1610 | kfree(cedf[i].cpus); |
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c index 2433297b7482..245e41c25a5d 100644 --- a/litmus/sched_plugin.c +++ b/litmus/sched_plugin.c | |||
| @@ -13,6 +13,10 @@ | |||
| 13 | #include <litmus/preempt.h> | 13 | #include <litmus/preempt.h> |
| 14 | #include <litmus/jobs.h> | 14 | #include <litmus/jobs.h> |
| 15 | 15 | ||
| 16 | #ifdef CONFIG_LITMUS_NVIDIA | ||
| 17 | #include <litmus/nvidia_info.h> | ||
| 18 | #endif | ||
| 19 | |||
| 16 | /* | 20 | /* |
| 17 | * Generic function to trigger preemption on either local or remote cpu | 21 | * Generic function to trigger preemption on either local or remote cpu |
| 18 | * from scheduler plugins. The key feature is that this function is | 22 | * from scheduler plugins. The key feature is that this function is |
| @@ -102,6 +106,9 @@ static long litmus_dummy_complete_job(void) | |||
| 102 | 106 | ||
| 103 | static long litmus_dummy_activate_plugin(void) | 107 | static long litmus_dummy_activate_plugin(void) |
| 104 | { | 108 | { |
| 109 | #ifdef CONFIG_LITMUS_NVIDIA | ||
| 110 | shutdown_nvidia_info(); | ||
| 111 | #endif | ||
| 105 | return 0; | 112 | return 0; |
| 106 | } | 113 | } |
| 107 | 114 | ||
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c index 2bd3a787611b..f7f575346b54 100644 --- a/litmus/sched_task_trace.c +++ b/litmus/sched_task_trace.c | |||
| @@ -247,6 +247,53 @@ feather_callback void do_sched_trace_action(unsigned long id, | |||
| 247 | } | 247 | } |
| 248 | 248 | ||
| 249 | 249 | ||
| 250 | |||
| 251 | |||
| 252 | feather_callback void do_sched_trace_prediction_err(unsigned long id, | ||
| 253 | unsigned long _task, | ||
| 254 | unsigned long _distance, | ||
| 255 | unsigned long _rel_err) | ||
| 256 | { | ||
| 257 | struct task_struct *t = (struct task_struct*) _task; | ||
| 258 | struct st_event_record *rec = get_record(ST_PREDICTION_ERR, t); | ||
| 259 | |||
| 260 | if (rec) { | ||
| 261 | gpu_migration_dist_t* distance = (gpu_migration_dist_t*) _distance; | ||
| 262 | fp_t* rel_err = (fp_t*) _rel_err; | ||
| 263 | |||
| 264 | rec->data.prediction_err.distance = *distance; | ||
| 265 | rec->data.prediction_err.rel_err = rel_err->val; | ||
| 266 | put_record(rec); | ||
| 267 | } | ||
| 268 | } | ||
| 269 | |||
| 270 | |||
| 271 | feather_callback void do_sched_trace_migration(unsigned long id, | ||
| 272 | unsigned long _task, | ||
| 273 | unsigned long _mig_info) | ||
| 274 | { | ||
| 275 | struct task_struct *t = (struct task_struct*) _task; | ||
| 276 | struct st_event_record *rec = get_record(ST_MIGRATION, t); | ||
| 277 | |||
| 278 | if (rec) { | ||
| 279 | struct migration_info* mig_info = (struct migration_info*) _mig_info; | ||
| 280 | |||
| 281 | rec->hdr.extra = mig_info->distance; | ||
| 282 | rec->data.migration.observed = mig_info->observed; | ||
| 283 | rec->data.migration.estimated = mig_info->estimated; | ||
| 284 | |||
| 285 | put_record(rec); | ||
| 286 | } | ||
| 287 | } | ||
| 288 | |||
| 289 | |||
| 290 | |||
| 291 | |||
| 292 | |||
| 293 | |||
| 294 | |||
| 295 | |||
| 296 | |||
| 250 | feather_callback void do_sched_trace_tasklet_release(unsigned long id, | 297 | feather_callback void do_sched_trace_tasklet_release(unsigned long id, |
| 251 | unsigned long _owner) | 298 | unsigned long _owner) |
| 252 | { | 299 | { |
| @@ -457,3 +504,6 @@ EXPORT_SYMBOL(do_sched_trace_nv_interrupt_end); | |||
| 457 | 504 | ||
| 458 | 505 | ||
| 459 | 506 | ||
| 507 | |||
| 508 | |||
| 509 | |||
