diff options
67 files changed, 14061 insertions, 403 deletions
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 6c0802eb2f7f..680a5cb4b585 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -10,6 +10,10 @@ | |||
10 | #include <linux/ftrace.h> | 10 | #include <linux/ftrace.h> |
11 | #include <linux/delay.h> | 11 | #include <linux/delay.h> |
12 | 12 | ||
13 | #ifdef CONFIG_LITMUS_NVIDIA | ||
14 | #include <litmus/sched_trace.h> | ||
15 | #endif | ||
16 | |||
13 | #include <asm/apic.h> | 17 | #include <asm/apic.h> |
14 | #include <asm/io_apic.h> | 18 | #include <asm/io_apic.h> |
15 | #include <asm/irq.h> | 19 | #include <asm/irq.h> |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index d0126222b394..bd91e647228d 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -358,3 +358,7 @@ ENTRY(sys_call_table) | |||
358 | .long sys_wait_for_ts_release | 358 | .long sys_wait_for_ts_release |
359 | .long sys_release_ts /* +10 */ | 359 | .long sys_release_ts /* +10 */ |
360 | .long sys_null_call | 360 | .long sys_null_call |
361 | .long sys_litmus_dgl_lock | ||
362 | .long sys_litmus_dgl_unlock | ||
363 | .long sys_set_aux_tasks | ||
364 | .long sys_sched_trace_event /* +15 */ | ||
diff --git a/include/linux/completion.h b/include/linux/completion.h index 51494e6b5548..a64fb5680400 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h | |||
@@ -76,6 +76,7 @@ static inline void init_completion(struct completion *x) | |||
76 | init_waitqueue_head(&x->wait); | 76 | init_waitqueue_head(&x->wait); |
77 | } | 77 | } |
78 | 78 | ||
79 | extern void __wait_for_completion_locked(struct completion *); | ||
79 | extern void wait_for_completion(struct completion *); | 80 | extern void wait_for_completion(struct completion *); |
80 | extern int wait_for_completion_interruptible(struct completion *x); | 81 | extern int wait_for_completion_interruptible(struct completion *x); |
81 | extern int wait_for_completion_killable(struct completion *x); | 82 | extern int wait_for_completion_killable(struct completion *x); |
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index f6efed0039ed..9fc31289a1bb 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h | |||
@@ -445,6 +445,7 @@ static inline void __raise_softirq_irqoff(unsigned int nr) | |||
445 | 445 | ||
446 | extern void raise_softirq_irqoff(unsigned int nr); | 446 | extern void raise_softirq_irqoff(unsigned int nr); |
447 | extern void raise_softirq(unsigned int nr); | 447 | extern void raise_softirq(unsigned int nr); |
448 | extern void wakeup_softirqd(void); | ||
448 | 449 | ||
449 | /* This is the worklist that queues up per-cpu softirq work. | 450 | /* This is the worklist that queues up per-cpu softirq work. |
450 | * | 451 | * |
@@ -500,6 +501,16 @@ struct tasklet_struct | |||
500 | atomic_t count; | 501 | atomic_t count; |
501 | void (*func)(unsigned long); | 502 | void (*func)(unsigned long); |
502 | unsigned long data; | 503 | unsigned long data; |
504 | |||
505 | #if defined(CONFIG_LITMUS_SOFTIRQD) || defined(CONFIG_LITMUS_PAI_SOFTIRQD) | ||
506 | struct task_struct *owner; | ||
507 | #endif | ||
508 | }; | ||
509 | |||
510 | struct tasklet_head | ||
511 | { | ||
512 | struct tasklet_struct *head; | ||
513 | struct tasklet_struct **tail; | ||
503 | }; | 514 | }; |
504 | 515 | ||
505 | #define DECLARE_TASKLET(name, func, data) \ | 516 | #define DECLARE_TASKLET(name, func, data) \ |
@@ -537,6 +548,7 @@ static inline void tasklet_unlock_wait(struct tasklet_struct *t) | |||
537 | #define tasklet_unlock(t) do { } while (0) | 548 | #define tasklet_unlock(t) do { } while (0) |
538 | #endif | 549 | #endif |
539 | 550 | ||
551 | extern void ___tasklet_schedule(struct tasklet_struct *t); | ||
540 | extern void __tasklet_schedule(struct tasklet_struct *t); | 552 | extern void __tasklet_schedule(struct tasklet_struct *t); |
541 | 553 | ||
542 | static inline void tasklet_schedule(struct tasklet_struct *t) | 554 | static inline void tasklet_schedule(struct tasklet_struct *t) |
@@ -545,6 +557,7 @@ static inline void tasklet_schedule(struct tasklet_struct *t) | |||
545 | __tasklet_schedule(t); | 557 | __tasklet_schedule(t); |
546 | } | 558 | } |
547 | 559 | ||
560 | extern void ___tasklet_hi_schedule(struct tasklet_struct *t); | ||
548 | extern void __tasklet_hi_schedule(struct tasklet_struct *t); | 561 | extern void __tasklet_hi_schedule(struct tasklet_struct *t); |
549 | 562 | ||
550 | static inline void tasklet_hi_schedule(struct tasklet_struct *t) | 563 | static inline void tasklet_hi_schedule(struct tasklet_struct *t) |
@@ -553,6 +566,7 @@ static inline void tasklet_hi_schedule(struct tasklet_struct *t) | |||
553 | __tasklet_hi_schedule(t); | 566 | __tasklet_hi_schedule(t); |
554 | } | 567 | } |
555 | 568 | ||
569 | extern void ___tasklet_hi_schedule_first(struct tasklet_struct *t); | ||
556 | extern void __tasklet_hi_schedule_first(struct tasklet_struct *t); | 570 | extern void __tasklet_hi_schedule_first(struct tasklet_struct *t); |
557 | 571 | ||
558 | /* | 572 | /* |
@@ -582,7 +596,7 @@ static inline void tasklet_disable(struct tasklet_struct *t) | |||
582 | } | 596 | } |
583 | 597 | ||
584 | static inline void tasklet_enable(struct tasklet_struct *t) | 598 | static inline void tasklet_enable(struct tasklet_struct *t) |
585 | { | 599 | { |
586 | smp_mb__before_atomic_dec(); | 600 | smp_mb__before_atomic_dec(); |
587 | atomic_dec(&t->count); | 601 | atomic_dec(&t->count); |
588 | } | 602 | } |
diff --git a/include/linux/mutex.h b/include/linux/mutex.h index a940fe435aca..cb47debbf24d 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h | |||
@@ -126,6 +126,15 @@ static inline int mutex_is_locked(struct mutex *lock) | |||
126 | return atomic_read(&lock->count) != 1; | 126 | return atomic_read(&lock->count) != 1; |
127 | } | 127 | } |
128 | 128 | ||
129 | /* return non-zero to abort. only pre-side-effects may abort */ | ||
130 | typedef int (*side_effect_t)(unsigned long); | ||
131 | extern void mutex_lock_sfx(struct mutex *lock, | ||
132 | side_effect_t pre, unsigned long pre_arg, | ||
133 | side_effect_t post, unsigned long post_arg); | ||
134 | extern void mutex_unlock_sfx(struct mutex *lock, | ||
135 | side_effect_t pre, unsigned long pre_arg, | ||
136 | side_effect_t post, unsigned long post_arg); | ||
137 | |||
129 | /* | 138 | /* |
130 | * See kernel/mutex.c for detailed documentation of these APIs. | 139 | * See kernel/mutex.c for detailed documentation of these APIs. |
131 | * Also see Documentation/mutex-design.txt. | 140 | * Also see Documentation/mutex-design.txt. |
@@ -153,6 +162,7 @@ extern void mutex_lock(struct mutex *lock); | |||
153 | extern int __must_check mutex_lock_interruptible(struct mutex *lock); | 162 | extern int __must_check mutex_lock_interruptible(struct mutex *lock); |
154 | extern int __must_check mutex_lock_killable(struct mutex *lock); | 163 | extern int __must_check mutex_lock_killable(struct mutex *lock); |
155 | 164 | ||
165 | |||
156 | # define mutex_lock_nested(lock, subclass) mutex_lock(lock) | 166 | # define mutex_lock_nested(lock, subclass) mutex_lock(lock) |
157 | # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) | 167 | # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) |
158 | # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock) | 168 | # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock) |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 9c990d13ae35..d580959f9f5c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1532,8 +1532,12 @@ struct task_struct { | |||
1532 | #endif | 1532 | #endif |
1533 | struct prop_local_single dirties; | 1533 | struct prop_local_single dirties; |
1534 | 1534 | ||
1535 | /* LITMUS RT parameters and state */ | 1535 | /*** LITMUS RT parameters and state ***/ |
1536 | struct rt_param rt_param; | 1536 | struct rt_param rt_param; |
1537 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1538 | struct aux_data aux_data; | ||
1539 | #endif | ||
1540 | /*****/ | ||
1537 | 1541 | ||
1538 | /* references to PI semaphores, etc. */ | 1542 | /* references to PI semaphores, etc. */ |
1539 | struct od_table_entry *od_table; | 1543 | struct od_table_entry *od_table; |
diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h index 39fa04966aa8..c83fc2b65f01 100644 --- a/include/linux/semaphore.h +++ b/include/linux/semaphore.h | |||
@@ -43,4 +43,13 @@ extern int __must_check down_trylock(struct semaphore *sem); | |||
43 | extern int __must_check down_timeout(struct semaphore *sem, long jiffies); | 43 | extern int __must_check down_timeout(struct semaphore *sem, long jiffies); |
44 | extern void up(struct semaphore *sem); | 44 | extern void up(struct semaphore *sem); |
45 | 45 | ||
46 | extern void __down(struct semaphore *sem); | ||
47 | extern void __up(struct semaphore *sem); | ||
48 | |||
49 | struct semaphore_waiter { | ||
50 | struct list_head list; | ||
51 | struct task_struct *task; | ||
52 | int up; | ||
53 | }; | ||
54 | |||
46 | #endif /* __LINUX_SEMAPHORE_H */ | 55 | #endif /* __LINUX_SEMAPHORE_H */ |
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index f584aba78ca9..1ec2ec7d4e3b 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h | |||
@@ -83,6 +83,9 @@ struct work_struct { | |||
83 | #ifdef CONFIG_LOCKDEP | 83 | #ifdef CONFIG_LOCKDEP |
84 | struct lockdep_map lockdep_map; | 84 | struct lockdep_map lockdep_map; |
85 | #endif | 85 | #endif |
86 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
87 | struct task_struct *owner; | ||
88 | #endif | ||
86 | }; | 89 | }; |
87 | 90 | ||
88 | #define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU) | 91 | #define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU) |
@@ -115,11 +118,25 @@ struct execute_work { | |||
115 | #define __WORK_INIT_LOCKDEP_MAP(n, k) | 118 | #define __WORK_INIT_LOCKDEP_MAP(n, k) |
116 | #endif | 119 | #endif |
117 | 120 | ||
121 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
122 | #define __WORK_INIT_OWNER() \ | ||
123 | .owner = NULL, | ||
124 | |||
125 | #define PREPARE_OWNER(_work, _owner) \ | ||
126 | do { \ | ||
127 | (_work)->owner = (_owner); \ | ||
128 | } while(0) | ||
129 | #else | ||
130 | #define __WORK_INIT_OWNER() | ||
131 | #define PREPARE_OWNER(_work, _owner) | ||
132 | #endif | ||
133 | |||
118 | #define __WORK_INITIALIZER(n, f) { \ | 134 | #define __WORK_INITIALIZER(n, f) { \ |
119 | .data = WORK_DATA_STATIC_INIT(), \ | 135 | .data = WORK_DATA_STATIC_INIT(), \ |
120 | .entry = { &(n).entry, &(n).entry }, \ | 136 | .entry = { &(n).entry, &(n).entry }, \ |
121 | .func = (f), \ | 137 | .func = (f), \ |
122 | __WORK_INIT_LOCKDEP_MAP(#n, &(n)) \ | 138 | __WORK_INIT_LOCKDEP_MAP(#n, &(n)) \ |
139 | __WORK_INIT_OWNER() \ | ||
123 | } | 140 | } |
124 | 141 | ||
125 | #define __DELAYED_WORK_INITIALIZER(n, f) { \ | 142 | #define __DELAYED_WORK_INITIALIZER(n, f) { \ |
@@ -357,6 +374,7 @@ extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, | |||
357 | extern void flush_workqueue(struct workqueue_struct *wq); | 374 | extern void flush_workqueue(struct workqueue_struct *wq); |
358 | extern void flush_scheduled_work(void); | 375 | extern void flush_scheduled_work(void); |
359 | 376 | ||
377 | extern int __schedule_work(struct work_struct *work); | ||
360 | extern int schedule_work(struct work_struct *work); | 378 | extern int schedule_work(struct work_struct *work); |
361 | extern int schedule_work_on(int cpu, struct work_struct *work); | 379 | extern int schedule_work_on(int cpu, struct work_struct *work); |
362 | extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay); | 380 | extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay); |
diff --git a/include/litmus/aux_tasks.h b/include/litmus/aux_tasks.h new file mode 100644 index 000000000000..255bbafcc6b7 --- /dev/null +++ b/include/litmus/aux_tasks.h | |||
@@ -0,0 +1,23 @@ | |||
1 | #ifndef LITMUS_AUX_taskS | ||
2 | #define LITMUS_AUX_taskS | ||
3 | |||
4 | struct task_struct; | ||
5 | |||
6 | int make_aux_task_if_required(struct task_struct *t); | ||
7 | |||
8 | /* call on an aux task when it exits real-time */ | ||
9 | int exit_aux_task(struct task_struct *t); | ||
10 | |||
11 | /* call when an aux_owner becomes real-time */ | ||
12 | long enable_aux_task_owner(struct task_struct *t); | ||
13 | |||
14 | /* call when an aux_owner exits real-time */ | ||
15 | long disable_aux_task_owner(struct task_struct *t); | ||
16 | |||
17 | /* call when an aux_owner increases its priority */ | ||
18 | int aux_task_owner_increase_priority(struct task_struct *t); | ||
19 | |||
20 | /* call when an aux_owner decreases its priority */ | ||
21 | int aux_task_owner_decrease_priority(struct task_struct *t); | ||
22 | |||
23 | #endif | ||
diff --git a/include/litmus/budget.h b/include/litmus/budget.h index 33344ee8d5f9..763b31c0e9f6 100644 --- a/include/litmus/budget.h +++ b/include/litmus/budget.h | |||
@@ -5,6 +5,9 @@ | |||
5 | * the next task. */ | 5 | * the next task. */ |
6 | void update_enforcement_timer(struct task_struct* t); | 6 | void update_enforcement_timer(struct task_struct* t); |
7 | 7 | ||
8 | /* Send SIG_BUDGET to a real-time task. */ | ||
9 | void send_sigbudget(struct task_struct* t); | ||
10 | |||
8 | inline static int budget_exhausted(struct task_struct* t) | 11 | inline static int budget_exhausted(struct task_struct* t) |
9 | { | 12 | { |
10 | return get_exec_time(t) >= get_exec_cost(t); | 13 | return get_exec_time(t) >= get_exec_cost(t); |
@@ -19,10 +22,21 @@ inline static lt_t budget_remaining(struct task_struct* t) | |||
19 | return 0; | 22 | return 0; |
20 | } | 23 | } |
21 | 24 | ||
22 | #define budget_enforced(t) (tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT) | 25 | #define budget_enforced(t) (\ |
26 | tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT) | ||
27 | |||
28 | #define budget_precisely_tracked(t) (\ | ||
29 | tsk_rt(t)->task_params.budget_policy == PRECISE_ENFORCEMENT || \ | ||
30 | tsk_rt(t)->task_params.budget_signal_policy == PRECISE_SIGNALS) | ||
31 | |||
32 | #define budget_signalled(t) (\ | ||
33 | tsk_rt(t)->task_params.budget_signal_policy != NO_SIGNALS) | ||
34 | |||
35 | #define budget_precisely_signalled(t) (\ | ||
36 | tsk_rt(t)->task_params.budget_policy == PRECISE_SIGNALS) | ||
23 | 37 | ||
24 | #define budget_precisely_enforced(t) (tsk_rt(t)->task_params.budget_policy \ | 38 | #define sigbudget_sent(t) (\ |
25 | == PRECISE_ENFORCEMENT) | 39 | test_bit(RT_JOB_SIG_BUDGET_SENT, &tsk_rt(t)->job_params.flags)) |
26 | 40 | ||
27 | static inline int requeue_preempted_job(struct task_struct* t) | 41 | static inline int requeue_preempted_job(struct task_struct* t) |
28 | { | 42 | { |
diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h index bbaf22ea7f12..63dff7efe8fb 100644 --- a/include/litmus/edf_common.h +++ b/include/litmus/edf_common.h | |||
@@ -20,6 +20,18 @@ int edf_higher_prio(struct task_struct* first, | |||
20 | 20 | ||
21 | int edf_ready_order(struct bheap_node* a, struct bheap_node* b); | 21 | int edf_ready_order(struct bheap_node* a, struct bheap_node* b); |
22 | 22 | ||
23 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
24 | /* binheap_nodes must be embedded within 'struct litmus_lock' */ | ||
25 | int edf_max_heap_order(struct binheap_node *a, struct binheap_node *b); | ||
26 | int edf_min_heap_order(struct binheap_node *a, struct binheap_node *b); | ||
27 | int edf_max_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b); | ||
28 | int edf_min_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b); | ||
29 | |||
30 | int __edf_higher_prio(struct task_struct* first, comparison_mode_t first_mode, | ||
31 | struct task_struct* second, comparison_mode_t second_mode); | ||
32 | |||
33 | #endif | ||
34 | |||
23 | int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t); | 35 | int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t); |
24 | 36 | ||
25 | #endif | 37 | #endif |
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h index f2115b83f1e4..1469c0fd0460 100644 --- a/include/litmus/fdso.h +++ b/include/litmus/fdso.h | |||
@@ -24,9 +24,18 @@ typedef enum { | |||
24 | MPCP_VS_SEM = 3, | 24 | MPCP_VS_SEM = 3, |
25 | DPCP_SEM = 4, | 25 | DPCP_SEM = 4, |
26 | 26 | ||
27 | PCP_SEM = 5, | 27 | PCP_SEM = 5, |
28 | 28 | ||
29 | MAX_OBJ_TYPE = 5 | 29 | RSM_MUTEX = 6, |
30 | IKGLP_SEM = 7, | ||
31 | KFMLP_SEM = 8, | ||
32 | |||
33 | IKGLP_SIMPLE_GPU_AFF_OBS = 9, | ||
34 | IKGLP_GPU_AFF_OBS = 10, | ||
35 | KFMLP_SIMPLE_GPU_AFF_OBS = 11, | ||
36 | KFMLP_GPU_AFF_OBS = 12, | ||
37 | |||
38 | MAX_OBJ_TYPE = 12 | ||
30 | } obj_type_t; | 39 | } obj_type_t; |
31 | 40 | ||
32 | struct inode_obj_id { | 41 | struct inode_obj_id { |
@@ -70,8 +79,11 @@ static inline void* od_lookup(int od, obj_type_t type) | |||
70 | } | 79 | } |
71 | 80 | ||
72 | #define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM)) | 81 | #define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM)) |
82 | #define lookup_kfmlp_sem(od)((struct pi_semaphore*) od_lookup(od, KFMLP_SEM)) | ||
73 | #define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM)) | 83 | #define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM)) |
74 | #define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID)) | 84 | #define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID)) |
75 | 85 | ||
86 | #define lookup_rsm_mutex(od)((struct litmus_lock*) od_lookup(od, FMLP_SEM)) | ||
87 | |||
76 | 88 | ||
77 | #endif | 89 | #endif |
diff --git a/include/litmus/fpmath.h b/include/litmus/fpmath.h index 642de98542c8..3d545fd2f5c5 100644 --- a/include/litmus/fpmath.h +++ b/include/litmus/fpmath.h | |||
@@ -1,11 +1,12 @@ | |||
1 | #ifndef __FP_MATH_H__ | 1 | #ifndef __FP_MATH_H__ |
2 | #define __FP_MATH_H__ | 2 | #define __FP_MATH_H__ |
3 | 3 | ||
4 | #ifdef __KERNEL__ | ||
4 | #include <linux/math64.h> | 5 | #include <linux/math64.h> |
5 | 6 | #else | |
6 | #ifndef __KERNEL__ | ||
7 | #include <stdint.h> | 7 | #include <stdint.h> |
8 | #define abs(x) (((x) < 0) ? -(x) : x) | 8 | #define abs(x) (((x) < 0) ? -(x) : x) |
9 | #define div64_s64(a, b) (a)/(b) | ||
9 | #endif | 10 | #endif |
10 | 11 | ||
11 | // Use 64-bit because we want to track things at the nanosecond scale. | 12 | // Use 64-bit because we want to track things at the nanosecond scale. |
diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h new file mode 100644 index 000000000000..47da725717b0 --- /dev/null +++ b/include/litmus/gpu_affinity.h | |||
@@ -0,0 +1,66 @@ | |||
1 | #ifndef LITMUS_GPU_AFFINITY_H | ||
2 | #define LITMUS_GPU_AFFINITY_H | ||
3 | |||
4 | #include <litmus/rt_param.h> | ||
5 | #include <litmus/sched_plugin.h> | ||
6 | #include <litmus/litmus.h> | ||
7 | |||
8 | void update_gpu_estimate(struct task_struct* t, lt_t observed); | ||
9 | gpu_migration_dist_t gpu_migration_distance(int a, int b); | ||
10 | |||
11 | static inline void reset_gpu_tracker(struct task_struct* t) | ||
12 | { | ||
13 | t->rt_param.accum_gpu_time = 0; | ||
14 | } | ||
15 | |||
16 | static inline void start_gpu_tracker(struct task_struct* t) | ||
17 | { | ||
18 | t->rt_param.gpu_time_stamp = litmus_clock(); | ||
19 | } | ||
20 | |||
21 | static inline void stop_gpu_tracker(struct task_struct* t) | ||
22 | { | ||
23 | lt_t now = litmus_clock(); | ||
24 | t->rt_param.accum_gpu_time += (now - t->rt_param.gpu_time_stamp); | ||
25 | } | ||
26 | |||
27 | static inline lt_t get_gpu_time(struct task_struct* t) | ||
28 | { | ||
29 | return t->rt_param.accum_gpu_time; | ||
30 | } | ||
31 | |||
32 | static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist) | ||
33 | { | ||
34 | int i; | ||
35 | lt_t val; | ||
36 | |||
37 | if(dist == MIG_NONE) { | ||
38 | dist = MIG_LOCAL; | ||
39 | } | ||
40 | |||
41 | val = t->rt_param.gpu_migration_est[dist].avg; | ||
42 | for(i = dist-1; i >= 0; --i) { | ||
43 | if(t->rt_param.gpu_migration_est[i].avg > val) { | ||
44 | val = t->rt_param.gpu_migration_est[i].avg; | ||
45 | } | ||
46 | } | ||
47 | |||
48 | #if 0 | ||
49 | // int i; | ||
50 | // fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est); | ||
51 | // lt_t val = (temp >= 0) ? temp : 0; // never allow negative estimates... | ||
52 | lt_t val = t->rt_param.gpu_migration_est[dist].avg; | ||
53 | |||
54 | // WARN_ON(temp < 0); | ||
55 | |||
56 | // lower-bound a distant migration to be at least equal to the level | ||
57 | // below it. | ||
58 | // for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) { | ||
59 | // val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est); | ||
60 | // } | ||
61 | #endif | ||
62 | |||
63 | return ((val > 0) ? val : dist+1); | ||
64 | } | ||
65 | |||
66 | #endif | ||
diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h new file mode 100644 index 000000000000..af155eadbb35 --- /dev/null +++ b/include/litmus/ikglp_lock.h | |||
@@ -0,0 +1,164 @@ | |||
1 | #ifndef LITMUS_IKGLP_H | ||
2 | #define LITMUS_IKGLP_H | ||
3 | |||
4 | #include <litmus/litmus.h> | ||
5 | #include <litmus/binheap.h> | ||
6 | #include <litmus/locking.h> | ||
7 | |||
8 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
9 | #include <litmus/kexclu_affinity.h> | ||
10 | |||
11 | struct ikglp_affinity; | ||
12 | #endif | ||
13 | |||
14 | typedef struct ikglp_heap_node | ||
15 | { | ||
16 | struct task_struct *task; | ||
17 | struct binheap_node node; | ||
18 | } ikglp_heap_node_t; | ||
19 | |||
20 | struct fifo_queue; | ||
21 | struct ikglp_wait_state; | ||
22 | |||
23 | typedef struct ikglp_donee_heap_node | ||
24 | { | ||
25 | struct task_struct *task; | ||
26 | struct fifo_queue *fq; | ||
27 | struct ikglp_wait_state *donor_info; // cross-linked with ikglp_wait_state_t of donor | ||
28 | |||
29 | struct binheap_node node; | ||
30 | } ikglp_donee_heap_node_t; | ||
31 | |||
32 | // Maintains the state of a request as it goes through the IKGLP | ||
33 | typedef struct ikglp_wait_state { | ||
34 | struct task_struct *task; // pointer back to the requesting task | ||
35 | |||
36 | // Data for while waiting in FIFO Queue | ||
37 | wait_queue_t fq_node; | ||
38 | ikglp_heap_node_t global_heap_node; | ||
39 | ikglp_donee_heap_node_t donee_heap_node; | ||
40 | |||
41 | // Data for while waiting in PQ | ||
42 | ikglp_heap_node_t pq_node; | ||
43 | |||
44 | // Data for while waiting as a donor | ||
45 | ikglp_donee_heap_node_t *donee_info; // cross-linked with donee's ikglp_donee_heap_node_t | ||
46 | struct nested_info prio_donation; | ||
47 | struct binheap_node node; | ||
48 | } ikglp_wait_state_t; | ||
49 | |||
50 | /* struct for semaphore with priority inheritance */ | ||
51 | struct fifo_queue | ||
52 | { | ||
53 | wait_queue_head_t wait; | ||
54 | struct task_struct* owner; | ||
55 | |||
56 | // used for bookkeepping | ||
57 | ikglp_heap_node_t global_heap_node; | ||
58 | ikglp_donee_heap_node_t donee_heap_node; | ||
59 | |||
60 | struct task_struct* hp_waiter; | ||
61 | int count; /* number of waiters + holder */ | ||
62 | |||
63 | struct nested_info nest; | ||
64 | }; | ||
65 | |||
66 | struct ikglp_semaphore | ||
67 | { | ||
68 | struct litmus_lock litmus_lock; | ||
69 | |||
70 | raw_spinlock_t lock; | ||
71 | raw_spinlock_t real_lock; | ||
72 | |||
73 | int nr_replicas; // AKA k | ||
74 | int m; | ||
75 | |||
76 | int max_fifo_len; // max len of a fifo queue | ||
77 | int nr_in_fifos; | ||
78 | |||
79 | struct binheap top_m; // min heap, base prio | ||
80 | int top_m_size; // number of nodes in top_m | ||
81 | |||
82 | struct binheap not_top_m; // max heap, base prio | ||
83 | |||
84 | struct binheap donees; // min-heap, base prio | ||
85 | struct fifo_queue *shortest_fifo_queue; // pointer to shortest fifo queue | ||
86 | |||
87 | /* data structures for holding requests */ | ||
88 | struct fifo_queue *fifo_queues; // array nr_replicas in length | ||
89 | struct binheap priority_queue; // max-heap, base prio | ||
90 | struct binheap donors; // max-heap, base prio | ||
91 | |||
92 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
93 | struct ikglp_affinity *aff_obs; | ||
94 | #endif | ||
95 | }; | ||
96 | |||
97 | static inline struct ikglp_semaphore* ikglp_from_lock(struct litmus_lock* lock) | ||
98 | { | ||
99 | return container_of(lock, struct ikglp_semaphore, litmus_lock); | ||
100 | } | ||
101 | |||
102 | int ikglp_lock(struct litmus_lock* l); | ||
103 | int ikglp_unlock(struct litmus_lock* l); | ||
104 | int ikglp_close(struct litmus_lock* l); | ||
105 | void ikglp_free(struct litmus_lock* l); | ||
106 | struct litmus_lock* ikglp_new(int m, struct litmus_lock_ops*, void* __user arg); | ||
107 | |||
108 | |||
109 | |||
110 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
111 | |||
112 | struct ikglp_queue_info | ||
113 | { | ||
114 | struct fifo_queue* q; | ||
115 | lt_t estimated_len; | ||
116 | int *nr_cur_users; | ||
117 | int64_t *nr_aff_users; | ||
118 | }; | ||
119 | |||
120 | struct ikglp_affinity_ops | ||
121 | { | ||
122 | struct fifo_queue* (*advise_enqueue)(struct ikglp_affinity* aff, struct task_struct* t); // select FIFO | ||
123 | ikglp_wait_state_t* (*advise_steal)(struct ikglp_affinity* aff, struct fifo_queue* dst); // select steal from FIFO | ||
124 | ikglp_donee_heap_node_t* (*advise_donee_selection)(struct ikglp_affinity* aff, struct task_struct* t); // select a donee | ||
125 | ikglp_wait_state_t* (*advise_donor_to_fq)(struct ikglp_affinity* aff, struct fifo_queue* dst); // select a donor to move to PQ | ||
126 | |||
127 | void (*notify_enqueue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // fifo enqueue | ||
128 | void (*notify_dequeue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // fifo dequeue | ||
129 | void (*notify_acquired)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica acquired | ||
130 | void (*notify_freed)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica freed | ||
131 | int (*replica_to_resource)(struct ikglp_affinity* aff, struct fifo_queue* fq); // convert a replica # to a GPU (includes offsets and simult user folding) | ||
132 | |||
133 | int (*notify_exit)(struct ikglp_affinity* aff, struct task_struct* t); | ||
134 | }; | ||
135 | |||
136 | struct ikglp_affinity | ||
137 | { | ||
138 | struct affinity_observer obs; | ||
139 | struct ikglp_affinity_ops *ops; | ||
140 | struct ikglp_queue_info *q_info; | ||
141 | int *nr_cur_users_on_rsrc; | ||
142 | int64_t *nr_aff_on_rsrc; | ||
143 | int offset; | ||
144 | int nr_simult; | ||
145 | int nr_rsrc; | ||
146 | int relax_max_fifo_len; | ||
147 | }; | ||
148 | |||
149 | static inline struct ikglp_affinity* ikglp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs) | ||
150 | { | ||
151 | return container_of(aff_obs, struct ikglp_affinity, obs); | ||
152 | } | ||
153 | |||
154 | int ikglp_aff_obs_close(struct affinity_observer*); | ||
155 | void ikglp_aff_obs_free(struct affinity_observer*); | ||
156 | struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops*, | ||
157 | void* __user arg); | ||
158 | struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops*, | ||
159 | void* __user arg); | ||
160 | #endif | ||
161 | |||
162 | |||
163 | |||
164 | #endif | ||
diff --git a/include/litmus/kexclu_affinity.h b/include/litmus/kexclu_affinity.h new file mode 100644 index 000000000000..f6355de49074 --- /dev/null +++ b/include/litmus/kexclu_affinity.h | |||
@@ -0,0 +1,35 @@ | |||
1 | #ifndef LITMUS_AFF_OBS_H | ||
2 | #define LITMUS_AFF_OBS_H | ||
3 | |||
4 | #include <litmus/locking.h> | ||
5 | |||
6 | struct affinity_observer_ops; | ||
7 | |||
8 | struct affinity_observer | ||
9 | { | ||
10 | struct affinity_observer_ops* ops; | ||
11 | int type; | ||
12 | int ident; | ||
13 | |||
14 | struct litmus_lock* lock; // the lock under observation | ||
15 | }; | ||
16 | |||
17 | typedef int (*aff_obs_open_t)(struct affinity_observer* aff_obs, | ||
18 | void* __user arg); | ||
19 | typedef int (*aff_obs_close_t)(struct affinity_observer* aff_obs); | ||
20 | typedef void (*aff_obs_free_t)(struct affinity_observer* aff_obs); | ||
21 | |||
22 | struct affinity_observer_ops | ||
23 | { | ||
24 | aff_obs_open_t open; | ||
25 | aff_obs_close_t close; | ||
26 | aff_obs_free_t deallocate; | ||
27 | }; | ||
28 | |||
29 | struct litmus_lock* get_lock_from_od(int od); | ||
30 | |||
31 | void affinity_observer_new(struct affinity_observer* aff, | ||
32 | struct affinity_observer_ops* ops, | ||
33 | struct affinity_observer_args* args); | ||
34 | |||
35 | #endif | ||
diff --git a/include/litmus/kfmlp_lock.h b/include/litmus/kfmlp_lock.h new file mode 100644 index 000000000000..5f0aae6e6f42 --- /dev/null +++ b/include/litmus/kfmlp_lock.h | |||
@@ -0,0 +1,97 @@ | |||
1 | #ifndef LITMUS_KFMLP_H | ||
2 | #define LITMUS_KFMLP_H | ||
3 | |||
4 | #include <litmus/litmus.h> | ||
5 | #include <litmus/locking.h> | ||
6 | |||
7 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
8 | #include <litmus/kexclu_affinity.h> | ||
9 | |||
10 | struct kfmlp_affinity; | ||
11 | #endif | ||
12 | |||
13 | /* struct for semaphore with priority inheritance */ | ||
14 | struct kfmlp_queue | ||
15 | { | ||
16 | wait_queue_head_t wait; | ||
17 | struct task_struct* owner; | ||
18 | struct task_struct* hp_waiter; | ||
19 | int count; /* number of waiters + holder */ | ||
20 | }; | ||
21 | |||
22 | struct kfmlp_semaphore | ||
23 | { | ||
24 | struct litmus_lock litmus_lock; | ||
25 | |||
26 | spinlock_t lock; | ||
27 | |||
28 | int num_resources; /* aka k */ | ||
29 | |||
30 | struct kfmlp_queue *queues; /* array */ | ||
31 | struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */ | ||
32 | |||
33 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
34 | struct kfmlp_affinity *aff_obs; | ||
35 | #endif | ||
36 | }; | ||
37 | |||
38 | static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock) | ||
39 | { | ||
40 | return container_of(lock, struct kfmlp_semaphore, litmus_lock); | ||
41 | } | ||
42 | |||
43 | int kfmlp_lock(struct litmus_lock* l); | ||
44 | int kfmlp_unlock(struct litmus_lock* l); | ||
45 | int kfmlp_close(struct litmus_lock* l); | ||
46 | void kfmlp_free(struct litmus_lock* l); | ||
47 | struct litmus_lock* kfmlp_new(struct litmus_lock_ops*, void* __user arg); | ||
48 | |||
49 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
50 | |||
51 | struct kfmlp_queue_info | ||
52 | { | ||
53 | struct kfmlp_queue* q; | ||
54 | lt_t estimated_len; | ||
55 | int *nr_cur_users; | ||
56 | }; | ||
57 | |||
58 | struct kfmlp_affinity_ops | ||
59 | { | ||
60 | struct kfmlp_queue* (*advise_enqueue)(struct kfmlp_affinity* aff, struct task_struct* t); | ||
61 | struct task_struct* (*advise_steal)(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from); | ||
62 | void (*notify_enqueue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t); | ||
63 | void (*notify_dequeue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t); | ||
64 | void (*notify_acquired)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t); | ||
65 | void (*notify_freed)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t); | ||
66 | int (*replica_to_resource)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq); | ||
67 | }; | ||
68 | |||
69 | struct kfmlp_affinity | ||
70 | { | ||
71 | struct affinity_observer obs; | ||
72 | struct kfmlp_affinity_ops *ops; | ||
73 | struct kfmlp_queue_info *q_info; | ||
74 | int *nr_cur_users_on_rsrc; | ||
75 | int offset; | ||
76 | int nr_simult; | ||
77 | int nr_rsrc; | ||
78 | }; | ||
79 | |||
80 | static inline struct kfmlp_affinity* kfmlp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs) | ||
81 | { | ||
82 | return container_of(aff_obs, struct kfmlp_affinity, obs); | ||
83 | } | ||
84 | |||
85 | int kfmlp_aff_obs_close(struct affinity_observer*); | ||
86 | void kfmlp_aff_obs_free(struct affinity_observer*); | ||
87 | struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops*, | ||
88 | void* __user arg); | ||
89 | struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops*, | ||
90 | void* __user arg); | ||
91 | |||
92 | |||
93 | #endif | ||
94 | |||
95 | #endif | ||
96 | |||
97 | |||
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h index 875783e6a67b..2da61fa58bdc 100644 --- a/include/litmus/litmus.h +++ b/include/litmus/litmus.h | |||
@@ -26,16 +26,20 @@ static inline int in_list(struct list_head* list) | |||
26 | ); | 26 | ); |
27 | } | 27 | } |
28 | 28 | ||
29 | |||
29 | struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq); | 30 | struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq); |
30 | 31 | ||
31 | #define NO_CPU 0xffffffff | 32 | #define NO_CPU 0xffffffff |
32 | 33 | ||
33 | void litmus_fork(struct task_struct *tsk); | 34 | void litmus_fork(struct task_struct *tsk); |
35 | void litmus_post_fork_thread(struct task_struct *tsk); | ||
34 | void litmus_exec(void); | 36 | void litmus_exec(void); |
35 | /* clean up real-time state of a task */ | 37 | /* clean up real-time state of a task */ |
36 | void exit_litmus(struct task_struct *dead_tsk); | 38 | void exit_litmus(struct task_struct *dead_tsk); |
37 | 39 | ||
38 | long litmus_admit_task(struct task_struct *tsk); | 40 | long litmus_admit_task(struct task_struct *tsk); |
41 | |||
42 | void litmus_pre_exit_task(struct task_struct *tsk); // called before litmus_exit_task, but without run queue locks held | ||
39 | void litmus_exit_task(struct task_struct *tsk); | 43 | void litmus_exit_task(struct task_struct *tsk); |
40 | 44 | ||
41 | #define is_realtime(t) ((t)->policy == SCHED_LITMUS) | 45 | #define is_realtime(t) ((t)->policy == SCHED_LITMUS) |
@@ -43,6 +47,7 @@ void litmus_exit_task(struct task_struct *tsk); | |||
43 | ((t)->rt_param.transition_pending) | 47 | ((t)->rt_param.transition_pending) |
44 | 48 | ||
45 | #define tsk_rt(t) (&(t)->rt_param) | 49 | #define tsk_rt(t) (&(t)->rt_param) |
50 | #define tsk_aux(t) (&(t)->aux_data) | ||
46 | 51 | ||
47 | /* Realtime utility macros */ | 52 | /* Realtime utility macros */ |
48 | #define is_priority_boosted(t) (tsk_rt(t)->priority_boosted) | 53 | #define is_priority_boosted(t) (tsk_rt(t)->priority_boosted) |
@@ -60,9 +65,13 @@ void litmus_exit_task(struct task_struct *tsk); | |||
60 | /* job_param macros */ | 65 | /* job_param macros */ |
61 | #define get_exec_time(t) (tsk_rt(t)->job_params.exec_time) | 66 | #define get_exec_time(t) (tsk_rt(t)->job_params.exec_time) |
62 | #define get_deadline(t) (tsk_rt(t)->job_params.deadline) | 67 | #define get_deadline(t) (tsk_rt(t)->job_params.deadline) |
68 | #define get_period(t) (tsk_rt(t)->task_params.period) | ||
63 | #define get_release(t) (tsk_rt(t)->job_params.release) | 69 | #define get_release(t) (tsk_rt(t)->job_params.release) |
64 | #define get_lateness(t) (tsk_rt(t)->job_params.lateness) | 70 | #define get_lateness(t) (tsk_rt(t)->job_params.lateness) |
65 | 71 | ||
72 | #define effective_priority(t) ((!(tsk_rt(t)->inh_task)) ? t : tsk_rt(t)->inh_task) | ||
73 | #define base_priority(t) (t) | ||
74 | |||
66 | #define is_hrt(t) \ | 75 | #define is_hrt(t) \ |
67 | (tsk_rt(t)->task_params.cls == RT_CLASS_HARD) | 76 | (tsk_rt(t)->task_params.cls == RT_CLASS_HARD) |
68 | #define is_srt(t) \ | 77 | #define is_srt(t) \ |
@@ -99,10 +108,12 @@ static inline lt_t litmus_clock(void) | |||
99 | #define earlier_deadline(a, b) (lt_before(\ | 108 | #define earlier_deadline(a, b) (lt_before(\ |
100 | (a)->rt_param.job_params.deadline,\ | 109 | (a)->rt_param.job_params.deadline,\ |
101 | (b)->rt_param.job_params.deadline)) | 110 | (b)->rt_param.job_params.deadline)) |
111 | #define shorter_period(a, b) (lt_before(\ | ||
112 | (a)->rt_param.task_params.period,\ | ||
113 | (b)->rt_param.task_params.period)) | ||
102 | #define earlier_release(a, b) (lt_before(\ | 114 | #define earlier_release(a, b) (lt_before(\ |
103 | (a)->rt_param.job_params.release,\ | 115 | (a)->rt_param.job_params.release,\ |
104 | (b)->rt_param.job_params.release)) | 116 | (b)->rt_param.job_params.release)) |
105 | |||
106 | void preempt_if_preemptable(struct task_struct* t, int on_cpu); | 117 | void preempt_if_preemptable(struct task_struct* t, int on_cpu); |
107 | 118 | ||
108 | #ifdef CONFIG_LITMUS_LOCKING | 119 | #ifdef CONFIG_LITMUS_LOCKING |
@@ -174,8 +185,10 @@ static inline int request_exit_np_atomic(struct task_struct *t) | |||
174 | * retry loop here since tasks might exploit that to | 185 | * retry loop here since tasks might exploit that to |
175 | * keep the kernel busy indefinitely. */ | 186 | * keep the kernel busy indefinitely. */ |
176 | } | 187 | } |
177 | } else | 188 | } |
189 | else { | ||
178 | return 0; | 190 | return 0; |
191 | } | ||
179 | } | 192 | } |
180 | 193 | ||
181 | #else | 194 | #else |
diff --git a/include/litmus/litmus_softirq.h b/include/litmus/litmus_softirq.h new file mode 100644 index 000000000000..cfef08187464 --- /dev/null +++ b/include/litmus/litmus_softirq.h | |||
@@ -0,0 +1,166 @@ | |||
1 | #ifndef __LITMUS_SOFTIRQ_H | ||
2 | #define __LITMUS_SOFTIRQ_H | ||
3 | |||
4 | #include <linux/interrupt.h> | ||
5 | #include <linux/workqueue.h> | ||
6 | |||
7 | /* | ||
8 | Threaded tasklet/workqueue handling for Litmus. | ||
9 | Items are scheduled in the following order: hi-tasklet, | ||
10 | lo-tasklet, workqueue. Items are scheduled in FIFO order | ||
11 | within each of these classes. | ||
12 | |||
13 | klmirqd assumes the priority of the owner of the | ||
14 | tasklet when the tasklet is next to execute. | ||
15 | |||
16 | The base-priority of a klimirqd thread is below all regular | ||
17 | real-time tasks, but above all other Linux scheduling | ||
18 | classes (klmirqd threads are within the SHCED_LITMUS class). | ||
19 | Regular real-time tasks may increase the priority of | ||
20 | a klmirqd thread, but klmirqd is unaware of this | ||
21 | (this was not the case in prior incarnations of klmirqd). | ||
22 | */ | ||
23 | |||
24 | |||
25 | /* Initialize klmirqd */ | ||
26 | void init_klmirqd(void); | ||
27 | |||
28 | /* Raises a flag to tell klmirqds to terminate. | ||
29 | Termination is async, so some threads may be running | ||
30 | after function return. */ | ||
31 | void kill_klmirqd(void); | ||
32 | |||
33 | void kill_klmirqd_thread(struct task_struct* klmirqd_thread); | ||
34 | |||
35 | /* Returns 1 if all NR_LITMUS_SOFTIRQD klitirqs are ready | ||
36 | to handle tasklets. 0, otherwise.*/ | ||
37 | int klmirqd_is_ready(void); | ||
38 | |||
39 | /* Returns 1 if no NR_LITMUS_SOFTIRQD klitirqs are ready | ||
40 | to handle tasklets. 0, otherwise.*/ | ||
41 | int klmirqd_is_dead(void); | ||
42 | |||
43 | |||
44 | typedef int (*klmirqd_cb_t) (void *arg); | ||
45 | |||
46 | typedef struct | ||
47 | { | ||
48 | klmirqd_cb_t func; | ||
49 | void* arg; | ||
50 | } klmirqd_callback_t; | ||
51 | |||
52 | /* Launches a klmirqd thread with the provided affinity. | ||
53 | |||
54 | Actual launch of threads is deffered to kworker's | ||
55 | workqueue, so daemons will likely not be immediately | ||
56 | running when this function returns, though the required | ||
57 | data will be initialized. | ||
58 | |||
59 | cpu == -1 for no affinity | ||
60 | |||
61 | provide a name at most 31 (32, + null terminator) characters long. | ||
62 | name == NULL for a default name. (all names are appended with | ||
63 | base-CPU affinity) | ||
64 | */ | ||
65 | #define MAX_KLMIRQD_NAME_LEN 31 | ||
66 | int launch_klmirqd_thread(char* name, int cpu, klmirqd_callback_t* cb); | ||
67 | |||
68 | |||
69 | /* Flushes all pending work out to the OS for regular | ||
70 | * tasklet/work processing. | ||
71 | */ | ||
72 | void flush_pending(struct task_struct* klmirqd_thread); | ||
73 | |||
74 | extern int __litmus_tasklet_schedule( | ||
75 | struct tasklet_struct *t, | ||
76 | struct task_struct *klmirqd_thread); | ||
77 | |||
78 | /* schedule a tasklet on klmirqd #k_id */ | ||
79 | static inline int litmus_tasklet_schedule( | ||
80 | struct tasklet_struct *t, | ||
81 | struct task_struct *klmirqd_thread) | ||
82 | { | ||
83 | int ret = 0; | ||
84 | if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { | ||
85 | ret = __litmus_tasklet_schedule(t, klmirqd_thread); | ||
86 | } | ||
87 | return(ret); | ||
88 | } | ||
89 | |||
90 | /* for use by __tasklet_schedule() */ | ||
91 | static inline int _litmus_tasklet_schedule( | ||
92 | struct tasklet_struct *t, | ||
93 | struct task_struct *klmirqd_thread) | ||
94 | { | ||
95 | return(__litmus_tasklet_schedule(t, klmirqd_thread)); | ||
96 | } | ||
97 | |||
98 | |||
99 | |||
100 | |||
101 | extern int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, | ||
102 | struct task_struct *klmirqd_thread); | ||
103 | |||
104 | /* schedule a hi tasklet on klmirqd #k_id */ | ||
105 | static inline int litmus_tasklet_hi_schedule(struct tasklet_struct *t, | ||
106 | struct task_struct *klmirqd_thread) | ||
107 | { | ||
108 | int ret = 0; | ||
109 | if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { | ||
110 | ret = __litmus_tasklet_hi_schedule(t, klmirqd_thread); | ||
111 | } | ||
112 | return(ret); | ||
113 | } | ||
114 | |||
115 | /* for use by __tasklet_hi_schedule() */ | ||
116 | static inline int _litmus_tasklet_hi_schedule(struct tasklet_struct *t, | ||
117 | struct task_struct *klmirqd_thread) | ||
118 | { | ||
119 | return(__litmus_tasklet_hi_schedule(t, klmirqd_thread)); | ||
120 | } | ||
121 | |||
122 | |||
123 | |||
124 | |||
125 | |||
126 | extern int __litmus_tasklet_hi_schedule_first( | ||
127 | struct tasklet_struct *t, | ||
128 | struct task_struct *klmirqd_thread); | ||
129 | |||
130 | /* schedule a hi tasklet on klmirqd #k_id on next go-around */ | ||
131 | /* PRECONDITION: Interrupts must be disabled. */ | ||
132 | static inline int litmus_tasklet_hi_schedule_first( | ||
133 | struct tasklet_struct *t, | ||
134 | struct task_struct *klmirqd_thread) | ||
135 | { | ||
136 | int ret = 0; | ||
137 | if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { | ||
138 | ret = __litmus_tasklet_hi_schedule_first(t, klmirqd_thread); | ||
139 | } | ||
140 | return(ret); | ||
141 | } | ||
142 | |||
143 | /* for use by __tasklet_hi_schedule_first() */ | ||
144 | static inline int _litmus_tasklet_hi_schedule_first( | ||
145 | struct tasklet_struct *t, | ||
146 | struct task_struct *klmirqd_thread) | ||
147 | { | ||
148 | return(__litmus_tasklet_hi_schedule_first(t, klmirqd_thread)); | ||
149 | } | ||
150 | |||
151 | |||
152 | |||
153 | ////////////// | ||
154 | |||
155 | extern int __litmus_schedule_work( | ||
156 | struct work_struct* w, | ||
157 | struct task_struct *klmirqd_thread); | ||
158 | |||
159 | static inline int litmus_schedule_work( | ||
160 | struct work_struct* w, | ||
161 | struct task_struct *klmirqd_thread) | ||
162 | { | ||
163 | return(__litmus_schedule_work(w, klmirqd_thread)); | ||
164 | } | ||
165 | |||
166 | #endif | ||
diff --git a/include/litmus/locking.h b/include/litmus/locking.h index 4d7b870cb443..4a5f198a0407 100644 --- a/include/litmus/locking.h +++ b/include/litmus/locking.h | |||
@@ -1,28 +1,163 @@ | |||
1 | #ifndef LITMUS_LOCKING_H | 1 | #ifndef LITMUS_LOCKING_H |
2 | #define LITMUS_LOCKING_H | 2 | #define LITMUS_LOCKING_H |
3 | 3 | ||
4 | #include <linux/list.h> | ||
5 | |||
4 | struct litmus_lock_ops; | 6 | struct litmus_lock_ops; |
5 | 7 | ||
8 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
9 | struct nested_info | ||
10 | { | ||
11 | struct litmus_lock *lock; | ||
12 | struct task_struct *hp_waiter_eff_prio; | ||
13 | struct task_struct **hp_waiter_ptr; | ||
14 | struct binheap_node hp_binheap_node; | ||
15 | }; | ||
16 | |||
17 | static inline struct task_struct* top_priority(struct binheap* handle) { | ||
18 | if(!binheap_empty(handle)) { | ||
19 | return (struct task_struct*)(binheap_top_entry(handle, struct nested_info, hp_binheap_node)->hp_waiter_eff_prio); | ||
20 | } | ||
21 | return NULL; | ||
22 | } | ||
23 | |||
24 | void print_hp_waiters(struct binheap_node* n, int depth); | ||
25 | #endif | ||
26 | |||
27 | |||
6 | /* Generic base struct for LITMUS^RT userspace semaphores. | 28 | /* Generic base struct for LITMUS^RT userspace semaphores. |
7 | * This structure should be embedded in protocol-specific semaphores. | 29 | * This structure should be embedded in protocol-specific semaphores. |
8 | */ | 30 | */ |
9 | struct litmus_lock { | 31 | struct litmus_lock { |
10 | struct litmus_lock_ops *ops; | 32 | struct litmus_lock_ops *ops; |
11 | int type; | 33 | int type; |
34 | |||
35 | int ident; | ||
36 | |||
37 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
38 | struct nested_info nest; | ||
39 | //#ifdef CONFIG_DEBUG_SPINLOCK | ||
40 | char cheat_lockdep[2]; | ||
41 | struct lock_class_key key; | ||
42 | //#endif | ||
43 | #endif | ||
12 | }; | 44 | }; |
13 | 45 | ||
46 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
47 | |||
48 | #define MAX_DGL_SIZE CONFIG_LITMUS_MAX_DGL_SIZE | ||
49 | |||
50 | typedef struct dgl_wait_state { | ||
51 | struct task_struct *task; /* task waiting on DGL */ | ||
52 | struct litmus_lock *locks[MAX_DGL_SIZE]; /* requested locks in DGL */ | ||
53 | int size; /* size of the DGL */ | ||
54 | int nr_remaining; /* nr locks remainging before DGL is complete */ | ||
55 | int last_primary; /* index lock in locks[] that has active priority */ | ||
56 | wait_queue_t wq_nodes[MAX_DGL_SIZE]; | ||
57 | } dgl_wait_state_t; | ||
58 | |||
59 | void wake_or_wait_on_next_lock(dgl_wait_state_t *dgl_wait); | ||
60 | void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/); | ||
61 | |||
62 | void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait); | ||
63 | int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key); | ||
64 | void __waitqueue_dgl_remove_first(wait_queue_head_t *wq, dgl_wait_state_t** dgl_wait, struct task_struct **task); | ||
65 | #endif | ||
66 | |||
67 | typedef int (*lock_op_t)(struct litmus_lock *l); | ||
68 | typedef lock_op_t lock_close_t; | ||
69 | typedef lock_op_t lock_lock_t; | ||
70 | typedef lock_op_t lock_unlock_t; | ||
71 | |||
72 | typedef int (*lock_open_t)(struct litmus_lock *l, void* __user arg); | ||
73 | typedef void (*lock_free_t)(struct litmus_lock *l); | ||
74 | |||
14 | struct litmus_lock_ops { | 75 | struct litmus_lock_ops { |
15 | /* Current task tries to obtain / drop a reference to a lock. | 76 | /* Current task tries to obtain / drop a reference to a lock. |
16 | * Optional methods, allowed by default. */ | 77 | * Optional methods, allowed by default. */ |
17 | int (*open)(struct litmus_lock*, void* __user); | 78 | lock_open_t open; |
18 | int (*close)(struct litmus_lock*); | 79 | lock_close_t close; |
19 | 80 | ||
20 | /* Current tries to lock/unlock this lock (mandatory methods). */ | 81 | /* Current tries to lock/unlock this lock (mandatory methods). */ |
21 | int (*lock)(struct litmus_lock*); | 82 | lock_lock_t lock; |
22 | int (*unlock)(struct litmus_lock*); | 83 | lock_unlock_t unlock; |
23 | 84 | ||
24 | /* The lock is no longer being referenced (mandatory method). */ | 85 | /* The lock is no longer being referenced (mandatory method). */ |
25 | void (*deallocate)(struct litmus_lock*); | 86 | lock_free_t deallocate; |
87 | |||
88 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
89 | void (*propagate_increase_inheritance)(struct litmus_lock* l, struct task_struct* t, raw_spinlock_t* to_unlock, unsigned long irqflags); | ||
90 | void (*propagate_decrease_inheritance)(struct litmus_lock* l, struct task_struct* t, raw_spinlock_t* to_unlock, unsigned long irqflags); | ||
91 | #endif | ||
92 | |||
93 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
94 | raw_spinlock_t* (*get_dgl_spin_lock)(struct litmus_lock *l); | ||
95 | int (*dgl_lock)(struct litmus_lock *l, dgl_wait_state_t* dgl_wait, wait_queue_t* wq_node); | ||
96 | int (*is_owner)(struct litmus_lock *l, struct task_struct *t); | ||
97 | void (*enable_priority)(struct litmus_lock *l, dgl_wait_state_t* dgl_wait); | ||
98 | #endif | ||
26 | }; | 99 | }; |
27 | 100 | ||
101 | |||
102 | /* | ||
103 | Nested inheritance can be achieved with fine-grain locking when there is | ||
104 | no need for DGL support, presuming locks are acquired in a partial order | ||
105 | (no cycles!). However, DGLs allow locks to be acquired in any order. This | ||
106 | makes nested inheritance very difficult (we don't yet know a solution) to | ||
107 | realize with fine-grain locks, so we use a big lock instead. | ||
108 | |||
109 | Code contains both fine-grain and coarse-grain methods together, side-by-side. | ||
110 | Each lock operation *IS NOT* surrounded by ifdef/endif to help make code more | ||
111 | readable. However, this leads to the odd situation where both code paths | ||
112 | appear together in code as if they were both active together. | ||
113 | |||
114 | THIS IS NOT REALLY THE CASE! ONLY ONE CODE PATH IS ACTUALLY ACTIVE! | ||
115 | |||
116 | Example: | ||
117 | lock_global_irqsave(coarseLock, flags); | ||
118 | lock_fine_irqsave(fineLock, flags); | ||
119 | |||
120 | Reality (coarse): | ||
121 | lock_global_irqsave(coarseLock, flags); | ||
122 | //lock_fine_irqsave(fineLock, flags); | ||
123 | |||
124 | Reality (fine): | ||
125 | //lock_global_irqsave(coarseLock, flags); | ||
126 | lock_fine_irqsave(fineLock, flags); | ||
127 | |||
128 | Be careful when you read code involving nested inheritance. | ||
129 | */ | ||
130 | #if defined(CONFIG_LITMUS_DGL_SUPPORT) | ||
131 | /* DGL requires a big lock to implement nested inheritance */ | ||
132 | #define lock_global_irqsave(lock, flags) raw_spin_lock_irqsave((lock), (flags)) | ||
133 | #define lock_global(lock) raw_spin_lock((lock)) | ||
134 | #define unlock_global_irqrestore(lock, flags) raw_spin_unlock_irqrestore((lock), (flags)) | ||
135 | #define unlock_global(lock) raw_spin_unlock((lock)) | ||
136 | |||
137 | /* fine-grain locking are no-ops with DGL support */ | ||
138 | #define lock_fine_irqsave(lock, flags) | ||
139 | #define lock_fine(lock) | ||
140 | #define unlock_fine_irqrestore(lock, flags) | ||
141 | #define unlock_fine(lock) | ||
142 | |||
143 | #elif defined(CONFIG_LITMUS_NESTED_LOCKING) | ||
144 | /* Use fine-grain locking when DGLs are disabled. */ | ||
145 | /* global locking are no-ops without DGL support */ | ||
146 | #define lock_global_irqsave(lock, flags) | ||
147 | #define lock_global(lock) | ||
148 | #define unlock_global_irqrestore(lock, flags) | ||
149 | #define unlock_global(lock) | ||
150 | |||
151 | #define lock_fine_irqsave(lock, flags) raw_spin_lock_irqsave((lock), (flags)) | ||
152 | #define lock_fine(lock) raw_spin_lock((lock)) | ||
153 | #define unlock_fine_irqrestore(lock, flags) raw_spin_unlock_irqrestore((lock), (flags)) | ||
154 | #define unlock_fine(lock) raw_spin_unlock((lock)) | ||
155 | |||
156 | #endif | ||
157 | |||
158 | |||
159 | void suspend_for_lock(void); | ||
160 | |||
161 | |||
28 | #endif | 162 | #endif |
163 | |||
diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h new file mode 100644 index 000000000000..8c2a5524512e --- /dev/null +++ b/include/litmus/nvidia_info.h | |||
@@ -0,0 +1,51 @@ | |||
1 | #ifndef __LITMUS_NVIDIA_H | ||
2 | #define __LITMUS_NVIDIA_H | ||
3 | |||
4 | #include <linux/interrupt.h> | ||
5 | |||
6 | |||
7 | #include <litmus/litmus_softirq.h> | ||
8 | |||
9 | #define NV_DEVICE_NUM CONFIG_NV_DEVICE_NUM | ||
10 | |||
11 | /* TODO: Make this a function that checks the PCIe bus or maybe proc settings */ | ||
12 | #define num_online_gpus() (NV_DEVICE_NUM) | ||
13 | |||
14 | |||
15 | /* Functions used for decoding NVIDIA blobs. */ | ||
16 | |||
17 | int init_nvidia_info(void); | ||
18 | void shutdown_nvidia_info(void); | ||
19 | |||
20 | int is_nvidia_func(void* func_addr); | ||
21 | |||
22 | void dump_nvidia_info(const struct tasklet_struct *t); | ||
23 | |||
24 | // Returns the Nvidia device # associated with provided tasklet and work_struct. | ||
25 | u32 get_tasklet_nv_device_num(const struct tasklet_struct *t); | ||
26 | u32 get_work_nv_device_num(const struct work_struct *t); | ||
27 | |||
28 | /* Functions for figuring out the priority of GPU-using tasks */ | ||
29 | |||
30 | struct task_struct* get_nv_max_device_owner(u32 target_device_id); | ||
31 | |||
32 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
33 | struct task_struct* get_nv_klmirqd_thread(u32 target_device_id); | ||
34 | #endif | ||
35 | |||
36 | /* call when the GPU-holding task, t, blocks */ | ||
37 | long enable_gpu_owner(struct task_struct *t); | ||
38 | |||
39 | /* call when the GPU-holding task, t, resumes */ | ||
40 | long disable_gpu_owner(struct task_struct *t); | ||
41 | |||
42 | /* call when the GPU-holding task, t, increases its priority */ | ||
43 | int gpu_owner_increase_priority(struct task_struct *t); | ||
44 | |||
45 | /* call when the GPU-holding task, t, decreases its priority */ | ||
46 | int gpu_owner_decrease_priority(struct task_struct *t); | ||
47 | |||
48 | |||
49 | int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t); | ||
50 | |||
51 | #endif | ||
diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h index 380b886d78ff..8f3a9ca2d4e3 100644 --- a/include/litmus/preempt.h +++ b/include/litmus/preempt.h | |||
@@ -26,12 +26,12 @@ const char* sched_state_name(int s); | |||
26 | (x), #x, __FUNCTION__); \ | 26 | (x), #x, __FUNCTION__); \ |
27 | } while (0); | 27 | } while (0); |
28 | 28 | ||
29 | //#define TRACE_SCHED_STATE_CHANGE(x, y, cpu) /* ignore */ | ||
29 | #define TRACE_SCHED_STATE_CHANGE(x, y, cpu) \ | 30 | #define TRACE_SCHED_STATE_CHANGE(x, y, cpu) \ |
30 | TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n", \ | 31 | TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n", \ |
31 | cpu, (x), sched_state_name(x), \ | 32 | cpu, (x), sched_state_name(x), \ |
32 | (y), sched_state_name(y)) | 33 | (y), sched_state_name(y)) |
33 | 34 | ||
34 | |||
35 | typedef enum scheduling_state { | 35 | typedef enum scheduling_state { |
36 | TASK_SCHEDULED = (1 << 0), /* The currently scheduled task is the one that | 36 | TASK_SCHEDULED = (1 << 0), /* The currently scheduled task is the one that |
37 | * should be scheduled, and the processor does not | 37 | * should be scheduled, and the processor does not |
diff --git a/include/litmus/rsm_lock.h b/include/litmus/rsm_lock.h new file mode 100644 index 000000000000..a15189683de4 --- /dev/null +++ b/include/litmus/rsm_lock.h | |||
@@ -0,0 +1,54 @@ | |||
1 | #ifndef LITMUS_RSM_H | ||
2 | #define LITMUS_RSM_H | ||
3 | |||
4 | #include <litmus/litmus.h> | ||
5 | #include <litmus/binheap.h> | ||
6 | #include <litmus/locking.h> | ||
7 | |||
8 | /* struct for semaphore with priority inheritance */ | ||
9 | struct rsm_mutex { | ||
10 | struct litmus_lock litmus_lock; | ||
11 | |||
12 | /* current resource holder */ | ||
13 | struct task_struct *owner; | ||
14 | |||
15 | /* highest-priority waiter */ | ||
16 | struct task_struct *hp_waiter; | ||
17 | |||
18 | /* FIFO queue of waiting tasks -- for now. time stamp in the future. */ | ||
19 | wait_queue_head_t wait; | ||
20 | |||
21 | /* we do some nesting within spinlocks, so we can't use the normal | ||
22 | sleeplocks found in wait_queue_head_t. */ | ||
23 | raw_spinlock_t lock; | ||
24 | }; | ||
25 | |||
26 | static inline struct rsm_mutex* rsm_mutex_from_lock(struct litmus_lock* lock) | ||
27 | { | ||
28 | return container_of(lock, struct rsm_mutex, litmus_lock); | ||
29 | } | ||
30 | |||
31 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
32 | int rsm_mutex_is_owner(struct litmus_lock *l, struct task_struct *t); | ||
33 | int rsm_mutex_dgl_lock(struct litmus_lock *l, dgl_wait_state_t* dgl_wait, wait_queue_t* wq_node); | ||
34 | void rsm_mutex_enable_priority(struct litmus_lock *l, dgl_wait_state_t* dgl_wait); | ||
35 | #endif | ||
36 | |||
37 | void rsm_mutex_propagate_increase_inheritance(struct litmus_lock* l, | ||
38 | struct task_struct* t, | ||
39 | raw_spinlock_t* to_unlock, | ||
40 | unsigned long irqflags); | ||
41 | |||
42 | void rsm_mutex_propagate_decrease_inheritance(struct litmus_lock* l, | ||
43 | struct task_struct* t, | ||
44 | raw_spinlock_t* to_unlock, | ||
45 | unsigned long irqflags); | ||
46 | |||
47 | int rsm_mutex_lock(struct litmus_lock* l); | ||
48 | int rsm_mutex_unlock(struct litmus_lock* l); | ||
49 | int rsm_mutex_close(struct litmus_lock* l); | ||
50 | void rsm_mutex_free(struct litmus_lock* l); | ||
51 | struct litmus_lock* rsm_mutex_new(struct litmus_lock_ops*); | ||
52 | |||
53 | |||
54 | #endif \ No newline at end of file | ||
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index 4cd06dd32906..39685a351cb1 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h | |||
@@ -1,9 +1,11 @@ | |||
1 | #ifndef _LINUX_RT_PARAM_H_ | ||
2 | #define _LINUX_RT_PARAM_H_ | ||
1 | /* | 3 | /* |
2 | * Definition of the scheduler plugin interface. | 4 | * Definition of the scheduler plugin interface. |
3 | * | 5 | * |
4 | */ | 6 | */ |
5 | #ifndef _LINUX_RT_PARAM_H_ | 7 | |
6 | #define _LINUX_RT_PARAM_H_ | 8 | #include <litmus/fpmath.h> |
7 | 9 | ||
8 | /* Litmus time type. */ | 10 | /* Litmus time type. */ |
9 | typedef unsigned long long lt_t; | 11 | typedef unsigned long long lt_t; |
@@ -30,9 +32,43 @@ typedef enum { | |||
30 | typedef enum { | 32 | typedef enum { |
31 | NO_ENFORCEMENT, /* job may overrun unhindered */ | 33 | NO_ENFORCEMENT, /* job may overrun unhindered */ |
32 | QUANTUM_ENFORCEMENT, /* budgets are only checked on quantum boundaries */ | 34 | QUANTUM_ENFORCEMENT, /* budgets are only checked on quantum boundaries */ |
33 | PRECISE_ENFORCEMENT /* budgets are enforced with hrtimers */ | 35 | PRECISE_ENFORCEMENT, /* budgets are enforced with hrtimers */ |
34 | } budget_policy_t; | 36 | } budget_policy_t; |
35 | 37 | ||
38 | typedef enum { | ||
39 | NO_SIGNALS, /* job receives no signals when it exhausts its budget */ | ||
40 | QUANTUM_SIGNALS, /* budget signals are only sent on quantum boundaries */ | ||
41 | PRECISE_SIGNALS, /* budget signals are triggered with hrtimers */ | ||
42 | } budget_signal_policy_t; | ||
43 | |||
44 | typedef enum { | ||
45 | AUX_ENABLE = 0x1, | ||
46 | AUX_CURRENT = (AUX_ENABLE<<1), | ||
47 | AUX_FUTURE = (AUX_CURRENT<<2) | ||
48 | } aux_flags_t; | ||
49 | |||
50 | /* mirror of st_event_record_type_t | ||
51 | * Assume all are UNsupported, unless otherwise stated. */ | ||
52 | typedef enum { | ||
53 | ST_INJECT_NAME = 1, /* supported */ | ||
54 | ST_INJECT_PARAM, /* supported */ | ||
55 | ST_INJECT_RELEASE, /* supported */ | ||
56 | ST_INJECT_ASSIGNED, | ||
57 | ST_INJECT_SWITCH_TO, | ||
58 | ST_INJECT_SWITCH_AWAY, | ||
59 | ST_INJECT_COMPLETION, /* supported */ | ||
60 | ST_INJECT_BLOCK, | ||
61 | ST_INJECT_RESUME, | ||
62 | ST_INJECT_ACTION, | ||
63 | ST_INJECT_SYS_RELEASE, /* supported */ | ||
64 | } sched_trace_injection_events_t; | ||
65 | |||
66 | struct st_inject_args { | ||
67 | lt_t release; | ||
68 | lt_t deadline; | ||
69 | unsigned int job_no; | ||
70 | }; | ||
71 | |||
36 | /* We use the common priority interpretation "lower index == higher priority", | 72 | /* We use the common priority interpretation "lower index == higher priority", |
37 | * which is commonly used in fixed-priority schedulability analysis papers. | 73 | * which is commonly used in fixed-priority schedulability analysis papers. |
38 | * So, a numerically lower priority value implies higher scheduling priority, | 74 | * So, a numerically lower priority value implies higher scheduling priority, |
@@ -62,6 +98,7 @@ struct rt_task { | |||
62 | unsigned int priority; | 98 | unsigned int priority; |
63 | task_class_t cls; | 99 | task_class_t cls; |
64 | budget_policy_t budget_policy; /* ignored by pfair */ | 100 | budget_policy_t budget_policy; /* ignored by pfair */ |
101 | budget_signal_policy_t budget_signal_policy; /* currently ignored by pfair */ | ||
65 | }; | 102 | }; |
66 | 103 | ||
67 | union np_flag { | 104 | union np_flag { |
@@ -74,6 +111,19 @@ union np_flag { | |||
74 | } np; | 111 | } np; |
75 | }; | 112 | }; |
76 | 113 | ||
114 | struct affinity_observer_args | ||
115 | { | ||
116 | int lock_od; | ||
117 | }; | ||
118 | |||
119 | struct gpu_affinity_observer_args | ||
120 | { | ||
121 | struct affinity_observer_args obs; | ||
122 | int replica_to_gpu_offset; | ||
123 | int nr_simult_users; | ||
124 | int relaxed_rules; | ||
125 | }; | ||
126 | |||
77 | /* The definition of the data that is shared between the kernel and real-time | 127 | /* The definition of the data that is shared between the kernel and real-time |
78 | * tasks via a shared page (see litmus/ctrldev.c). | 128 | * tasks via a shared page (see litmus/ctrldev.c). |
79 | * | 129 | * |
@@ -115,6 +165,13 @@ struct control_page { | |||
115 | /* don't export internal data structures to user space (liblitmus) */ | 165 | /* don't export internal data structures to user space (liblitmus) */ |
116 | #ifdef __KERNEL__ | 166 | #ifdef __KERNEL__ |
117 | 167 | ||
168 | #include <litmus/binheap.h> | ||
169 | #include <linux/semaphore.h> | ||
170 | |||
171 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
172 | #include <linux/interrupt.h> | ||
173 | #endif | ||
174 | |||
118 | struct _rt_domain; | 175 | struct _rt_domain; |
119 | struct bheap_node; | 176 | struct bheap_node; |
120 | struct release_heap; | 177 | struct release_heap; |
@@ -142,10 +199,82 @@ struct rt_job { | |||
142 | * Increase this sequence number when a job is released. | 199 | * Increase this sequence number when a job is released. |
143 | */ | 200 | */ |
144 | unsigned int job_no; | 201 | unsigned int job_no; |
202 | |||
203 | /* bits: | ||
204 | * 0th: Set if a budget exhaustion signal has already been sent for | ||
205 | * the current job. */ | ||
206 | unsigned long flags; | ||
145 | }; | 207 | }; |
146 | 208 | ||
209 | #define RT_JOB_SIG_BUDGET_SENT 0 | ||
210 | |||
147 | struct pfair_param; | 211 | struct pfair_param; |
148 | 212 | ||
213 | enum klmirqd_sem_status | ||
214 | { | ||
215 | NEED_TO_REACQUIRE, | ||
216 | REACQUIRING, | ||
217 | NOT_HELD, | ||
218 | HELD | ||
219 | }; | ||
220 | |||
221 | typedef enum gpu_migration_dist | ||
222 | { | ||
223 | // TODO: Make this variable against NR_NVIDIA_GPUS | ||
224 | MIG_LOCAL = 0, | ||
225 | MIG_NEAR = 1, | ||
226 | MIG_MED = 2, | ||
227 | MIG_FAR = 3, // 8 GPUs in a binary tree hierarchy | ||
228 | MIG_NONE = 4, | ||
229 | |||
230 | MIG_LAST = MIG_NONE | ||
231 | } gpu_migration_dist_t; | ||
232 | |||
233 | typedef struct feedback_est{ | ||
234 | fp_t est; | ||
235 | fp_t accum_err; | ||
236 | } feedback_est_t; | ||
237 | |||
238 | |||
239 | #define AVG_EST_WINDOW_SIZE 20 | ||
240 | |||
241 | typedef int (*notify_rsrc_exit_t)(struct task_struct* tsk); | ||
242 | |||
243 | typedef struct avg_est{ | ||
244 | lt_t history[AVG_EST_WINDOW_SIZE]; | ||
245 | uint16_t count; | ||
246 | uint16_t idx; | ||
247 | lt_t sum; | ||
248 | lt_t std; | ||
249 | lt_t avg; | ||
250 | } avg_est_t; | ||
251 | |||
252 | |||
253 | |||
254 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
255 | struct klmirqd_info | ||
256 | { | ||
257 | struct task_struct* klmirqd; | ||
258 | struct task_struct* current_owner; | ||
259 | unsigned int terminating:1; | ||
260 | |||
261 | raw_spinlock_t lock; | ||
262 | |||
263 | u32 pending; | ||
264 | atomic_t num_hi_pending; | ||
265 | atomic_t num_low_pending; | ||
266 | atomic_t num_work_pending; | ||
267 | |||
268 | /* in order of priority */ | ||
269 | struct tasklet_head pending_tasklets_hi; | ||
270 | struct tasklet_head pending_tasklets; | ||
271 | struct list_head worklist; | ||
272 | |||
273 | struct list_head klmirqd_reg; | ||
274 | }; | ||
275 | #endif | ||
276 | |||
277 | |||
149 | /* RT task parameters for scheduling extensions | 278 | /* RT task parameters for scheduling extensions |
150 | * These parameters are inherited during clone and therefore must | 279 | * These parameters are inherited during clone and therefore must |
151 | * be explicitly set up before the task set is launched. | 280 | * be explicitly set up before the task set is launched. |
@@ -163,6 +292,40 @@ struct rt_param { | |||
163 | /* has the task completed? */ | 292 | /* has the task completed? */ |
164 | unsigned int completed:1; | 293 | unsigned int completed:1; |
165 | 294 | ||
295 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
296 | /* proxy threads have minimum priority by default */ | ||
297 | unsigned int is_interrupt_thread:1; | ||
298 | |||
299 | /* pointer to data used by klmirqd thread. | ||
300 | * | ||
301 | * ptr only valid if is_interrupt_thread == 1 | ||
302 | */ | ||
303 | struct klmirqd_info* klmirqd_info; | ||
304 | #endif | ||
305 | |||
306 | #ifdef CONFIG_LITMUS_NVIDIA | ||
307 | /* number of top-half interrupts handled on behalf of current job */ | ||
308 | atomic_t nv_int_count; | ||
309 | long unsigned int held_gpus; // bitmap of held GPUs. | ||
310 | struct binheap_node gpu_owner_node; // just one GPU for now... | ||
311 | unsigned int hide_from_gpu:1; | ||
312 | |||
313 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
314 | avg_est_t gpu_migration_est[MIG_LAST+1]; | ||
315 | |||
316 | gpu_migration_dist_t gpu_migration; | ||
317 | int last_gpu; | ||
318 | |||
319 | notify_rsrc_exit_t rsrc_exit_cb; | ||
320 | void* rsrc_exit_cb_args; | ||
321 | |||
322 | lt_t accum_gpu_time; | ||
323 | lt_t gpu_time_stamp; | ||
324 | |||
325 | unsigned int suspend_gpu_tracker_on_block:1; | ||
326 | #endif | ||
327 | #endif | ||
328 | |||
166 | #ifdef CONFIG_LITMUS_LOCKING | 329 | #ifdef CONFIG_LITMUS_LOCKING |
167 | /* Is the task being priority-boosted by a locking protocol? */ | 330 | /* Is the task being priority-boosted by a locking protocol? */ |
168 | unsigned int priority_boosted:1; | 331 | unsigned int priority_boosted:1; |
@@ -182,7 +345,26 @@ struct rt_param { | |||
182 | * could point to self if PI does not result in | 345 | * could point to self if PI does not result in |
183 | * an increased task priority. | 346 | * an increased task priority. |
184 | */ | 347 | */ |
185 | struct task_struct* inh_task; | 348 | struct task_struct* inh_task; |
349 | |||
350 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
351 | raw_spinlock_t hp_blocked_tasks_lock; | ||
352 | struct binheap hp_blocked_tasks; | ||
353 | |||
354 | /* pointer to lock upon which is currently blocked */ | ||
355 | struct litmus_lock* blocked_lock; | ||
356 | #endif | ||
357 | |||
358 | |||
359 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
360 | unsigned int is_aux_task:1; | ||
361 | unsigned int has_aux_tasks:1; | ||
362 | unsigned int hide_from_aux_tasks:1; | ||
363 | |||
364 | struct list_head aux_task_node; | ||
365 | struct binheap_node aux_task_owner_node; | ||
366 | #endif | ||
367 | |||
186 | 368 | ||
187 | #ifdef CONFIG_NP_SECTION | 369 | #ifdef CONFIG_NP_SECTION |
188 | /* For the FMLP under PSN-EDF, it is required to make the task | 370 | /* For the FMLP under PSN-EDF, it is required to make the task |
@@ -248,6 +430,16 @@ struct rt_param { | |||
248 | struct control_page * ctrl_page; | 430 | struct control_page * ctrl_page; |
249 | }; | 431 | }; |
250 | 432 | ||
433 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
434 | struct aux_data | ||
435 | { | ||
436 | struct list_head aux_tasks; | ||
437 | struct binheap aux_task_owners; | ||
438 | unsigned int initialized:1; | ||
439 | unsigned int aux_future:1; | ||
440 | }; | ||
251 | #endif | 441 | #endif |
252 | 442 | ||
443 | #endif /* __KERNEL */ | ||
444 | |||
253 | #endif | 445 | #endif |
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h index 1546ab7f1d66..d0e7d74bb45e 100644 --- a/include/litmus/sched_plugin.h +++ b/include/litmus/sched_plugin.h | |||
@@ -11,6 +11,12 @@ | |||
11 | #include <litmus/locking.h> | 11 | #include <litmus/locking.h> |
12 | #endif | 12 | #endif |
13 | 13 | ||
14 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
15 | #include <litmus/kexclu_affinity.h> | ||
16 | #endif | ||
17 | |||
18 | #include <linux/interrupt.h> | ||
19 | |||
14 | /************************ setup/tear down ********************/ | 20 | /************************ setup/tear down ********************/ |
15 | 21 | ||
16 | typedef long (*activate_plugin_t) (void); | 22 | typedef long (*activate_plugin_t) (void); |
@@ -29,7 +35,6 @@ typedef struct task_struct* (*schedule_t)(struct task_struct * prev); | |||
29 | */ | 35 | */ |
30 | typedef void (*finish_switch_t)(struct task_struct *prev); | 36 | typedef void (*finish_switch_t)(struct task_struct *prev); |
31 | 37 | ||
32 | |||
33 | /********************* task state changes ********************/ | 38 | /********************* task state changes ********************/ |
34 | 39 | ||
35 | /* Called to setup a new real-time task. | 40 | /* Called to setup a new real-time task. |
@@ -60,6 +65,49 @@ typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type, | |||
60 | void* __user config); | 65 | void* __user config); |
61 | #endif | 66 | #endif |
62 | 67 | ||
68 | struct affinity_observer; | ||
69 | typedef long (*allocate_affinity_observer_t) ( | ||
70 | struct affinity_observer **aff_obs, int type, | ||
71 | void* __user config); | ||
72 | |||
73 | typedef void (*increase_prio_t)(struct task_struct* t, struct task_struct* prio_inh); | ||
74 | typedef void (*decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh); | ||
75 | |||
76 | typedef int (*__increase_prio_t)(struct task_struct* t, struct task_struct* prio_inh); | ||
77 | typedef int (*__decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh); | ||
78 | |||
79 | typedef void (*nested_increase_prio_t)(struct task_struct* t, struct task_struct* prio_inh, | ||
80 | raw_spinlock_t *to_unlock, unsigned long irqflags); | ||
81 | typedef void (*nested_decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh, | ||
82 | raw_spinlock_t *to_unlock, unsigned long irqflags); | ||
83 | |||
84 | |||
85 | typedef int (*enqueue_pai_tasklet_t)(struct tasklet_struct* tasklet); | ||
86 | typedef void (*change_prio_pai_tasklet_t)(struct task_struct *old_prio, | ||
87 | struct task_struct *new_prio); | ||
88 | typedef void (*run_tasklets_t)(struct task_struct* next); | ||
89 | |||
90 | typedef raw_spinlock_t* (*get_dgl_spinlock_t) (struct task_struct *t); | ||
91 | |||
92 | |||
93 | typedef int (*higher_prio_t)(struct task_struct* a, struct task_struct* b); | ||
94 | |||
95 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
96 | |||
97 | typedef enum | ||
98 | { | ||
99 | BASE, | ||
100 | EFFECTIVE | ||
101 | } comparison_mode_t; | ||
102 | |||
103 | typedef int (*__higher_prio_t)(struct task_struct* a, comparison_mode_t a_mod, | ||
104 | struct task_struct* b, comparison_mode_t b_mod); | ||
105 | #endif | ||
106 | |||
107 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) | ||
108 | typedef int (*default_cpu_for_gpu_t)(int gpu); | ||
109 | #endif | ||
110 | |||
63 | 111 | ||
64 | /********************* sys call backends ********************/ | 112 | /********************* sys call backends ********************/ |
65 | /* This function causes the caller to sleep until the next release */ | 113 | /* This function causes the caller to sleep until the next release */ |
@@ -90,14 +138,42 @@ struct sched_plugin { | |||
90 | /* task state changes */ | 138 | /* task state changes */ |
91 | admit_task_t admit_task; | 139 | admit_task_t admit_task; |
92 | 140 | ||
93 | task_new_t task_new; | 141 | task_new_t task_new; |
94 | task_wake_up_t task_wake_up; | 142 | task_wake_up_t task_wake_up; |
95 | task_block_t task_block; | 143 | task_block_t task_block; |
96 | task_exit_t task_exit; | 144 | task_exit_t task_exit; |
97 | 145 | ||
146 | higher_prio_t compare; | ||
147 | |||
98 | #ifdef CONFIG_LITMUS_LOCKING | 148 | #ifdef CONFIG_LITMUS_LOCKING |
99 | /* locking protocols */ | 149 | /* locking protocols */ |
100 | allocate_lock_t allocate_lock; | 150 | allocate_lock_t allocate_lock; |
151 | increase_prio_t increase_prio; | ||
152 | decrease_prio_t decrease_prio; | ||
153 | |||
154 | __increase_prio_t __increase_prio; | ||
155 | __decrease_prio_t __decrease_prio; | ||
156 | #endif | ||
157 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
158 | nested_increase_prio_t nested_increase_prio; | ||
159 | nested_decrease_prio_t nested_decrease_prio; | ||
160 | __higher_prio_t __compare; | ||
161 | #endif | ||
162 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
163 | get_dgl_spinlock_t get_dgl_spinlock; | ||
164 | #endif | ||
165 | |||
166 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
167 | allocate_affinity_observer_t allocate_aff_obs; | ||
168 | #endif | ||
169 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
170 | enqueue_pai_tasklet_t enqueue_pai_tasklet; | ||
171 | change_prio_pai_tasklet_t change_prio_pai_tasklet; | ||
172 | run_tasklets_t run_tasklets; | ||
173 | #endif | ||
174 | |||
175 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) | ||
176 | default_cpu_for_gpu_t map_gpu_to_cpu; | ||
101 | #endif | 177 | #endif |
102 | } __attribute__ ((__aligned__(SMP_CACHE_BYTES))); | 178 | } __attribute__ ((__aligned__(SMP_CACHE_BYTES))); |
103 | 179 | ||
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h index 82bde8241298..7af12f49c600 100644 --- a/include/litmus/sched_trace.h +++ b/include/litmus/sched_trace.h | |||
@@ -10,13 +10,14 @@ struct st_trace_header { | |||
10 | u8 type; /* Of what type is this record? */ | 10 | u8 type; /* Of what type is this record? */ |
11 | u8 cpu; /* On which CPU was it recorded? */ | 11 | u8 cpu; /* On which CPU was it recorded? */ |
12 | u16 pid; /* PID of the task. */ | 12 | u16 pid; /* PID of the task. */ |
13 | u32 job; /* The job sequence number. */ | 13 | u32 job:24; /* The job sequence number. */ |
14 | }; | 14 | u8 extra; |
15 | } __attribute__((packed)); | ||
15 | 16 | ||
16 | #define ST_NAME_LEN 16 | 17 | #define ST_NAME_LEN 16 |
17 | struct st_name_data { | 18 | struct st_name_data { |
18 | char cmd[ST_NAME_LEN];/* The name of the executable of this process. */ | 19 | char cmd[ST_NAME_LEN];/* The name of the executable of this process. */ |
19 | }; | 20 | } __attribute__((packed)); |
20 | 21 | ||
21 | struct st_param_data { /* regular params */ | 22 | struct st_param_data { /* regular params */ |
22 | u32 wcet; | 23 | u32 wcet; |
@@ -25,30 +26,29 @@ struct st_param_data { /* regular params */ | |||
25 | u8 partition; | 26 | u8 partition; |
26 | u8 class; | 27 | u8 class; |
27 | u8 __unused[2]; | 28 | u8 __unused[2]; |
28 | }; | 29 | } __attribute__((packed)); |
29 | 30 | ||
30 | struct st_release_data { /* A job is was/is going to be released. */ | 31 | struct st_release_data { /* A job is was/is going to be released. */ |
31 | u64 release; /* What's the release time? */ | 32 | u64 release; /* What's the release time? */ |
32 | u64 deadline; /* By when must it finish? */ | 33 | u64 deadline; /* By when must it finish? */ |
33 | }; | 34 | } __attribute__((packed)); |
34 | 35 | ||
35 | struct st_assigned_data { /* A job was asigned to a CPU. */ | 36 | struct st_assigned_data { /* A job was asigned to a CPU. */ |
36 | u64 when; | 37 | u64 when; |
37 | u8 target; /* Where should it execute? */ | 38 | u8 target; /* Where should it execute? */ |
38 | u8 __unused[7]; | 39 | u8 __unused[7]; |
39 | }; | 40 | } __attribute__((packed)); |
40 | 41 | ||
41 | struct st_switch_to_data { /* A process was switched to on a given CPU. */ | 42 | struct st_switch_to_data { /* A process was switched to on a given CPU. */ |
42 | u64 when; /* When did this occur? */ | 43 | u64 when; /* When did this occur? */ |
43 | u32 exec_time; /* Time the current job has executed. */ | 44 | u32 exec_time; /* Time the current job has executed. */ |
44 | u8 __unused[4]; | 45 | u8 __unused[4]; |
45 | 46 | } __attribute__((packed)); | |
46 | }; | ||
47 | 47 | ||
48 | struct st_switch_away_data { /* A process was switched away from on a given CPU. */ | 48 | struct st_switch_away_data { /* A process was switched away from on a given CPU. */ |
49 | u64 when; | 49 | u64 when; |
50 | u64 exec_time; | 50 | u64 exec_time; |
51 | }; | 51 | } __attribute__((packed)); |
52 | 52 | ||
53 | struct st_completion_data { /* A job completed. */ | 53 | struct st_completion_data { /* A job completed. */ |
54 | u64 when; | 54 | u64 when; |
@@ -56,35 +56,108 @@ struct st_completion_data { /* A job completed. */ | |||
56 | * next task automatically; set to 0 otherwise. | 56 | * next task automatically; set to 0 otherwise. |
57 | */ | 57 | */ |
58 | u8 __uflags:7; | 58 | u8 __uflags:7; |
59 | u8 __unused[7]; | 59 | u16 nv_int_count; |
60 | }; | 60 | u8 __unused[5]; |
61 | } __attribute__((packed)); | ||
61 | 62 | ||
62 | struct st_block_data { /* A task blocks. */ | 63 | struct st_block_data { /* A task blocks. */ |
63 | u64 when; | 64 | u64 when; |
64 | u64 __unused; | 65 | u64 __unused; |
65 | }; | 66 | } __attribute__((packed)); |
66 | 67 | ||
67 | struct st_resume_data { /* A task resumes. */ | 68 | struct st_resume_data { /* A task resumes. */ |
68 | u64 when; | 69 | u64 when; |
69 | u64 __unused; | 70 | u64 __unused; |
70 | }; | 71 | } __attribute__((packed)); |
71 | 72 | ||
72 | struct st_action_data { | 73 | struct st_action_data { |
73 | u64 when; | 74 | u64 when; |
74 | u8 action; | 75 | u8 action; |
75 | u8 __unused[7]; | 76 | u8 __unused[7]; |
76 | }; | 77 | } __attribute__((packed)); |
77 | 78 | ||
78 | struct st_sys_release_data { | 79 | struct st_sys_release_data { |
79 | u64 when; | 80 | u64 when; |
80 | u64 release; | 81 | u64 release; |
81 | }; | 82 | } __attribute__((packed)); |
83 | |||
84 | |||
85 | struct st_tasklet_release_data { | ||
86 | u64 when; | ||
87 | u64 __unused; | ||
88 | } __attribute__((packed)); | ||
89 | |||
90 | struct st_tasklet_begin_data { | ||
91 | u64 when; | ||
92 | u16 exe_pid; | ||
93 | u8 __unused[6]; | ||
94 | } __attribute__((packed)); | ||
95 | |||
96 | struct st_tasklet_end_data { | ||
97 | u64 when; | ||
98 | u16 exe_pid; | ||
99 | u8 flushed; | ||
100 | u8 __unused[5]; | ||
101 | } __attribute__((packed)); | ||
102 | |||
103 | |||
104 | struct st_work_release_data { | ||
105 | u64 when; | ||
106 | u64 __unused; | ||
107 | } __attribute__((packed)); | ||
108 | |||
109 | struct st_work_begin_data { | ||
110 | u64 when; | ||
111 | u16 exe_pid; | ||
112 | u8 __unused[6]; | ||
113 | } __attribute__((packed)); | ||
114 | |||
115 | struct st_work_end_data { | ||
116 | u64 when; | ||
117 | u16 exe_pid; | ||
118 | u8 flushed; | ||
119 | u8 __unused[5]; | ||
120 | } __attribute__((packed)); | ||
121 | |||
122 | struct st_effective_priority_change_data { | ||
123 | u64 when; | ||
124 | u16 inh_pid; | ||
125 | u8 __unused[6]; | ||
126 | } __attribute__((packed)); | ||
127 | |||
128 | struct st_nv_interrupt_begin_data { | ||
129 | u64 when; | ||
130 | u32 device; | ||
131 | u32 serialNumber; | ||
132 | } __attribute__((packed)); | ||
133 | |||
134 | struct st_nv_interrupt_end_data { | ||
135 | u64 when; | ||
136 | u32 device; | ||
137 | u32 serialNumber; | ||
138 | } __attribute__((packed)); | ||
139 | |||
140 | struct st_prediction_err_data { | ||
141 | u64 distance; | ||
142 | u64 rel_err; | ||
143 | } __attribute__((packed)); | ||
144 | |||
145 | struct st_migration_data { | ||
146 | u64 observed; | ||
147 | u64 estimated; | ||
148 | } __attribute__((packed)); | ||
149 | |||
150 | struct migration_info { | ||
151 | u64 observed; | ||
152 | u64 estimated; | ||
153 | u8 distance; | ||
154 | } __attribute__((packed)); | ||
82 | 155 | ||
83 | #define DATA(x) struct st_ ## x ## _data x; | 156 | #define DATA(x) struct st_ ## x ## _data x; |
84 | 157 | ||
85 | typedef enum { | 158 | typedef enum { |
86 | ST_NAME = 1, /* Start at one, so that we can spot | 159 | ST_NAME = 1, /* Start at one, so that we can spot |
87 | * uninitialized records. */ | 160 | * uninitialized records. */ |
88 | ST_PARAM, | 161 | ST_PARAM, |
89 | ST_RELEASE, | 162 | ST_RELEASE, |
90 | ST_ASSIGNED, | 163 | ST_ASSIGNED, |
@@ -94,7 +167,19 @@ typedef enum { | |||
94 | ST_BLOCK, | 167 | ST_BLOCK, |
95 | ST_RESUME, | 168 | ST_RESUME, |
96 | ST_ACTION, | 169 | ST_ACTION, |
97 | ST_SYS_RELEASE | 170 | ST_SYS_RELEASE, |
171 | ST_TASKLET_RELEASE, | ||
172 | ST_TASKLET_BEGIN, | ||
173 | ST_TASKLET_END, | ||
174 | ST_WORK_RELEASE, | ||
175 | ST_WORK_BEGIN, | ||
176 | ST_WORK_END, | ||
177 | ST_EFF_PRIO_CHANGE, | ||
178 | ST_NV_INTERRUPT_BEGIN, | ||
179 | ST_NV_INTERRUPT_END, | ||
180 | |||
181 | ST_PREDICTION_ERR, | ||
182 | ST_MIGRATION, | ||
98 | } st_event_record_type_t; | 183 | } st_event_record_type_t; |
99 | 184 | ||
100 | struct st_event_record { | 185 | struct st_event_record { |
@@ -113,8 +198,20 @@ struct st_event_record { | |||
113 | DATA(resume); | 198 | DATA(resume); |
114 | DATA(action); | 199 | DATA(action); |
115 | DATA(sys_release); | 200 | DATA(sys_release); |
201 | DATA(tasklet_release); | ||
202 | DATA(tasklet_begin); | ||
203 | DATA(tasklet_end); | ||
204 | DATA(work_release); | ||
205 | DATA(work_begin); | ||
206 | DATA(work_end); | ||
207 | DATA(effective_priority_change); | ||
208 | DATA(nv_interrupt_begin); | ||
209 | DATA(nv_interrupt_end); | ||
210 | |||
211 | DATA(prediction_err); | ||
212 | DATA(migration); | ||
116 | } data; | 213 | } data; |
117 | }; | 214 | } __attribute__((packed)); |
118 | 215 | ||
119 | #undef DATA | 216 | #undef DATA |
120 | 217 | ||
@@ -129,6 +226,8 @@ struct st_event_record { | |||
129 | ft_event1(id, callback, task) | 226 | ft_event1(id, callback, task) |
130 | #define SCHED_TRACE2(id, callback, task, xtra) \ | 227 | #define SCHED_TRACE2(id, callback, task, xtra) \ |
131 | ft_event2(id, callback, task, xtra) | 228 | ft_event2(id, callback, task, xtra) |
229 | #define SCHED_TRACE3(id, callback, task, xtra1, xtra2) \ | ||
230 | ft_event3(id, callback, task, xtra1, xtra2) | ||
132 | 231 | ||
133 | /* provide prototypes; needed on sparc64 */ | 232 | /* provide prototypes; needed on sparc64 */ |
134 | #ifndef NO_TASK_TRACE_DECLS | 233 | #ifndef NO_TASK_TRACE_DECLS |
@@ -155,12 +254,58 @@ feather_callback void do_sched_trace_action(unsigned long id, | |||
155 | feather_callback void do_sched_trace_sys_release(unsigned long id, | 254 | feather_callback void do_sched_trace_sys_release(unsigned long id, |
156 | lt_t* start); | 255 | lt_t* start); |
157 | 256 | ||
257 | |||
258 | feather_callback void do_sched_trace_tasklet_release(unsigned long id, | ||
259 | struct task_struct* owner); | ||
260 | feather_callback void do_sched_trace_tasklet_begin(unsigned long id, | ||
261 | struct task_struct* owner); | ||
262 | feather_callback void do_sched_trace_tasklet_end(unsigned long id, | ||
263 | struct task_struct* owner, | ||
264 | unsigned long flushed); | ||
265 | |||
266 | feather_callback void do_sched_trace_work_release(unsigned long id, | ||
267 | struct task_struct* owner); | ||
268 | feather_callback void do_sched_trace_work_begin(unsigned long id, | ||
269 | struct task_struct* owner, | ||
270 | struct task_struct* exe); | ||
271 | feather_callback void do_sched_trace_work_end(unsigned long id, | ||
272 | struct task_struct* owner, | ||
273 | struct task_struct* exe, | ||
274 | unsigned long flushed); | ||
275 | |||
276 | feather_callback void do_sched_trace_eff_prio_change(unsigned long id, | ||
277 | struct task_struct* task, | ||
278 | struct task_struct* inh); | ||
279 | |||
280 | feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id, | ||
281 | u32 device); | ||
282 | feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, | ||
283 | unsigned long unused); | ||
284 | |||
285 | feather_callback void do_sched_trace_prediction_err(unsigned long id, | ||
286 | struct task_struct* task, | ||
287 | gpu_migration_dist_t* distance, | ||
288 | fp_t* rel_err); | ||
289 | |||
290 | |||
291 | |||
292 | |||
293 | |||
294 | feather_callback void do_sched_trace_migration(unsigned long id, | ||
295 | struct task_struct* task, | ||
296 | struct migration_info* mig_info); | ||
297 | |||
298 | |||
299 | /* returns true if we're tracing an interrupt on current CPU */ | ||
300 | /* int is_interrupt_tracing_active(void); */ | ||
301 | |||
158 | #endif | 302 | #endif |
159 | 303 | ||
160 | #else | 304 | #else |
161 | 305 | ||
162 | #define SCHED_TRACE(id, callback, task) /* no tracing */ | 306 | #define SCHED_TRACE(id, callback, task) /* no tracing */ |
163 | #define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */ | 307 | #define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */ |
308 | #define SCHED_TRACE3(id, callback, task, xtra1, xtra2) | ||
164 | 309 | ||
165 | #endif | 310 | #endif |
166 | 311 | ||
@@ -252,6 +397,41 @@ feather_callback void do_sched_trace_sys_release(unsigned long id, | |||
252 | trace_litmus_sys_release(when); \ | 397 | trace_litmus_sys_release(when); \ |
253 | } while (0) | 398 | } while (0) |
254 | 399 | ||
400 | #define sched_trace_tasklet_release(t) \ | ||
401 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 11, do_sched_trace_tasklet_release, t) | ||
402 | |||
403 | #define sched_trace_tasklet_begin(t) \ | ||
404 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 12, do_sched_trace_tasklet_begin, t) | ||
405 | |||
406 | #define sched_trace_tasklet_end(t, flushed) \ | ||
407 | SCHED_TRACE2(SCHED_TRACE_BASE_ID + 13, do_sched_trace_tasklet_end, t, flushed) | ||
408 | |||
409 | |||
410 | #define sched_trace_work_release(t) \ | ||
411 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 14, do_sched_trace_work_release, t) | ||
412 | |||
413 | #define sched_trace_work_begin(t, e) \ | ||
414 | SCHED_TRACE2(SCHED_TRACE_BASE_ID + 15, do_sched_trace_work_begin, t, e) | ||
415 | |||
416 | #define sched_trace_work_end(t, e, flushed) \ | ||
417 | SCHED_TRACE3(SCHED_TRACE_BASE_ID + 16, do_sched_trace_work_end, t, e, flushed) | ||
418 | |||
419 | |||
420 | #define sched_trace_eff_prio_change(t, inh) \ | ||
421 | SCHED_TRACE2(SCHED_TRACE_BASE_ID + 17, do_sched_trace_eff_prio_change, t, inh) | ||
422 | |||
423 | |||
424 | #define sched_trace_nv_interrupt_begin(d) \ | ||
425 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 18, do_sched_trace_nv_interrupt_begin, d) | ||
426 | #define sched_trace_nv_interrupt_end(d) \ | ||
427 | SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, d) | ||
428 | |||
429 | #define sched_trace_prediction_err(t, dist, rel_err) \ | ||
430 | SCHED_TRACE3(SCHED_TRACE_BASE_ID + 20, do_sched_trace_prediction_err, t, dist, rel_err) | ||
431 | |||
432 | #define sched_trace_migration(t, mig_info) \ | ||
433 | SCHED_TRACE2(SCHED_TRACE_BASE_ID + 21, do_sched_trace_migration, t, mig_info) | ||
434 | |||
255 | #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */ | 435 | #define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */ |
256 | 436 | ||
257 | #endif /* __KERNEL__ */ | 437 | #endif /* __KERNEL__ */ |
diff --git a/include/litmus/sched_trace_external.h b/include/litmus/sched_trace_external.h new file mode 100644 index 000000000000..e70e45e4cf51 --- /dev/null +++ b/include/litmus/sched_trace_external.h | |||
@@ -0,0 +1,78 @@ | |||
1 | /* | ||
2 | * sched_trace.h -- record scheduler events to a byte stream for offline analysis. | ||
3 | */ | ||
4 | #ifndef _LINUX_SCHED_TRACE_EXTERNAL_H_ | ||
5 | #define _LINUX_SCHED_TRACE_EXTERNAL_H_ | ||
6 | |||
7 | |||
8 | #ifdef CONFIG_SCHED_TASK_TRACE | ||
9 | extern void __sched_trace_tasklet_begin_external(struct task_struct* t); | ||
10 | static inline void sched_trace_tasklet_begin_external(struct task_struct* t) | ||
11 | { | ||
12 | __sched_trace_tasklet_begin_external(t); | ||
13 | } | ||
14 | |||
15 | extern void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed); | ||
16 | static inline void sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed) | ||
17 | { | ||
18 | __sched_trace_tasklet_end_external(t, flushed); | ||
19 | } | ||
20 | |||
21 | extern void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e); | ||
22 | static inline void sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e) | ||
23 | { | ||
24 | __sched_trace_work_begin_external(t, e); | ||
25 | } | ||
26 | |||
27 | extern void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f); | ||
28 | static inline void sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f) | ||
29 | { | ||
30 | __sched_trace_work_end_external(t, e, f); | ||
31 | } | ||
32 | |||
33 | #ifdef CONFIG_LITMUS_NVIDIA | ||
34 | extern void __sched_trace_nv_interrupt_begin_external(u32 device); | ||
35 | static inline void sched_trace_nv_interrupt_begin_external(u32 device) | ||
36 | { | ||
37 | __sched_trace_nv_interrupt_begin_external(device); | ||
38 | } | ||
39 | |||
40 | extern void __sched_trace_nv_interrupt_end_external(u32 device); | ||
41 | static inline void sched_trace_nv_interrupt_end_external(u32 device) | ||
42 | { | ||
43 | __sched_trace_nv_interrupt_end_external(device); | ||
44 | } | ||
45 | #endif | ||
46 | |||
47 | #else | ||
48 | |||
49 | // no tracing. | ||
50 | static inline void sched_trace_tasklet_begin_external(struct task_struct* t){} | ||
51 | static inline void sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed){} | ||
52 | static inline void sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e){} | ||
53 | static inline void sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f){} | ||
54 | |||
55 | #ifdef CONFIG_LITMUS_NVIDIA | ||
56 | static inline void sched_trace_nv_interrupt_begin_external(u32 device){} | ||
57 | static inline void sched_trace_nv_interrupt_end_external(u32 device){} | ||
58 | #endif | ||
59 | |||
60 | #endif | ||
61 | |||
62 | |||
63 | #ifdef CONFIG_LITMUS_NVIDIA | ||
64 | |||
65 | #define EX_TS(evt) \ | ||
66 | extern void __##evt(void); \ | ||
67 | static inline void EX_##evt(void) { __##evt(); } | ||
68 | |||
69 | EX_TS(TS_NV_TOPISR_START) | ||
70 | EX_TS(TS_NV_TOPISR_END) | ||
71 | EX_TS(TS_NV_BOTISR_START) | ||
72 | EX_TS(TS_NV_BOTISR_END) | ||
73 | EX_TS(TS_NV_RELEASE_BOTISR_START) | ||
74 | EX_TS(TS_NV_RELEASE_BOTISR_END) | ||
75 | |||
76 | #endif | ||
77 | |||
78 | #endif | ||
diff --git a/include/litmus/signal.h b/include/litmus/signal.h new file mode 100644 index 000000000000..38c3207951e0 --- /dev/null +++ b/include/litmus/signal.h | |||
@@ -0,0 +1,47 @@ | |||
1 | #ifndef LITMUS_SIGNAL_H | ||
2 | #define LITMUS_SIGNAL_H | ||
3 | |||
4 | #ifdef __KERNEL__ | ||
5 | #include <linux/signal.h> | ||
6 | #else | ||
7 | #include <signal.h> | ||
8 | #endif | ||
9 | |||
10 | /* Signals used by Litmus to asynchronously communicate events | ||
11 | * to real-time tasks. | ||
12 | * | ||
13 | * Signal values overlap with [SIGRTMIN, SIGRTMAX], so beware of | ||
14 | * application-level conflicts when dealing with COTS user-level | ||
15 | * code. | ||
16 | */ | ||
17 | |||
18 | /* Sent to a Litmus task when all of the following conditions are true: | ||
19 | * (1) The task has exhausted its budget. | ||
20 | * (2) budget_signal_policy is QUANTUM_SIGNALS or PRECISE_SIGNALS. | ||
21 | * | ||
22 | * Note: If a task does not have a registered handler for SIG_BUDGET, | ||
23 | * the signal will cause the task to terminate (default action). | ||
24 | */ | ||
25 | |||
26 | /* Assigned values start at SIGRTMAX and decrease, hopefully reducing | ||
27 | * likelihood of user-level conflicts. | ||
28 | */ | ||
29 | #define SIG_BUDGET (SIGRTMAX - 0) | ||
30 | |||
31 | /* | ||
32 | Future signals could include: | ||
33 | |||
34 | #define SIG_DEADLINE_MISS (SIGRTMAX - 1) | ||
35 | #define SIG_CRIT_LEVEL_CHANGE (SIGRTMAX - 2) | ||
36 | */ | ||
37 | |||
38 | #define SIGLITMUSMIN SIG_BUDGET | ||
39 | |||
40 | #ifdef __KERNEL__ | ||
41 | #if (SIGLITMUSMIN < SIGRTMIN) | ||
42 | /* no compile-time check in user-space since SIGRTMIN may be a variable. */ | ||
43 | #error "Too many LITMUS^RT signals!" | ||
44 | #endif | ||
45 | #endif | ||
46 | |||
47 | #endif | ||
diff --git a/include/litmus/trace.h b/include/litmus/trace.h index 8ad4966c602e..15bd645d2466 100644 --- a/include/litmus/trace.h +++ b/include/litmus/trace.h | |||
@@ -137,9 +137,34 @@ feather_callback void save_timestamp_hide_irq(unsigned long event); | |||
137 | #define TS_EXIT_NP_START TIMESTAMP(150) | 137 | #define TS_EXIT_NP_START TIMESTAMP(150) |
138 | #define TS_EXIT_NP_END TIMESTAMP(151) | 138 | #define TS_EXIT_NP_END TIMESTAMP(151) |
139 | 139 | ||
140 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
141 | #define TS_DGL_LOCK_START TIMESTAMP(175) | ||
142 | #define TS_DGL_LOCK_SUSPEND TIMESTAMP(176) | ||
143 | #define TS_DGL_LOCK_RESUME TIMESTAMP(177) | ||
144 | #define TS_DGL_LOCK_END TIMESTAMP(178) | ||
145 | #define TS_DGL_UNLOCK_START TIMESTAMP(185) | ||
146 | #define TS_DGL_UNLOCK_END TIMESTAMP(186) | ||
147 | #endif | ||
148 | |||
140 | #define TS_SEND_RESCHED_START(c) CTIMESTAMP(190, c) | 149 | #define TS_SEND_RESCHED_START(c) CTIMESTAMP(190, c) |
141 | #define TS_SEND_RESCHED_END TIMESTAMP_IN_IRQ(191) | 150 | #define TS_SEND_RESCHED_END TIMESTAMP_IN_IRQ(191) |
142 | 151 | ||
152 | #ifdef CONFIG_LITMUS_NVIDIA | ||
153 | #define TS_NV_TOPISR_START TIMESTAMP(200) | ||
154 | #define TS_NV_TOPISR_END TIMESTAMP(201) | ||
155 | |||
156 | #define TS_NV_BOTISR_START TIMESTAMP(202) | ||
157 | #define TS_NV_BOTISR_END TIMESTAMP(203) | ||
158 | |||
159 | #define TS_NV_RELEASE_BOTISR_START TIMESTAMP(204) | ||
160 | #define TS_NV_RELEASE_BOTISR_END TIMESTAMP(205) | ||
161 | #endif | ||
162 | |||
163 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
164 | #define TS_NV_SCHED_BOTISR_START TIMESTAMP(206) | ||
165 | #define TS_NV_SCHED_BOTISR_END TIMESTAMP(207) | ||
166 | #endif | ||
167 | |||
143 | #define TS_RELEASE_LATENCY(when) LTIMESTAMP(208, &(when)) | 168 | #define TS_RELEASE_LATENCY(when) LTIMESTAMP(208, &(when)) |
144 | 169 | ||
145 | #endif /* !_SYS_TRACE_H_ */ | 170 | #endif /* !_SYS_TRACE_H_ */ |
diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h index 94264c27d9ac..d1fe84a5d574 100644 --- a/include/litmus/unistd_32.h +++ b/include/litmus/unistd_32.h | |||
@@ -17,5 +17,11 @@ | |||
17 | #define __NR_wait_for_ts_release __LSC(9) | 17 | #define __NR_wait_for_ts_release __LSC(9) |
18 | #define __NR_release_ts __LSC(10) | 18 | #define __NR_release_ts __LSC(10) |
19 | #define __NR_null_call __LSC(11) | 19 | #define __NR_null_call __LSC(11) |
20 | #define __NR_litmus_dgl_lock __LSC(12) | ||
21 | #define __NR_litmus_dgl_unlock __LSC(13) | ||
20 | 22 | ||
21 | #define NR_litmus_syscalls 12 | 23 | #define __NR_set_aux_tasks __LSC(14) |
24 | |||
25 | #define __NR_sched_trace_event __LSC(15) | ||
26 | |||
27 | #define NR_litmus_syscalls 16 | ||
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h index d5ced0d2642c..75f9fcb897f5 100644 --- a/include/litmus/unistd_64.h +++ b/include/litmus/unistd_64.h | |||
@@ -29,5 +29,15 @@ __SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release) | |||
29 | __SYSCALL(__NR_release_ts, sys_release_ts) | 29 | __SYSCALL(__NR_release_ts, sys_release_ts) |
30 | #define __NR_null_call __LSC(11) | 30 | #define __NR_null_call __LSC(11) |
31 | __SYSCALL(__NR_null_call, sys_null_call) | 31 | __SYSCALL(__NR_null_call, sys_null_call) |
32 | #define __NR_litmus_dgl_lock __LSC(12) | ||
33 | __SYSCALL(__NR_litmus_dgl_lock, sys_litmus_dgl_lock) | ||
34 | #define __NR_litmus_dgl_unlock __LSC(13) | ||
35 | __SYSCALL(__NR_litmus_dgl_unlock, sys_litmus_dgl_unlock) | ||
32 | 36 | ||
33 | #define NR_litmus_syscalls 12 | 37 | #define __NR_set_aux_tasks __LSC(14) |
38 | __SYSCALL(__NR_set_aux_tasks, sys_set_aux_tasks) | ||
39 | |||
40 | #define __NR_sched_trace_event __LSC(15) | ||
41 | __SYSCALL(__NR_sched_trace_event, sys_sched_trace_event) | ||
42 | |||
43 | #define NR_litmus_syscalls 16 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index 25c6111fe3a6..7491c4f5e78c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1370,8 +1370,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1370 | write_unlock_irq(&tasklist_lock); | 1370 | write_unlock_irq(&tasklist_lock); |
1371 | proc_fork_connector(p); | 1371 | proc_fork_connector(p); |
1372 | cgroup_post_fork(p); | 1372 | cgroup_post_fork(p); |
1373 | if (clone_flags & CLONE_THREAD) | 1373 | if (clone_flags & CLONE_THREAD) { |
1374 | threadgroup_fork_read_unlock(current); | 1374 | threadgroup_fork_read_unlock(current); |
1375 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1376 | litmus_post_fork_thread(p); | ||
1377 | #endif | ||
1378 | } | ||
1375 | perf_event_fork(p); | 1379 | perf_event_fork(p); |
1376 | return p; | 1380 | return p; |
1377 | 1381 | ||
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 298c9276dfdb..2bdcdc3691e5 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -542,7 +542,7 @@ static void print_lock(struct held_lock *hlock) | |||
542 | print_ip_sym(hlock->acquire_ip); | 542 | print_ip_sym(hlock->acquire_ip); |
543 | } | 543 | } |
544 | 544 | ||
545 | static void lockdep_print_held_locks(struct task_struct *curr) | 545 | void lockdep_print_held_locks(struct task_struct *curr) |
546 | { | 546 | { |
547 | int i, depth = curr->lockdep_depth; | 547 | int i, depth = curr->lockdep_depth; |
548 | 548 | ||
@@ -558,6 +558,7 @@ static void lockdep_print_held_locks(struct task_struct *curr) | |||
558 | print_lock(curr->held_locks + i); | 558 | print_lock(curr->held_locks + i); |
559 | } | 559 | } |
560 | } | 560 | } |
561 | EXPORT_SYMBOL(lockdep_print_held_locks); | ||
561 | 562 | ||
562 | static void print_kernel_version(void) | 563 | static void print_kernel_version(void) |
563 | { | 564 | { |
@@ -583,6 +584,10 @@ static int static_obj(void *obj) | |||
583 | end = (unsigned long) &_end, | 584 | end = (unsigned long) &_end, |
584 | addr = (unsigned long) obj; | 585 | addr = (unsigned long) obj; |
585 | 586 | ||
587 | // GLENN | ||
588 | return 1; | ||
589 | |||
590 | |||
586 | /* | 591 | /* |
587 | * static variable? | 592 | * static variable? |
588 | */ | 593 | */ |
diff --git a/kernel/mutex.c b/kernel/mutex.c index d607ed5dd441..2f363b9bfc1f 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -498,3 +498,128 @@ int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) | |||
498 | return 1; | 498 | return 1; |
499 | } | 499 | } |
500 | EXPORT_SYMBOL(atomic_dec_and_mutex_lock); | 500 | EXPORT_SYMBOL(atomic_dec_and_mutex_lock); |
501 | |||
502 | |||
503 | |||
504 | |||
505 | void mutex_lock_sfx(struct mutex *lock, | ||
506 | side_effect_t pre, unsigned long pre_arg, | ||
507 | side_effect_t post, unsigned long post_arg) | ||
508 | { | ||
509 | long state = TASK_UNINTERRUPTIBLE; | ||
510 | |||
511 | struct task_struct *task = current; | ||
512 | struct mutex_waiter waiter; | ||
513 | unsigned long flags; | ||
514 | |||
515 | preempt_disable(); | ||
516 | mutex_acquire(&lock->dep_map, subclass, 0, ip); | ||
517 | |||
518 | spin_lock_mutex(&lock->wait_lock, flags); | ||
519 | |||
520 | if(pre) | ||
521 | { | ||
522 | if(unlikely(pre(pre_arg))) | ||
523 | { | ||
524 | // this will fuck with lockdep's CONFIG_PROVE_LOCKING... | ||
525 | spin_unlock_mutex(&lock->wait_lock, flags); | ||
526 | preempt_enable(); | ||
527 | return; | ||
528 | } | ||
529 | } | ||
530 | |||
531 | debug_mutex_lock_common(lock, &waiter); | ||
532 | debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); | ||
533 | |||
534 | /* add waiting tasks to the end of the waitqueue (FIFO): */ | ||
535 | list_add_tail(&waiter.list, &lock->wait_list); | ||
536 | waiter.task = task; | ||
537 | |||
538 | if (atomic_xchg(&lock->count, -1) == 1) | ||
539 | goto done; | ||
540 | |||
541 | lock_contended(&lock->dep_map, ip); | ||
542 | |||
543 | for (;;) { | ||
544 | /* | ||
545 | * Lets try to take the lock again - this is needed even if | ||
546 | * we get here for the first time (shortly after failing to | ||
547 | * acquire the lock), to make sure that we get a wakeup once | ||
548 | * it's unlocked. Later on, if we sleep, this is the | ||
549 | * operation that gives us the lock. We xchg it to -1, so | ||
550 | * that when we release the lock, we properly wake up the | ||
551 | * other waiters: | ||
552 | */ | ||
553 | if (atomic_xchg(&lock->count, -1) == 1) | ||
554 | break; | ||
555 | |||
556 | __set_task_state(task, state); | ||
557 | |||
558 | /* didnt get the lock, go to sleep: */ | ||
559 | spin_unlock_mutex(&lock->wait_lock, flags); | ||
560 | preempt_enable_no_resched(); | ||
561 | schedule(); | ||
562 | preempt_disable(); | ||
563 | spin_lock_mutex(&lock->wait_lock, flags); | ||
564 | } | ||
565 | |||
566 | done: | ||
567 | lock_acquired(&lock->dep_map, ip); | ||
568 | /* got the lock - rejoice! */ | ||
569 | mutex_remove_waiter(lock, &waiter, current_thread_info()); | ||
570 | mutex_set_owner(lock); | ||
571 | |||
572 | /* set it to 0 if there are no waiters left: */ | ||
573 | if (likely(list_empty(&lock->wait_list))) | ||
574 | atomic_set(&lock->count, 0); | ||
575 | |||
576 | if(post) | ||
577 | post(post_arg); | ||
578 | |||
579 | spin_unlock_mutex(&lock->wait_lock, flags); | ||
580 | |||
581 | debug_mutex_free_waiter(&waiter); | ||
582 | preempt_enable(); | ||
583 | } | ||
584 | EXPORT_SYMBOL(mutex_lock_sfx); | ||
585 | |||
586 | void mutex_unlock_sfx(struct mutex *lock, | ||
587 | side_effect_t pre, unsigned long pre_arg, | ||
588 | side_effect_t post, unsigned long post_arg) | ||
589 | { | ||
590 | unsigned long flags; | ||
591 | |||
592 | spin_lock_mutex(&lock->wait_lock, flags); | ||
593 | |||
594 | if(pre) | ||
595 | pre(pre_arg); | ||
596 | |||
597 | //mutex_release(&lock->dep_map, nested, _RET_IP_); | ||
598 | mutex_release(&lock->dep_map, 1, _RET_IP_); | ||
599 | debug_mutex_unlock(lock); | ||
600 | |||
601 | /* | ||
602 | * some architectures leave the lock unlocked in the fastpath failure | ||
603 | * case, others need to leave it locked. In the later case we have to | ||
604 | * unlock it here | ||
605 | */ | ||
606 | if (__mutex_slowpath_needs_to_unlock()) | ||
607 | atomic_set(&lock->count, 1); | ||
608 | |||
609 | if (!list_empty(&lock->wait_list)) { | ||
610 | /* get the first entry from the wait-list: */ | ||
611 | struct mutex_waiter *waiter = | ||
612 | list_entry(lock->wait_list.next, | ||
613 | struct mutex_waiter, list); | ||
614 | |||
615 | debug_mutex_wake_waiter(lock, waiter); | ||
616 | |||
617 | wake_up_process(waiter->task); | ||
618 | } | ||
619 | |||
620 | if(post) | ||
621 | post(post_arg); | ||
622 | |||
623 | spin_unlock_mutex(&lock->wait_lock, flags); | ||
624 | } | ||
625 | EXPORT_SYMBOL(mutex_unlock_sfx); | ||
diff --git a/kernel/sched.c b/kernel/sched.c index c4b6bd5151ff..e29a97235f26 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -86,6 +86,10 @@ | |||
86 | #include <litmus/sched_trace.h> | 86 | #include <litmus/sched_trace.h> |
87 | #include <litmus/trace.h> | 87 | #include <litmus/trace.h> |
88 | 88 | ||
89 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
90 | #include <litmus/litmus_softirq.h> | ||
91 | #endif | ||
92 | |||
89 | static void litmus_tick(struct rq*, struct task_struct*); | 93 | static void litmus_tick(struct rq*, struct task_struct*); |
90 | 94 | ||
91 | /* | 95 | /* |
@@ -2703,8 +2707,10 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |||
2703 | unsigned long flags; | 2707 | unsigned long flags; |
2704 | int cpu, success = 0; | 2708 | int cpu, success = 0; |
2705 | 2709 | ||
2706 | if (is_realtime(p)) | 2710 | if (is_realtime(p)) { |
2711 | //WARN_ON(1); | ||
2707 | TRACE_TASK(p, "try_to_wake_up() state:%d\n", p->state); | 2712 | TRACE_TASK(p, "try_to_wake_up() state:%d\n", p->state); |
2713 | } | ||
2708 | 2714 | ||
2709 | smp_wmb(); | 2715 | smp_wmb(); |
2710 | raw_spin_lock_irqsave(&p->pi_lock, flags); | 2716 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
@@ -4319,6 +4325,7 @@ pick_next_task(struct rq *rq) | |||
4319 | BUG(); /* the idle class will always have a runnable task */ | 4325 | BUG(); /* the idle class will always have a runnable task */ |
4320 | } | 4326 | } |
4321 | 4327 | ||
4328 | |||
4322 | /* | 4329 | /* |
4323 | * schedule() is the main scheduler function. | 4330 | * schedule() is the main scheduler function. |
4324 | */ | 4331 | */ |
@@ -4434,10 +4441,16 @@ litmus_need_resched_nonpreemptible: | |||
4434 | if (need_resched()) | 4441 | if (need_resched()) |
4435 | goto need_resched; | 4442 | goto need_resched; |
4436 | 4443 | ||
4444 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
4445 | litmus->run_tasklets(prev); | ||
4446 | #endif | ||
4447 | |||
4437 | srp_ceiling_block(); | 4448 | srp_ceiling_block(); |
4438 | } | 4449 | } |
4439 | EXPORT_SYMBOL(schedule); | 4450 | EXPORT_SYMBOL(schedule); |
4440 | 4451 | ||
4452 | |||
4453 | |||
4441 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | 4454 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
4442 | 4455 | ||
4443 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) | 4456 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) |
@@ -4581,6 +4594,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | |||
4581 | } | 4594 | } |
4582 | } | 4595 | } |
4583 | 4596 | ||
4597 | |||
4584 | /** | 4598 | /** |
4585 | * __wake_up - wake up threads blocked on a waitqueue. | 4599 | * __wake_up - wake up threads blocked on a waitqueue. |
4586 | * @q: the waitqueue | 4600 | * @q: the waitqueue |
@@ -4756,6 +4770,12 @@ void __sched wait_for_completion(struct completion *x) | |||
4756 | } | 4770 | } |
4757 | EXPORT_SYMBOL(wait_for_completion); | 4771 | EXPORT_SYMBOL(wait_for_completion); |
4758 | 4772 | ||
4773 | void __sched __wait_for_completion_locked(struct completion *x) | ||
4774 | { | ||
4775 | do_wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE); | ||
4776 | } | ||
4777 | EXPORT_SYMBOL(__wait_for_completion_locked); | ||
4778 | |||
4759 | /** | 4779 | /** |
4760 | * wait_for_completion_timeout: - waits for completion of a task (w/timeout) | 4780 | * wait_for_completion_timeout: - waits for completion of a task (w/timeout) |
4761 | * @x: holds the state of this particular completion | 4781 | * @x: holds the state of this particular completion |
@@ -5258,6 +5278,9 @@ recheck: | |||
5258 | if (retval) | 5278 | if (retval) |
5259 | return retval; | 5279 | return retval; |
5260 | } | 5280 | } |
5281 | else if (p->policy == SCHED_LITMUS) { | ||
5282 | litmus_pre_exit_task(p); | ||
5283 | } | ||
5261 | 5284 | ||
5262 | /* | 5285 | /* |
5263 | * make sure no PI-waiters arrive (or leave) while we are | 5286 | * make sure no PI-waiters arrive (or leave) while we are |
diff --git a/kernel/semaphore.c b/kernel/semaphore.c index 94a62c0d4ade..c947a046a6d7 100644 --- a/kernel/semaphore.c +++ b/kernel/semaphore.c | |||
@@ -33,11 +33,11 @@ | |||
33 | #include <linux/spinlock.h> | 33 | #include <linux/spinlock.h> |
34 | #include <linux/ftrace.h> | 34 | #include <linux/ftrace.h> |
35 | 35 | ||
36 | static noinline void __down(struct semaphore *sem); | 36 | noinline void __down(struct semaphore *sem); |
37 | static noinline int __down_interruptible(struct semaphore *sem); | 37 | static noinline int __down_interruptible(struct semaphore *sem); |
38 | static noinline int __down_killable(struct semaphore *sem); | 38 | static noinline int __down_killable(struct semaphore *sem); |
39 | static noinline int __down_timeout(struct semaphore *sem, long jiffies); | 39 | static noinline int __down_timeout(struct semaphore *sem, long jiffies); |
40 | static noinline void __up(struct semaphore *sem); | 40 | noinline void __up(struct semaphore *sem); |
41 | 41 | ||
42 | /** | 42 | /** |
43 | * down - acquire the semaphore | 43 | * down - acquire the semaphore |
@@ -190,11 +190,13 @@ EXPORT_SYMBOL(up); | |||
190 | 190 | ||
191 | /* Functions for the contended case */ | 191 | /* Functions for the contended case */ |
192 | 192 | ||
193 | /* | ||
193 | struct semaphore_waiter { | 194 | struct semaphore_waiter { |
194 | struct list_head list; | 195 | struct list_head list; |
195 | struct task_struct *task; | 196 | struct task_struct *task; |
196 | int up; | 197 | int up; |
197 | }; | 198 | }; |
199 | */ | ||
198 | 200 | ||
199 | /* | 201 | /* |
200 | * Because this function is inlined, the 'state' parameter will be | 202 | * Because this function is inlined, the 'state' parameter will be |
@@ -233,10 +235,12 @@ static inline int __sched __down_common(struct semaphore *sem, long state, | |||
233 | return -EINTR; | 235 | return -EINTR; |
234 | } | 236 | } |
235 | 237 | ||
236 | static noinline void __sched __down(struct semaphore *sem) | 238 | noinline void __sched __down(struct semaphore *sem) |
237 | { | 239 | { |
238 | __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); | 240 | __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); |
239 | } | 241 | } |
242 | EXPORT_SYMBOL(__down); | ||
243 | |||
240 | 244 | ||
241 | static noinline int __sched __down_interruptible(struct semaphore *sem) | 245 | static noinline int __sched __down_interruptible(struct semaphore *sem) |
242 | { | 246 | { |
@@ -253,7 +257,7 @@ static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies) | |||
253 | return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies); | 257 | return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies); |
254 | } | 258 | } |
255 | 259 | ||
256 | static noinline void __sched __up(struct semaphore *sem) | 260 | noinline void __sched __up(struct semaphore *sem) |
257 | { | 261 | { |
258 | struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list, | 262 | struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list, |
259 | struct semaphore_waiter, list); | 263 | struct semaphore_waiter, list); |
@@ -261,3 +265,4 @@ static noinline void __sched __up(struct semaphore *sem) | |||
261 | waiter->up = 1; | 265 | waiter->up = 1; |
262 | wake_up_process(waiter->task); | 266 | wake_up_process(waiter->task); |
263 | } | 267 | } |
268 | EXPORT_SYMBOL(__up); \ No newline at end of file | ||
diff --git a/kernel/softirq.c b/kernel/softirq.c index 2f2df08df395..ea438a8635d0 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -29,6 +29,15 @@ | |||
29 | #include <trace/events/irq.h> | 29 | #include <trace/events/irq.h> |
30 | 30 | ||
31 | #include <asm/irq.h> | 31 | #include <asm/irq.h> |
32 | |||
33 | #include <litmus/litmus.h> | ||
34 | #include <litmus/sched_trace.h> | ||
35 | |||
36 | #ifdef CONFIG_LITMUS_NVIDIA | ||
37 | #include <litmus/nvidia_info.h> | ||
38 | #include <litmus/trace.h> | ||
39 | #endif | ||
40 | |||
32 | /* | 41 | /* |
33 | - No shared variables, all the data are CPU local. | 42 | - No shared variables, all the data are CPU local. |
34 | - If a softirq needs serialization, let it serialize itself | 43 | - If a softirq needs serialization, let it serialize itself |
@@ -67,7 +76,7 @@ char *softirq_to_name[NR_SOFTIRQS] = { | |||
67 | * to the pending events, so lets the scheduler to balance | 76 | * to the pending events, so lets the scheduler to balance |
68 | * the softirq load for us. | 77 | * the softirq load for us. |
69 | */ | 78 | */ |
70 | static void wakeup_softirqd(void) | 79 | void wakeup_softirqd(void) |
71 | { | 80 | { |
72 | /* Interrupts are disabled: no need to stop preemption */ | 81 | /* Interrupts are disabled: no need to stop preemption */ |
73 | struct task_struct *tsk = __this_cpu_read(ksoftirqd); | 82 | struct task_struct *tsk = __this_cpu_read(ksoftirqd); |
@@ -193,6 +202,7 @@ void local_bh_enable_ip(unsigned long ip) | |||
193 | } | 202 | } |
194 | EXPORT_SYMBOL(local_bh_enable_ip); | 203 | EXPORT_SYMBOL(local_bh_enable_ip); |
195 | 204 | ||
205 | |||
196 | /* | 206 | /* |
197 | * We restart softirq processing MAX_SOFTIRQ_RESTART times, | 207 | * We restart softirq processing MAX_SOFTIRQ_RESTART times, |
198 | * and we fall back to softirqd after that. | 208 | * and we fall back to softirqd after that. |
@@ -206,10 +216,10 @@ EXPORT_SYMBOL(local_bh_enable_ip); | |||
206 | 216 | ||
207 | asmlinkage void __do_softirq(void) | 217 | asmlinkage void __do_softirq(void) |
208 | { | 218 | { |
209 | struct softirq_action *h; | 219 | struct softirq_action *h; |
210 | __u32 pending; | 220 | __u32 pending; |
211 | int max_restart = MAX_SOFTIRQ_RESTART; | 221 | int max_restart = MAX_SOFTIRQ_RESTART; |
212 | int cpu; | 222 | int cpu; |
213 | 223 | ||
214 | /* Mark Feather-Trace samples as "disturbed". */ | 224 | /* Mark Feather-Trace samples as "disturbed". */ |
215 | ft_irq_fired(); | 225 | ft_irq_fired(); |
@@ -217,57 +227,57 @@ asmlinkage void __do_softirq(void) | |||
217 | pending = local_softirq_pending(); | 227 | pending = local_softirq_pending(); |
218 | account_system_vtime(current); | 228 | account_system_vtime(current); |
219 | 229 | ||
220 | __local_bh_disable((unsigned long)__builtin_return_address(0), | 230 | __local_bh_disable((unsigned long)__builtin_return_address(0), |
221 | SOFTIRQ_OFFSET); | 231 | SOFTIRQ_OFFSET); |
222 | lockdep_softirq_enter(); | 232 | lockdep_softirq_enter(); |
223 | 233 | ||
224 | cpu = smp_processor_id(); | 234 | cpu = smp_processor_id(); |
225 | restart: | 235 | restart: |
226 | /* Reset the pending bitmask before enabling irqs */ | 236 | /* Reset the pending bitmask before enabling irqs */ |
227 | set_softirq_pending(0); | 237 | set_softirq_pending(0); |
228 | 238 | ||
229 | local_irq_enable(); | 239 | local_irq_enable(); |
230 | 240 | ||
231 | h = softirq_vec; | 241 | h = softirq_vec; |
232 | |||
233 | do { | ||
234 | if (pending & 1) { | ||
235 | unsigned int vec_nr = h - softirq_vec; | ||
236 | int prev_count = preempt_count(); | ||
237 | |||
238 | kstat_incr_softirqs_this_cpu(vec_nr); | ||
239 | |||
240 | trace_softirq_entry(vec_nr); | ||
241 | h->action(h); | ||
242 | trace_softirq_exit(vec_nr); | ||
243 | if (unlikely(prev_count != preempt_count())) { | ||
244 | printk(KERN_ERR "huh, entered softirq %u %s %p" | ||
245 | "with preempt_count %08x," | ||
246 | " exited with %08x?\n", vec_nr, | ||
247 | softirq_to_name[vec_nr], h->action, | ||
248 | prev_count, preempt_count()); | ||
249 | preempt_count() = prev_count; | ||
250 | } | ||
251 | 242 | ||
252 | rcu_bh_qs(cpu); | 243 | do { |
253 | } | 244 | if (pending & 1) { |
254 | h++; | 245 | unsigned int vec_nr = h - softirq_vec; |
255 | pending >>= 1; | 246 | int prev_count = preempt_count(); |
256 | } while (pending); | ||
257 | 247 | ||
258 | local_irq_disable(); | 248 | kstat_incr_softirqs_this_cpu(vec_nr); |
259 | 249 | ||
260 | pending = local_softirq_pending(); | 250 | trace_softirq_entry(vec_nr); |
261 | if (pending && --max_restart) | 251 | h->action(h); |
262 | goto restart; | 252 | trace_softirq_exit(vec_nr); |
253 | if (unlikely(prev_count != preempt_count())) { | ||
254 | printk(KERN_ERR "huh, entered softirq %u %s %p" | ||
255 | "with preempt_count %08x," | ||
256 | " exited with %08x?\n", vec_nr, | ||
257 | softirq_to_name[vec_nr], h->action, | ||
258 | prev_count, preempt_count()); | ||
259 | preempt_count() = prev_count; | ||
260 | } | ||
263 | 261 | ||
264 | if (pending) | 262 | rcu_bh_qs(cpu); |
265 | wakeup_softirqd(); | 263 | } |
264 | h++; | ||
265 | pending >>= 1; | ||
266 | } while (pending); | ||
266 | 267 | ||
267 | lockdep_softirq_exit(); | 268 | local_irq_disable(); |
268 | 269 | ||
269 | account_system_vtime(current); | 270 | pending = local_softirq_pending(); |
270 | __local_bh_enable(SOFTIRQ_OFFSET); | 271 | if (pending && --max_restart) |
272 | goto restart; | ||
273 | |||
274 | if (pending) | ||
275 | wakeup_softirqd(); | ||
276 | |||
277 | lockdep_softirq_exit(); | ||
278 | |||
279 | account_system_vtime(current); | ||
280 | __local_bh_enable(SOFTIRQ_OFFSET); | ||
271 | } | 281 | } |
272 | 282 | ||
273 | #ifndef __ARCH_HAS_DO_SOFTIRQ | 283 | #ifndef __ARCH_HAS_DO_SOFTIRQ |
@@ -396,17 +406,103 @@ void open_softirq(int nr, void (*action)(struct softirq_action *)) | |||
396 | /* | 406 | /* |
397 | * Tasklets | 407 | * Tasklets |
398 | */ | 408 | */ |
399 | struct tasklet_head | ||
400 | { | ||
401 | struct tasklet_struct *head; | ||
402 | struct tasklet_struct **tail; | ||
403 | }; | ||
404 | 409 | ||
405 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec); | 410 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec); |
406 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec); | 411 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec); |
407 | 412 | ||
413 | #ifdef CONFIG_LITMUS_NVIDIA | ||
414 | static int __do_nv_now(struct tasklet_struct* tasklet) | ||
415 | { | ||
416 | int success = 1; | ||
417 | |||
418 | if(tasklet_trylock(tasklet)) { | ||
419 | if (!atomic_read(&tasklet->count)) { | ||
420 | if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) { | ||
421 | BUG(); | ||
422 | } | ||
423 | tasklet->func(tasklet->data); | ||
424 | tasklet_unlock(tasklet); | ||
425 | } | ||
426 | else { | ||
427 | success = 0; | ||
428 | } | ||
429 | |||
430 | tasklet_unlock(tasklet); | ||
431 | } | ||
432 | else { | ||
433 | success = 0; | ||
434 | } | ||
435 | |||
436 | return success; | ||
437 | } | ||
438 | #endif | ||
439 | |||
440 | |||
408 | void __tasklet_schedule(struct tasklet_struct *t) | 441 | void __tasklet_schedule(struct tasklet_struct *t) |
409 | { | 442 | { |
443 | #ifdef CONFIG_LITMUS_NVIDIA | ||
444 | if(is_nvidia_func(t->func)) | ||
445 | { | ||
446 | #if 1 | ||
447 | // do nvidia tasklets right away and return | ||
448 | if(__do_nv_now(t)) | ||
449 | return; | ||
450 | #else | ||
451 | u32 nvidia_device = get_tasklet_nv_device_num(t); | ||
452 | // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", | ||
453 | // __FUNCTION__, nvidia_device,litmus_clock()); | ||
454 | |||
455 | unsigned long flags; | ||
456 | struct task_struct* device_owner; | ||
457 | |||
458 | lock_nv_registry(nvidia_device, &flags); | ||
459 | |||
460 | device_owner = get_nv_max_device_owner(nvidia_device); | ||
461 | |||
462 | if(device_owner==NULL) | ||
463 | { | ||
464 | t->owner = NULL; | ||
465 | } | ||
466 | else | ||
467 | { | ||
468 | if(is_realtime(device_owner)) | ||
469 | { | ||
470 | TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n", | ||
471 | __FUNCTION__, nvidia_device,litmus_clock()); | ||
472 | TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", | ||
473 | __FUNCTION__,device_owner->pid,nvidia_device); | ||
474 | |||
475 | t->owner = device_owner; | ||
476 | sched_trace_tasklet_release(t->owner); | ||
477 | |||
478 | if(likely(_litmus_tasklet_schedule(t,nvidia_device))) | ||
479 | { | ||
480 | unlock_nv_registry(nvidia_device, &flags); | ||
481 | return; | ||
482 | } | ||
483 | else | ||
484 | { | ||
485 | t->owner = NULL; /* fall through to normal scheduling */ | ||
486 | } | ||
487 | } | ||
488 | else | ||
489 | { | ||
490 | t->owner = NULL; | ||
491 | } | ||
492 | } | ||
493 | unlock_nv_registry(nvidia_device, &flags); | ||
494 | #endif | ||
495 | } | ||
496 | |||
497 | #endif | ||
498 | |||
499 | ___tasklet_schedule(t); | ||
500 | } | ||
501 | EXPORT_SYMBOL(__tasklet_schedule); | ||
502 | |||
503 | |||
504 | void ___tasklet_schedule(struct tasklet_struct *t) | ||
505 | { | ||
410 | unsigned long flags; | 506 | unsigned long flags; |
411 | 507 | ||
412 | local_irq_save(flags); | 508 | local_irq_save(flags); |
@@ -416,11 +512,71 @@ void __tasklet_schedule(struct tasklet_struct *t) | |||
416 | raise_softirq_irqoff(TASKLET_SOFTIRQ); | 512 | raise_softirq_irqoff(TASKLET_SOFTIRQ); |
417 | local_irq_restore(flags); | 513 | local_irq_restore(flags); |
418 | } | 514 | } |
515 | EXPORT_SYMBOL(___tasklet_schedule); | ||
419 | 516 | ||
420 | EXPORT_SYMBOL(__tasklet_schedule); | ||
421 | 517 | ||
422 | void __tasklet_hi_schedule(struct tasklet_struct *t) | 518 | void __tasklet_hi_schedule(struct tasklet_struct *t) |
423 | { | 519 | { |
520 | #ifdef CONFIG_LITMUS_NVIDIA | ||
521 | if(is_nvidia_func(t->func)) | ||
522 | { | ||
523 | #if 1 | ||
524 | // do nvidia tasklets right away and return | ||
525 | if(__do_nv_now(t)) | ||
526 | return; | ||
527 | #else | ||
528 | u32 nvidia_device = get_tasklet_nv_device_num(t); | ||
529 | // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", | ||
530 | // __FUNCTION__, nvidia_device,litmus_clock()); | ||
531 | |||
532 | unsigned long flags; | ||
533 | struct task_struct* device_owner; | ||
534 | |||
535 | lock_nv_registry(nvidia_device, &flags); | ||
536 | |||
537 | device_owner = get_nv_max_device_owner(nvidia_device); | ||
538 | |||
539 | if(device_owner==NULL) | ||
540 | { | ||
541 | t->owner = NULL; | ||
542 | } | ||
543 | else | ||
544 | { | ||
545 | if( is_realtime(device_owner)) | ||
546 | { | ||
547 | TRACE("%s: Handling NVIDIA tasklet for device %u\tat %llu\n", | ||
548 | __FUNCTION__, nvidia_device,litmus_clock()); | ||
549 | TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", | ||
550 | __FUNCTION__,device_owner->pid,nvidia_device); | ||
551 | |||
552 | t->owner = device_owner; | ||
553 | sched_trace_tasklet_release(t->owner); | ||
554 | if(likely(_litmus_tasklet_hi_schedule(t,nvidia_device))) | ||
555 | { | ||
556 | unlock_nv_registry(nvidia_device, &flags); | ||
557 | return; | ||
558 | } | ||
559 | else | ||
560 | { | ||
561 | t->owner = NULL; /* fall through to normal scheduling */ | ||
562 | } | ||
563 | } | ||
564 | else | ||
565 | { | ||
566 | t->owner = NULL; | ||
567 | } | ||
568 | } | ||
569 | unlock_nv_registry(nvidia_device, &flags); | ||
570 | #endif | ||
571 | } | ||
572 | #endif | ||
573 | |||
574 | ___tasklet_hi_schedule(t); | ||
575 | } | ||
576 | EXPORT_SYMBOL(__tasklet_hi_schedule); | ||
577 | |||
578 | void ___tasklet_hi_schedule(struct tasklet_struct* t) | ||
579 | { | ||
424 | unsigned long flags; | 580 | unsigned long flags; |
425 | 581 | ||
426 | local_irq_save(flags); | 582 | local_irq_save(flags); |
@@ -430,19 +586,78 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) | |||
430 | raise_softirq_irqoff(HI_SOFTIRQ); | 586 | raise_softirq_irqoff(HI_SOFTIRQ); |
431 | local_irq_restore(flags); | 587 | local_irq_restore(flags); |
432 | } | 588 | } |
433 | 589 | EXPORT_SYMBOL(___tasklet_hi_schedule); | |
434 | EXPORT_SYMBOL(__tasklet_hi_schedule); | ||
435 | 590 | ||
436 | void __tasklet_hi_schedule_first(struct tasklet_struct *t) | 591 | void __tasklet_hi_schedule_first(struct tasklet_struct *t) |
437 | { | 592 | { |
438 | BUG_ON(!irqs_disabled()); | 593 | BUG_ON(!irqs_disabled()); |
594 | #ifdef CONFIG_LITMUS_NVIDIA | ||
595 | if(is_nvidia_func(t->func)) | ||
596 | { | ||
597 | #if 1 | ||
598 | // do nvidia tasklets right away and return | ||
599 | if(__do_nv_now(t)) | ||
600 | return; | ||
601 | #else | ||
602 | u32 nvidia_device = get_tasklet_nv_device_num(t); | ||
603 | // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n", | ||
604 | // __FUNCTION__, nvidia_device,litmus_clock()); | ||
605 | unsigned long flags; | ||
606 | struct task_struct* device_owner; | ||
607 | |||
608 | lock_nv_registry(nvidia_device, &flags); | ||
609 | |||
610 | device_owner = get_nv_max_device_owner(nvidia_device); | ||
611 | |||
612 | if(device_owner==NULL) | ||
613 | { | ||
614 | t->owner = NULL; | ||
615 | } | ||
616 | else | ||
617 | { | ||
618 | if(is_realtime(device_owner)) | ||
619 | { | ||
620 | TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n", | ||
621 | __FUNCTION__, nvidia_device,litmus_clock()); | ||
622 | |||
623 | TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", | ||
624 | __FUNCTION__,device_owner->pid,nvidia_device); | ||
625 | |||
626 | t->owner = device_owner; | ||
627 | sched_trace_tasklet_release(t->owner); | ||
628 | if(likely(_litmus_tasklet_hi_schedule_first(t,nvidia_device))) | ||
629 | { | ||
630 | unlock_nv_registry(nvidia_device, &flags); | ||
631 | return; | ||
632 | } | ||
633 | else | ||
634 | { | ||
635 | t->owner = NULL; /* fall through to normal scheduling */ | ||
636 | } | ||
637 | } | ||
638 | else | ||
639 | { | ||
640 | t->owner = NULL; | ||
641 | } | ||
642 | } | ||
643 | unlock_nv_registry(nvidia_device, &flags); | ||
644 | #endif | ||
645 | } | ||
646 | #endif | ||
647 | |||
648 | ___tasklet_hi_schedule_first(t); | ||
649 | } | ||
650 | EXPORT_SYMBOL(__tasklet_hi_schedule_first); | ||
651 | |||
652 | void ___tasklet_hi_schedule_first(struct tasklet_struct* t) | ||
653 | { | ||
654 | BUG_ON(!irqs_disabled()); | ||
439 | 655 | ||
440 | t->next = __this_cpu_read(tasklet_hi_vec.head); | 656 | t->next = __this_cpu_read(tasklet_hi_vec.head); |
441 | __this_cpu_write(tasklet_hi_vec.head, t); | 657 | __this_cpu_write(tasklet_hi_vec.head, t); |
442 | __raise_softirq_irqoff(HI_SOFTIRQ); | 658 | __raise_softirq_irqoff(HI_SOFTIRQ); |
443 | } | 659 | } |
444 | 660 | EXPORT_SYMBOL(___tasklet_hi_schedule_first); | |
445 | EXPORT_SYMBOL(__tasklet_hi_schedule_first); | ||
446 | 661 | ||
447 | static void tasklet_action(struct softirq_action *a) | 662 | static void tasklet_action(struct softirq_action *a) |
448 | { | 663 | { |
@@ -498,6 +713,7 @@ static void tasklet_hi_action(struct softirq_action *a) | |||
498 | if (!atomic_read(&t->count)) { | 713 | if (!atomic_read(&t->count)) { |
499 | if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) | 714 | if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) |
500 | BUG(); | 715 | BUG(); |
716 | |||
501 | t->func(t->data); | 717 | t->func(t->data); |
502 | tasklet_unlock(t); | 718 | tasklet_unlock(t); |
503 | continue; | 719 | continue; |
@@ -521,8 +737,13 @@ void tasklet_init(struct tasklet_struct *t, | |||
521 | t->next = NULL; | 737 | t->next = NULL; |
522 | t->state = 0; | 738 | t->state = 0; |
523 | atomic_set(&t->count, 0); | 739 | atomic_set(&t->count, 0); |
740 | |||
524 | t->func = func; | 741 | t->func = func; |
525 | t->data = data; | 742 | t->data = data; |
743 | |||
744 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
745 | t->owner = NULL; | ||
746 | #endif | ||
526 | } | 747 | } |
527 | 748 | ||
528 | EXPORT_SYMBOL(tasklet_init); | 749 | EXPORT_SYMBOL(tasklet_init); |
@@ -537,6 +758,7 @@ void tasklet_kill(struct tasklet_struct *t) | |||
537 | yield(); | 758 | yield(); |
538 | } while (test_bit(TASKLET_STATE_SCHED, &t->state)); | 759 | } while (test_bit(TASKLET_STATE_SCHED, &t->state)); |
539 | } | 760 | } |
761 | |||
540 | tasklet_unlock_wait(t); | 762 | tasklet_unlock_wait(t); |
541 | clear_bit(TASKLET_STATE_SCHED, &t->state); | 763 | clear_bit(TASKLET_STATE_SCHED, &t->state); |
542 | } | 764 | } |
@@ -811,6 +1033,7 @@ void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu) | |||
811 | for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) { | 1033 | for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) { |
812 | if (*i == t) { | 1034 | if (*i == t) { |
813 | *i = t->next; | 1035 | *i = t->next; |
1036 | |||
814 | /* If this was the tail element, move the tail ptr */ | 1037 | /* If this was the tail element, move the tail ptr */ |
815 | if (*i == NULL) | 1038 | if (*i == NULL) |
816 | per_cpu(tasklet_vec, cpu).tail = i; | 1039 | per_cpu(tasklet_vec, cpu).tail = i; |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 0400553f0d04..6b59d59ce3cf 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -44,6 +44,13 @@ | |||
44 | 44 | ||
45 | #include "workqueue_sched.h" | 45 | #include "workqueue_sched.h" |
46 | 46 | ||
47 | #ifdef CONFIG_LITMUS_NVIDIA | ||
48 | #include <litmus/litmus.h> | ||
49 | #include <litmus/sched_trace.h> | ||
50 | #include <litmus/nvidia_info.h> | ||
51 | #endif | ||
52 | |||
53 | |||
47 | enum { | 54 | enum { |
48 | /* global_cwq flags */ | 55 | /* global_cwq flags */ |
49 | GCWQ_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ | 56 | GCWQ_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ |
@@ -1047,9 +1054,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | |||
1047 | work_flags |= WORK_STRUCT_DELAYED; | 1054 | work_flags |= WORK_STRUCT_DELAYED; |
1048 | worklist = &cwq->delayed_works; | 1055 | worklist = &cwq->delayed_works; |
1049 | } | 1056 | } |
1050 | |||
1051 | insert_work(cwq, work, worklist, work_flags); | 1057 | insert_work(cwq, work, worklist, work_flags); |
1052 | |||
1053 | spin_unlock_irqrestore(&gcwq->lock, flags); | 1058 | spin_unlock_irqrestore(&gcwq->lock, flags); |
1054 | } | 1059 | } |
1055 | 1060 | ||
@@ -2687,10 +2692,70 @@ EXPORT_SYMBOL(cancel_delayed_work_sync); | |||
2687 | */ | 2692 | */ |
2688 | int schedule_work(struct work_struct *work) | 2693 | int schedule_work(struct work_struct *work) |
2689 | { | 2694 | { |
2690 | return queue_work(system_wq, work); | 2695 | #if 0 |
2696 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) | ||
2697 | if(is_nvidia_func(work->func)) | ||
2698 | { | ||
2699 | u32 nvidiaDevice = get_work_nv_device_num(work); | ||
2700 | |||
2701 | //1) Ask Litmus which task owns GPU <nvidiaDevice>. (API to be defined.) | ||
2702 | unsigned long flags; | ||
2703 | struct task_struct* device_owner; | ||
2704 | |||
2705 | lock_nv_registry(nvidiaDevice, &flags); | ||
2706 | |||
2707 | device_owner = get_nv_max_device_owner(nvidiaDevice); | ||
2708 | |||
2709 | //2) If there is an owner, set work->owner to the owner's task struct. | ||
2710 | if(device_owner==NULL) | ||
2711 | { | ||
2712 | work->owner = NULL; | ||
2713 | //TRACE("%s: the owner task of NVIDIA Device %u is NULL\n",__FUNCTION__,nvidiaDevice); | ||
2714 | } | ||
2715 | else | ||
2716 | { | ||
2717 | if( is_realtime(device_owner)) | ||
2718 | { | ||
2719 | TRACE("%s: Handling NVIDIA work for device\t%u\tat\t%llu\n", | ||
2720 | __FUNCTION__, nvidiaDevice,litmus_clock()); | ||
2721 | TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n", | ||
2722 | __FUNCTION__, | ||
2723 | device_owner->pid, | ||
2724 | nvidiaDevice); | ||
2725 | |||
2726 | //3) Call litmus_schedule_work() and return (don't execute the rest | ||
2727 | // of schedule_schedule()). | ||
2728 | work->owner = device_owner; | ||
2729 | sched_trace_work_release(work->owner); | ||
2730 | if(likely(litmus_schedule_work(work, nvidiaDevice))) | ||
2731 | { | ||
2732 | unlock_nv_registry(nvidiaDevice, &flags); | ||
2733 | return 1; | ||
2734 | } | ||
2735 | else | ||
2736 | { | ||
2737 | work->owner = NULL; /* fall through to normal work scheduling */ | ||
2738 | } | ||
2739 | } | ||
2740 | else | ||
2741 | { | ||
2742 | work->owner = NULL; | ||
2743 | } | ||
2744 | } | ||
2745 | unlock_nv_registry(nvidiaDevice, &flags); | ||
2746 | } | ||
2747 | #endif | ||
2748 | #endif | ||
2749 | return(__schedule_work(work)); | ||
2691 | } | 2750 | } |
2692 | EXPORT_SYMBOL(schedule_work); | 2751 | EXPORT_SYMBOL(schedule_work); |
2693 | 2752 | ||
2753 | int __schedule_work(struct work_struct* work) | ||
2754 | { | ||
2755 | return queue_work(system_wq, work); | ||
2756 | } | ||
2757 | EXPORT_SYMBOL(__schedule_work); | ||
2758 | |||
2694 | /* | 2759 | /* |
2695 | * schedule_work_on - put work task on a specific cpu | 2760 | * schedule_work_on - put work task on a specific cpu |
2696 | * @cpu: cpu to put the work task on | 2761 | * @cpu: cpu to put the work task on |
diff --git a/litmus/Kconfig b/litmus/Kconfig index bd6635c8de08..594c54342bdc 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig | |||
@@ -34,8 +34,70 @@ config RELEASE_MASTER | |||
34 | (http://www.cs.unc.edu/~anderson/papers.html). | 34 | (http://www.cs.unc.edu/~anderson/papers.html). |
35 | Currently only supported by GSN-EDF. | 35 | Currently only supported by GSN-EDF. |
36 | 36 | ||
37 | config REALTIME_AUX_TASKS | ||
38 | bool "Real-Time Auxillary Tasks" | ||
39 | depends on LITMUS_LOCKING | ||
40 | default n | ||
41 | help | ||
42 | Adds a system call that forces all non-real-time threads in a process | ||
43 | to become auxillary real-time tasks. These tasks inherit the priority of | ||
44 | the highest-prio *BLOCKED* (but NOT blocked on a Litmus lock) real-time | ||
45 | task (non-auxillary) in the process. This allows the integration of COTS | ||
46 | code that has background helper threads used primarily for message passing | ||
47 | and synchronization. If these background threads are NOT real-time scheduled, | ||
48 | then unbounded priority inversions may occur if a real-time task blocks on | ||
49 | a non-real-time thread. | ||
50 | |||
51 | Beware of the following pitfalls: | ||
52 | 1) Auxillary threads should not be CPU intensive. They should mostly | ||
53 | block on mutexes and condition variables. Violating this will | ||
54 | likely prevent meaningful analysis. | ||
55 | 2) Since there may be more than one auxillary thread per process, | ||
56 | priority inversions may occur with respect to single-threaded | ||
57 | task models if/when one of threads are scheduled simultanously | ||
58 | with another of the same identity. | ||
59 | |||
60 | choice | ||
61 | prompt "Scheduling prioritization of AUX tasks." | ||
62 | default REALTIME_AUX_TASK_PRIORITY_BOOSTED | ||
63 | help | ||
64 | Select the prioritization method for auxillary tasks. | ||
65 | |||
66 | config REALTIME_AUX_TASK_PRIORITY_BOOSTED | ||
67 | bool "Boosted" | ||
68 | help | ||
69 | Run all auxillary task threads at a maximum priority. Useful for | ||
70 | temporarily working around bugs during development. | ||
71 | |||
72 | config REALTIME_AUX_TASK_PRIORITY_INHERITANCE | ||
73 | bool "Inheritance" | ||
74 | help | ||
75 | Auxillary tasks inherit the maximum priority from blocked real-time | ||
76 | threads within the same process. | ||
77 | |||
78 | Additional pitfall: | ||
79 | 3) Busy-wait deadlock is likely between normal real-time tasks and | ||
80 | auxillary tasks synchronize using _preemptive_ spinlocks that do | ||
81 | not use priority inheritance. | ||
82 | |||
83 | These pitfalls are mitgated by the fact that auxillary tasks only | ||
84 | inherit priorities from blocked tasks (Blocking signifies that the | ||
85 | blocked task _may_ be waiting on an auxillary task to perform some | ||
86 | work.). Futher, auxillary tasks without an inherited priority are | ||
87 | _always_ scheduled with a priority less than any normal real-time task!! | ||
88 | |||
89 | NOTE: Aux tasks do not _directly_ inherit a priority from rt tasks that | ||
90 | are blocked on Litmus locks. Aux task should be COTS code that know nothing | ||
91 | of Litmus, so they won't hold Litmus locks. Nothing the aux task can do can | ||
92 | _directly_ unblock the rt task blocked on a Litmus lock. However, the lock | ||
93 | holder that blocks the rt task CAN block on I/O and contribute its priority | ||
94 | to the aux tasks. Aux tasks may still _indirectly_ inherit the priority of | ||
95 | the blocked rt task via the lock holder. | ||
96 | endchoice | ||
97 | |||
37 | endmenu | 98 | endmenu |
38 | 99 | ||
100 | |||
39 | menu "Real-Time Synchronization" | 101 | menu "Real-Time Synchronization" |
40 | 102 | ||
41 | config NP_SECTION | 103 | config NP_SECTION |
@@ -60,6 +122,42 @@ config LITMUS_LOCKING | |||
60 | Say Yes if you want to include locking protocols such as the FMLP and | 122 | Say Yes if you want to include locking protocols such as the FMLP and |
61 | Baker's SRP. | 123 | Baker's SRP. |
62 | 124 | ||
125 | config LITMUS_AFFINITY_LOCKING | ||
126 | bool "Enable affinity infrastructure in k-exclusion locking protocols." | ||
127 | depends on LITMUS_LOCKING | ||
128 | default n | ||
129 | help | ||
130 | Enable affinity tracking infrastructure in k-exclusion locking protocols. | ||
131 | This only enabled the *infrastructure* not actual affinity algorithms. | ||
132 | |||
133 | If unsure, say No. | ||
134 | |||
135 | config LITMUS_NESTED_LOCKING | ||
136 | bool "Support for nested inheritance in locking protocols" | ||
137 | depends on LITMUS_LOCKING | ||
138 | default n | ||
139 | help | ||
140 | Enable nested priority inheritance. | ||
141 | |||
142 | config LITMUS_DGL_SUPPORT | ||
143 | bool "Support for dynamic group locks" | ||
144 | depends on LITMUS_NESTED_LOCKING | ||
145 | default n | ||
146 | help | ||
147 | Enable dynamic group lock support. | ||
148 | |||
149 | config LITMUS_MAX_DGL_SIZE | ||
150 | int "Maximum size of a dynamic group lock." | ||
151 | depends on LITMUS_DGL_SUPPORT | ||
152 | range 1 128 | ||
153 | default "10" | ||
154 | help | ||
155 | Dynamic group lock data structures are allocated on the process | ||
156 | stack when a group is requested. We set a maximum size of | ||
157 | locks in a dynamic group lock to avoid dynamic allocation. | ||
158 | |||
159 | TODO: Batch DGL requests exceeding LITMUS_MAX_DGL_SIZE. | ||
160 | |||
63 | endmenu | 161 | endmenu |
64 | 162 | ||
65 | menu "Performance Enhancements" | 163 | menu "Performance Enhancements" |
@@ -112,6 +210,14 @@ choice | |||
112 | Break ties between two jobs, A and B, with equal deadlines by using a | 210 | Break ties between two jobs, A and B, with equal deadlines by using a |
113 | uniform hash; i.e.: hash(A.pid, A.job_num) < hash(B.pid, B.job_num). Job | 211 | uniform hash; i.e.: hash(A.pid, A.job_num) < hash(B.pid, B.job_num). Job |
114 | A has ~50% of winning a given tie-break. | 212 | A has ~50% of winning a given tie-break. |
213 | |||
214 | NOTES: | ||
215 | * This method doesn't work very well if a tied job has a low-valued | ||
216 | hash while the jobs it ties with do not make progress (that is, | ||
217 | they don't increment to new job numbers). The job with a low-valued | ||
218 | hash job will lose most tie-breaks. This is usually not a problem | ||
219 | unless you are doing something funky in Litmus (ex. worker threads | ||
220 | that do not increment job numbers). | ||
115 | 221 | ||
116 | config EDF_PID_TIE_BREAK | 222 | config EDF_PID_TIE_BREAK |
117 | bool "PID-based Tie Breaks" | 223 | bool "PID-based Tie Breaks" |
@@ -167,7 +273,7 @@ config SCHED_TASK_TRACE | |||
167 | config SCHED_TASK_TRACE_SHIFT | 273 | config SCHED_TASK_TRACE_SHIFT |
168 | int "Buffer size for sched_trace_xxx() events" | 274 | int "Buffer size for sched_trace_xxx() events" |
169 | depends on SCHED_TASK_TRACE | 275 | depends on SCHED_TASK_TRACE |
170 | range 8 13 | 276 | range 8 15 |
171 | default 9 | 277 | default 9 |
172 | help | 278 | help |
173 | 279 | ||
@@ -279,4 +385,108 @@ config PREEMPT_STATE_TRACE | |||
279 | 385 | ||
280 | endmenu | 386 | endmenu |
281 | 387 | ||
388 | menu "Interrupt Handling" | ||
389 | |||
390 | choice | ||
391 | prompt "Scheduling of interrupt bottom-halves in Litmus." | ||
392 | default LITMUS_SOFTIRQD_NONE | ||
393 | depends on LITMUS_LOCKING | ||
394 | help | ||
395 | Schedule tasklets with known priorities in Litmus. | ||
396 | |||
397 | config LITMUS_SOFTIRQD_NONE | ||
398 | bool "No tasklet scheduling in Litmus." | ||
399 | help | ||
400 | Don't schedule tasklets in Litmus. Default. | ||
401 | |||
402 | config LITMUS_SOFTIRQD | ||
403 | bool "Spawn klmirqd interrupt handling threads." | ||
404 | help | ||
405 | Create klmirqd interrupt handling threads. Work must be | ||
406 | specifically dispatched to these workers. (Softirqs for | ||
407 | Litmus tasks are not magically redirected to klmirqd.) | ||
408 | |||
409 | G-EDF, C-EDF ONLY for now! | ||
410 | |||
411 | |||
412 | config LITMUS_PAI_SOFTIRQD | ||
413 | bool "Defer tasklets to context switch points." | ||
414 | help | ||
415 | Only execute scheduled tasklet bottom halves at | ||
416 | scheduling points. Trades context switch overhead | ||
417 | at the cost of non-preemptive durations of bottom half | ||
418 | processing. | ||
419 | |||
420 | G-EDF, C-EDF ONLY for now! | ||
421 | |||
422 | endchoice | ||
423 | |||
424 | |||
425 | config LITMUS_NVIDIA | ||
426 | bool "Litmus handling of NVIDIA interrupts." | ||
427 | default n | ||
428 | help | ||
429 | Direct tasklets from NVIDIA devices to Litmus's klmirqd | ||
430 | or PAI interrupt handling routines. | ||
431 | |||
432 | If unsure, say No. | ||
433 | |||
434 | config LITMUS_AFFINITY_AWARE_GPU_ASSINGMENT | ||
435 | bool "Enable affinity-aware heuristics to improve GPU assignment." | ||
436 | depends on LITMUS_NVIDIA && LITMUS_AFFINITY_LOCKING | ||
437 | default n | ||
438 | help | ||
439 | Enable several heuristics to improve the assignment | ||
440 | of GPUs to real-time tasks to reduce the overheads | ||
441 | of memory migrations. | ||
442 | |||
443 | If unsure, say No. | ||
444 | |||
445 | config NV_DEVICE_NUM | ||
446 | int "Number of NVIDIA GPUs." | ||
447 | depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD | ||
448 | range 1 16 | ||
449 | default "1" | ||
450 | help | ||
451 | Should be (<= to the number of CPUs) and | ||
452 | (<= to the number of GPUs) in your system. | ||
453 | |||
454 | choice | ||
455 | prompt "CUDA/Driver Version Support" | ||
456 | default CUDA_5_0 | ||
457 | depends on LITMUS_NVIDIA | ||
458 | help | ||
459 | Select the version of CUDA/driver to support. | ||
460 | |||
461 | config CUDA_5_0 | ||
462 | bool "CUDA 5.0" | ||
463 | depends on LITMUS_NVIDIA && REALTIME_AUX_TASKS | ||
464 | help | ||
465 | Support CUDA 5.0 RCx (dev. driver version: x86_64-304.33) | ||
466 | |||
467 | config CUDA_4_0 | ||
468 | bool "CUDA 4.0" | ||
469 | depends on LITMUS_NVIDIA | ||
470 | help | ||
471 | Support CUDA 4.0 RC2 (dev. driver version: x86_64-270.40) | ||
472 | |||
473 | config CUDA_3_2 | ||
474 | bool "CUDA 3.2" | ||
475 | depends on LITMUS_NVIDIA | ||
476 | help | ||
477 | Support CUDA 3.2 (dev. driver version: x86_64-260.24) | ||
478 | |||
479 | endchoice | ||
480 | |||
481 | config LITMUS_NV_KLMIRQD_DEBUG | ||
482 | bool "Raise fake sporadic tasklets to test nv klimirqd threads." | ||
483 | depends on LITMUS_NVIDIA && LITMUS_SOFTIRQD | ||
484 | default n | ||
485 | help | ||
486 | Causes tasklets to be sporadically dispatched to waiting klmirqd | ||
487 | threads. WARNING! Kernel panic may occur if you switch between | ||
488 | LITMUS plugins! | ||
489 | |||
490 | endmenu | ||
491 | |||
282 | endmenu | 492 | endmenu |
diff --git a/litmus/Makefile b/litmus/Makefile index d26ca7076b62..67d8b8ee72bc 100644 --- a/litmus/Makefile +++ b/litmus/Makefile | |||
@@ -18,6 +18,7 @@ obj-y = sched_plugin.o litmus.o \ | |||
18 | bheap.o \ | 18 | bheap.o \ |
19 | binheap.o \ | 19 | binheap.o \ |
20 | ctrldev.o \ | 20 | ctrldev.o \ |
21 | aux_tasks.o \ | ||
21 | sched_gsn_edf.o \ | 22 | sched_gsn_edf.o \ |
22 | sched_psn_edf.o \ | 23 | sched_psn_edf.o \ |
23 | sched_pfp.o | 24 | sched_pfp.o |
@@ -30,3 +31,11 @@ obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o | |||
30 | obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o | 31 | obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o |
31 | obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o | 32 | obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o |
32 | obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o | 33 | obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o |
34 | |||
35 | obj-$(CONFIG_LITMUS_LOCKING) += kfmlp_lock.o | ||
36 | obj-$(CONFIG_LITMUS_NESTED_LOCKING) += rsm_lock.o ikglp_lock.o | ||
37 | obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o | ||
38 | obj-$(CONFIG_LITMUS_PAI_SOFTIRQD) += litmus_pai_softirq.o | ||
39 | obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o sched_trace_external.o | ||
40 | |||
41 | obj-$(CONFIG_LITMUS_AFFINITY_LOCKING) += kexclu_affinity.o gpu_affinity.o | ||
diff --git a/litmus/affinity.c b/litmus/affinity.c index 3fa6dd789400..cd93249b5506 100644 --- a/litmus/affinity.c +++ b/litmus/affinity.c | |||
@@ -26,7 +26,7 @@ void init_topology(void) { | |||
26 | cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]); | 26 | cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]); |
27 | } | 27 | } |
28 | printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n", | 28 | printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n", |
29 | cpu, neigh_info[cpu].size[i], i, | 29 | cpu, neigh_info[cpu].size[i], i, |
30 | *cpumask_bits(neigh_info[cpu].neighbors[i])); | 30 | *cpumask_bits(neigh_info[cpu].neighbors[i])); |
31 | } | 31 | } |
32 | 32 | ||
diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c new file mode 100644 index 000000000000..ef26bba3be77 --- /dev/null +++ b/litmus/aux_tasks.c | |||
@@ -0,0 +1,529 @@ | |||
1 | #include <litmus/sched_plugin.h> | ||
2 | #include <litmus/trace.h> | ||
3 | #include <litmus/litmus.h> | ||
4 | |||
5 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
6 | #include <litmus/rt_param.h> | ||
7 | #include <litmus/aux_tasks.h> | ||
8 | |||
9 | #include <linux/time.h> | ||
10 | |||
11 | #define AUX_SLICE_NR_JIFFIES 1 | ||
12 | #define AUX_SLICE_NS ((NSEC_PER_SEC / HZ) * AUX_SLICE_NR_JIFFIES) | ||
13 | |||
14 | static int admit_aux_task(struct task_struct *t) | ||
15 | { | ||
16 | int retval = 0; | ||
17 | struct task_struct *leader = t->group_leader; | ||
18 | |||
19 | /* budget enforcement increments job numbers. job numbers are used in | ||
20 | * tie-breaking of aux_tasks. method helps ensure: | ||
21 | * 1) aux threads with no inherited priority can starve another (they share | ||
22 | * the CPUs equally. | ||
23 | * 2) aux threads that inherit the same priority cannot starve each other. | ||
24 | * | ||
25 | * Assuming aux threads are well-behavied (they do very little work and | ||
26 | * suspend), risk of starvation should not be an issue, but this is a | ||
27 | * fail-safe. | ||
28 | */ | ||
29 | struct rt_task tp = { | ||
30 | .period = AUX_SLICE_NS, | ||
31 | .relative_deadline = AUX_SLICE_NS, | ||
32 | .exec_cost = AUX_SLICE_NS, /* allow full utilization with buget tracking */ | ||
33 | .phase = 0, | ||
34 | .cpu = task_cpu(leader), /* take CPU of group leader */ | ||
35 | .budget_policy = QUANTUM_ENFORCEMENT, | ||
36 | .budget_signal_policy = NO_SIGNALS, | ||
37 | .cls = RT_CLASS_BEST_EFFORT | ||
38 | }; | ||
39 | |||
40 | struct sched_param param = { .sched_priority = 0}; | ||
41 | |||
42 | tsk_rt(t)->task_params = tp; | ||
43 | retval = sched_setscheduler_nocheck(t, SCHED_LITMUS, ¶m); | ||
44 | |||
45 | return retval; | ||
46 | } | ||
47 | |||
48 | int exit_aux_task(struct task_struct *t) | ||
49 | { | ||
50 | int retval = 0; | ||
51 | |||
52 | BUG_ON(!tsk_rt(t)->is_aux_task); | ||
53 | |||
54 | TRACE_CUR("Aux task %s/%d is exiting from %s/%d.\n", t->comm, t->pid, t->group_leader->comm, t->group_leader->pid); | ||
55 | |||
56 | tsk_rt(t)->is_aux_task = 0; | ||
57 | |||
58 | #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE | ||
59 | list_del(&tsk_rt(t)->aux_task_node); | ||
60 | if (tsk_rt(t)->inh_task) { | ||
61 | litmus->__decrease_prio(t, NULL); | ||
62 | } | ||
63 | #endif | ||
64 | |||
65 | return retval; | ||
66 | } | ||
67 | |||
68 | static int aux_tasks_increase_priority(struct task_struct *leader, struct task_struct *hp) | ||
69 | { | ||
70 | int retval = 0; | ||
71 | |||
72 | #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE | ||
73 | struct list_head *pos; | ||
74 | |||
75 | TRACE_CUR("Increasing priority of aux tasks in group %s/%d.\n", leader->comm, leader->pid); | ||
76 | |||
77 | list_for_each(pos, &tsk_aux(leader)->aux_tasks) { | ||
78 | struct task_struct *aux = | ||
79 | container_of(list_entry(pos, struct rt_param, aux_task_node), | ||
80 | struct task_struct, rt_param); | ||
81 | |||
82 | if (!is_realtime(aux)) { | ||
83 | TRACE_CUR("skipping non-real-time aux task %s/%d\n", aux->comm, aux->pid); | ||
84 | } | ||
85 | else if(tsk_rt(aux)->inh_task == hp) { | ||
86 | TRACE_CUR("skipping real-time aux task %s/%d that already inherits from %s/%d\n", aux->comm, aux->pid, hp->comm, hp->pid); | ||
87 | } | ||
88 | else { | ||
89 | // aux tasks don't touch rt locks, so no nested call needed. | ||
90 | TRACE_CUR("increasing %s/%d.\n", aux->comm, aux->pid); | ||
91 | retval = litmus->__increase_prio(aux, hp); | ||
92 | } | ||
93 | } | ||
94 | #endif | ||
95 | |||
96 | return retval; | ||
97 | } | ||
98 | |||
99 | static int aux_tasks_decrease_priority(struct task_struct *leader, struct task_struct *hp) | ||
100 | { | ||
101 | int retval = 0; | ||
102 | |||
103 | #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE | ||
104 | struct list_head *pos; | ||
105 | |||
106 | TRACE_CUR("Decreasing priority of aux tasks in group %s/%d.\n", leader->comm, leader->pid); | ||
107 | |||
108 | list_for_each(pos, &tsk_aux(leader)->aux_tasks) { | ||
109 | struct task_struct *aux = | ||
110 | container_of(list_entry(pos, struct rt_param, aux_task_node), | ||
111 | struct task_struct, rt_param); | ||
112 | |||
113 | if (!is_realtime(aux)) { | ||
114 | TRACE_CUR("skipping non-real-time aux task %s/%d\n", aux->comm, aux->pid); | ||
115 | } | ||
116 | else { | ||
117 | TRACE_CUR("decreasing %s/%d.\n", aux->comm, aux->pid); | ||
118 | retval = litmus->__decrease_prio(aux, hp); | ||
119 | } | ||
120 | } | ||
121 | #endif | ||
122 | |||
123 | return retval; | ||
124 | } | ||
125 | |||
126 | int aux_task_owner_increase_priority(struct task_struct *t) | ||
127 | { | ||
128 | int retval = 0; | ||
129 | |||
130 | #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE | ||
131 | struct task_struct *leader; | ||
132 | struct task_struct *hp = NULL; | ||
133 | struct task_struct *hp_eff = NULL; | ||
134 | |||
135 | BUG_ON(!is_realtime(t)); | ||
136 | BUG_ON(!tsk_rt(t)->has_aux_tasks); | ||
137 | |||
138 | leader = t->group_leader; | ||
139 | |||
140 | if (!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) { | ||
141 | WARN_ON(!is_running(t)); | ||
142 | TRACE_CUR("aux tasks may not inherit from %s/%d in group %s/%d\n", | ||
143 | t->comm, t->pid, leader->comm, leader->pid); | ||
144 | goto out; | ||
145 | } | ||
146 | |||
147 | TRACE_CUR("task %s/%d in group %s/%d increasing priority.\n", t->comm, t->pid, leader->comm, leader->pid); | ||
148 | |||
149 | hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), | ||
150 | struct task_struct, rt_param); | ||
151 | hp_eff = effective_priority(hp); | ||
152 | |||
153 | if (hp != t) { /* our position in the heap may have changed. hp is already at the root. */ | ||
154 | binheap_decrease(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners); | ||
155 | } | ||
156 | |||
157 | hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), | ||
158 | struct task_struct, rt_param); | ||
159 | |||
160 | if (effective_priority(hp) != hp_eff) { /* the eff. prio. of hp has changed */ | ||
161 | hp_eff = effective_priority(hp); | ||
162 | TRACE_CUR("%s/%d is new hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); | ||
163 | retval = aux_tasks_increase_priority(leader, hp_eff); | ||
164 | } | ||
165 | #endif | ||
166 | |||
167 | out: | ||
168 | return retval; | ||
169 | } | ||
170 | |||
171 | int aux_task_owner_decrease_priority(struct task_struct *t) | ||
172 | { | ||
173 | int retval = 0; | ||
174 | |||
175 | #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE | ||
176 | struct task_struct *leader; | ||
177 | struct task_struct *hp = NULL; | ||
178 | struct task_struct *hp_eff = NULL; | ||
179 | |||
180 | BUG_ON(!is_realtime(t)); | ||
181 | BUG_ON(!tsk_rt(t)->has_aux_tasks); | ||
182 | |||
183 | leader = t->group_leader; | ||
184 | |||
185 | if (!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) { | ||
186 | WARN_ON(!is_running(t)); | ||
187 | TRACE_CUR("aux tasks may not inherit from %s/%d in group %s/%d\n", | ||
188 | t->comm, t->pid, leader->comm, leader->pid); | ||
189 | goto out; | ||
190 | } | ||
191 | |||
192 | TRACE_CUR("task %s/%d in group %s/%d decresing priority.\n", t->comm, t->pid, leader->comm, leader->pid); | ||
193 | |||
194 | hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), | ||
195 | struct task_struct, rt_param); | ||
196 | hp_eff = effective_priority(hp); | ||
197 | binheap_delete(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners); | ||
198 | binheap_add(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners, | ||
199 | struct rt_param, aux_task_owner_node); | ||
200 | |||
201 | if (hp == t) { /* t was originally the hp */ | ||
202 | struct task_struct *new_hp = | ||
203 | container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), | ||
204 | struct task_struct, rt_param); | ||
205 | if (effective_priority(new_hp) != hp_eff) { /* eff prio. of hp has changed */ | ||
206 | hp_eff = effective_priority(new_hp); | ||
207 | TRACE_CUR("%s/%d is no longer hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); | ||
208 | retval = aux_tasks_decrease_priority(leader, hp_eff); | ||
209 | } | ||
210 | } | ||
211 | #endif | ||
212 | |||
213 | out: | ||
214 | return retval; | ||
215 | } | ||
216 | |||
217 | int make_aux_task_if_required(struct task_struct *t) | ||
218 | { | ||
219 | struct task_struct *leader; | ||
220 | int retval = 0; | ||
221 | |||
222 | read_lock_irq(&tasklist_lock); | ||
223 | |||
224 | leader = t->group_leader; | ||
225 | |||
226 | if(!tsk_aux(leader)->initialized || !tsk_aux(leader)->aux_future) { | ||
227 | goto out; | ||
228 | } | ||
229 | |||
230 | TRACE_CUR("Making %s/%d in %s/%d an aux thread.\n", t->comm, t->pid, leader->comm, leader->pid); | ||
231 | |||
232 | INIT_LIST_HEAD(&tsk_rt(t)->aux_task_node); | ||
233 | INIT_BINHEAP_NODE(&tsk_rt(t)->aux_task_owner_node); | ||
234 | |||
235 | retval = admit_aux_task(t); | ||
236 | if (retval == 0) { | ||
237 | tsk_rt(t)->is_aux_task = 1; | ||
238 | |||
239 | #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE | ||
240 | list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks); | ||
241 | |||
242 | if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) { | ||
243 | struct task_struct *hp = | ||
244 | container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), | ||
245 | struct task_struct, rt_param); | ||
246 | |||
247 | TRACE_CUR("hp in group: %s/%d\n", hp->comm, hp->pid); | ||
248 | |||
249 | retval = litmus->__increase_prio(t, (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp); | ||
250 | |||
251 | if (retval != 0) { | ||
252 | /* don't know how to recover from bugs with prio inheritance. better just crash. */ | ||
253 | read_unlock_irq(&tasklist_lock); | ||
254 | BUG(); | ||
255 | } | ||
256 | } | ||
257 | #endif | ||
258 | } | ||
259 | |||
260 | out: | ||
261 | read_unlock_irq(&tasklist_lock); | ||
262 | |||
263 | return retval; | ||
264 | } | ||
265 | |||
266 | |||
267 | long enable_aux_task_owner(struct task_struct *t) | ||
268 | { | ||
269 | long retval = 0; | ||
270 | |||
271 | #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE | ||
272 | struct task_struct *leader = t->group_leader; | ||
273 | struct task_struct *hp; | ||
274 | |||
275 | if (!tsk_rt(t)->has_aux_tasks) { | ||
276 | TRACE_CUR("task %s/%d is not an aux owner\n", t->comm, t->pid); | ||
277 | return -1; | ||
278 | } | ||
279 | |||
280 | BUG_ON(!is_realtime(t)); | ||
281 | |||
282 | if (binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) { | ||
283 | TRACE_CUR("task %s/%d is already active\n", t->comm, t->pid); | ||
284 | goto out; | ||
285 | } | ||
286 | |||
287 | binheap_add(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners, | ||
288 | struct rt_param, aux_task_owner_node); | ||
289 | |||
290 | hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), | ||
291 | struct task_struct, rt_param); | ||
292 | if (hp == t) { | ||
293 | /* we're the new hp */ | ||
294 | TRACE_CUR("%s/%d is new hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); | ||
295 | |||
296 | retval = aux_tasks_increase_priority(leader, | ||
297 | (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp); | ||
298 | } | ||
299 | #endif | ||
300 | |||
301 | out: | ||
302 | return retval; | ||
303 | } | ||
304 | |||
305 | long disable_aux_task_owner(struct task_struct *t) | ||
306 | { | ||
307 | long retval = 0; | ||
308 | |||
309 | #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE | ||
310 | struct task_struct *leader = t->group_leader; | ||
311 | struct task_struct *hp; | ||
312 | struct task_struct *new_hp = NULL; | ||
313 | |||
314 | if (!tsk_rt(t)->has_aux_tasks) { | ||
315 | TRACE_CUR("task %s/%d is not an aux owner\n", t->comm, t->pid); | ||
316 | return -1; | ||
317 | } | ||
318 | |||
319 | BUG_ON(!is_realtime(t)); | ||
320 | |||
321 | if (!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) { | ||
322 | TRACE_CUR("task %s/%d is already not active\n", t->comm, t->pid); | ||
323 | goto out; | ||
324 | } | ||
325 | |||
326 | TRACE_CUR("task %s/%d exiting from group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); | ||
327 | |||
328 | hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), | ||
329 | struct task_struct, rt_param); | ||
330 | binheap_delete(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners); | ||
331 | |||
332 | if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) { | ||
333 | new_hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), | ||
334 | struct task_struct, rt_param); | ||
335 | } | ||
336 | |||
337 | if (hp == t && new_hp != t) { | ||
338 | struct task_struct *to_inh = NULL; | ||
339 | |||
340 | TRACE_CUR("%s/%d is no longer hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid); | ||
341 | |||
342 | if (new_hp) { | ||
343 | to_inh = (tsk_rt(new_hp)->inh_task) ? tsk_rt(new_hp)->inh_task : new_hp; | ||
344 | } | ||
345 | |||
346 | retval = aux_tasks_decrease_priority(leader, to_inh); | ||
347 | } | ||
348 | #endif | ||
349 | |||
350 | out: | ||
351 | return retval; | ||
352 | } | ||
353 | |||
354 | |||
355 | static int aux_task_owner_max_priority_order(struct binheap_node *a, | ||
356 | struct binheap_node *b) | ||
357 | { | ||
358 | struct task_struct *d_a = container_of(binheap_entry(a, struct rt_param, aux_task_owner_node), | ||
359 | struct task_struct, rt_param); | ||
360 | struct task_struct *d_b = container_of(binheap_entry(b, struct rt_param, aux_task_owner_node), | ||
361 | struct task_struct, rt_param); | ||
362 | |||
363 | BUG_ON(!d_a); | ||
364 | BUG_ON(!d_b); | ||
365 | |||
366 | return litmus->compare(d_a, d_b); | ||
367 | } | ||
368 | |||
369 | |||
370 | static long __do_enable_aux_tasks(int flags) | ||
371 | { | ||
372 | long retval = 0; | ||
373 | struct task_struct *leader; | ||
374 | struct task_struct *t; | ||
375 | int aux_tasks_added = 0; | ||
376 | |||
377 | leader = current->group_leader; | ||
378 | |||
379 | if (!tsk_aux(leader)->initialized) { | ||
380 | INIT_LIST_HEAD(&tsk_aux(leader)->aux_tasks); | ||
381 | INIT_BINHEAP_HANDLE(&tsk_aux(leader)->aux_task_owners, aux_task_owner_max_priority_order); | ||
382 | tsk_aux(leader)->initialized = 1; | ||
383 | } | ||
384 | |||
385 | if (flags & AUX_FUTURE) { | ||
386 | tsk_aux(leader)->aux_future = 1; | ||
387 | } | ||
388 | |||
389 | t = leader; | ||
390 | do { | ||
391 | if (!tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->is_aux_task) { | ||
392 | /* This may harmlessly reinit unused nodes. TODO: Don't reinit already init nodes. */ | ||
393 | /* doesn't hurt to initialize both nodes */ | ||
394 | INIT_LIST_HEAD(&tsk_rt(t)->aux_task_node); | ||
395 | INIT_BINHEAP_NODE(&tsk_rt(t)->aux_task_owner_node); | ||
396 | } | ||
397 | |||
398 | TRACE_CUR("Checking task in %s/%d: %s/%d = (p = %llu):\n", | ||
399 | leader->comm, leader->pid, t->comm, t->pid, | ||
400 | tsk_rt(t)->task_params.period); | ||
401 | |||
402 | /* inspect period to see if it is an rt task */ | ||
403 | if (tsk_rt(t)->task_params.period == 0) { | ||
404 | if (flags && AUX_CURRENT) { | ||
405 | if (!tsk_rt(t)->is_aux_task) { | ||
406 | int admit_ret; | ||
407 | |||
408 | TRACE_CUR("AUX task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid); | ||
409 | |||
410 | admit_ret = admit_aux_task(t); | ||
411 | |||
412 | if (admit_ret == 0) { | ||
413 | /* hasn't been aux_tasks_increase_priorityted into rt. make it a aux. */ | ||
414 | tsk_rt(t)->is_aux_task = 1; | ||
415 | aux_tasks_added = 1; | ||
416 | |||
417 | #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE | ||
418 | list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks); | ||
419 | #endif | ||
420 | } | ||
421 | } | ||
422 | else { | ||
423 | TRACE_CUR("AUX task in %s/%d is already set up: %s/%d\n", leader->comm, leader->pid, t->comm, t->pid); | ||
424 | } | ||
425 | } | ||
426 | else { | ||
427 | TRACE_CUR("Not changing thread in %s/%d to AUX task: %s/%d\n", leader->comm, leader->pid, t->comm, t->pid); | ||
428 | } | ||
429 | } | ||
430 | else if (!tsk_rt(t)->is_aux_task) { /* don't let aux tasks get aux tasks of their own */ | ||
431 | if (!tsk_rt(t)->has_aux_tasks) { | ||
432 | TRACE_CUR("task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid); | ||
433 | tsk_rt(t)->has_aux_tasks = 1; | ||
434 | } | ||
435 | else { | ||
436 | TRACE_CUR("task in %s/%d is already set up: %s/%d\n", leader->comm, leader->pid, t->comm, t->pid); | ||
437 | } | ||
438 | } | ||
439 | |||
440 | t = next_thread(t); | ||
441 | } while(t != leader); | ||
442 | |||
443 | |||
444 | #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE | ||
445 | if (aux_tasks_added && !binheap_empty(&tsk_aux(leader)->aux_task_owners)) { | ||
446 | struct task_struct *hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node), | ||
447 | struct task_struct, rt_param); | ||
448 | TRACE_CUR("hp in group: %s/%d\n", hp->comm, hp->pid); | ||
449 | retval = aux_tasks_increase_priority(leader, (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp); | ||
450 | } | ||
451 | #endif | ||
452 | |||
453 | return retval; | ||
454 | } | ||
455 | |||
456 | static long __do_disable_aux_tasks(int flags) | ||
457 | { | ||
458 | long retval = 0; | ||
459 | struct task_struct *leader; | ||
460 | struct task_struct *t; | ||
461 | |||
462 | leader = current->group_leader; | ||
463 | |||
464 | if (flags & AUX_FUTURE) { | ||
465 | tsk_aux(leader)->aux_future = 0; | ||
466 | } | ||
467 | |||
468 | if (flags & AUX_CURRENT) { | ||
469 | t = leader; | ||
470 | do { | ||
471 | if (tsk_rt(t)->is_aux_task) { | ||
472 | |||
473 | TRACE_CUR("%s/%d is an aux task.\n", t->comm, t->pid); | ||
474 | |||
475 | if (is_realtime(t)) { | ||
476 | long temp_retval; | ||
477 | struct sched_param param = { .sched_priority = 0}; | ||
478 | |||
479 | TRACE_CUR("%s/%d is real-time. Changing policy to SCHED_NORMAL.\n", t->comm, t->pid); | ||
480 | |||
481 | temp_retval = sched_setscheduler_nocheck(t, SCHED_NORMAL, ¶m); | ||
482 | |||
483 | if (temp_retval != 0) { | ||
484 | TRACE_CUR("error changing policy of %s/%d to SCHED_NORMAL\n", t->comm, t->pid); | ||
485 | if (retval == 0) { | ||
486 | retval = temp_retval; | ||
487 | } | ||
488 | else { | ||
489 | TRACE_CUR("prior error (%d) masks new error (%d)\n", retval, temp_retval); | ||
490 | } | ||
491 | } | ||
492 | } | ||
493 | |||
494 | tsk_rt(t)->is_aux_task = 0; | ||
495 | } | ||
496 | t = next_thread(t); | ||
497 | } while(t != leader); | ||
498 | } | ||
499 | |||
500 | return retval; | ||
501 | } | ||
502 | |||
503 | asmlinkage long sys_set_aux_tasks(int flags) | ||
504 | { | ||
505 | long retval; | ||
506 | |||
507 | read_lock_irq(&tasklist_lock); | ||
508 | |||
509 | if (flags & AUX_ENABLE) { | ||
510 | retval = __do_enable_aux_tasks(flags); | ||
511 | } | ||
512 | else { | ||
513 | retval = __do_disable_aux_tasks(flags); | ||
514 | } | ||
515 | |||
516 | read_unlock_irq(&tasklist_lock); | ||
517 | |||
518 | return retval; | ||
519 | } | ||
520 | |||
521 | #else | ||
522 | |||
523 | asmlinkage long sys_set_aux_tasks(int flags) | ||
524 | { | ||
525 | printk("Unsupported. Recompile with CONFIG_REALTIME_AUX_TASKS.\n"); | ||
526 | return -EINVAL; | ||
527 | } | ||
528 | |||
529 | #endif | ||
diff --git a/litmus/budget.c b/litmus/budget.c index f7712be29adb..518174a37a3b 100644 --- a/litmus/budget.c +++ b/litmus/budget.c | |||
@@ -1,11 +1,13 @@ | |||
1 | #include <linux/sched.h> | 1 | #include <linux/sched.h> |
2 | #include <linux/percpu.h> | 2 | #include <linux/percpu.h> |
3 | #include <linux/hrtimer.h> | 3 | #include <linux/hrtimer.h> |
4 | #include <linux/signal.h> | ||
4 | 5 | ||
5 | #include <litmus/litmus.h> | 6 | #include <litmus/litmus.h> |
6 | #include <litmus/preempt.h> | 7 | #include <litmus/preempt.h> |
7 | 8 | ||
8 | #include <litmus/budget.h> | 9 | #include <litmus/budget.h> |
10 | #include <litmus/signal.h> | ||
9 | 11 | ||
10 | struct enforcement_timer { | 12 | struct enforcement_timer { |
11 | /* The enforcement timer is used to accurately police | 13 | /* The enforcement timer is used to accurately police |
@@ -64,7 +66,7 @@ static void arm_enforcement_timer(struct enforcement_timer* et, | |||
64 | 66 | ||
65 | /* Calling this when there is no budget left for the task | 67 | /* Calling this when there is no budget left for the task |
66 | * makes no sense, unless the task is non-preemptive. */ | 68 | * makes no sense, unless the task is non-preemptive. */ |
67 | BUG_ON(budget_exhausted(t) && (!is_np(t))); | 69 | BUG_ON(budget_exhausted(t) && !is_np(t)); |
68 | 70 | ||
69 | /* __hrtimer_start_range_ns() cancels the timer | 71 | /* __hrtimer_start_range_ns() cancels the timer |
70 | * anyway, so we don't have to check whether it is still armed */ | 72 | * anyway, so we don't have to check whether it is still armed */ |
@@ -86,7 +88,7 @@ void update_enforcement_timer(struct task_struct* t) | |||
86 | { | 88 | { |
87 | struct enforcement_timer* et = &__get_cpu_var(budget_timer); | 89 | struct enforcement_timer* et = &__get_cpu_var(budget_timer); |
88 | 90 | ||
89 | if (t && budget_precisely_enforced(t)) { | 91 | if (t && budget_precisely_tracked(t) && !sigbudget_sent(t)) { |
90 | /* Make sure we call into the scheduler when this budget | 92 | /* Make sure we call into the scheduler when this budget |
91 | * expires. */ | 93 | * expires. */ |
92 | arm_enforcement_timer(et, t); | 94 | arm_enforcement_timer(et, t); |
@@ -96,6 +98,16 @@ void update_enforcement_timer(struct task_struct* t) | |||
96 | } | 98 | } |
97 | } | 99 | } |
98 | 100 | ||
101 | void send_sigbudget(struct task_struct* t) | ||
102 | { | ||
103 | if (!test_and_set_bit(RT_JOB_SIG_BUDGET_SENT, &tsk_rt(t)->job_params.flags)) { | ||
104 | /* signal has not yet been sent and we are responsible for sending | ||
105 | * since we just set the sent-bit when it was previously 0. */ | ||
106 | |||
107 | TRACE_TASK(t, "SIG_BUDGET being sent!\n"); | ||
108 | send_sig(SIG_BUDGET, t, 1); /* '1' denotes signal sent from kernel */ | ||
109 | } | ||
110 | } | ||
99 | 111 | ||
100 | static int __init init_budget_enforcement(void) | 112 | static int __init init_budget_enforcement(void) |
101 | { | 113 | { |
diff --git a/litmus/edf_common.c b/litmus/edf_common.c index 5aca2934a7b5..441fbfddf0c2 100644 --- a/litmus/edf_common.c +++ b/litmus/edf_common.c | |||
@@ -12,6 +12,10 @@ | |||
12 | #include <litmus/sched_plugin.h> | 12 | #include <litmus/sched_plugin.h> |
13 | #include <litmus/sched_trace.h> | 13 | #include <litmus/sched_trace.h> |
14 | 14 | ||
15 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
16 | #include <litmus/locking.h> | ||
17 | #endif | ||
18 | |||
15 | #include <litmus/edf_common.h> | 19 | #include <litmus/edf_common.h> |
16 | 20 | ||
17 | #ifdef CONFIG_EDF_TIE_BREAK_LATENESS_NORM | 21 | #ifdef CONFIG_EDF_TIE_BREAK_LATENESS_NORM |
@@ -45,33 +49,158 @@ static inline long edf_hash(struct task_struct *t) | |||
45 | * | 49 | * |
46 | * both first and second may be NULL | 50 | * both first and second may be NULL |
47 | */ | 51 | */ |
48 | int edf_higher_prio(struct task_struct* first, | 52 | #ifdef CONFIG_LITMUS_NESTED_LOCKING |
49 | struct task_struct* second) | 53 | int __edf_higher_prio( |
54 | struct task_struct* first, comparison_mode_t first_mode, | ||
55 | struct task_struct* second, comparison_mode_t second_mode) | ||
56 | #else | ||
57 | int edf_higher_prio(struct task_struct* first, struct task_struct* second) | ||
58 | #endif | ||
50 | { | 59 | { |
51 | struct task_struct *first_task = first; | 60 | struct task_struct *first_task = first; |
52 | struct task_struct *second_task = second; | 61 | struct task_struct *second_task = second; |
53 | 62 | ||
54 | /* There is no point in comparing a task to itself. */ | 63 | /* There is no point in comparing a task to itself. */ |
55 | if (first && first == second) { | 64 | if (first && first == second) { |
56 | TRACE_TASK(first, | 65 | TRACE_CUR("WARNING: pointless edf priority comparison: %s/%d\n", first->comm, first->pid); |
57 | "WARNING: pointless edf priority comparison.\n"); | 66 | WARN_ON(1); |
58 | return 0; | 67 | return 0; |
59 | } | 68 | } |
60 | 69 | ||
61 | 70 | ||
62 | /* check for NULL tasks */ | 71 | /* check for NULL tasks */ |
63 | if (!first || !second) | 72 | if (!first || !second) { |
64 | return first && !second; | 73 | return first && !second; |
74 | } | ||
65 | 75 | ||
66 | #ifdef CONFIG_LITMUS_LOCKING | 76 | /* There is some goofy stuff in this code here. There are three subclasses |
77 | * within the SCHED_LITMUS scheduling class: | ||
78 | * 1) Auxiliary tasks: COTS helper threads from the application level that | ||
79 | * are forced to be real-time. | ||
80 | * 2) klmirqd interrupt threads: Litmus threaded interrupt handlers. | ||
81 | * 3) Normal Litmus tasks. | ||
82 | * | ||
83 | * At their base priorities, #3 > #2 > #1. However, #1 and #2 threads might | ||
84 | * inherit a priority from a task of #3. | ||
85 | * | ||
86 | * The code proceeds in the following manner: | ||
87 | * 1) Make aux and klmirqd threads with base-priorities have low priorities. | ||
88 | * 2) Determine effective priorities. | ||
89 | * 3) Perform priority comparison. Favor #3 over #1 and #2 in case of tie. | ||
90 | */ | ||
91 | |||
92 | |||
93 | #if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_BOOSTED) | ||
94 | /* run aux tasks at max priority */ | ||
95 | /* TODO: Actually use prio-boosting. */ | ||
96 | if (first->rt_param.is_aux_task != second->rt_param.is_aux_task) | ||
97 | { | ||
98 | return (first->rt_param.is_aux_task > second->rt_param.is_aux_task); | ||
99 | } | ||
100 | else if(first->rt_param.is_aux_task && second->rt_param.is_aux_task) | ||
101 | { | ||
102 | if(first->group_leader == second->group_leader) { | ||
103 | TRACE_CUR("aux tie break!\n"); // tie-break by BASE priority of the aux tasks | ||
104 | goto aux_tie_break; | ||
105 | } | ||
106 | first = first->group_leader; | ||
107 | second = second->group_leader; | ||
108 | } | ||
109 | #elif defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE) | ||
110 | { | ||
111 | int first_lo_aux = first->rt_param.is_aux_task && !first->rt_param.inh_task; | ||
112 | int second_lo_aux = second->rt_param.is_aux_task && !second->rt_param.inh_task; | ||
113 | |||
114 | /* prioritize aux tasks without inheritance below real-time tasks */ | ||
115 | if (first_lo_aux || second_lo_aux) { | ||
116 | // one of these is an aux task without inheritance. | ||
117 | if(first_lo_aux && second_lo_aux) { | ||
118 | TRACE_CUR("aux tie break!\n"); // tie-break by BASE priority of the aux tasks | ||
119 | goto aux_tie_break; | ||
120 | } | ||
121 | else { | ||
122 | |||
123 | // make the aux thread lowest priority real-time task | ||
124 | int temp = 0; | ||
125 | if (first_lo_aux && is_realtime(second)) { | ||
126 | // temp = 0; | ||
127 | } | ||
128 | else if(second_lo_aux && is_realtime(first)) { | ||
129 | temp = 1; | ||
130 | } | ||
131 | TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, temp); | ||
132 | return temp; | ||
133 | } | ||
134 | } | ||
135 | |||
136 | if (first->rt_param.is_aux_task && second->rt_param.is_aux_task && | ||
137 | first->rt_param.inh_task == second->rt_param.inh_task) { | ||
138 | // inh_task is !NULL for both tasks since neither was a lo_aux task. | ||
139 | // Both aux tasks inherit from the same task, so tie-break | ||
140 | // by base priority of the aux tasks. | ||
141 | TRACE_CUR("aux tie break!\n"); | ||
142 | goto aux_tie_break; | ||
143 | } | ||
144 | } | ||
145 | #endif | ||
146 | |||
147 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
148 | { | ||
149 | int first_lo_klmirqd = first->rt_param.is_interrupt_thread && !first->rt_param.inh_task; | ||
150 | int second_lo_klmirqd = second->rt_param.is_interrupt_thread && !second->rt_param.inh_task; | ||
151 | |||
152 | /* prioritize aux tasks without inheritance below real-time tasks */ | ||
153 | if (first_lo_klmirqd || second_lo_klmirqd) { | ||
154 | // one of these is an klmirqd thread without inheritance. | ||
155 | if(first_lo_klmirqd && second_lo_klmirqd) { | ||
156 | TRACE_CUR("klmirqd tie break!\n"); // tie-break by BASE priority of the aux tasks | ||
157 | goto klmirqd_tie_break; | ||
158 | } | ||
159 | else { | ||
160 | // make the klmirqd thread the lowest-priority real-time task | ||
161 | // but (above low-prio aux tasks and Linux tasks) | ||
162 | int temp = 0; | ||
163 | if (first_lo_klmirqd && is_realtime(second)) { | ||
164 | // temp = 0; | ||
165 | } | ||
166 | else if(second_lo_klmirqd && is_realtime(first)) { | ||
167 | temp = 1; | ||
168 | } | ||
169 | TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, temp); | ||
170 | return temp; | ||
171 | } | ||
172 | } | ||
173 | |||
174 | if (first->rt_param.is_interrupt_thread && second->rt_param.is_interrupt_thread && | ||
175 | first->rt_param.inh_task == second->rt_param.inh_task) { | ||
176 | // inh_task is !NULL for both tasks since neither was a lo_klmirqd task. | ||
177 | // Both klmirqd tasks inherit from the same task, so tie-break | ||
178 | // by base priority of the klmirqd tasks. | ||
179 | TRACE_CUR("klmirqd tie break!\n"); | ||
180 | goto klmirqd_tie_break; | ||
181 | } | ||
182 | } | ||
183 | #endif | ||
67 | 184 | ||
68 | /* Check for inherited priorities. Change task | 185 | |
186 | #ifdef CONFIG_LITMUS_LOCKING | ||
187 | /* Check for EFFECTIVE priorities. Change task | ||
69 | * used for comparison in such a case. | 188 | * used for comparison in such a case. |
70 | */ | 189 | */ |
71 | if (unlikely(first->rt_param.inh_task)) | 190 | if (unlikely(first->rt_param.inh_task) |
191 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
192 | && (first_mode == EFFECTIVE) | ||
193 | #endif | ||
194 | ) { | ||
72 | first_task = first->rt_param.inh_task; | 195 | first_task = first->rt_param.inh_task; |
73 | if (unlikely(second->rt_param.inh_task)) | 196 | } |
197 | if (unlikely(second->rt_param.inh_task) | ||
198 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
199 | && (second_mode == EFFECTIVE) | ||
200 | #endif | ||
201 | ) { | ||
74 | second_task = second->rt_param.inh_task; | 202 | second_task = second->rt_param.inh_task; |
203 | } | ||
75 | 204 | ||
76 | /* Check for priority boosting. Tie-break by start of boosting. | 205 | /* Check for priority boosting. Tie-break by start of boosting. |
77 | */ | 206 | */ |
@@ -79,17 +208,31 @@ int edf_higher_prio(struct task_struct* first, | |||
79 | /* first_task is boosted, how about second_task? */ | 208 | /* first_task is boosted, how about second_task? */ |
80 | if (!is_priority_boosted(second_task) || | 209 | if (!is_priority_boosted(second_task) || |
81 | lt_before(get_boost_start(first_task), | 210 | lt_before(get_boost_start(first_task), |
82 | get_boost_start(second_task))) | 211 | get_boost_start(second_task))) { |
83 | return 1; | 212 | return 1; |
84 | else | 213 | } |
214 | else { | ||
85 | return 0; | 215 | return 0; |
86 | } else if (unlikely(is_priority_boosted(second_task))) | 216 | } |
217 | } | ||
218 | else if (unlikely(is_priority_boosted(second_task))) { | ||
87 | /* second_task is boosted, first is not*/ | 219 | /* second_task is boosted, first is not*/ |
88 | return 0; | 220 | return 0; |
221 | } | ||
222 | |||
223 | #endif | ||
89 | 224 | ||
225 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
226 | aux_tie_break: | ||
227 | #endif | ||
228 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
229 | klmirqd_tie_break: | ||
90 | #endif | 230 | #endif |
91 | 231 | ||
92 | if (earlier_deadline(first_task, second_task)) { | 232 | if (!is_realtime(second_task)) { |
233 | return 1; | ||
234 | } | ||
235 | else if (earlier_deadline(first_task, second_task)) { | ||
93 | return 1; | 236 | return 1; |
94 | } | 237 | } |
95 | else if (get_deadline(first_task) == get_deadline(second_task)) { | 238 | else if (get_deadline(first_task) == get_deadline(second_task)) { |
@@ -98,7 +241,6 @@ int edf_higher_prio(struct task_struct* first, | |||
98 | */ | 241 | */ |
99 | int pid_break; | 242 | int pid_break; |
100 | 243 | ||
101 | |||
102 | #if defined(CONFIG_EDF_TIE_BREAK_LATENESS) | 244 | #if defined(CONFIG_EDF_TIE_BREAK_LATENESS) |
103 | /* Tie break by lateness. Jobs with greater lateness get | 245 | /* Tie break by lateness. Jobs with greater lateness get |
104 | * priority. This should spread tardiness across all tasks, | 246 | * priority. This should spread tardiness across all tasks, |
@@ -154,18 +296,104 @@ int edf_higher_prio(struct task_struct* first, | |||
154 | return 1; | 296 | return 1; |
155 | } | 297 | } |
156 | else if (first_task->pid == second_task->pid) { | 298 | else if (first_task->pid == second_task->pid) { |
157 | /* If the PIDs are the same then the task with the | 299 | #ifdef CONFIG_LITMUS_SOFTIRQD |
158 | * inherited priority wins. | 300 | if (first_task->rt_param.is_interrupt_thread < second_task->rt_param.is_interrupt_thread) { |
159 | */ | 301 | return 1; |
160 | if (!second->rt_param.inh_task) { | 302 | } |
303 | else if (first_task->rt_param.is_interrupt_thread == second_task->rt_param.is_interrupt_thread) { | ||
304 | #endif | ||
305 | |||
306 | #if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE) | ||
307 | if (tsk_rt(first)->is_aux_task < tsk_rt(second)->is_aux_task) { | ||
161 | return 1; | 308 | return 1; |
162 | } | 309 | } |
310 | else if (tsk_rt(first)->is_aux_task == tsk_rt(second)->is_aux_task) { | ||
311 | #endif | ||
312 | |||
313 | /* Something could be wrong if you get this far. */ | ||
314 | if (unlikely(first->rt_param.inh_task == second->rt_param.inh_task)) { | ||
315 | /* Both tasks have the same inherited priority. | ||
316 | * Likely in a bug-condition. | ||
317 | */ | ||
318 | if (first->pid < second->pid) { | ||
319 | return 1; | ||
320 | } | ||
321 | else if (first->pid == second->pid) { | ||
322 | //WARN_ON(1); | ||
323 | } | ||
324 | } | ||
325 | else { | ||
326 | /* At least one task must inherit */ | ||
327 | BUG_ON(!first->rt_param.inh_task && | ||
328 | !second->rt_param.inh_task); | ||
329 | |||
330 | /* The task withOUT the inherited priority wins. */ | ||
331 | if (second->rt_param.inh_task) { | ||
332 | /* | ||
333 | * common with aux tasks. | ||
334 | TRACE_CUR("unusual comparison: " | ||
335 | "first = %s/%d first_task = %s/%d " | ||
336 | "second = %s/%d second_task = %s/%d\n", | ||
337 | first->comm, first->pid, | ||
338 | (first->rt_param.inh_task) ? first->rt_param.inh_task->comm : "(nil)", | ||
339 | (first->rt_param.inh_task) ? first->rt_param.inh_task->pid : 0, | ||
340 | second->comm, second->pid, | ||
341 | (second->rt_param.inh_task) ? second->rt_param.inh_task->comm : "(nil)", | ||
342 | (second->rt_param.inh_task) ? second->rt_param.inh_task->pid : 0); | ||
343 | */ | ||
344 | return 1; | ||
345 | } | ||
346 | } | ||
347 | #if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE) | ||
348 | } | ||
349 | #endif | ||
350 | |||
351 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
352 | } | ||
353 | #endif | ||
354 | |||
163 | } | 355 | } |
164 | } | 356 | } |
165 | } | 357 | } |
358 | |||
166 | return 0; /* fall-through. prio(second_task) > prio(first_task) */ | 359 | return 0; /* fall-through. prio(second_task) > prio(first_task) */ |
167 | } | 360 | } |
168 | 361 | ||
362 | |||
363 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
364 | int edf_higher_prio(struct task_struct* first, struct task_struct* second) | ||
365 | { | ||
366 | return __edf_higher_prio(first, EFFECTIVE, second, EFFECTIVE); | ||
367 | } | ||
368 | |||
369 | int edf_max_heap_order(struct binheap_node *a, struct binheap_node *b) | ||
370 | { | ||
371 | struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node); | ||
372 | struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node); | ||
373 | |||
374 | return __edf_higher_prio(l_a->hp_waiter_eff_prio, EFFECTIVE, l_b->hp_waiter_eff_prio, EFFECTIVE); | ||
375 | } | ||
376 | |||
377 | int edf_min_heap_order(struct binheap_node *a, struct binheap_node *b) | ||
378 | { | ||
379 | return edf_max_heap_order(b, a); // swap comparison | ||
380 | } | ||
381 | |||
382 | int edf_max_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b) | ||
383 | { | ||
384 | struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node); | ||
385 | struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node); | ||
386 | |||
387 | return __edf_higher_prio(l_a->hp_waiter_eff_prio, BASE, l_b->hp_waiter_eff_prio, BASE); | ||
388 | } | ||
389 | |||
390 | int edf_min_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b) | ||
391 | { | ||
392 | return edf_max_heap_base_priority_order(b, a); // swap comparison | ||
393 | } | ||
394 | #endif | ||
395 | |||
396 | |||
169 | int edf_ready_order(struct bheap_node* a, struct bheap_node* b) | 397 | int edf_ready_order(struct bheap_node* a, struct bheap_node* b) |
170 | { | 398 | { |
171 | return edf_higher_prio(bheap2task(a), bheap2task(b)); | 399 | return edf_higher_prio(bheap2task(a), bheap2task(b)); |
diff --git a/litmus/fdso.c b/litmus/fdso.c index 250377d184e7..709be3cc8992 100644 --- a/litmus/fdso.c +++ b/litmus/fdso.c | |||
@@ -20,13 +20,28 @@ | |||
20 | 20 | ||
21 | extern struct fdso_ops generic_lock_ops; | 21 | extern struct fdso_ops generic_lock_ops; |
22 | 22 | ||
23 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
24 | extern struct fdso_ops generic_affinity_ops; | ||
25 | #endif | ||
26 | |||
23 | static const struct fdso_ops* fdso_ops[] = { | 27 | static const struct fdso_ops* fdso_ops[] = { |
24 | &generic_lock_ops, /* FMLP_SEM */ | 28 | &generic_lock_ops, /* FMLP_SEM */ |
25 | &generic_lock_ops, /* SRP_SEM */ | 29 | &generic_lock_ops, /* SRP_SEM */ |
30 | |||
26 | &generic_lock_ops, /* MPCP_SEM */ | 31 | &generic_lock_ops, /* MPCP_SEM */ |
27 | &generic_lock_ops, /* MPCP_VS_SEM */ | 32 | &generic_lock_ops, /* MPCP_VS_SEM */ |
28 | &generic_lock_ops, /* DPCP_SEM */ | 33 | &generic_lock_ops, /* DPCP_SEM */ |
29 | &generic_lock_ops, /* PCP_SEM */ | 34 | &generic_lock_ops, /* PCP_SEM */ |
35 | |||
36 | &generic_lock_ops, /* RSM_MUTEX */ | ||
37 | &generic_lock_ops, /* IKGLP_SEM */ | ||
38 | &generic_lock_ops, /* KFMLP_SEM */ | ||
39 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
40 | &generic_affinity_ops, /* IKGLP_SIMPLE_GPU_AFF_OBS */ | ||
41 | &generic_affinity_ops, /* IKGLP_GPU_AFF_OBS */ | ||
42 | &generic_affinity_ops, /* KFMLP_SIMPLE_GPU_AFF_OBS */ | ||
43 | &generic_affinity_ops, /* KFMLP_GPU_AFF_OBS */ | ||
44 | #endif | ||
30 | }; | 45 | }; |
31 | 46 | ||
32 | static int fdso_create(void** obj_ref, obj_type_t type, void* __user config) | 47 | static int fdso_create(void** obj_ref, obj_type_t type, void* __user config) |
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c new file mode 100644 index 000000000000..7d73105b4181 --- /dev/null +++ b/litmus/gpu_affinity.c | |||
@@ -0,0 +1,231 @@ | |||
1 | |||
2 | #ifdef CONFIG_LITMUS_NVIDIA | ||
3 | |||
4 | #include <linux/sched.h> | ||
5 | #include <litmus/litmus.h> | ||
6 | #include <litmus/gpu_affinity.h> | ||
7 | |||
8 | #include <litmus/sched_trace.h> | ||
9 | |||
10 | #define OBSERVATION_CAP ((lt_t)(2e9)) | ||
11 | |||
12 | // reason for skew: high outliers are less | ||
13 | // frequent and way out of bounds | ||
14 | //#define HI_THRESHOLD 2 | ||
15 | //#define LO_THRESHOLD 4 | ||
16 | |||
17 | #define NUM_STDEV_NUM 1 | ||
18 | #define NUM_STDEV_DENOM 2 | ||
19 | |||
20 | #define MIN(a, b) ((a < b) ? a : b) | ||
21 | |||
22 | static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed) | ||
23 | { | ||
24 | fp_t relative_err; | ||
25 | fp_t err, new; | ||
26 | fp_t actual = _integer_to_fp(observed); | ||
27 | |||
28 | err = _sub(actual, fb->est); | ||
29 | new = _add(_mul(a, err), _mul(b, fb->accum_err)); | ||
30 | |||
31 | relative_err = _div(err, actual); | ||
32 | |||
33 | fb->est = new; | ||
34 | fb->accum_err = _add(fb->accum_err, err); | ||
35 | |||
36 | return relative_err; | ||
37 | } | ||
38 | |||
39 | lt_t varience(lt_t nums[], const lt_t avg, const uint16_t count) | ||
40 | { | ||
41 | /* brute force: takes about as much time as incremental running methods when | ||
42 | * count < 50 (on Bonham). Brute force also less prone to overflow. | ||
43 | */ | ||
44 | lt_t sqdeviations = 0; | ||
45 | uint16_t i; | ||
46 | for(i = 0; i < count; ++i) | ||
47 | { | ||
48 | lt_t temp = (int64_t)nums[i] - (int64_t)avg; | ||
49 | sqdeviations += temp * temp; | ||
50 | } | ||
51 | return sqdeviations/count; | ||
52 | } | ||
53 | |||
54 | lt_t isqrt(lt_t n) | ||
55 | { | ||
56 | /* integer square root using babylonian method | ||
57 | * (algo taken from wikipedia */ | ||
58 | lt_t res = 0; | ||
59 | lt_t bit = ((lt_t)1) << (sizeof(n)*8-2); | ||
60 | while (bit > n) { | ||
61 | bit >>= 2; | ||
62 | } | ||
63 | |||
64 | while (bit != 0) { | ||
65 | if (n >= res + bit) { | ||
66 | n -= res + bit; | ||
67 | res = (res >> 1) + bit; | ||
68 | } | ||
69 | else { | ||
70 | res >>= 1; | ||
71 | } | ||
72 | bit >>= 2; | ||
73 | } | ||
74 | return res; | ||
75 | } | ||
76 | |||
77 | void update_gpu_estimate(struct task_struct *t, lt_t observed) | ||
78 | { | ||
79 | //feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); | ||
80 | avg_est_t *est; | ||
81 | struct migration_info mig_info; | ||
82 | |||
83 | BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST); | ||
84 | |||
85 | est = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]); | ||
86 | |||
87 | if (unlikely(observed > OBSERVATION_CAP)) { | ||
88 | TRACE_TASK(t, "Crazy observation greater than was dropped: %llu > %llu\n", | ||
89 | observed, | ||
90 | OBSERVATION_CAP); | ||
91 | return; | ||
92 | } | ||
93 | |||
94 | #if 0 | ||
95 | // filter out values that are HI_THRESHOLDx or (1/LO_THRESHOLD)x out | ||
96 | // of range of the average, but only filter if enough samples | ||
97 | // have been taken. | ||
98 | if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) { | ||
99 | if (unlikely(observed < est->avg/LO_THRESHOLD)) { | ||
100 | TRACE_TASK(t, "Observation is too small: %llu\n", | ||
101 | observed); | ||
102 | return; | ||
103 | } | ||
104 | else if (unlikely(observed > est->avg*HI_THRESHOLD)) { | ||
105 | TRACE_TASK(t, "Observation is too large: %llu\n", | ||
106 | observed); | ||
107 | return; | ||
108 | } | ||
109 | #endif | ||
110 | // filter values outside NUM_STDEVx the standard deviation, | ||
111 | // but only filter if enough samples have been taken. | ||
112 | if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) { | ||
113 | lt_t lower, upper; | ||
114 | |||
115 | lt_t range = (est->std*NUM_STDEV_NUM)/NUM_STDEV_DENOM; | ||
116 | lower = est->avg - MIN(range, est->avg); // no underflow. | ||
117 | |||
118 | if (unlikely(observed < lower)) { | ||
119 | TRACE_TASK(t, "Observation is too small: %llu\n", observed); | ||
120 | return; | ||
121 | } | ||
122 | |||
123 | upper = est->avg + range; | ||
124 | if (unlikely(observed > upper)) { | ||
125 | TRACE_TASK(t, "Observation is too large: %llu\n", observed); | ||
126 | return; | ||
127 | } | ||
128 | } | ||
129 | |||
130 | |||
131 | |||
132 | if (unlikely(est->count < AVG_EST_WINDOW_SIZE)) { | ||
133 | ++est->count; | ||
134 | } | ||
135 | else { | ||
136 | est->sum -= est->history[est->idx]; | ||
137 | } | ||
138 | |||
139 | mig_info.observed = observed; | ||
140 | mig_info.estimated = est->avg; | ||
141 | mig_info.distance = tsk_rt(t)->gpu_migration; | ||
142 | sched_trace_migration(t, &mig_info); | ||
143 | |||
144 | |||
145 | est->history[est->idx] = observed; | ||
146 | est->sum += observed; | ||
147 | est->avg = est->sum/est->count; | ||
148 | est->std = isqrt(varience(est->history, est->avg, est->count)); | ||
149 | est->idx = (est->idx + 1) % AVG_EST_WINDOW_SIZE; | ||
150 | |||
151 | |||
152 | #if 0 | ||
153 | if(unlikely(fb->est.val == 0)) { | ||
154 | // kludge-- cap observed values to prevent whacky estimations. | ||
155 | // whacky stuff happens during the first few jobs. | ||
156 | if(unlikely(observed > OBSERVATION_CAP)) { | ||
157 | TRACE_TASK(t, "Crazy observation was capped: %llu -> %llu\n", | ||
158 | observed, OBSERVATION_CAP); | ||
159 | observed = OBSERVATION_CAP; | ||
160 | } | ||
161 | |||
162 | // take the first observation as our estimate | ||
163 | // (initial value of 0 was bogus anyhow) | ||
164 | fb->est = _integer_to_fp(observed); | ||
165 | fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work. | ||
166 | } | ||
167 | else { | ||
168 | fp_t rel_err = update_estimate(fb, | ||
169 | tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration], | ||
170 | tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration], | ||
171 | observed); | ||
172 | |||
173 | if(unlikely(_fp_to_integer(fb->est) <= 0)) { | ||
174 | TRACE_TASK(t, "Invalid estimate. Patching.\n"); | ||
175 | fb->est = _integer_to_fp(observed); | ||
176 | fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work. | ||
177 | } | ||
178 | else { | ||
179 | struct migration_info mig_info; | ||
180 | |||
181 | sched_trace_prediction_err(t, | ||
182 | &(tsk_rt(t)->gpu_migration), | ||
183 | &rel_err); | ||
184 | |||
185 | mig_info.observed = observed; | ||
186 | mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration); | ||
187 | mig_info.distance = tsk_rt(t)->gpu_migration; | ||
188 | |||
189 | sched_trace_migration(t, &mig_info); | ||
190 | } | ||
191 | } | ||
192 | #endif | ||
193 | |||
194 | TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %llu\n", | ||
195 | tsk_rt(t)->gpu_migration, | ||
196 | observed, | ||
197 | est->avg); | ||
198 | } | ||
199 | |||
200 | gpu_migration_dist_t gpu_migration_distance(int a, int b) | ||
201 | { | ||
202 | // GPUs organized in a binary hierarchy, no more than 2^MIG_FAR GPUs | ||
203 | int i; | ||
204 | int dist; | ||
205 | |||
206 | if(likely(a >= 0 && b >= 0)) { | ||
207 | for(i = 0; i <= MIG_FAR; ++i) { | ||
208 | if(a>>i == b>>i) { | ||
209 | dist = i; | ||
210 | goto out; | ||
211 | } | ||
212 | } | ||
213 | dist = MIG_NONE; // hopefully never reached. | ||
214 | TRACE_CUR("WARNING: GPU distance too far! %d -> %d\n", a, b); | ||
215 | } | ||
216 | else { | ||
217 | dist = MIG_NONE; | ||
218 | } | ||
219 | |||
220 | out: | ||
221 | TRACE_CUR("Distance %d -> %d is %d\n", | ||
222 | a, b, dist); | ||
223 | |||
224 | return dist; | ||
225 | } | ||
226 | |||
227 | |||
228 | |||
229 | |||
230 | #endif | ||
231 | |||
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c new file mode 100644 index 000000000000..a4ae74331782 --- /dev/null +++ b/litmus/ikglp_lock.c | |||
@@ -0,0 +1,2976 @@ | |||
1 | #include <linux/slab.h> | ||
2 | #include <linux/uaccess.h> | ||
3 | |||
4 | #include <litmus/trace.h> | ||
5 | #include <litmus/sched_plugin.h> | ||
6 | #include <litmus/fdso.h> | ||
7 | |||
8 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
9 | #include <litmus/gpu_affinity.h> | ||
10 | #include <litmus/nvidia_info.h> | ||
11 | #endif | ||
12 | |||
13 | #include <litmus/ikglp_lock.h> | ||
14 | |||
15 | // big signed value. | ||
16 | #define IKGLP_INVAL_DISTANCE 0x7FFFFFFF | ||
17 | |||
18 | int ikglp_max_heap_base_priority_order(struct binheap_node *a, | ||
19 | struct binheap_node *b) | ||
20 | { | ||
21 | ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node); | ||
22 | ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node); | ||
23 | |||
24 | BUG_ON(!d_a); | ||
25 | BUG_ON(!d_b); | ||
26 | |||
27 | return litmus->__compare(d_a->task, BASE, d_b->task, BASE); | ||
28 | } | ||
29 | |||
30 | int ikglp_min_heap_base_priority_order(struct binheap_node *a, | ||
31 | struct binheap_node *b) | ||
32 | { | ||
33 | ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node); | ||
34 | ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node); | ||
35 | |||
36 | return litmus->__compare(d_b->task, BASE, d_a->task, BASE); | ||
37 | } | ||
38 | |||
39 | int ikglp_donor_max_heap_base_priority_order(struct binheap_node *a, | ||
40 | struct binheap_node *b) | ||
41 | { | ||
42 | ikglp_wait_state_t *d_a = binheap_entry(a, ikglp_wait_state_t, node); | ||
43 | ikglp_wait_state_t *d_b = binheap_entry(b, ikglp_wait_state_t, node); | ||
44 | |||
45 | return litmus->__compare(d_a->task, BASE, d_b->task, BASE); | ||
46 | } | ||
47 | |||
48 | |||
49 | int ikglp_min_heap_donee_order(struct binheap_node *a, | ||
50 | struct binheap_node *b) | ||
51 | { | ||
52 | struct task_struct *prio_a, *prio_b; | ||
53 | |||
54 | ikglp_donee_heap_node_t *d_a = | ||
55 | binheap_entry(a, ikglp_donee_heap_node_t, node); | ||
56 | ikglp_donee_heap_node_t *d_b = | ||
57 | binheap_entry(b, ikglp_donee_heap_node_t, node); | ||
58 | |||
59 | if(!d_a->donor_info) { | ||
60 | prio_a = d_a->task; | ||
61 | } | ||
62 | else { | ||
63 | prio_a = d_a->donor_info->task; | ||
64 | BUG_ON(d_a->task != d_a->donor_info->donee_info->task); | ||
65 | } | ||
66 | |||
67 | if(!d_b->donor_info) { | ||
68 | prio_b = d_b->task; | ||
69 | } | ||
70 | else { | ||
71 | prio_b = d_b->donor_info->task; | ||
72 | BUG_ON(d_b->task != d_b->donor_info->donee_info->task); | ||
73 | } | ||
74 | |||
75 | // note reversed order | ||
76 | return litmus->__compare(prio_b, BASE, prio_a, BASE); | ||
77 | } | ||
78 | |||
79 | |||
80 | |||
81 | static inline int ikglp_get_idx(struct ikglp_semaphore *sem, | ||
82 | struct fifo_queue *queue) | ||
83 | { | ||
84 | return (queue - &sem->fifo_queues[0]); | ||
85 | } | ||
86 | |||
87 | static inline struct fifo_queue* ikglp_get_queue(struct ikglp_semaphore *sem, | ||
88 | struct task_struct *holder) | ||
89 | { | ||
90 | int i; | ||
91 | for(i = 0; i < sem->nr_replicas; ++i) | ||
92 | if(sem->fifo_queues[i].owner == holder) | ||
93 | return(&sem->fifo_queues[i]); | ||
94 | return(NULL); | ||
95 | } | ||
96 | |||
97 | |||
98 | |||
99 | static struct task_struct* ikglp_find_hp_waiter(struct fifo_queue *kqueue, | ||
100 | struct task_struct *skip) | ||
101 | { | ||
102 | struct list_head *pos; | ||
103 | struct task_struct *queued, *found = NULL; | ||
104 | |||
105 | list_for_each(pos, &kqueue->wait.task_list) { | ||
106 | queued = (struct task_struct*) list_entry(pos, | ||
107 | wait_queue_t, task_list)->private; | ||
108 | |||
109 | /* Compare task prios, find high prio task. */ | ||
110 | if(queued != skip && litmus->compare(queued, found)) | ||
111 | found = queued; | ||
112 | } | ||
113 | return found; | ||
114 | } | ||
115 | |||
116 | static struct fifo_queue* ikglp_find_shortest(struct ikglp_semaphore *sem, | ||
117 | struct fifo_queue *search_start) | ||
118 | { | ||
119 | // we start our search at search_start instead of at the beginning of the | ||
120 | // queue list to load-balance across all resources. | ||
121 | struct fifo_queue* step = search_start; | ||
122 | struct fifo_queue* shortest = sem->shortest_fifo_queue; | ||
123 | |||
124 | do { | ||
125 | step = (step+1 != &sem->fifo_queues[sem->nr_replicas]) ? | ||
126 | step+1 : &sem->fifo_queues[0]; | ||
127 | |||
128 | if(step->count < shortest->count) { | ||
129 | shortest = step; | ||
130 | if(step->count == 0) | ||
131 | break; /* can't get any shorter */ | ||
132 | } | ||
133 | |||
134 | }while(step != search_start); | ||
135 | |||
136 | return(shortest); | ||
137 | } | ||
138 | |||
139 | static inline struct task_struct* ikglp_mth_highest(struct ikglp_semaphore *sem) | ||
140 | { | ||
141 | return binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node)->task; | ||
142 | } | ||
143 | |||
144 | |||
145 | |||
146 | #if 0 | ||
147 | static void print_global_list(struct binheap_node* n, int depth) | ||
148 | { | ||
149 | ikglp_heap_node_t *global_heap_node; | ||
150 | char padding[81] = " "; | ||
151 | |||
152 | if(n == NULL) { | ||
153 | TRACE_CUR("+-> %p\n", NULL); | ||
154 | return; | ||
155 | } | ||
156 | |||
157 | global_heap_node = binheap_entry(n, ikglp_heap_node_t, node); | ||
158 | |||
159 | if(depth*2 <= 80) | ||
160 | padding[depth*2] = '\0'; | ||
161 | |||
162 | TRACE_CUR("%s+-> %s/%d\n", | ||
163 | padding, | ||
164 | global_heap_node->task->comm, | ||
165 | global_heap_node->task->pid); | ||
166 | |||
167 | if(n->left) print_global_list(n->left, depth+1); | ||
168 | if(n->right) print_global_list(n->right, depth+1); | ||
169 | } | ||
170 | |||
171 | static void print_donees(struct ikglp_semaphore *sem, struct binheap_node *n, int depth) | ||
172 | { | ||
173 | ikglp_donee_heap_node_t *donee_node; | ||
174 | char padding[81] = " "; | ||
175 | struct task_struct* donor = NULL; | ||
176 | |||
177 | if(n == NULL) { | ||
178 | TRACE_CUR("+-> %p\n", NULL); | ||
179 | return; | ||
180 | } | ||
181 | |||
182 | donee_node = binheap_entry(n, ikglp_donee_heap_node_t, node); | ||
183 | |||
184 | if(depth*2 <= 80) | ||
185 | padding[depth*2] = '\0'; | ||
186 | |||
187 | if(donee_node->donor_info) { | ||
188 | donor = donee_node->donor_info->task; | ||
189 | } | ||
190 | |||
191 | TRACE_CUR("%s+-> %s/%d (d: %s/%d) (fq: %d)\n", | ||
192 | padding, | ||
193 | donee_node->task->comm, | ||
194 | donee_node->task->pid, | ||
195 | (donor) ? donor->comm : "nil", | ||
196 | (donor) ? donor->pid : -1, | ||
197 | ikglp_get_idx(sem, donee_node->fq)); | ||
198 | |||
199 | if(n->left) print_donees(sem, n->left, depth+1); | ||
200 | if(n->right) print_donees(sem, n->right, depth+1); | ||
201 | } | ||
202 | |||
203 | static void print_donors(struct binheap_node *n, int depth) | ||
204 | { | ||
205 | ikglp_wait_state_t *donor_node; | ||
206 | char padding[81] = " "; | ||
207 | |||
208 | if(n == NULL) { | ||
209 | TRACE_CUR("+-> %p\n", NULL); | ||
210 | return; | ||
211 | } | ||
212 | |||
213 | donor_node = binheap_entry(n, ikglp_wait_state_t, node); | ||
214 | |||
215 | if(depth*2 <= 80) | ||
216 | padding[depth*2] = '\0'; | ||
217 | |||
218 | |||
219 | TRACE_CUR("%s+-> %s/%d (donee: %s/%d)\n", | ||
220 | padding, | ||
221 | donor_node->task->comm, | ||
222 | donor_node->task->pid, | ||
223 | donor_node->donee_info->task->comm, | ||
224 | donor_node->donee_info->task->pid); | ||
225 | |||
226 | if(n->left) print_donors(n->left, depth+1); | ||
227 | if(n->right) print_donors(n->right, depth+1); | ||
228 | } | ||
229 | #endif | ||
230 | |||
231 | static void ikglp_add_global_list(struct ikglp_semaphore *sem, | ||
232 | struct task_struct *t, | ||
233 | ikglp_heap_node_t *node) | ||
234 | { | ||
235 | |||
236 | |||
237 | node->task = t; | ||
238 | INIT_BINHEAP_NODE(&node->node); | ||
239 | |||
240 | if(sem->top_m_size < sem->m) { | ||
241 | TRACE_CUR("Trivially adding %s/%d to top-m global list.\n", | ||
242 | t->comm, t->pid); | ||
243 | // TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size); | ||
244 | // print_global_list(sem->top_m.root, 1); | ||
245 | |||
246 | binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node); | ||
247 | ++(sem->top_m_size); | ||
248 | |||
249 | // TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size); | ||
250 | // print_global_list(sem->top_m.root, 1); | ||
251 | } | ||
252 | else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) { | ||
253 | ikglp_heap_node_t *evicted = | ||
254 | binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node); | ||
255 | |||
256 | TRACE_CUR("Adding %s/%d to top-m and evicting %s/%d.\n", | ||
257 | t->comm, t->pid, | ||
258 | evicted->task->comm, evicted->task->pid); | ||
259 | |||
260 | // TRACE_CUR("Not-Top-M Before:\n"); | ||
261 | // print_global_list(sem->not_top_m.root, 1); | ||
262 | // TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size); | ||
263 | // print_global_list(sem->top_m.root, 1); | ||
264 | |||
265 | |||
266 | binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node); | ||
267 | INIT_BINHEAP_NODE(&evicted->node); | ||
268 | binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node); | ||
269 | |||
270 | binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node); | ||
271 | |||
272 | // TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size); | ||
273 | // print_global_list(sem->top_m.root, 1); | ||
274 | // TRACE_CUR("Not-Top-M After:\n"); | ||
275 | // print_global_list(sem->not_top_m.root, 1); | ||
276 | } | ||
277 | else { | ||
278 | TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n", | ||
279 | t->comm, t->pid); | ||
280 | // TRACE_CUR("Not-Top-M Before:\n"); | ||
281 | // print_global_list(sem->not_top_m.root, 1); | ||
282 | |||
283 | binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node); | ||
284 | |||
285 | // TRACE_CUR("Not-Top-M After:\n"); | ||
286 | // print_global_list(sem->not_top_m.root, 1); | ||
287 | } | ||
288 | } | ||
289 | |||
290 | |||
291 | static void ikglp_del_global_list(struct ikglp_semaphore *sem, | ||
292 | struct task_struct *t, | ||
293 | ikglp_heap_node_t *node) | ||
294 | { | ||
295 | BUG_ON(!binheap_is_in_heap(&node->node)); | ||
296 | |||
297 | TRACE_CUR("Removing %s/%d from global list.\n", t->comm, t->pid); | ||
298 | |||
299 | if(binheap_is_in_this_heap(&node->node, &sem->top_m)) { | ||
300 | TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid); | ||
301 | |||
302 | // TRACE_CUR("Not-Top-M Before:\n"); | ||
303 | // print_global_list(sem->not_top_m.root, 1); | ||
304 | // TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size); | ||
305 | // print_global_list(sem->top_m.root, 1); | ||
306 | |||
307 | |||
308 | binheap_delete(&node->node, &sem->top_m); | ||
309 | |||
310 | if(!binheap_empty(&sem->not_top_m)) { | ||
311 | ikglp_heap_node_t *promoted = | ||
312 | binheap_top_entry(&sem->not_top_m, ikglp_heap_node_t, node); | ||
313 | |||
314 | TRACE_CUR("Promoting %s/%d to top-m\n", | ||
315 | promoted->task->comm, promoted->task->pid); | ||
316 | |||
317 | binheap_delete_root(&sem->not_top_m, ikglp_heap_node_t, node); | ||
318 | INIT_BINHEAP_NODE(&promoted->node); | ||
319 | |||
320 | binheap_add(&promoted->node, &sem->top_m, ikglp_heap_node_t, node); | ||
321 | } | ||
322 | else { | ||
323 | TRACE_CUR("No one to promote to top-m.\n"); | ||
324 | --(sem->top_m_size); | ||
325 | } | ||
326 | |||
327 | // TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size); | ||
328 | // print_global_list(sem->top_m.root, 1); | ||
329 | // TRACE_CUR("Not-Top-M After:\n"); | ||
330 | // print_global_list(sem->not_top_m.root, 1); | ||
331 | } | ||
332 | else { | ||
333 | TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid); | ||
334 | // TRACE_CUR("Not-Top-M Before:\n"); | ||
335 | // print_global_list(sem->not_top_m.root, 1); | ||
336 | |||
337 | binheap_delete(&node->node, &sem->not_top_m); | ||
338 | |||
339 | // TRACE_CUR("Not-Top-M After:\n"); | ||
340 | // print_global_list(sem->not_top_m.root, 1); | ||
341 | } | ||
342 | } | ||
343 | |||
344 | |||
345 | static void ikglp_add_donees(struct ikglp_semaphore *sem, | ||
346 | struct fifo_queue *fq, | ||
347 | struct task_struct *t, | ||
348 | ikglp_donee_heap_node_t* node) | ||
349 | { | ||
350 | // TRACE_CUR("Adding %s/%d to donee list.\n", t->comm, t->pid); | ||
351 | // TRACE_CUR("donees Before:\n"); | ||
352 | // print_donees(sem, sem->donees.root, 1); | ||
353 | |||
354 | node->task = t; | ||
355 | node->donor_info = NULL; | ||
356 | node->fq = fq; | ||
357 | INIT_BINHEAP_NODE(&node->node); | ||
358 | |||
359 | binheap_add(&node->node, &sem->donees, ikglp_donee_heap_node_t, node); | ||
360 | |||
361 | // TRACE_CUR("donees After:\n"); | ||
362 | // print_donees(sem, sem->donees.root, 1); | ||
363 | } | ||
364 | |||
365 | |||
366 | static void ikglp_refresh_owners_prio_increase(struct task_struct *t, | ||
367 | struct fifo_queue *fq, | ||
368 | struct ikglp_semaphore *sem, | ||
369 | unsigned long flags) | ||
370 | { | ||
371 | // priority of 't' has increased (note: 't' might already be hp_waiter). | ||
372 | if ((t == fq->hp_waiter) || litmus->compare(t, fq->hp_waiter)) { | ||
373 | struct task_struct *old_max_eff_prio; | ||
374 | struct task_struct *new_max_eff_prio; | ||
375 | struct task_struct *new_prio = NULL; | ||
376 | struct task_struct *owner = fq->owner; | ||
377 | |||
378 | if(fq->hp_waiter) | ||
379 | TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n", | ||
380 | fq->hp_waiter->comm, fq->hp_waiter->pid); | ||
381 | else | ||
382 | TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n"); | ||
383 | |||
384 | if(owner) | ||
385 | { | ||
386 | raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
387 | |||
388 | // TRACE_TASK(owner, "Heap Before:\n"); | ||
389 | // print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0); | ||
390 | |||
391 | old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); | ||
392 | |||
393 | fq->hp_waiter = t; | ||
394 | fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); | ||
395 | |||
396 | binheap_decrease(&fq->nest.hp_binheap_node, | ||
397 | &tsk_rt(owner)->hp_blocked_tasks); | ||
398 | |||
399 | // TRACE_TASK(owner, "Heap After:\n"); | ||
400 | // print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0); | ||
401 | |||
402 | new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); | ||
403 | |||
404 | if(new_max_eff_prio != old_max_eff_prio) { | ||
405 | TRACE_TASK(t, "is new hp_waiter.\n"); | ||
406 | |||
407 | if ((effective_priority(owner) == old_max_eff_prio) || | ||
408 | (litmus->__compare(new_max_eff_prio, BASE, | ||
409 | owner, EFFECTIVE))){ | ||
410 | new_prio = new_max_eff_prio; | ||
411 | } | ||
412 | } | ||
413 | else { | ||
414 | TRACE_TASK(t, "no change in max_eff_prio of heap.\n"); | ||
415 | } | ||
416 | |||
417 | if(new_prio) { | ||
418 | // set new inheritance and propagate | ||
419 | TRACE_TASK(t, "Effective priority changed for owner %s/%d to %s/%d\n", | ||
420 | owner->comm, owner->pid, | ||
421 | new_prio->comm, new_prio->pid); | ||
422 | litmus->nested_increase_prio(owner, new_prio, &sem->lock, | ||
423 | flags); // unlocks lock. | ||
424 | } | ||
425 | else { | ||
426 | TRACE_TASK(t, "No change in effective priority (is %s/%d). Propagation halted.\n", | ||
427 | new_max_eff_prio->comm, new_max_eff_prio->pid); | ||
428 | raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
429 | unlock_fine_irqrestore(&sem->lock, flags); | ||
430 | } | ||
431 | } | ||
432 | else { | ||
433 | fq->hp_waiter = t; | ||
434 | fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); | ||
435 | |||
436 | TRACE_TASK(t, "no owner.\n"); | ||
437 | unlock_fine_irqrestore(&sem->lock, flags); | ||
438 | } | ||
439 | } | ||
440 | else { | ||
441 | TRACE_TASK(t, "hp_waiter is unaffected.\n"); | ||
442 | unlock_fine_irqrestore(&sem->lock, flags); | ||
443 | } | ||
444 | } | ||
445 | |||
446 | // hp_waiter has decreased | ||
447 | static void ikglp_refresh_owners_prio_decrease(struct fifo_queue *fq, | ||
448 | struct ikglp_semaphore *sem, | ||
449 | unsigned long flags) | ||
450 | { | ||
451 | struct task_struct *owner = fq->owner; | ||
452 | |||
453 | struct task_struct *old_max_eff_prio; | ||
454 | struct task_struct *new_max_eff_prio; | ||
455 | |||
456 | if(!owner) { | ||
457 | TRACE_CUR("No owner. Returning.\n"); | ||
458 | unlock_fine_irqrestore(&sem->lock, flags); | ||
459 | return; | ||
460 | } | ||
461 | |||
462 | TRACE_CUR("ikglp_refresh_owners_prio_decrease\n"); | ||
463 | |||
464 | raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
465 | |||
466 | old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); | ||
467 | |||
468 | binheap_delete(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks); | ||
469 | fq->nest.hp_waiter_eff_prio = fq->hp_waiter; | ||
470 | binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks, | ||
471 | struct nested_info, hp_binheap_node); | ||
472 | |||
473 | new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); | ||
474 | |||
475 | if((old_max_eff_prio != new_max_eff_prio) && | ||
476 | (effective_priority(owner) == old_max_eff_prio)) | ||
477 | { | ||
478 | // Need to set new effective_priority for owner | ||
479 | struct task_struct *decreased_prio; | ||
480 | |||
481 | TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n", | ||
482 | ikglp_get_idx(sem, fq)); | ||
483 | |||
484 | if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) { | ||
485 | TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of fq %d.\n", | ||
486 | (new_max_eff_prio) ? new_max_eff_prio->comm : "nil", | ||
487 | (new_max_eff_prio) ? new_max_eff_prio->pid : -1, | ||
488 | owner->comm, | ||
489 | owner->pid, | ||
490 | ikglp_get_idx(sem, fq)); | ||
491 | |||
492 | decreased_prio = new_max_eff_prio; | ||
493 | } | ||
494 | else { | ||
495 | TRACE_CUR("%s/%d has lesser base priority than base priority of owner (%s/%d) of fq %d.\n", | ||
496 | (new_max_eff_prio) ? new_max_eff_prio->comm : "nil", | ||
497 | (new_max_eff_prio) ? new_max_eff_prio->pid : -1, | ||
498 | owner->comm, | ||
499 | owner->pid, | ||
500 | ikglp_get_idx(sem, fq)); | ||
501 | |||
502 | decreased_prio = NULL; | ||
503 | } | ||
504 | |||
505 | // beware: recursion | ||
506 | litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags); // will unlock mutex->lock | ||
507 | } | ||
508 | else { | ||
509 | TRACE_TASK(owner, "No need to propagate priority decrease forward.\n"); | ||
510 | raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
511 | unlock_fine_irqrestore(&sem->lock, flags); | ||
512 | } | ||
513 | } | ||
514 | |||
515 | |||
516 | static void ikglp_remove_donation_from_owner(struct binheap_node *n, | ||
517 | struct fifo_queue *fq, | ||
518 | struct ikglp_semaphore *sem, | ||
519 | unsigned long flags) | ||
520 | { | ||
521 | struct task_struct *owner = fq->owner; | ||
522 | |||
523 | struct task_struct *old_max_eff_prio; | ||
524 | struct task_struct *new_max_eff_prio; | ||
525 | |||
526 | BUG_ON(!owner); | ||
527 | |||
528 | raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
529 | |||
530 | old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); | ||
531 | |||
532 | binheap_delete(n, &tsk_rt(owner)->hp_blocked_tasks); | ||
533 | |||
534 | new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); | ||
535 | |||
536 | if((old_max_eff_prio != new_max_eff_prio) && | ||
537 | (effective_priority(owner) == old_max_eff_prio)) | ||
538 | { | ||
539 | // Need to set new effective_priority for owner | ||
540 | struct task_struct *decreased_prio; | ||
541 | |||
542 | TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n", | ||
543 | ikglp_get_idx(sem, fq)); | ||
544 | |||
545 | if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) { | ||
546 | TRACE_CUR("has greater base priority than base priority of owner of fq %d.\n", | ||
547 | ikglp_get_idx(sem, fq)); | ||
548 | decreased_prio = new_max_eff_prio; | ||
549 | } | ||
550 | else { | ||
551 | TRACE_CUR("has lesser base priority than base priority of owner of fq %d.\n", | ||
552 | ikglp_get_idx(sem, fq)); | ||
553 | decreased_prio = NULL; | ||
554 | } | ||
555 | |||
556 | // beware: recursion | ||
557 | litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags); // will unlock mutex->lock | ||
558 | } | ||
559 | else { | ||
560 | TRACE_TASK(owner, "No need to propagate priority decrease forward.\n"); | ||
561 | raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
562 | unlock_fine_irqrestore(&sem->lock, flags); | ||
563 | } | ||
564 | } | ||
565 | |||
566 | static void ikglp_remove_donation_from_fq_waiter(struct task_struct *t, | ||
567 | struct binheap_node *n) | ||
568 | { | ||
569 | struct task_struct *old_max_eff_prio; | ||
570 | struct task_struct *new_max_eff_prio; | ||
571 | |||
572 | raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock); | ||
573 | |||
574 | old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks); | ||
575 | |||
576 | binheap_delete(n, &tsk_rt(t)->hp_blocked_tasks); | ||
577 | |||
578 | new_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks); | ||
579 | |||
580 | if((old_max_eff_prio != new_max_eff_prio) && | ||
581 | (effective_priority(t) == old_max_eff_prio)) | ||
582 | { | ||
583 | // Need to set new effective_priority for owner | ||
584 | struct task_struct *decreased_prio; | ||
585 | |||
586 | if(litmus->__compare(new_max_eff_prio, BASE, t, BASE)) { | ||
587 | decreased_prio = new_max_eff_prio; | ||
588 | } | ||
589 | else { | ||
590 | decreased_prio = NULL; | ||
591 | } | ||
592 | |||
593 | tsk_rt(t)->inh_task = decreased_prio; | ||
594 | } | ||
595 | |||
596 | raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); | ||
597 | } | ||
598 | |||
599 | static void ikglp_get_immediate(struct task_struct* t, | ||
600 | struct fifo_queue *fq, | ||
601 | struct ikglp_semaphore *sem, | ||
602 | unsigned long flags) | ||
603 | { | ||
604 | // resource available now | ||
605 | TRACE_CUR("queue %d: acquired immediately\n", ikglp_get_idx(sem, fq)); | ||
606 | |||
607 | fq->owner = t; | ||
608 | |||
609 | raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock); | ||
610 | binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks, | ||
611 | struct nested_info, hp_binheap_node); | ||
612 | raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); | ||
613 | |||
614 | ++(fq->count); | ||
615 | |||
616 | ikglp_add_global_list(sem, t, &fq->global_heap_node); | ||
617 | ikglp_add_donees(sem, fq, t, &fq->donee_heap_node); | ||
618 | |||
619 | sem->shortest_fifo_queue = ikglp_find_shortest(sem, sem->shortest_fifo_queue); | ||
620 | |||
621 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
622 | if(sem->aff_obs) { | ||
623 | sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t); | ||
624 | sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, t); | ||
625 | } | ||
626 | #endif | ||
627 | |||
628 | unlock_fine_irqrestore(&sem->lock, flags); | ||
629 | } | ||
630 | |||
631 | |||
632 | |||
633 | |||
634 | |||
635 | static void __ikglp_enqueue_on_fq(struct ikglp_semaphore *sem, | ||
636 | struct fifo_queue* fq, | ||
637 | struct task_struct* t, | ||
638 | wait_queue_t *wait, | ||
639 | ikglp_heap_node_t *global_heap_node, | ||
640 | ikglp_donee_heap_node_t *donee_heap_node) | ||
641 | { | ||
642 | /* resource is not free => must suspend and wait */ | ||
643 | TRACE_TASK(t, "Enqueuing on fq %d.\n", | ||
644 | ikglp_get_idx(sem, fq)); | ||
645 | |||
646 | init_waitqueue_entry(wait, t); | ||
647 | |||
648 | __add_wait_queue_tail_exclusive(&fq->wait, wait); | ||
649 | |||
650 | ++(fq->count); | ||
651 | ++(sem->nr_in_fifos); | ||
652 | |||
653 | // update global list. | ||
654 | if(likely(global_heap_node)) { | ||
655 | if(binheap_is_in_heap(&global_heap_node->node)) { | ||
656 | WARN_ON(1); | ||
657 | ikglp_del_global_list(sem, t, global_heap_node); | ||
658 | } | ||
659 | ikglp_add_global_list(sem, t, global_heap_node); | ||
660 | } | ||
661 | // update donor eligiblity list. | ||
662 | if(likely(donee_heap_node)) { | ||
663 | // if(binheap_is_in_heap(&donee_heap_node->node)) { | ||
664 | // WARN_ON(1); | ||
665 | // } | ||
666 | ikglp_add_donees(sem, fq, t, donee_heap_node); | ||
667 | } | ||
668 | |||
669 | if(sem->shortest_fifo_queue == fq) { | ||
670 | sem->shortest_fifo_queue = ikglp_find_shortest(sem, fq); | ||
671 | } | ||
672 | |||
673 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
674 | if(sem->aff_obs) { | ||
675 | sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t); | ||
676 | } | ||
677 | #endif | ||
678 | |||
679 | TRACE_TASK(t, "shortest queue is now %d\n", ikglp_get_idx(sem, fq)); | ||
680 | } | ||
681 | |||
682 | |||
683 | static void ikglp_enqueue_on_fq( | ||
684 | struct ikglp_semaphore *sem, | ||
685 | struct fifo_queue *fq, | ||
686 | ikglp_wait_state_t *wait, | ||
687 | unsigned long flags) | ||
688 | { | ||
689 | /* resource is not free => must suspend and wait */ | ||
690 | TRACE_TASK(wait->task, "queue %d: Resource is not free => must suspend and wait.\n", | ||
691 | ikglp_get_idx(sem, fq)); | ||
692 | |||
693 | INIT_BINHEAP_NODE(&wait->global_heap_node.node); | ||
694 | INIT_BINHEAP_NODE(&wait->donee_heap_node.node); | ||
695 | |||
696 | __ikglp_enqueue_on_fq(sem, fq, wait->task, &wait->fq_node, | ||
697 | &wait->global_heap_node, &wait->donee_heap_node); | ||
698 | |||
699 | ikglp_refresh_owners_prio_increase(wait->task, fq, sem, flags); // unlocks sem->lock | ||
700 | } | ||
701 | |||
702 | |||
703 | static void __ikglp_enqueue_on_pq(struct ikglp_semaphore *sem, | ||
704 | ikglp_wait_state_t *wait) | ||
705 | { | ||
706 | TRACE_TASK(wait->task, "goes to PQ.\n"); | ||
707 | |||
708 | wait->pq_node.task = wait->task; // copy over task (little redundant...) | ||
709 | |||
710 | binheap_add(&wait->pq_node.node, &sem->priority_queue, | ||
711 | ikglp_heap_node_t, node); | ||
712 | } | ||
713 | |||
714 | static void ikglp_enqueue_on_pq(struct ikglp_semaphore *sem, | ||
715 | ikglp_wait_state_t *wait) | ||
716 | { | ||
717 | INIT_BINHEAP_NODE(&wait->global_heap_node.node); | ||
718 | INIT_BINHEAP_NODE(&wait->donee_heap_node.node); | ||
719 | INIT_BINHEAP_NODE(&wait->pq_node.node); | ||
720 | |||
721 | __ikglp_enqueue_on_pq(sem, wait); | ||
722 | } | ||
723 | |||
724 | static void ikglp_enqueue_on_donor(struct ikglp_semaphore *sem, | ||
725 | ikglp_wait_state_t* wait, | ||
726 | unsigned long flags) | ||
727 | { | ||
728 | struct task_struct *t = wait->task; | ||
729 | ikglp_donee_heap_node_t *donee_node = NULL; | ||
730 | struct task_struct *donee; | ||
731 | |||
732 | struct task_struct *old_max_eff_prio; | ||
733 | struct task_struct *new_max_eff_prio; | ||
734 | struct task_struct *new_prio = NULL; | ||
735 | |||
736 | INIT_BINHEAP_NODE(&wait->global_heap_node.node); | ||
737 | INIT_BINHEAP_NODE(&wait->donee_heap_node.node); | ||
738 | INIT_BINHEAP_NODE(&wait->pq_node.node); | ||
739 | INIT_BINHEAP_NODE(&wait->node); | ||
740 | |||
741 | // TRACE_CUR("Adding %s/%d as donor.\n", t->comm, t->pid); | ||
742 | // TRACE_CUR("donors Before:\n"); | ||
743 | // print_donors(sem->donors.root, 1); | ||
744 | |||
745 | // Add donor to the global list. | ||
746 | ikglp_add_global_list(sem, t, &wait->global_heap_node); | ||
747 | |||
748 | // Select a donee | ||
749 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
750 | donee_node = (sem->aff_obs) ? | ||
751 | sem->aff_obs->ops->advise_donee_selection(sem->aff_obs, t) : | ||
752 | binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node); | ||
753 | #else | ||
754 | donee_node = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node); | ||
755 | #endif | ||
756 | |||
757 | donee = donee_node->task; | ||
758 | |||
759 | TRACE_TASK(t, "Donee selected: %s/%d\n", donee->comm, donee->pid); | ||
760 | |||
761 | TRACE_CUR("Temporarily removing %s/%d to donee list.\n", | ||
762 | donee->comm, donee->pid); | ||
763 | // TRACE_CUR("donees Before:\n"); | ||
764 | // print_donees(sem, sem->donees.root, 1); | ||
765 | |||
766 | //binheap_delete_root(&sem->donees, ikglp_donee_heap_node_t, node); // will re-add it shortly | ||
767 | binheap_delete(&donee_node->node, &sem->donees); | ||
768 | |||
769 | // TRACE_CUR("donees After:\n"); | ||
770 | // print_donees(sem, sem->donees.root, 1); | ||
771 | |||
772 | |||
773 | wait->donee_info = donee_node; | ||
774 | |||
775 | // Add t to donor heap. | ||
776 | binheap_add(&wait->node, &sem->donors, ikglp_wait_state_t, node); | ||
777 | |||
778 | // Now adjust the donee's priority. | ||
779 | |||
780 | // Lock the donee's inheritance heap. | ||
781 | raw_spin_lock(&tsk_rt(donee)->hp_blocked_tasks_lock); | ||
782 | |||
783 | old_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks); | ||
784 | |||
785 | if(donee_node->donor_info) { | ||
786 | // Steal donation relation. Evict old donor to PQ. | ||
787 | |||
788 | // Remove old donor from donor heap | ||
789 | ikglp_wait_state_t *old_wait = donee_node->donor_info; | ||
790 | struct task_struct *old_donor = old_wait->task; | ||
791 | |||
792 | TRACE_TASK(t, "Donee (%s/%d) had donor %s/%d. Moving old donor to PQ.\n", | ||
793 | donee->comm, donee->pid, old_donor->comm, old_donor->pid); | ||
794 | |||
795 | binheap_delete(&old_wait->node, &sem->donors); | ||
796 | |||
797 | // Remove donation from donee's inheritance heap. | ||
798 | binheap_delete(&old_wait->prio_donation.hp_binheap_node, | ||
799 | &tsk_rt(donee)->hp_blocked_tasks); | ||
800 | // WARNING: have not updated inh_prio! | ||
801 | |||
802 | // Add old donor to PQ. | ||
803 | __ikglp_enqueue_on_pq(sem, old_wait); | ||
804 | |||
805 | // Remove old donor from the global heap. | ||
806 | ikglp_del_global_list(sem, old_donor, &old_wait->global_heap_node); | ||
807 | } | ||
808 | |||
809 | // Add back donee's node to the donees heap with increased prio | ||
810 | donee_node->donor_info = wait; | ||
811 | INIT_BINHEAP_NODE(&donee_node->node); | ||
812 | |||
813 | |||
814 | TRACE_CUR("Adding %s/%d back to donee list.\n", donee->comm, donee->pid); | ||
815 | // TRACE_CUR("donees Before:\n"); | ||
816 | // print_donees(sem, sem->donees.root, 1); | ||
817 | |||
818 | binheap_add(&donee_node->node, &sem->donees, ikglp_donee_heap_node_t, node); | ||
819 | |||
820 | // TRACE_CUR("donees After:\n"); | ||
821 | // print_donees(sem, sem->donees.root, 1); | ||
822 | |||
823 | // Add an inheritance/donation to the donee's inheritance heap. | ||
824 | wait->prio_donation.lock = (struct litmus_lock*)sem; | ||
825 | wait->prio_donation.hp_waiter_eff_prio = t; | ||
826 | wait->prio_donation.hp_waiter_ptr = NULL; | ||
827 | INIT_BINHEAP_NODE(&wait->prio_donation.hp_binheap_node); | ||
828 | |||
829 | binheap_add(&wait->prio_donation.hp_binheap_node, | ||
830 | &tsk_rt(donee)->hp_blocked_tasks, | ||
831 | struct nested_info, hp_binheap_node); | ||
832 | |||
833 | new_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks); | ||
834 | |||
835 | if(new_max_eff_prio != old_max_eff_prio) { | ||
836 | if ((effective_priority(donee) == old_max_eff_prio) || | ||
837 | (litmus->__compare(new_max_eff_prio, BASE, donee, EFFECTIVE))){ | ||
838 | TRACE_TASK(t, "Donation increases %s/%d's effective priority\n", | ||
839 | donee->comm, donee->pid); | ||
840 | new_prio = new_max_eff_prio; | ||
841 | } | ||
842 | // else { | ||
843 | // // should be bug. donor would not be in top-m. | ||
844 | // TRACE_TASK(t, "Donation is not greater than base prio of %s/%d?\n", donee->comm, donee->pid); | ||
845 | // WARN_ON(1); | ||
846 | // } | ||
847 | // } | ||
848 | // else { | ||
849 | // // should be bug. donor would not be in top-m. | ||
850 | // TRACE_TASK(t, "No change in %s/%d's inheritance heap?\n", donee->comm, donee->pid); | ||
851 | // WARN_ON(1); | ||
852 | } | ||
853 | |||
854 | if(new_prio) { | ||
855 | struct fifo_queue *donee_fq = donee_node->fq; | ||
856 | |||
857 | if(donee != donee_fq->owner) { | ||
858 | TRACE_TASK(t, "%s/%d is not the owner. Propagating priority to owner %s/%d.\n", | ||
859 | donee->comm, donee->pid, | ||
860 | donee_fq->owner->comm, donee_fq->owner->pid); | ||
861 | |||
862 | raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock); | ||
863 | ikglp_refresh_owners_prio_increase(donee, donee_fq, sem, flags); // unlocks sem->lock | ||
864 | } | ||
865 | else { | ||
866 | TRACE_TASK(t, "%s/%d is the owner. Progatating priority immediatly.\n", | ||
867 | donee->comm, donee->pid); | ||
868 | litmus->nested_increase_prio(donee, new_prio, &sem->lock, flags); // unlocks sem->lock and donee's heap lock | ||
869 | } | ||
870 | } | ||
871 | else { | ||
872 | TRACE_TASK(t, "No change in effective priority (it is %d/%s). BUG?\n", | ||
873 | new_max_eff_prio->comm, new_max_eff_prio->pid); | ||
874 | raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock); | ||
875 | unlock_fine_irqrestore(&sem->lock, flags); | ||
876 | } | ||
877 | |||
878 | |||
879 | // TRACE_CUR("donors After:\n"); | ||
880 | // print_donors(sem->donors.root, 1); | ||
881 | } | ||
882 | |||
883 | int ikglp_lock(struct litmus_lock* l) | ||
884 | { | ||
885 | struct task_struct* t = current; | ||
886 | struct ikglp_semaphore *sem = ikglp_from_lock(l); | ||
887 | unsigned long flags = 0, real_flags; | ||
888 | struct fifo_queue *fq = NULL; | ||
889 | int replica = -EINVAL; | ||
890 | |||
891 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
892 | raw_spinlock_t *dgl_lock; | ||
893 | #endif | ||
894 | |||
895 | ikglp_wait_state_t wait; | ||
896 | |||
897 | if (!is_realtime(t)) | ||
898 | return -EPERM; | ||
899 | |||
900 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
901 | dgl_lock = litmus->get_dgl_spinlock(t); | ||
902 | #endif | ||
903 | |||
904 | raw_spin_lock_irqsave(&sem->real_lock, real_flags); | ||
905 | |||
906 | lock_global_irqsave(dgl_lock, flags); | ||
907 | lock_fine_irqsave(&sem->lock, flags); | ||
908 | |||
909 | if(sem->nr_in_fifos < sem->m) { | ||
910 | // enqueue somwhere | ||
911 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
912 | fq = (sem->aff_obs) ? | ||
913 | sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) : | ||
914 | sem->shortest_fifo_queue; | ||
915 | #else | ||
916 | fq = sem->shortest_fifo_queue; | ||
917 | #endif | ||
918 | if(fq->count == 0) { | ||
919 | // take available resource | ||
920 | replica = ikglp_get_idx(sem, fq); | ||
921 | |||
922 | ikglp_get_immediate(t, fq, sem, flags); // unlocks sem->lock | ||
923 | |||
924 | unlock_global_irqrestore(dgl_lock, flags); | ||
925 | raw_spin_unlock_irqrestore(&sem->real_lock, real_flags); | ||
926 | goto acquired; | ||
927 | } | ||
928 | else { | ||
929 | wait.task = t; // THIS IS CRITICALLY IMPORTANT!!! | ||
930 | |||
931 | tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked | ||
932 | mb(); | ||
933 | |||
934 | /* FIXME: interruptible would be nice some day */ | ||
935 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
936 | |||
937 | ikglp_enqueue_on_fq(sem, fq, &wait, flags); // unlocks sem->lock | ||
938 | } | ||
939 | } | ||
940 | else { | ||
941 | // donor! | ||
942 | wait.task = t; // THIS IS CRITICALLY IMPORTANT!!! | ||
943 | |||
944 | tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked | ||
945 | mb(); | ||
946 | |||
947 | /* FIXME: interruptible would be nice some day */ | ||
948 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
949 | |||
950 | if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) { | ||
951 | // enqueue on PQ | ||
952 | ikglp_enqueue_on_pq(sem, &wait); | ||
953 | unlock_fine_irqrestore(&sem->lock, flags); | ||
954 | } | ||
955 | else { | ||
956 | // enqueue as donor | ||
957 | ikglp_enqueue_on_donor(sem, &wait, flags); // unlocks sem->lock | ||
958 | } | ||
959 | } | ||
960 | |||
961 | unlock_global_irqrestore(dgl_lock, flags); | ||
962 | raw_spin_unlock_irqrestore(&sem->real_lock, real_flags); | ||
963 | |||
964 | TS_LOCK_SUSPEND; | ||
965 | |||
966 | suspend_for_lock(); | ||
967 | |||
968 | TS_LOCK_RESUME; | ||
969 | |||
970 | fq = ikglp_get_queue(sem, t); | ||
971 | BUG_ON(!fq); | ||
972 | |||
973 | replica = ikglp_get_idx(sem, fq); | ||
974 | |||
975 | acquired: | ||
976 | TRACE_CUR("Acquired lock %d, queue %d\n", | ||
977 | l->ident, replica); | ||
978 | |||
979 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
980 | if(sem->aff_obs) { | ||
981 | return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq); | ||
982 | } | ||
983 | #endif | ||
984 | |||
985 | return replica; | ||
986 | } | ||
987 | |||
988 | //int ikglp_lock(struct litmus_lock* l) | ||
989 | //{ | ||
990 | // struct task_struct* t = current; | ||
991 | // struct ikglp_semaphore *sem = ikglp_from_lock(l); | ||
992 | // unsigned long flags = 0, real_flags; | ||
993 | // struct fifo_queue *fq = NULL; | ||
994 | // int replica = -EINVAL; | ||
995 | // | ||
996 | //#ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
997 | // raw_spinlock_t *dgl_lock; | ||
998 | //#endif | ||
999 | // | ||
1000 | // ikglp_wait_state_t wait; | ||
1001 | // | ||
1002 | // if (!is_realtime(t)) | ||
1003 | // return -EPERM; | ||
1004 | // | ||
1005 | //#ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
1006 | // dgl_lock = litmus->get_dgl_spinlock(t); | ||
1007 | //#endif | ||
1008 | // | ||
1009 | // raw_spin_lock_irqsave(&sem->real_lock, real_flags); | ||
1010 | // | ||
1011 | // lock_global_irqsave(dgl_lock, flags); | ||
1012 | // lock_fine_irqsave(&sem->lock, flags); | ||
1013 | // | ||
1014 | // | ||
1015 | //#ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1016 | // fq = (sem->aff_obs) ? | ||
1017 | // sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) : | ||
1018 | // sem->shortest_fifo_queue; | ||
1019 | //#else | ||
1020 | // fq = sem->shortest_fifo_queue; | ||
1021 | //#endif | ||
1022 | // | ||
1023 | // if(fq->count == 0) { | ||
1024 | // // take available resource | ||
1025 | // replica = ikglp_get_idx(sem, fq); | ||
1026 | // | ||
1027 | // ikglp_get_immediate(t, fq, sem, flags); // unlocks sem->lock | ||
1028 | // | ||
1029 | // unlock_global_irqrestore(dgl_lock, flags); | ||
1030 | // raw_spin_unlock_irqrestore(&sem->real_lock, real_flags); | ||
1031 | // } | ||
1032 | // else | ||
1033 | // { | ||
1034 | // // we have to suspend. | ||
1035 | // | ||
1036 | // wait.task = t; // THIS IS CRITICALLY IMPORTANT!!! | ||
1037 | // | ||
1038 | // tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked | ||
1039 | // mb(); | ||
1040 | // | ||
1041 | // /* FIXME: interruptible would be nice some day */ | ||
1042 | // set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
1043 | // | ||
1044 | // if(fq->count < sem->max_fifo_len) { | ||
1045 | // // enqueue on fq | ||
1046 | // ikglp_enqueue_on_fq(sem, fq, &wait, flags); // unlocks sem->lock | ||
1047 | // } | ||
1048 | // else { | ||
1049 | // | ||
1050 | // TRACE_CUR("IKGLP fifo queues are full (at least they better be).\n"); | ||
1051 | // | ||
1052 | // // no room in fifos. Go to PQ or donors. | ||
1053 | // | ||
1054 | // if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) { | ||
1055 | // // enqueue on PQ | ||
1056 | // ikglp_enqueue_on_pq(sem, &wait); | ||
1057 | // unlock_fine_irqrestore(&sem->lock, flags); | ||
1058 | // } | ||
1059 | // else { | ||
1060 | // // enqueue as donor | ||
1061 | // ikglp_enqueue_on_donor(sem, &wait, flags); // unlocks sem->lock | ||
1062 | // } | ||
1063 | // } | ||
1064 | // | ||
1065 | // unlock_global_irqrestore(dgl_lock, flags); | ||
1066 | // raw_spin_unlock_irqrestore(&sem->real_lock, real_flags); | ||
1067 | // | ||
1068 | // TS_LOCK_SUSPEND; | ||
1069 | // | ||
1070 | // schedule(); | ||
1071 | // | ||
1072 | // TS_LOCK_RESUME; | ||
1073 | // | ||
1074 | // fq = ikglp_get_queue(sem, t); | ||
1075 | // BUG_ON(!fq); | ||
1076 | // | ||
1077 | // replica = ikglp_get_idx(sem, fq); | ||
1078 | // } | ||
1079 | // | ||
1080 | // TRACE_CUR("Acquired lock %d, queue %d\n", | ||
1081 | // l->ident, replica); | ||
1082 | // | ||
1083 | //#ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1084 | // if(sem->aff_obs) { | ||
1085 | // return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq); | ||
1086 | // } | ||
1087 | //#endif | ||
1088 | // | ||
1089 | // return replica; | ||
1090 | //} | ||
1091 | |||
1092 | static void ikglp_move_donor_to_fq(struct ikglp_semaphore *sem, | ||
1093 | struct fifo_queue *fq, | ||
1094 | ikglp_wait_state_t *donor_info) | ||
1095 | { | ||
1096 | struct task_struct *t = donor_info->task; | ||
1097 | |||
1098 | TRACE_CUR("Donor %s/%d being moved to fq %d\n", | ||
1099 | t->comm, | ||
1100 | t->pid, | ||
1101 | ikglp_get_idx(sem, fq)); | ||
1102 | |||
1103 | binheap_delete(&donor_info->node, &sem->donors); | ||
1104 | |||
1105 | __ikglp_enqueue_on_fq(sem, fq, t, | ||
1106 | &donor_info->fq_node, | ||
1107 | NULL, // already in global_list, so pass null to prevent adding 2nd time. | ||
1108 | &donor_info->donee_heap_node); | ||
1109 | |||
1110 | // warning: | ||
1111 | // ikglp_update_owners_prio(t, fq, sem, flags) has not been called. | ||
1112 | } | ||
1113 | |||
1114 | static void ikglp_move_pq_to_fq(struct ikglp_semaphore *sem, | ||
1115 | struct fifo_queue *fq, | ||
1116 | ikglp_wait_state_t *wait) | ||
1117 | { | ||
1118 | struct task_struct *t = wait->task; | ||
1119 | |||
1120 | TRACE_CUR("PQ request %s/%d being moved to fq %d\n", | ||
1121 | t->comm, | ||
1122 | t->pid, | ||
1123 | ikglp_get_idx(sem, fq)); | ||
1124 | |||
1125 | binheap_delete(&wait->pq_node.node, &sem->priority_queue); | ||
1126 | |||
1127 | __ikglp_enqueue_on_fq(sem, fq, t, | ||
1128 | &wait->fq_node, | ||
1129 | &wait->global_heap_node, | ||
1130 | &wait->donee_heap_node); | ||
1131 | // warning: | ||
1132 | // ikglp_update_owners_prio(t, fq, sem, flags) has not been called. | ||
1133 | } | ||
1134 | |||
1135 | static ikglp_wait_state_t* ikglp_find_hp_waiter_to_steal( | ||
1136 | struct ikglp_semaphore* sem) | ||
1137 | { | ||
1138 | /* must hold sem->lock */ | ||
1139 | |||
1140 | struct fifo_queue *fq = NULL; | ||
1141 | struct list_head *pos; | ||
1142 | struct task_struct *queued; | ||
1143 | int i; | ||
1144 | |||
1145 | for(i = 0; i < sem->nr_replicas; ++i) { | ||
1146 | if( (sem->fifo_queues[i].count > 1) && | ||
1147 | (!fq || litmus->compare(sem->fifo_queues[i].hp_waiter, fq->hp_waiter)) ) { | ||
1148 | |||
1149 | TRACE_CUR("hp_waiter on fq %d (%s/%d) has higher prio than hp_waiter on fq %d (%s/%d)\n", | ||
1150 | ikglp_get_idx(sem, &sem->fifo_queues[i]), | ||
1151 | sem->fifo_queues[i].hp_waiter->comm, | ||
1152 | sem->fifo_queues[i].hp_waiter->pid, | ||
1153 | (fq) ? ikglp_get_idx(sem, fq) : -1, | ||
1154 | (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->comm : "nil") : "nilXX", | ||
1155 | (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->pid : -1) : -2); | ||
1156 | |||
1157 | fq = &sem->fifo_queues[i]; | ||
1158 | |||
1159 | WARN_ON(!(fq->hp_waiter)); | ||
1160 | } | ||
1161 | } | ||
1162 | |||
1163 | if(fq) { | ||
1164 | struct task_struct *max_hp = fq->hp_waiter; | ||
1165 | ikglp_wait_state_t* ret = NULL; | ||
1166 | |||
1167 | TRACE_CUR("Searching for %s/%d on fq %d\n", | ||
1168 | max_hp->comm, | ||
1169 | max_hp->pid, | ||
1170 | ikglp_get_idx(sem, fq)); | ||
1171 | |||
1172 | BUG_ON(!max_hp); | ||
1173 | |||
1174 | list_for_each(pos, &fq->wait.task_list) { | ||
1175 | wait_queue_t *wait = list_entry(pos, wait_queue_t, task_list); | ||
1176 | |||
1177 | queued = (struct task_struct*) wait->private; | ||
1178 | |||
1179 | TRACE_CUR("fq %d entry: %s/%d\n", | ||
1180 | ikglp_get_idx(sem, fq), | ||
1181 | queued->comm, | ||
1182 | queued->pid); | ||
1183 | |||
1184 | /* Compare task prios, find high prio task. */ | ||
1185 | if (queued == max_hp) { | ||
1186 | TRACE_CUR("Found it!\n"); | ||
1187 | ret = container_of(wait, ikglp_wait_state_t, fq_node); | ||
1188 | } | ||
1189 | } | ||
1190 | |||
1191 | WARN_ON(!ret); | ||
1192 | return ret; | ||
1193 | } | ||
1194 | |||
1195 | return(NULL); | ||
1196 | } | ||
1197 | |||
1198 | static void ikglp_steal_to_fq(struct ikglp_semaphore *sem, | ||
1199 | struct fifo_queue *fq, | ||
1200 | ikglp_wait_state_t *fq_wait) | ||
1201 | { | ||
1202 | struct task_struct *t = fq_wait->task; | ||
1203 | struct fifo_queue *fq_steal = fq_wait->donee_heap_node.fq; | ||
1204 | |||
1205 | TRACE_CUR("FQ request %s/%d being moved to fq %d\n", | ||
1206 | t->comm, | ||
1207 | t->pid, | ||
1208 | ikglp_get_idx(sem, fq)); | ||
1209 | |||
1210 | fq_wait->donee_heap_node.fq = fq; // just to be safe | ||
1211 | |||
1212 | |||
1213 | __remove_wait_queue(&fq_steal->wait, &fq_wait->fq_node); | ||
1214 | --(fq_steal->count); | ||
1215 | |||
1216 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1217 | if(sem->aff_obs) { | ||
1218 | sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq_steal, t); | ||
1219 | } | ||
1220 | #endif | ||
1221 | |||
1222 | if(t == fq_steal->hp_waiter) { | ||
1223 | fq_steal->hp_waiter = ikglp_find_hp_waiter(fq_steal, NULL); | ||
1224 | TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n", | ||
1225 | ikglp_get_idx(sem, fq_steal), | ||
1226 | (fq_steal->hp_waiter) ? fq_steal->hp_waiter->comm : "nil", | ||
1227 | (fq_steal->hp_waiter) ? fq_steal->hp_waiter->pid : -1); | ||
1228 | } | ||
1229 | |||
1230 | |||
1231 | // Update shortest. | ||
1232 | if(fq_steal->count < sem->shortest_fifo_queue->count) { | ||
1233 | sem->shortest_fifo_queue = fq_steal; | ||
1234 | } | ||
1235 | |||
1236 | __ikglp_enqueue_on_fq(sem, fq, t, | ||
1237 | &fq_wait->fq_node, | ||
1238 | NULL, | ||
1239 | NULL); | ||
1240 | |||
1241 | // warning: We have not checked the priority inheritance of fq's owner yet. | ||
1242 | } | ||
1243 | |||
1244 | |||
1245 | static void ikglp_migrate_fq_to_owner_heap_nodes(struct ikglp_semaphore *sem, | ||
1246 | struct fifo_queue *fq, | ||
1247 | ikglp_wait_state_t *old_wait) | ||
1248 | { | ||
1249 | struct task_struct *t = old_wait->task; | ||
1250 | |||
1251 | BUG_ON(old_wait->donee_heap_node.fq != fq); | ||
1252 | |||
1253 | TRACE_TASK(t, "Migrating wait_state to memory of queue %d.\n", | ||
1254 | ikglp_get_idx(sem, fq)); | ||
1255 | |||
1256 | // need to migrate global_heap_node and donee_heap_node off of the stack | ||
1257 | // to the nodes allocated for the owner of this fq. | ||
1258 | |||
1259 | // TODO: Enhance binheap() to perform this operation in place. | ||
1260 | |||
1261 | ikglp_del_global_list(sem, t, &old_wait->global_heap_node); // remove | ||
1262 | fq->global_heap_node = old_wait->global_heap_node; // copy | ||
1263 | ikglp_add_global_list(sem, t, &fq->global_heap_node); // re-add | ||
1264 | |||
1265 | binheap_delete(&old_wait->donee_heap_node.node, &sem->donees); // remove | ||
1266 | fq->donee_heap_node = old_wait->donee_heap_node; // copy | ||
1267 | |||
1268 | if(fq->donee_heap_node.donor_info) { | ||
1269 | // let donor know that our location has changed | ||
1270 | BUG_ON(fq->donee_heap_node.donor_info->donee_info->task != t); // validate cross-link | ||
1271 | fq->donee_heap_node.donor_info->donee_info = &fq->donee_heap_node; | ||
1272 | } | ||
1273 | INIT_BINHEAP_NODE(&fq->donee_heap_node.node); | ||
1274 | binheap_add(&fq->donee_heap_node.node, &sem->donees, | ||
1275 | ikglp_donee_heap_node_t, node); // re-add | ||
1276 | } | ||
1277 | |||
1278 | int ikglp_unlock(struct litmus_lock* l) | ||
1279 | { | ||
1280 | struct ikglp_semaphore *sem = ikglp_from_lock(l); | ||
1281 | struct task_struct *t = current; | ||
1282 | struct task_struct *donee = NULL; | ||
1283 | struct task_struct *next = NULL; | ||
1284 | struct task_struct *new_on_fq = NULL; | ||
1285 | struct fifo_queue *fq_of_new_on_fq = NULL; | ||
1286 | |||
1287 | ikglp_wait_state_t *other_donor_info = NULL; | ||
1288 | struct fifo_queue *to_steal = NULL; | ||
1289 | int need_steal_prio_reeval = 0; | ||
1290 | struct fifo_queue *fq; | ||
1291 | |||
1292 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
1293 | raw_spinlock_t *dgl_lock; | ||
1294 | #endif | ||
1295 | |||
1296 | unsigned long flags = 0, real_flags; | ||
1297 | |||
1298 | int err = 0; | ||
1299 | |||
1300 | fq = ikglp_get_queue(sem, t); // returns NULL if 't' is not owner. | ||
1301 | |||
1302 | if (!fq) { | ||
1303 | err = -EINVAL; | ||
1304 | goto out; | ||
1305 | } | ||
1306 | |||
1307 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
1308 | dgl_lock = litmus->get_dgl_spinlock(t); | ||
1309 | #endif | ||
1310 | raw_spin_lock_irqsave(&sem->real_lock, real_flags); | ||
1311 | |||
1312 | lock_global_irqsave(dgl_lock, flags); // TODO: Push this deeper | ||
1313 | lock_fine_irqsave(&sem->lock, flags); | ||
1314 | |||
1315 | TRACE_TASK(t, "Freeing replica %d.\n", ikglp_get_idx(sem, fq)); | ||
1316 | |||
1317 | |||
1318 | // Remove 't' from the heaps, but data in nodes will still be good. | ||
1319 | ikglp_del_global_list(sem, t, &fq->global_heap_node); | ||
1320 | binheap_delete(&fq->donee_heap_node.node, &sem->donees); | ||
1321 | |||
1322 | fq->owner = NULL; // no longer owned!! | ||
1323 | --(fq->count); | ||
1324 | if(fq->count < sem->shortest_fifo_queue->count) { | ||
1325 | sem->shortest_fifo_queue = fq; | ||
1326 | } | ||
1327 | --(sem->nr_in_fifos); | ||
1328 | |||
1329 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1330 | if(sem->aff_obs) { | ||
1331 | sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq, t); | ||
1332 | sem->aff_obs->ops->notify_freed(sem->aff_obs, fq, t); | ||
1333 | } | ||
1334 | #endif | ||
1335 | |||
1336 | // Move the next request into the FQ and update heaps as needed. | ||
1337 | // We defer re-evaluation of priorities to later in the function. | ||
1338 | if(fq->donee_heap_node.donor_info) { // move my donor to FQ | ||
1339 | ikglp_wait_state_t *donor_info = fq->donee_heap_node.donor_info; | ||
1340 | |||
1341 | new_on_fq = donor_info->task; | ||
1342 | |||
1343 | // donor moved to FQ | ||
1344 | donee = t; | ||
1345 | |||
1346 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1347 | if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) { | ||
1348 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); | ||
1349 | if(fq_of_new_on_fq->count == 0) { | ||
1350 | // ignore it? | ||
1351 | // fq_of_new_on_fq = fq; | ||
1352 | } | ||
1353 | } | ||
1354 | else { | ||
1355 | fq_of_new_on_fq = fq; | ||
1356 | } | ||
1357 | #else | ||
1358 | fq_of_new_on_fq = fq; | ||
1359 | #endif | ||
1360 | |||
1361 | TRACE_TASK(t, "Moving MY donor (%s/%d) to fq %d (non-aff wanted fq %d).\n", | ||
1362 | new_on_fq->comm, new_on_fq->pid, | ||
1363 | ikglp_get_idx(sem, fq_of_new_on_fq), | ||
1364 | ikglp_get_idx(sem, fq)); | ||
1365 | |||
1366 | |||
1367 | ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, donor_info); | ||
1368 | } | ||
1369 | else if(!binheap_empty(&sem->donors)) { // No donor, so move any donor to FQ | ||
1370 | // move other donor to FQ | ||
1371 | // Select a donor | ||
1372 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1373 | other_donor_info = (sem->aff_obs) ? | ||
1374 | sem->aff_obs->ops->advise_donor_to_fq(sem->aff_obs, fq) : | ||
1375 | binheap_top_entry(&sem->donors, ikglp_wait_state_t, node); | ||
1376 | #else | ||
1377 | other_donor_info = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node); | ||
1378 | #endif | ||
1379 | |||
1380 | new_on_fq = other_donor_info->task; | ||
1381 | donee = other_donor_info->donee_info->task; | ||
1382 | |||
1383 | // update the donee's heap position. | ||
1384 | other_donor_info->donee_info->donor_info = NULL; // clear the cross-link | ||
1385 | binheap_decrease(&other_donor_info->donee_info->node, &sem->donees); | ||
1386 | |||
1387 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1388 | if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) { | ||
1389 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); | ||
1390 | if(fq_of_new_on_fq->count == 0) { | ||
1391 | // ignore it? | ||
1392 | // fq_of_new_on_fq = fq; | ||
1393 | } | ||
1394 | } | ||
1395 | else { | ||
1396 | fq_of_new_on_fq = fq; | ||
1397 | } | ||
1398 | #else | ||
1399 | fq_of_new_on_fq = fq; | ||
1400 | #endif | ||
1401 | |||
1402 | TRACE_TASK(t, "Moving a donor (%s/%d) to fq %d (non-aff wanted fq %d).\n", | ||
1403 | new_on_fq->comm, new_on_fq->pid, | ||
1404 | ikglp_get_idx(sem, fq_of_new_on_fq), | ||
1405 | ikglp_get_idx(sem, fq)); | ||
1406 | |||
1407 | ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, other_donor_info); | ||
1408 | } | ||
1409 | else if(!binheap_empty(&sem->priority_queue)) { // No donors, so move PQ | ||
1410 | ikglp_heap_node_t *pq_node = binheap_top_entry(&sem->priority_queue, | ||
1411 | ikglp_heap_node_t, node); | ||
1412 | ikglp_wait_state_t *pq_wait = container_of(pq_node, ikglp_wait_state_t, | ||
1413 | pq_node); | ||
1414 | |||
1415 | new_on_fq = pq_wait->task; | ||
1416 | |||
1417 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1418 | if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) { | ||
1419 | fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq); | ||
1420 | if(fq_of_new_on_fq->count == 0) { | ||
1421 | // ignore it? | ||
1422 | // fq_of_new_on_fq = fq; | ||
1423 | } | ||
1424 | } | ||
1425 | else { | ||
1426 | fq_of_new_on_fq = fq; | ||
1427 | } | ||
1428 | #else | ||
1429 | fq_of_new_on_fq = fq; | ||
1430 | #endif | ||
1431 | |||
1432 | TRACE_TASK(t, "Moving a pq waiter (%s/%d) to fq %d (non-aff wanted fq %d).\n", | ||
1433 | new_on_fq->comm, new_on_fq->pid, | ||
1434 | ikglp_get_idx(sem, fq_of_new_on_fq), | ||
1435 | ikglp_get_idx(sem, fq)); | ||
1436 | |||
1437 | ikglp_move_pq_to_fq(sem, fq_of_new_on_fq, pq_wait); | ||
1438 | } | ||
1439 | else if(fq->count == 0) { // No PQ and this queue is empty, so steal. | ||
1440 | ikglp_wait_state_t *fq_wait; | ||
1441 | |||
1442 | TRACE_TASK(t, "Looking to steal a request for fq %d...\n", | ||
1443 | ikglp_get_idx(sem, fq)); | ||
1444 | |||
1445 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1446 | fq_wait = (sem->aff_obs) ? | ||
1447 | sem->aff_obs->ops->advise_steal(sem->aff_obs, fq) : | ||
1448 | ikglp_find_hp_waiter_to_steal(sem); | ||
1449 | #else | ||
1450 | fq_wait = ikglp_find_hp_waiter_to_steal(sem); | ||
1451 | #endif | ||
1452 | |||
1453 | if(fq_wait) { | ||
1454 | to_steal = fq_wait->donee_heap_node.fq; | ||
1455 | |||
1456 | new_on_fq = fq_wait->task; | ||
1457 | fq_of_new_on_fq = fq; | ||
1458 | need_steal_prio_reeval = (new_on_fq == to_steal->hp_waiter); | ||
1459 | |||
1460 | TRACE_TASK(t, "Found %s/%d of fq %d to steal for fq %d...\n", | ||
1461 | new_on_fq->comm, new_on_fq->pid, | ||
1462 | ikglp_get_idx(sem, to_steal), | ||
1463 | ikglp_get_idx(sem, fq)); | ||
1464 | |||
1465 | ikglp_steal_to_fq(sem, fq, fq_wait); | ||
1466 | } | ||
1467 | else { | ||
1468 | TRACE_TASK(t, "Found nothing to steal for fq %d.\n", | ||
1469 | ikglp_get_idx(sem, fq)); | ||
1470 | } | ||
1471 | } | ||
1472 | else { // move no one | ||
1473 | } | ||
1474 | |||
1475 | // 't' must drop all priority and clean up data structures before hand-off. | ||
1476 | |||
1477 | // DROP ALL INHERITANCE. IKGLP MUST BE OUTER-MOST | ||
1478 | raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock); | ||
1479 | { | ||
1480 | int count = 0; | ||
1481 | while(!binheap_empty(&tsk_rt(t)->hp_blocked_tasks)) { | ||
1482 | binheap_delete_root(&tsk_rt(t)->hp_blocked_tasks, | ||
1483 | struct nested_info, hp_binheap_node); | ||
1484 | ++count; | ||
1485 | } | ||
1486 | litmus->decrease_prio(t, NULL); | ||
1487 | WARN_ON(count > 2); // should not be greater than 2. only local fq inh and donation can be possible. | ||
1488 | } | ||
1489 | raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); | ||
1490 | |||
1491 | |||
1492 | |||
1493 | // Now patch up other priorities. | ||
1494 | // | ||
1495 | // At most one of the following: | ||
1496 | // if(donee && donee != t), decrease prio, propagate to owner, or onward | ||
1497 | // if(to_steal), update owner's prio (hp_waiter has already been set) | ||
1498 | // | ||
1499 | |||
1500 | BUG_ON((other_donor_info != NULL) && (to_steal != NULL)); | ||
1501 | |||
1502 | if(other_donor_info) { | ||
1503 | struct fifo_queue *other_fq = other_donor_info->donee_info->fq; | ||
1504 | |||
1505 | BUG_ON(!donee); | ||
1506 | BUG_ON(donee == t); | ||
1507 | |||
1508 | TRACE_TASK(t, "Terminating donation relation of donor %s/%d to donee %s/%d!\n", | ||
1509 | other_donor_info->task->comm, other_donor_info->task->pid, | ||
1510 | donee->comm, donee->pid); | ||
1511 | |||
1512 | // need to terminate donation relation. | ||
1513 | if(donee == other_fq->owner) { | ||
1514 | TRACE_TASK(t, "Donee %s/%d is an owner of fq %d.\n", | ||
1515 | donee->comm, donee->pid, | ||
1516 | ikglp_get_idx(sem, other_fq)); | ||
1517 | |||
1518 | ikglp_remove_donation_from_owner(&other_donor_info->prio_donation.hp_binheap_node, other_fq, sem, flags); | ||
1519 | lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!! | ||
1520 | } | ||
1521 | else { | ||
1522 | TRACE_TASK(t, "Donee %s/%d is an blocked in of fq %d.\n", | ||
1523 | donee->comm, donee->pid, | ||
1524 | ikglp_get_idx(sem, other_fq)); | ||
1525 | |||
1526 | ikglp_remove_donation_from_fq_waiter(donee, &other_donor_info->prio_donation.hp_binheap_node); | ||
1527 | if(donee == other_fq->hp_waiter) { | ||
1528 | TRACE_TASK(t, "Donee %s/%d was an hp_waiter of fq %d. Rechecking hp_waiter.\n", | ||
1529 | donee->comm, donee->pid, | ||
1530 | ikglp_get_idx(sem, other_fq)); | ||
1531 | |||
1532 | other_fq->hp_waiter = ikglp_find_hp_waiter(other_fq, NULL); | ||
1533 | TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n", | ||
1534 | ikglp_get_idx(sem, other_fq), | ||
1535 | (other_fq->hp_waiter) ? other_fq->hp_waiter->comm : "nil", | ||
1536 | (other_fq->hp_waiter) ? other_fq->hp_waiter->pid : -1); | ||
1537 | |||
1538 | ikglp_refresh_owners_prio_decrease(other_fq, sem, flags); // unlocks sem->lock. reacquire it. | ||
1539 | lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!! | ||
1540 | } | ||
1541 | } | ||
1542 | } | ||
1543 | else if(to_steal) { | ||
1544 | TRACE_TASK(t, "Rechecking priority inheritance of fq %d, triggered by stealing.\n", | ||
1545 | ikglp_get_idx(sem, to_steal)); | ||
1546 | |||
1547 | if(need_steal_prio_reeval) { | ||
1548 | ikglp_refresh_owners_prio_decrease(to_steal, sem, flags); // unlocks sem->lock. reacquire it. | ||
1549 | lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!! | ||
1550 | } | ||
1551 | } | ||
1552 | |||
1553 | // check for new HP waiter. | ||
1554 | if(new_on_fq) { | ||
1555 | if(fq == fq_of_new_on_fq) { | ||
1556 | // fq->owner is null, so just update the hp_waiter without locking. | ||
1557 | if(new_on_fq == fq->hp_waiter) { | ||
1558 | TRACE_TASK(t, "new_on_fq is already hp_waiter.\n", | ||
1559 | fq->hp_waiter->comm, fq->hp_waiter->pid); | ||
1560 | fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); // set this just to be sure... | ||
1561 | } | ||
1562 | else if(litmus->compare(new_on_fq, fq->hp_waiter)) { | ||
1563 | if(fq->hp_waiter) | ||
1564 | TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n", | ||
1565 | fq->hp_waiter->comm, fq->hp_waiter->pid); | ||
1566 | else | ||
1567 | TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n"); | ||
1568 | |||
1569 | fq->hp_waiter = new_on_fq; | ||
1570 | fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); | ||
1571 | |||
1572 | TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n", | ||
1573 | ikglp_get_idx(sem, fq), | ||
1574 | (fq->hp_waiter) ? fq->hp_waiter->comm : "nil", | ||
1575 | (fq->hp_waiter) ? fq->hp_waiter->pid : -1); | ||
1576 | } | ||
1577 | } | ||
1578 | else { | ||
1579 | ikglp_refresh_owners_prio_increase(new_on_fq, fq_of_new_on_fq, sem, flags); // unlocks sem->lock. reacquire it. | ||
1580 | lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!! | ||
1581 | } | ||
1582 | } | ||
1583 | |||
1584 | wake_kludge: | ||
1585 | if(waitqueue_active(&fq->wait)) | ||
1586 | { | ||
1587 | wait_queue_t *wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list); | ||
1588 | ikglp_wait_state_t *fq_wait = container_of(wait, ikglp_wait_state_t, fq_node); | ||
1589 | next = (struct task_struct*) wait->private; | ||
1590 | |||
1591 | __remove_wait_queue(&fq->wait, wait); | ||
1592 | |||
1593 | TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n", | ||
1594 | ikglp_get_idx(sem, fq), | ||
1595 | next->comm, next->pid); | ||
1596 | |||
1597 | // migrate wait-state to fifo-memory. | ||
1598 | ikglp_migrate_fq_to_owner_heap_nodes(sem, fq, fq_wait); | ||
1599 | |||
1600 | /* next becomes the resouce holder */ | ||
1601 | fq->owner = next; | ||
1602 | tsk_rt(next)->blocked_lock = NULL; | ||
1603 | |||
1604 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1605 | if(sem->aff_obs) { | ||
1606 | sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, next); | ||
1607 | } | ||
1608 | #endif | ||
1609 | |||
1610 | /* determine new hp_waiter if necessary */ | ||
1611 | if (next == fq->hp_waiter) { | ||
1612 | |||
1613 | TRACE_TASK(next, "was highest-prio waiter\n"); | ||
1614 | /* next has the highest priority --- it doesn't need to | ||
1615 | * inherit. However, we need to make sure that the | ||
1616 | * next-highest priority in the queue is reflected in | ||
1617 | * hp_waiter. */ | ||
1618 | fq->hp_waiter = ikglp_find_hp_waiter(fq, NULL); | ||
1619 | TRACE_TASK(next, "New hp_waiter for fq %d is %s/%d!\n", | ||
1620 | ikglp_get_idx(sem, fq), | ||
1621 | (fq->hp_waiter) ? fq->hp_waiter->comm : "nil", | ||
1622 | (fq->hp_waiter) ? fq->hp_waiter->pid : -1); | ||
1623 | |||
1624 | fq->nest.hp_waiter_eff_prio = (fq->hp_waiter) ? | ||
1625 | effective_priority(fq->hp_waiter) : NULL; | ||
1626 | |||
1627 | if (fq->hp_waiter) | ||
1628 | TRACE_TASK(fq->hp_waiter, "is new highest-prio waiter\n"); | ||
1629 | else | ||
1630 | TRACE("no further waiters\n"); | ||
1631 | |||
1632 | raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock); | ||
1633 | |||
1634 | // TRACE_TASK(next, "Heap Before:\n"); | ||
1635 | // print_hp_waiters(tsk_rt(next)->hp_blocked_tasks.root, 0); | ||
1636 | |||
1637 | binheap_add(&fq->nest.hp_binheap_node, | ||
1638 | &tsk_rt(next)->hp_blocked_tasks, | ||
1639 | struct nested_info, | ||
1640 | hp_binheap_node); | ||
1641 | |||
1642 | // TRACE_TASK(next, "Heap After:\n"); | ||
1643 | // print_hp_waiters(tsk_rt(next)->hp_blocked_tasks.root, 0); | ||
1644 | |||
1645 | raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock); | ||
1646 | } | ||
1647 | else { | ||
1648 | /* Well, if 'next' is not the highest-priority waiter, | ||
1649 | * then it (probably) ought to inherit the highest-priority | ||
1650 | * waiter's priority. */ | ||
1651 | TRACE_TASK(next, "is not hp_waiter of replica %d. hp_waiter is %s/%d\n", | ||
1652 | ikglp_get_idx(sem, fq), | ||
1653 | (fq->hp_waiter) ? fq->hp_waiter->comm : "nil", | ||
1654 | (fq->hp_waiter) ? fq->hp_waiter->pid : -1); | ||
1655 | |||
1656 | raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock); | ||
1657 | |||
1658 | binheap_add(&fq->nest.hp_binheap_node, | ||
1659 | &tsk_rt(next)->hp_blocked_tasks, | ||
1660 | struct nested_info, | ||
1661 | hp_binheap_node); | ||
1662 | |||
1663 | /* It is possible that 'next' *should* be the hp_waiter, but isn't | ||
1664 | * because that update hasn't yet executed (update operation is | ||
1665 | * probably blocked on mutex->lock). So only inherit if the top of | ||
1666 | * 'next's top heap node is indeed the effective prio. of hp_waiter. | ||
1667 | * (We use fq->hp_waiter_eff_prio instead of effective_priority(hp_waiter) | ||
1668 | * since the effective priority of hp_waiter can change (and the | ||
1669 | * update has not made it to this lock).) | ||
1670 | */ | ||
1671 | if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) == | ||
1672 | fq->nest.hp_waiter_eff_prio)) | ||
1673 | { | ||
1674 | if(fq->nest.hp_waiter_eff_prio) | ||
1675 | litmus->increase_prio(next, fq->nest.hp_waiter_eff_prio); | ||
1676 | else | ||
1677 | WARN_ON(1); | ||
1678 | } | ||
1679 | |||
1680 | raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock); | ||
1681 | } | ||
1682 | |||
1683 | |||
1684 | // wake up the new resource holder! | ||
1685 | wake_up_process(next); | ||
1686 | } | ||
1687 | if(fq_of_new_on_fq && fq_of_new_on_fq != fq && fq_of_new_on_fq->count == 1) { | ||
1688 | // The guy we promoted when to an empty FQ. (Why didn't stealing pick this up?) | ||
1689 | // Wake up the new guy too. | ||
1690 | |||
1691 | BUG_ON(fq_of_new_on_fq->owner != NULL); | ||
1692 | |||
1693 | fq = fq_of_new_on_fq; | ||
1694 | fq_of_new_on_fq = NULL; | ||
1695 | goto wake_kludge; | ||
1696 | } | ||
1697 | |||
1698 | unlock_fine_irqrestore(&sem->lock, flags); | ||
1699 | unlock_global_irqrestore(dgl_lock, flags); | ||
1700 | |||
1701 | raw_spin_unlock_irqrestore(&sem->real_lock, real_flags); | ||
1702 | |||
1703 | out: | ||
1704 | return err; | ||
1705 | } | ||
1706 | |||
1707 | |||
1708 | |||
1709 | int ikglp_close(struct litmus_lock* l) | ||
1710 | { | ||
1711 | struct task_struct *t = current; | ||
1712 | struct ikglp_semaphore *sem = ikglp_from_lock(l); | ||
1713 | unsigned long flags; | ||
1714 | |||
1715 | int owner = 0; | ||
1716 | int i; | ||
1717 | |||
1718 | raw_spin_lock_irqsave(&sem->real_lock, flags); | ||
1719 | |||
1720 | for(i = 0; i < sem->nr_replicas; ++i) { | ||
1721 | if(sem->fifo_queues[i].owner == t) { | ||
1722 | owner = 1; | ||
1723 | break; | ||
1724 | } | ||
1725 | } | ||
1726 | |||
1727 | raw_spin_unlock_irqrestore(&sem->real_lock, flags); | ||
1728 | |||
1729 | if (owner) | ||
1730 | ikglp_unlock(l); | ||
1731 | |||
1732 | return 0; | ||
1733 | } | ||
1734 | |||
1735 | void ikglp_free(struct litmus_lock* l) | ||
1736 | { | ||
1737 | struct ikglp_semaphore *sem = ikglp_from_lock(l); | ||
1738 | |||
1739 | kfree(sem->fifo_queues); | ||
1740 | kfree(sem); | ||
1741 | } | ||
1742 | |||
1743 | |||
1744 | |||
1745 | struct litmus_lock* ikglp_new(int m, | ||
1746 | struct litmus_lock_ops* ops, | ||
1747 | void* __user arg) | ||
1748 | { | ||
1749 | struct ikglp_semaphore* sem; | ||
1750 | int nr_replicas = 0; | ||
1751 | int i; | ||
1752 | |||
1753 | if(!access_ok(VERIFY_READ, arg, sizeof(nr_replicas))) | ||
1754 | { | ||
1755 | return(NULL); | ||
1756 | } | ||
1757 | if(__copy_from_user(&nr_replicas, arg, sizeof(nr_replicas))) | ||
1758 | { | ||
1759 | return(NULL); | ||
1760 | } | ||
1761 | if(nr_replicas < 1) | ||
1762 | { | ||
1763 | return(NULL); | ||
1764 | } | ||
1765 | |||
1766 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
1767 | if(!sem) | ||
1768 | { | ||
1769 | return NULL; | ||
1770 | } | ||
1771 | |||
1772 | sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*nr_replicas, GFP_KERNEL); | ||
1773 | if(!sem->fifo_queues) | ||
1774 | { | ||
1775 | kfree(sem); | ||
1776 | return NULL; | ||
1777 | } | ||
1778 | |||
1779 | sem->litmus_lock.ops = ops; | ||
1780 | |||
1781 | #ifdef CONFIG_DEBUG_SPINLOCK | ||
1782 | { | ||
1783 | __raw_spin_lock_init(&sem->lock, ((struct litmus_lock*)sem)->cheat_lockdep, &((struct litmus_lock*)sem)->key); | ||
1784 | } | ||
1785 | #else | ||
1786 | raw_spin_lock_init(&sem->lock); | ||
1787 | #endif | ||
1788 | |||
1789 | raw_spin_lock_init(&sem->real_lock); | ||
1790 | |||
1791 | sem->nr_replicas = nr_replicas; | ||
1792 | sem->m = m; | ||
1793 | sem->max_fifo_len = (sem->m/nr_replicas) + ((sem->m%nr_replicas) != 0); | ||
1794 | sem->nr_in_fifos = 0; | ||
1795 | |||
1796 | TRACE("New IKGLP Sem: m = %d, k = %d, max fifo_len = %d\n", | ||
1797 | sem->m, | ||
1798 | sem->nr_replicas, | ||
1799 | sem->max_fifo_len); | ||
1800 | |||
1801 | for(i = 0; i < nr_replicas; ++i) | ||
1802 | { | ||
1803 | struct fifo_queue* q = &(sem->fifo_queues[i]); | ||
1804 | |||
1805 | q->owner = NULL; | ||
1806 | q->hp_waiter = NULL; | ||
1807 | init_waitqueue_head(&q->wait); | ||
1808 | q->count = 0; | ||
1809 | |||
1810 | q->global_heap_node.task = NULL; | ||
1811 | INIT_BINHEAP_NODE(&q->global_heap_node.node); | ||
1812 | |||
1813 | q->donee_heap_node.task = NULL; | ||
1814 | q->donee_heap_node.donor_info = NULL; | ||
1815 | q->donee_heap_node.fq = NULL; | ||
1816 | INIT_BINHEAP_NODE(&q->donee_heap_node.node); | ||
1817 | |||
1818 | q->nest.lock = (struct litmus_lock*)sem; | ||
1819 | q->nest.hp_waiter_eff_prio = NULL; | ||
1820 | q->nest.hp_waiter_ptr = &q->hp_waiter; | ||
1821 | INIT_BINHEAP_NODE(&q->nest.hp_binheap_node); | ||
1822 | } | ||
1823 | |||
1824 | sem->shortest_fifo_queue = &sem->fifo_queues[0]; | ||
1825 | |||
1826 | sem->top_m_size = 0; | ||
1827 | |||
1828 | // init heaps | ||
1829 | INIT_BINHEAP_HANDLE(&sem->top_m, ikglp_min_heap_base_priority_order); | ||
1830 | INIT_BINHEAP_HANDLE(&sem->not_top_m, ikglp_max_heap_base_priority_order); | ||
1831 | INIT_BINHEAP_HANDLE(&sem->donees, ikglp_min_heap_donee_order); | ||
1832 | INIT_BINHEAP_HANDLE(&sem->priority_queue, ikglp_max_heap_base_priority_order); | ||
1833 | INIT_BINHEAP_HANDLE(&sem->donors, ikglp_donor_max_heap_base_priority_order); | ||
1834 | |||
1835 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1836 | sem->aff_obs = NULL; | ||
1837 | #endif | ||
1838 | |||
1839 | return &sem->litmus_lock; | ||
1840 | } | ||
1841 | |||
1842 | |||
1843 | |||
1844 | |||
1845 | |||
1846 | |||
1847 | |||
1848 | |||
1849 | |||
1850 | |||
1851 | |||
1852 | |||
1853 | |||
1854 | |||
1855 | |||
1856 | |||
1857 | |||
1858 | |||
1859 | |||
1860 | |||
1861 | |||
1862 | |||
1863 | |||
1864 | |||
1865 | |||
1866 | |||
1867 | |||
1868 | |||
1869 | |||
1870 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
1871 | |||
1872 | static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica) | ||
1873 | { | ||
1874 | int gpu = replica % aff->nr_rsrc; | ||
1875 | return gpu; | ||
1876 | } | ||
1877 | |||
1878 | static inline int replica_to_gpu(struct ikglp_affinity* aff, int replica) | ||
1879 | { | ||
1880 | int gpu = __replica_to_gpu(aff, replica) + aff->offset; | ||
1881 | return gpu; | ||
1882 | } | ||
1883 | |||
1884 | static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu) | ||
1885 | { | ||
1886 | int replica = gpu - aff->offset; | ||
1887 | return replica; | ||
1888 | } | ||
1889 | |||
1890 | static inline int same_gpu(struct ikglp_affinity* aff, int replica_a, int replica_b) | ||
1891 | { | ||
1892 | return(replica_to_gpu(aff, replica_a) == replica_to_gpu(aff, replica_b)); | ||
1893 | } | ||
1894 | |||
1895 | static inline int has_affinity(struct ikglp_affinity* aff, struct task_struct* t, int replica) | ||
1896 | { | ||
1897 | if(tsk_rt(t)->last_gpu >= 0) | ||
1898 | { | ||
1899 | return (tsk_rt(t)->last_gpu == replica_to_gpu(aff, replica)); | ||
1900 | } | ||
1901 | return 0; | ||
1902 | } | ||
1903 | |||
1904 | int ikglp_aff_obs_close(struct affinity_observer* obs) | ||
1905 | { | ||
1906 | return 0; | ||
1907 | } | ||
1908 | |||
1909 | void ikglp_aff_obs_free(struct affinity_observer* obs) | ||
1910 | { | ||
1911 | struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs); | ||
1912 | |||
1913 | // make sure the thread destroying this semaphore will not | ||
1914 | // call the exit callback on a destroyed lock. | ||
1915 | struct task_struct *t = current; | ||
1916 | if (is_realtime(t) && tsk_rt(t)->rsrc_exit_cb_args == ikglp_aff) | ||
1917 | { | ||
1918 | tsk_rt(t)->rsrc_exit_cb = NULL; | ||
1919 | tsk_rt(t)->rsrc_exit_cb_args = NULL; | ||
1920 | } | ||
1921 | |||
1922 | kfree(ikglp_aff->nr_cur_users_on_rsrc); | ||
1923 | kfree(ikglp_aff->nr_aff_on_rsrc); | ||
1924 | kfree(ikglp_aff->q_info); | ||
1925 | kfree(ikglp_aff); | ||
1926 | } | ||
1927 | |||
1928 | static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* ops, | ||
1929 | struct ikglp_affinity_ops* ikglp_ops, | ||
1930 | void* __user args) | ||
1931 | { | ||
1932 | struct ikglp_affinity* ikglp_aff; | ||
1933 | struct gpu_affinity_observer_args aff_args; | ||
1934 | struct ikglp_semaphore* sem; | ||
1935 | int i; | ||
1936 | unsigned long flags; | ||
1937 | |||
1938 | if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) { | ||
1939 | return(NULL); | ||
1940 | } | ||
1941 | if(__copy_from_user(&aff_args, args, sizeof(aff_args))) { | ||
1942 | return(NULL); | ||
1943 | } | ||
1944 | |||
1945 | sem = (struct ikglp_semaphore*) get_lock_from_od(aff_args.obs.lock_od); | ||
1946 | |||
1947 | if(sem->litmus_lock.type != IKGLP_SEM) { | ||
1948 | TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type); | ||
1949 | return(NULL); | ||
1950 | } | ||
1951 | |||
1952 | if((aff_args.nr_simult_users <= 0) || | ||
1953 | (sem->nr_replicas%aff_args.nr_simult_users != 0)) { | ||
1954 | TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users " | ||
1955 | "(%d) per replica. #replicas should be evenly divisible " | ||
1956 | "by #simult_users.\n", | ||
1957 | sem->litmus_lock.ident, | ||
1958 | sem->nr_replicas, | ||
1959 | aff_args.nr_simult_users); | ||
1960 | return(NULL); | ||
1961 | } | ||
1962 | |||
1963 | // if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { | ||
1964 | // TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", | ||
1965 | // NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); | ||
1966 | //// return(NULL); | ||
1967 | // } | ||
1968 | |||
1969 | ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL); | ||
1970 | if(!ikglp_aff) { | ||
1971 | return(NULL); | ||
1972 | } | ||
1973 | |||
1974 | ikglp_aff->q_info = kmalloc(sizeof(struct ikglp_queue_info)*sem->nr_replicas, GFP_KERNEL); | ||
1975 | if(!ikglp_aff->q_info) { | ||
1976 | kfree(ikglp_aff); | ||
1977 | return(NULL); | ||
1978 | } | ||
1979 | |||
1980 | ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL); | ||
1981 | if(!ikglp_aff->nr_cur_users_on_rsrc) { | ||
1982 | kfree(ikglp_aff->q_info); | ||
1983 | kfree(ikglp_aff); | ||
1984 | return(NULL); | ||
1985 | } | ||
1986 | |||
1987 | ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(int64_t)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL); | ||
1988 | if(!ikglp_aff->nr_aff_on_rsrc) { | ||
1989 | kfree(ikglp_aff->nr_cur_users_on_rsrc); | ||
1990 | kfree(ikglp_aff->q_info); | ||
1991 | kfree(ikglp_aff); | ||
1992 | return(NULL); | ||
1993 | } | ||
1994 | |||
1995 | affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs); | ||
1996 | |||
1997 | ikglp_aff->ops = ikglp_ops; | ||
1998 | ikglp_aff->offset = aff_args.replica_to_gpu_offset; | ||
1999 | ikglp_aff->nr_simult = aff_args.nr_simult_users; | ||
2000 | ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult; | ||
2001 | ikglp_aff->relax_max_fifo_len = (aff_args.relaxed_rules) ? 1 : 0; | ||
2002 | |||
2003 | TRACE_CUR("GPU affinity_observer: offset = %d, nr_simult = %d, " | ||
2004 | "nr_rsrc = %d, relaxed_fifo_len = %d\n", | ||
2005 | ikglp_aff->offset, ikglp_aff->nr_simult, ikglp_aff->nr_rsrc, | ||
2006 | ikglp_aff->relax_max_fifo_len); | ||
2007 | |||
2008 | memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc)); | ||
2009 | memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int64_t)*(ikglp_aff->nr_rsrc)); | ||
2010 | |||
2011 | for(i = 0; i < sem->nr_replicas; ++i) { | ||
2012 | ikglp_aff->q_info[i].q = &sem->fifo_queues[i]; | ||
2013 | ikglp_aff->q_info[i].estimated_len = 0; | ||
2014 | |||
2015 | // multiple q_info's will point to the same resource (aka GPU) if | ||
2016 | // aff_args.nr_simult_users > 1 | ||
2017 | ikglp_aff->q_info[i].nr_cur_users = &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)]; | ||
2018 | ikglp_aff->q_info[i].nr_aff_users = &ikglp_aff->nr_aff_on_rsrc[__replica_to_gpu(ikglp_aff,i)]; | ||
2019 | } | ||
2020 | |||
2021 | // attach observer to the lock | ||
2022 | raw_spin_lock_irqsave(&sem->real_lock, flags); | ||
2023 | sem->aff_obs = ikglp_aff; | ||
2024 | raw_spin_unlock_irqrestore(&sem->real_lock, flags); | ||
2025 | |||
2026 | return &ikglp_aff->obs; | ||
2027 | } | ||
2028 | |||
2029 | |||
2030 | |||
2031 | |||
2032 | static int gpu_replica_to_resource(struct ikglp_affinity* aff, | ||
2033 | struct fifo_queue* fq) { | ||
2034 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2035 | return(replica_to_gpu(aff, ikglp_get_idx(sem, fq))); | ||
2036 | } | ||
2037 | |||
2038 | |||
2039 | // Smart IKGLP Affinity | ||
2040 | |||
2041 | //static inline struct ikglp_queue_info* ikglp_aff_find_shortest(struct ikglp_affinity* aff) | ||
2042 | //{ | ||
2043 | // struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2044 | // struct ikglp_queue_info *shortest = &aff->q_info[0]; | ||
2045 | // int i; | ||
2046 | // | ||
2047 | // for(i = 1; i < sem->nr_replicas; ++i) { | ||
2048 | // if(aff->q_info[i].estimated_len < shortest->estimated_len) { | ||
2049 | // shortest = &aff->q_info[i]; | ||
2050 | // } | ||
2051 | // } | ||
2052 | // | ||
2053 | // return(shortest); | ||
2054 | //} | ||
2055 | |||
2056 | struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t) | ||
2057 | { | ||
2058 | // advise_enqueue must be smart as not not break IKGLP rules: | ||
2059 | // * No queue can be greater than ceil(m/k) in length. We may return | ||
2060 | // such a queue, but IKGLP will be smart enough as to send requests | ||
2061 | // to donors or PQ. | ||
2062 | // * Cannot let a queue idle if there exist waiting PQ/donors | ||
2063 | // -- needed to guarantee parallel progress of waiters. | ||
2064 | // | ||
2065 | // We may be able to relax some of these constraints, but this will have to | ||
2066 | // be carefully evaluated. | ||
2067 | // | ||
2068 | // Huristic strategy: Find the shortest queue that is not full. | ||
2069 | |||
2070 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2071 | lt_t min_len; | ||
2072 | int min_nr_users, min_nr_aff_users; | ||
2073 | struct ikglp_queue_info *shortest, *aff_queue; | ||
2074 | struct fifo_queue *to_enqueue; | ||
2075 | int i; | ||
2076 | int affinity_gpu; | ||
2077 | |||
2078 | int max_fifo_len = (aff->relax_max_fifo_len) ? | ||
2079 | sem->m : sem->max_fifo_len; | ||
2080 | |||
2081 | // if we have no affinity, find the GPU with the least number of users | ||
2082 | // with active affinity | ||
2083 | if(unlikely(tsk_rt(t)->last_gpu < 0)) { | ||
2084 | int temp_min = aff->nr_aff_on_rsrc[0]; | ||
2085 | affinity_gpu = aff->offset; | ||
2086 | |||
2087 | for(i = 1; i < aff->nr_rsrc; ++i) { | ||
2088 | if(aff->nr_aff_on_rsrc[i] < temp_min) { | ||
2089 | affinity_gpu = aff->offset + i; | ||
2090 | } | ||
2091 | } | ||
2092 | |||
2093 | TRACE_CUR("no affinity. defaulting to %d with %d aff users.\n", | ||
2094 | affinity_gpu, temp_min); | ||
2095 | } | ||
2096 | else { | ||
2097 | affinity_gpu = tsk_rt(t)->last_gpu; | ||
2098 | } | ||
2099 | |||
2100 | // all things being equal, let's start with the queue with which we have | ||
2101 | // affinity. this helps us maintain affinity even when we don't have | ||
2102 | // an estiamte for local-affinity execution time (i.e., 2nd time on GPU) | ||
2103 | aff_queue = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)]; | ||
2104 | shortest = aff_queue; | ||
2105 | |||
2106 | // if(shortest == aff->shortest_queue) { | ||
2107 | // TRACE_CUR("special case: have affinity with shortest queue\n"); | ||
2108 | // goto out; | ||
2109 | // } | ||
2110 | |||
2111 | min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL); | ||
2112 | min_nr_users = *(shortest->nr_cur_users); | ||
2113 | min_nr_aff_users = *(shortest->nr_aff_users); | ||
2114 | |||
2115 | |||
2116 | TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n", | ||
2117 | get_gpu_estimate(t, MIG_LOCAL), | ||
2118 | ikglp_get_idx(sem, shortest->q), | ||
2119 | shortest->q->count, | ||
2120 | min_len); | ||
2121 | |||
2122 | for(i = 0; i < sem->nr_replicas; ++i) { | ||
2123 | if(&aff->q_info[i] != shortest) { | ||
2124 | if(aff->q_info[i].q->count < max_fifo_len) { | ||
2125 | int want = 0; | ||
2126 | |||
2127 | lt_t migration = | ||
2128 | get_gpu_estimate(t, | ||
2129 | gpu_migration_distance(tsk_rt(t)->last_gpu, | ||
2130 | replica_to_gpu(aff, i))); | ||
2131 | lt_t est_len = aff->q_info[i].estimated_len + migration; | ||
2132 | |||
2133 | // queue is smaller, or they're equal and the other has a smaller number | ||
2134 | // of total users. | ||
2135 | // | ||
2136 | // tie-break on the shortest number of simult users. this only kicks in | ||
2137 | // when there are more than 1 empty queues. | ||
2138 | |||
2139 | // TODO: Make "est_len < min_len" a fuzzy function that allows | ||
2140 | // queues "close enough" in length to be considered equal. | ||
2141 | |||
2142 | /* NOTE: 'shortest' starts out with affinity GPU */ | ||
2143 | if(unlikely(shortest->q->count >= max_fifo_len)) { /* 'shortest' is full and i-th queue is not */ | ||
2144 | want = 1; | ||
2145 | } | ||
2146 | else if(est_len < min_len) { | ||
2147 | want = 1; /* i-th queue has shortest length */ | ||
2148 | } | ||
2149 | else if(unlikely(est_len == min_len)) { /* equal lengths */ | ||
2150 | if(!has_affinity(aff, t, ikglp_get_idx(sem, shortest->q))) { /* don't sacrifice affinity on tie */ | ||
2151 | if(has_affinity(aff, t, i)) { | ||
2152 | want = 1; /* switch to maintain affinity */ | ||
2153 | } | ||
2154 | else if(*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) { /* favor one with less affinity load */ | ||
2155 | want = 1; | ||
2156 | } | ||
2157 | else if((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) && /* equal number of affinity */ | ||
2158 | (*(aff->q_info[i].nr_cur_users) < min_nr_users)) { /* favor one with current fewer users */ | ||
2159 | want = 1; | ||
2160 | } | ||
2161 | } | ||
2162 | } | ||
2163 | |||
2164 | if(want) { | ||
2165 | shortest = &aff->q_info[i]; | ||
2166 | min_len = est_len; | ||
2167 | min_nr_users = *(aff->q_info[i].nr_cur_users); | ||
2168 | min_nr_aff_users = *(aff->q_info[i].nr_aff_users); | ||
2169 | } | ||
2170 | |||
2171 | TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n", | ||
2172 | get_gpu_estimate(t, | ||
2173 | gpu_migration_distance(tsk_rt(t)->last_gpu, | ||
2174 | replica_to_gpu(aff, i))), | ||
2175 | ikglp_get_idx(sem, aff->q_info[i].q), | ||
2176 | aff->q_info[i].q->count, | ||
2177 | est_len); | ||
2178 | } | ||
2179 | else { | ||
2180 | TRACE_CUR("queue %d is too long. ineligible for enqueue.\n", | ||
2181 | ikglp_get_idx(sem, aff->q_info[i].q)); | ||
2182 | } | ||
2183 | } | ||
2184 | } | ||
2185 | |||
2186 | if(shortest->q->count >= max_fifo_len) { | ||
2187 | TRACE_CUR("selected fq %d is too long, but returning it anyway.\n", | ||
2188 | ikglp_get_idx(sem, shortest->q)); | ||
2189 | } | ||
2190 | |||
2191 | to_enqueue = shortest->q; | ||
2192 | TRACE_CUR("enqueue on fq %d (count = %d) (non-aff wanted fq %d)\n", | ||
2193 | ikglp_get_idx(sem, to_enqueue), | ||
2194 | to_enqueue->count, | ||
2195 | ikglp_get_idx(sem, sem->shortest_fifo_queue)); | ||
2196 | |||
2197 | return to_enqueue; | ||
2198 | |||
2199 | //return(sem->shortest_fifo_queue); | ||
2200 | } | ||
2201 | |||
2202 | |||
2203 | |||
2204 | |||
2205 | static ikglp_wait_state_t* pick_steal(struct ikglp_affinity* aff, | ||
2206 | int dest_gpu, | ||
2207 | struct fifo_queue* fq) | ||
2208 | { | ||
2209 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2210 | ikglp_wait_state_t *wait = NULL; | ||
2211 | int max_improvement = -(MIG_NONE+1); | ||
2212 | int replica = ikglp_get_idx(sem, fq); | ||
2213 | |||
2214 | if(waitqueue_active(&fq->wait)) { | ||
2215 | int this_gpu = replica_to_gpu(aff, replica); | ||
2216 | struct list_head *pos; | ||
2217 | |||
2218 | list_for_each(pos, &fq->wait.task_list) { | ||
2219 | wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list); | ||
2220 | ikglp_wait_state_t *tmp_wait = container_of(fq_wait, ikglp_wait_state_t, fq_node); | ||
2221 | |||
2222 | int tmp_improvement = | ||
2223 | gpu_migration_distance(this_gpu, tsk_rt(tmp_wait->task)->last_gpu) - | ||
2224 | gpu_migration_distance(dest_gpu, tsk_rt(tmp_wait->task)->last_gpu); | ||
2225 | |||
2226 | if(tmp_improvement > max_improvement) { | ||
2227 | wait = tmp_wait; | ||
2228 | max_improvement = tmp_improvement; | ||
2229 | |||
2230 | if(max_improvement >= (MIG_NONE-1)) { | ||
2231 | goto out; | ||
2232 | } | ||
2233 | } | ||
2234 | } | ||
2235 | |||
2236 | BUG_ON(!wait); | ||
2237 | } | ||
2238 | else { | ||
2239 | TRACE_CUR("fq %d is empty!\n", replica); | ||
2240 | } | ||
2241 | |||
2242 | out: | ||
2243 | |||
2244 | TRACE_CUR("Candidate victim from fq %d is %s/%d. aff improvement = %d.\n", | ||
2245 | replica, | ||
2246 | (wait) ? wait->task->comm : "nil", | ||
2247 | (wait) ? wait->task->pid : -1, | ||
2248 | max_improvement); | ||
2249 | |||
2250 | return wait; | ||
2251 | } | ||
2252 | |||
2253 | |||
2254 | ikglp_wait_state_t* gpu_ikglp_advise_steal(struct ikglp_affinity* aff, | ||
2255 | struct fifo_queue* dst) | ||
2256 | { | ||
2257 | // Huristic strategy: Find task with greatest improvement in affinity. | ||
2258 | // | ||
2259 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2260 | ikglp_wait_state_t *to_steal_state = NULL; | ||
2261 | // ikglp_wait_state_t *default_to_steal_state = ikglp_find_hp_waiter_to_steal(sem); | ||
2262 | int max_improvement = -(MIG_NONE+1); | ||
2263 | int replica, i; | ||
2264 | int dest_gpu; | ||
2265 | |||
2266 | replica = ikglp_get_idx(sem, dst); | ||
2267 | dest_gpu = replica_to_gpu(aff, replica); | ||
2268 | |||
2269 | for(i = 0; i < sem->nr_replicas; ++i) { | ||
2270 | ikglp_wait_state_t *tmp_to_steal_state = | ||
2271 | pick_steal(aff, dest_gpu, &sem->fifo_queues[i]); | ||
2272 | |||
2273 | if(tmp_to_steal_state) { | ||
2274 | int tmp_improvement = | ||
2275 | gpu_migration_distance(replica_to_gpu(aff, i), tsk_rt(tmp_to_steal_state->task)->last_gpu) - | ||
2276 | gpu_migration_distance(dest_gpu, tsk_rt(tmp_to_steal_state->task)->last_gpu); | ||
2277 | |||
2278 | if(tmp_improvement > max_improvement) { | ||
2279 | to_steal_state = tmp_to_steal_state; | ||
2280 | max_improvement = tmp_improvement; | ||
2281 | |||
2282 | if(max_improvement >= (MIG_NONE-1)) { | ||
2283 | goto out; | ||
2284 | } | ||
2285 | } | ||
2286 | } | ||
2287 | } | ||
2288 | |||
2289 | out: | ||
2290 | if(!to_steal_state) { | ||
2291 | TRACE_CUR("Could not find anyone to steal.\n"); | ||
2292 | } | ||
2293 | else { | ||
2294 | TRACE_CUR("Selected victim %s/%d on fq %d (GPU %d) for fq %d (GPU %d): improvement = %d\n", | ||
2295 | to_steal_state->task->comm, to_steal_state->task->pid, | ||
2296 | ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq), | ||
2297 | replica_to_gpu(aff, ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq)), | ||
2298 | ikglp_get_idx(sem, dst), | ||
2299 | dest_gpu, | ||
2300 | max_improvement); | ||
2301 | |||
2302 | // TRACE_CUR("Non-aff wanted to select victim %s/%d on fq %d (GPU %d) for fq %d (GPU %d): improvement = %d\n", | ||
2303 | // default_to_steal_state->task->comm, default_to_steal_state->task->pid, | ||
2304 | // ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq), | ||
2305 | // replica_to_gpu(aff, ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq)), | ||
2306 | // ikglp_get_idx(sem, dst), | ||
2307 | // replica_to_gpu(aff, ikglp_get_idx(sem, dst)), | ||
2308 | // | ||
2309 | // gpu_migration_distance( | ||
2310 | // replica_to_gpu(aff, ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq)), | ||
2311 | // tsk_rt(default_to_steal_state->task)->last_gpu) - | ||
2312 | // gpu_migration_distance(dest_gpu, tsk_rt(default_to_steal_state->task)->last_gpu)); | ||
2313 | } | ||
2314 | |||
2315 | return(to_steal_state); | ||
2316 | } | ||
2317 | |||
2318 | |||
2319 | static inline int has_donor(wait_queue_t* fq_wait) | ||
2320 | { | ||
2321 | ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node); | ||
2322 | return(wait->donee_heap_node.donor_info != NULL); | ||
2323 | } | ||
2324 | |||
2325 | static ikglp_donee_heap_node_t* pick_donee(struct ikglp_affinity* aff, | ||
2326 | struct fifo_queue* fq, | ||
2327 | int* dist_from_head) | ||
2328 | { | ||
2329 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2330 | struct task_struct *donee; | ||
2331 | ikglp_donee_heap_node_t *donee_node; | ||
2332 | struct task_struct *mth_highest = ikglp_mth_highest(sem); | ||
2333 | |||
2334 | // lt_t now = litmus_clock(); | ||
2335 | // | ||
2336 | // TRACE_CUR("fq %d: mth_highest: %s/%d, deadline = %d: (donor) = ??? ", | ||
2337 | // ikglp_get_idx(sem, fq), | ||
2338 | // mth_highest->comm, mth_highest->pid, | ||
2339 | // (int)get_deadline(mth_highest) - now); | ||
2340 | |||
2341 | if(fq->owner && | ||
2342 | fq->donee_heap_node.donor_info == NULL && | ||
2343 | mth_highest != fq->owner && | ||
2344 | litmus->__compare(mth_highest, BASE, fq->owner, BASE)) { | ||
2345 | donee = fq->owner; | ||
2346 | donee_node = &(fq->donee_heap_node); | ||
2347 | *dist_from_head = 0; | ||
2348 | |||
2349 | BUG_ON(donee != donee_node->task); | ||
2350 | |||
2351 | TRACE_CUR("picked owner of fq %d as donee\n", | ||
2352 | ikglp_get_idx(sem, fq)); | ||
2353 | |||
2354 | goto out; | ||
2355 | } | ||
2356 | else if(waitqueue_active(&fq->wait)) { | ||
2357 | struct list_head *pos; | ||
2358 | |||
2359 | |||
2360 | // TRACE_CUR("fq %d: owner: %s/%d, deadline = %d: (donor) = %s/%d " | ||
2361 | // "(mth_highest != fq->owner) = %d " | ||
2362 | // "(mth_highest > fq->owner) = %d\n", | ||
2363 | // ikglp_get_idx(sem, fq), | ||
2364 | // (fq->owner) ? fq->owner->comm : "nil", | ||
2365 | // (fq->owner) ? fq->owner->pid : -1, | ||
2366 | // (fq->owner) ? (int)get_deadline(fq->owner) - now : -999, | ||
2367 | // (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->comm : "nil", | ||
2368 | // (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->pid : -1, | ||
2369 | // (mth_highest != fq->owner), | ||
2370 | // (litmus->__compare(mth_highest, BASE, fq->owner, BASE))); | ||
2371 | |||
2372 | |||
2373 | *dist_from_head = 1; | ||
2374 | |||
2375 | // iterating from the start of the queue is nice since this means | ||
2376 | // the donee will be closer to obtaining a resource. | ||
2377 | list_for_each(pos, &fq->wait.task_list) { | ||
2378 | wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list); | ||
2379 | ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node); | ||
2380 | |||
2381 | // TRACE_CUR("fq %d: waiter %d: %s/%d, deadline = %d (donor) = %s/%d " | ||
2382 | // "(mth_highest != wait->task) = %d " | ||
2383 | // "(mth_highest > wait->task) = %d\n", | ||
2384 | // ikglp_get_idx(sem, fq), | ||
2385 | // dist_from_head, | ||
2386 | // wait->task->comm, wait->task->pid, | ||
2387 | // (int)get_deadline(wait->task) - now, | ||
2388 | // (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->comm : "nil", | ||
2389 | // (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->pid : -1, | ||
2390 | // (mth_highest != wait->task), | ||
2391 | // (litmus->__compare(mth_highest, BASE, wait->task, BASE))); | ||
2392 | |||
2393 | |||
2394 | if(!has_donor(fq_wait) && | ||
2395 | mth_highest != wait->task && | ||
2396 | litmus->__compare(mth_highest, BASE, wait->task, BASE)) { | ||
2397 | donee = (struct task_struct*) fq_wait->private; | ||
2398 | donee_node = &wait->donee_heap_node; | ||
2399 | |||
2400 | BUG_ON(donee != donee_node->task); | ||
2401 | |||
2402 | TRACE_CUR("picked waiter in fq %d as donee\n", | ||
2403 | ikglp_get_idx(sem, fq)); | ||
2404 | |||
2405 | goto out; | ||
2406 | } | ||
2407 | ++(*dist_from_head); | ||
2408 | } | ||
2409 | } | ||
2410 | |||
2411 | donee = NULL; | ||
2412 | donee_node = NULL; | ||
2413 | //*dist_from_head = sem->max_fifo_len + 1; | ||
2414 | *dist_from_head = IKGLP_INVAL_DISTANCE; | ||
2415 | |||
2416 | TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq)); | ||
2417 | |||
2418 | out: | ||
2419 | |||
2420 | TRACE_CUR("Candidate donee for fq %d is %s/%d (dist_from_head = %d)\n", | ||
2421 | ikglp_get_idx(sem, fq), | ||
2422 | (donee) ? (donee)->comm : "nil", | ||
2423 | (donee) ? (donee)->pid : -1, | ||
2424 | *dist_from_head); | ||
2425 | |||
2426 | return donee_node; | ||
2427 | } | ||
2428 | |||
2429 | ikglp_donee_heap_node_t* gpu_ikglp_advise_donee_selection( | ||
2430 | struct ikglp_affinity* aff, | ||
2431 | struct task_struct* donor) | ||
2432 | { | ||
2433 | // Huristic strategy: Find the highest-priority donee that is waiting on | ||
2434 | // a queue closest to our affinity. (1) The donee CANNOT already have a | ||
2435 | // donor (exception: donee is the lowest-prio task in the donee heap). | ||
2436 | // (2) Requests in 'top_m' heap are ineligible. | ||
2437 | // | ||
2438 | // Further strategy: amongst elible donees waiting for the same GPU, pick | ||
2439 | // the one closest to the head of the FIFO queue (including owners). | ||
2440 | // | ||
2441 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2442 | ikglp_donee_heap_node_t *donee_node; | ||
2443 | gpu_migration_dist_t distance; | ||
2444 | int start, i, j; | ||
2445 | |||
2446 | ikglp_donee_heap_node_t *default_donee; | ||
2447 | ikglp_wait_state_t *default_donee_donor_info; | ||
2448 | |||
2449 | if(tsk_rt(donor)->last_gpu < 0) { | ||
2450 | // no affinity. just return the min prio, like standard IKGLP | ||
2451 | // TODO: Find something closer to the head of the queue?? | ||
2452 | donee_node = binheap_top_entry(&sem->donees, | ||
2453 | ikglp_donee_heap_node_t, | ||
2454 | node); | ||
2455 | goto out; | ||
2456 | } | ||
2457 | |||
2458 | |||
2459 | // Temporarily break any donation relation the default donee (the lowest | ||
2460 | // prio task in the FIFO queues) to make it eligible for selection below. | ||
2461 | // | ||
2462 | // NOTE: The original donor relation *must* be restored, even if we select | ||
2463 | // the default donee throug affinity-aware selection, before returning | ||
2464 | // from this function so we don't screw up our heap ordering. | ||
2465 | // The standard IKGLP algorithm will steal the donor relationship if needed. | ||
2466 | default_donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node); | ||
2467 | default_donee_donor_info = default_donee->donor_info; // back-up donor relation | ||
2468 | default_donee->donor_info = NULL; // temporarily break any donor relation. | ||
2469 | |||
2470 | // initialize our search | ||
2471 | donee_node = NULL; | ||
2472 | distance = MIG_NONE; | ||
2473 | |||
2474 | // TODO: The below search logic may work well for locating nodes to steal | ||
2475 | // when an FQ goes idle. Validate this code and apply it to stealing. | ||
2476 | |||
2477 | // begin search with affinity GPU. | ||
2478 | start = gpu_to_base_replica(aff, tsk_rt(donor)->last_gpu); | ||
2479 | i = start; | ||
2480 | do { // "for each gpu" / "for each aff->nr_rsrc" | ||
2481 | gpu_migration_dist_t temp_distance = gpu_migration_distance(start, i); | ||
2482 | |||
2483 | // only interested in queues that will improve our distance | ||
2484 | if(temp_distance < distance || donee_node == NULL) { | ||
2485 | int dist_from_head = IKGLP_INVAL_DISTANCE; | ||
2486 | |||
2487 | TRACE_CUR("searching for donor on GPU %d", i); | ||
2488 | |||
2489 | // visit each queue and pick a donee. bail as soon as we find | ||
2490 | // one for this class. | ||
2491 | |||
2492 | for(j = 0; j < aff->nr_simult; ++j) { | ||
2493 | int temp_dist_from_head; | ||
2494 | ikglp_donee_heap_node_t *temp_donee_node; | ||
2495 | struct fifo_queue *fq; | ||
2496 | |||
2497 | fq = &(sem->fifo_queues[i + j*aff->nr_rsrc]); | ||
2498 | temp_donee_node = pick_donee(aff, fq, &temp_dist_from_head); | ||
2499 | |||
2500 | if(temp_dist_from_head < dist_from_head) | ||
2501 | { | ||
2502 | // we check all the FQs for this GPU to spread priorities | ||
2503 | // out across the queues. does this decrease jitter? | ||
2504 | donee_node = temp_donee_node; | ||
2505 | dist_from_head = temp_dist_from_head; | ||
2506 | } | ||
2507 | } | ||
2508 | |||
2509 | if(dist_from_head != IKGLP_INVAL_DISTANCE) { | ||
2510 | TRACE_CUR("found donee %s/%d and is the %d-th waiter.\n", | ||
2511 | donee_node->task->comm, donee_node->task->pid, | ||
2512 | dist_from_head); | ||
2513 | } | ||
2514 | else { | ||
2515 | TRACE_CUR("found no eligible donors from GPU %d\n", i); | ||
2516 | } | ||
2517 | } | ||
2518 | else { | ||
2519 | TRACE_CUR("skipping GPU %d (distance = %d, best donor " | ||
2520 | "distance = %d)\n", i, temp_distance, distance); | ||
2521 | } | ||
2522 | |||
2523 | i = (i+1 < aff->nr_rsrc) ? i+1 : 0; // increment with wrap-around | ||
2524 | } while (i != start); | ||
2525 | |||
2526 | |||
2527 | // restore old donor info state. | ||
2528 | default_donee->donor_info = default_donee_donor_info; | ||
2529 | |||
2530 | if(!donee_node) { | ||
2531 | donee_node = default_donee; | ||
2532 | |||
2533 | TRACE_CUR("Could not find a donee. We have to steal one.\n"); | ||
2534 | WARN_ON(default_donee->donor_info == NULL); | ||
2535 | } | ||
2536 | |||
2537 | out: | ||
2538 | |||
2539 | TRACE_CUR("Selected donee %s/%d on fq %d (GPU %d) for %s/%d with affinity for GPU %d\n", | ||
2540 | donee_node->task->comm, donee_node->task->pid, | ||
2541 | ikglp_get_idx(sem, donee_node->fq), | ||
2542 | replica_to_gpu(aff, ikglp_get_idx(sem, donee_node->fq)), | ||
2543 | donor->comm, donor->pid, tsk_rt(donor)->last_gpu); | ||
2544 | |||
2545 | return(donee_node); | ||
2546 | } | ||
2547 | |||
2548 | |||
2549 | |||
2550 | static void __find_closest_donor(int target_gpu, | ||
2551 | struct binheap_node* donor_node, | ||
2552 | ikglp_wait_state_t** cur_closest, | ||
2553 | int* cur_dist) | ||
2554 | { | ||
2555 | ikglp_wait_state_t *this_donor = | ||
2556 | binheap_entry(donor_node, ikglp_wait_state_t, node); | ||
2557 | |||
2558 | int this_dist = | ||
2559 | gpu_migration_distance(target_gpu, tsk_rt(this_donor->task)->last_gpu); | ||
2560 | |||
2561 | // TRACE_CUR("%s/%d: dist from target = %d\n", | ||
2562 | // this_donor->task->comm, | ||
2563 | // this_donor->task->pid, | ||
2564 | // this_dist); | ||
2565 | |||
2566 | if(this_dist < *cur_dist) { | ||
2567 | // take this donor | ||
2568 | *cur_dist = this_dist; | ||
2569 | *cur_closest = this_donor; | ||
2570 | } | ||
2571 | else if(this_dist == *cur_dist) { | ||
2572 | // priority tie-break. Even though this is a pre-order traversal, | ||
2573 | // this is a heap, not a binary tree, so we still need to do a priority | ||
2574 | // comparision. | ||
2575 | if(!(*cur_closest) || | ||
2576 | litmus->compare(this_donor->task, (*cur_closest)->task)) { | ||
2577 | *cur_dist = this_dist; | ||
2578 | *cur_closest = this_donor; | ||
2579 | } | ||
2580 | } | ||
2581 | |||
2582 | if(donor_node->left) __find_closest_donor(target_gpu, donor_node->left, cur_closest, cur_dist); | ||
2583 | if(donor_node->right) __find_closest_donor(target_gpu, donor_node->right, cur_closest, cur_dist); | ||
2584 | } | ||
2585 | |||
2586 | ikglp_wait_state_t* gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq) | ||
2587 | { | ||
2588 | // Huristic strategy: Find donor with the closest affinity to fq. | ||
2589 | // Tie-break on priority. | ||
2590 | |||
2591 | // We need to iterate over all the donors to do this. Unfortunatly, | ||
2592 | // our donors are organized in a heap. We'll visit each node with a | ||
2593 | // recurisve call. This is realitively safe since there are only sem->m | ||
2594 | // donors, at most. We won't recurse too deeply to have to worry about | ||
2595 | // our stack. (even with 128 CPUs, our nest depth is at most 7 deep). | ||
2596 | |||
2597 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2598 | ikglp_wait_state_t *donor = NULL; | ||
2599 | int distance = MIG_NONE; | ||
2600 | int gpu = replica_to_gpu(aff, ikglp_get_idx(sem, fq)); | ||
2601 | |||
2602 | #ifdef CONFIG_SCHED_DEBUG_TRACE | ||
2603 | ikglp_wait_state_t* default_donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node); | ||
2604 | #endif | ||
2605 | |||
2606 | __find_closest_donor(gpu, sem->donors.root, &donor, &distance); | ||
2607 | |||
2608 | TRACE_CUR("Selected donor %s/%d (distance = %d) to move to fq %d " | ||
2609 | "(non-aff wanted %s/%d). differs = %d\n", | ||
2610 | donor->task->comm, donor->task->pid, | ||
2611 | distance, | ||
2612 | ikglp_get_idx(sem, fq), | ||
2613 | default_donor->task->comm, default_donor->task->pid, | ||
2614 | (donor->task != default_donor->task) | ||
2615 | ); | ||
2616 | |||
2617 | return(donor); | ||
2618 | } | ||
2619 | |||
2620 | |||
2621 | |||
2622 | void gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) | ||
2623 | { | ||
2624 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2625 | int replica = ikglp_get_idx(sem, fq); | ||
2626 | int gpu = replica_to_gpu(aff, replica); | ||
2627 | struct ikglp_queue_info *info = &aff->q_info[replica]; | ||
2628 | lt_t est_time; | ||
2629 | lt_t est_len_before; | ||
2630 | |||
2631 | if(current == t) { | ||
2632 | tsk_rt(t)->suspend_gpu_tracker_on_block = 1; | ||
2633 | } | ||
2634 | |||
2635 | est_len_before = info->estimated_len; | ||
2636 | est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); | ||
2637 | info->estimated_len += est_time; | ||
2638 | |||
2639 | TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n", | ||
2640 | ikglp_get_idx(sem, info->q), | ||
2641 | est_len_before, est_time, | ||
2642 | info->estimated_len); | ||
2643 | |||
2644 | // if(aff->shortest_queue == info) { | ||
2645 | // // we may no longer be the shortest | ||
2646 | // aff->shortest_queue = ikglp_aff_find_shortest(aff); | ||
2647 | // | ||
2648 | // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n", | ||
2649 | // ikglp_get_idx(sem, aff->shortest_queue->q), | ||
2650 | // aff->shortest_queue->q->count, | ||
2651 | // aff->shortest_queue->estimated_len); | ||
2652 | // } | ||
2653 | } | ||
2654 | |||
2655 | void gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) | ||
2656 | { | ||
2657 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2658 | int replica = ikglp_get_idx(sem, fq); | ||
2659 | int gpu = replica_to_gpu(aff, replica); | ||
2660 | struct ikglp_queue_info *info = &aff->q_info[replica]; | ||
2661 | lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); | ||
2662 | |||
2663 | if(est_time > info->estimated_len) { | ||
2664 | WARN_ON(1); | ||
2665 | info->estimated_len = 0; | ||
2666 | } | ||
2667 | else { | ||
2668 | info->estimated_len -= est_time; | ||
2669 | } | ||
2670 | |||
2671 | TRACE_CUR("fq %d est len is now %llu\n", | ||
2672 | ikglp_get_idx(sem, info->q), | ||
2673 | info->estimated_len); | ||
2674 | |||
2675 | // check to see if we're the shortest queue now. | ||
2676 | // if((aff->shortest_queue != info) && | ||
2677 | // (aff->shortest_queue->estimated_len > info->estimated_len)) { | ||
2678 | // | ||
2679 | // aff->shortest_queue = info; | ||
2680 | // | ||
2681 | // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n", | ||
2682 | // ikglp_get_idx(sem, info->q), | ||
2683 | // info->q->count, | ||
2684 | // info->estimated_len); | ||
2685 | // } | ||
2686 | } | ||
2687 | |||
2688 | int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t) | ||
2689 | { | ||
2690 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2691 | unsigned long flags = 0, real_flags; | ||
2692 | int aff_rsrc; | ||
2693 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
2694 | raw_spinlock_t *dgl_lock; | ||
2695 | |||
2696 | dgl_lock = litmus->get_dgl_spinlock(t); | ||
2697 | #endif | ||
2698 | |||
2699 | if (tsk_rt(t)->last_gpu < 0) | ||
2700 | return 0; | ||
2701 | |||
2702 | raw_spin_lock_irqsave(&sem->real_lock, real_flags); | ||
2703 | lock_global_irqsave(dgl_lock, flags); | ||
2704 | lock_fine_irqsave(&sem->lock, flags); | ||
2705 | |||
2706 | // decrement affinity count on old GPU | ||
2707 | aff_rsrc = tsk_rt(t)->last_gpu - aff->offset; | ||
2708 | --(aff->nr_aff_on_rsrc[aff_rsrc]); | ||
2709 | // aff->nr_aff_on_rsrc[aff_rsrc] -= ((uint64_t)1e9)/get_rt_period(t); | ||
2710 | |||
2711 | if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) { | ||
2712 | WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0); | ||
2713 | aff->nr_aff_on_rsrc[aff_rsrc] = 0; | ||
2714 | } | ||
2715 | |||
2716 | unlock_fine_irqrestore(&sem->lock, flags); | ||
2717 | unlock_global_irqrestore(dgl_lock, flags); | ||
2718 | raw_spin_unlock_irqrestore(&sem->real_lock, real_flags); | ||
2719 | |||
2720 | return 0; | ||
2721 | } | ||
2722 | |||
2723 | int gpu_ikglp_notify_exit_trampoline(struct task_struct* t) | ||
2724 | { | ||
2725 | struct ikglp_affinity* aff = (struct ikglp_affinity*)tsk_rt(t)->rsrc_exit_cb_args; | ||
2726 | if(likely(aff)) { | ||
2727 | return gpu_ikglp_notify_exit(aff, t); | ||
2728 | } | ||
2729 | else { | ||
2730 | return -1; | ||
2731 | } | ||
2732 | } | ||
2733 | |||
2734 | void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, | ||
2735 | struct fifo_queue* fq, | ||
2736 | struct task_struct* t) | ||
2737 | { | ||
2738 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2739 | int replica = ikglp_get_idx(sem, fq); | ||
2740 | int gpu = replica_to_gpu(aff, replica); | ||
2741 | int last_gpu = tsk_rt(t)->last_gpu; | ||
2742 | |||
2743 | tsk_rt(t)->gpu_migration = gpu_migration_distance(last_gpu, gpu); // record the type of migration | ||
2744 | |||
2745 | TRACE_CUR("%s/%d acquired gpu %d (prev = %d). migration type = %d\n", | ||
2746 | t->comm, t->pid, gpu, last_gpu, tsk_rt(t)->gpu_migration); | ||
2747 | |||
2748 | // count the number or resource holders | ||
2749 | ++(*(aff->q_info[replica].nr_cur_users)); | ||
2750 | |||
2751 | if(gpu != last_gpu) { | ||
2752 | if(last_gpu >= 0) { | ||
2753 | int old_rsrc = last_gpu - aff->offset; | ||
2754 | --(aff->nr_aff_on_rsrc[old_rsrc]); | ||
2755 | // aff->nr_aff_on_rsrc[old_rsrc] -= ((uint64_t)(1e9)/get_rt_period(t)); | ||
2756 | } | ||
2757 | |||
2758 | // increment affinity count on new GPU | ||
2759 | ++(aff->nr_aff_on_rsrc[gpu - aff->offset]); | ||
2760 | // aff->nr_aff_on_rsrc[gpu - aff->offset] += ((uint64_t)(1e9)/get_rt_period(t)); | ||
2761 | tsk_rt(t)->rsrc_exit_cb_args = aff; | ||
2762 | tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline; | ||
2763 | } | ||
2764 | |||
2765 | reg_nv_device(gpu, 1, t); // register | ||
2766 | |||
2767 | tsk_rt(t)->suspend_gpu_tracker_on_block = 0; | ||
2768 | reset_gpu_tracker(t); | ||
2769 | start_gpu_tracker(t); | ||
2770 | } | ||
2771 | |||
2772 | void gpu_ikglp_notify_freed(struct ikglp_affinity* aff, | ||
2773 | struct fifo_queue* fq, | ||
2774 | struct task_struct* t) | ||
2775 | { | ||
2776 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2777 | int replica = ikglp_get_idx(sem, fq); | ||
2778 | int gpu = replica_to_gpu(aff, replica); | ||
2779 | lt_t est_time; | ||
2780 | |||
2781 | stop_gpu_tracker(t); // stop the tracker before we do anything else. | ||
2782 | |||
2783 | est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); | ||
2784 | |||
2785 | // count the number or resource holders | ||
2786 | --(*(aff->q_info[replica].nr_cur_users)); | ||
2787 | |||
2788 | reg_nv_device(gpu, 0, t); // unregister | ||
2789 | |||
2790 | // update estimates | ||
2791 | update_gpu_estimate(t, get_gpu_time(t)); | ||
2792 | |||
2793 | TRACE_CUR("%s/%d freed gpu %d (prev = %d). mig type = %d. actual time was %llu. " | ||
2794 | "estimated was %llu. diff is %d\n", | ||
2795 | t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, | ||
2796 | tsk_rt(t)->gpu_migration, | ||
2797 | get_gpu_time(t), | ||
2798 | est_time, | ||
2799 | (long long)get_gpu_time(t) - (long long)est_time); | ||
2800 | |||
2801 | tsk_rt(t)->last_gpu = gpu; | ||
2802 | } | ||
2803 | |||
2804 | struct ikglp_affinity_ops gpu_ikglp_affinity = | ||
2805 | { | ||
2806 | .advise_enqueue = gpu_ikglp_advise_enqueue, | ||
2807 | .advise_steal = gpu_ikglp_advise_steal, | ||
2808 | .advise_donee_selection = gpu_ikglp_advise_donee_selection, | ||
2809 | .advise_donor_to_fq = gpu_ikglp_advise_donor_to_fq, | ||
2810 | |||
2811 | .notify_enqueue = gpu_ikglp_notify_enqueue, | ||
2812 | .notify_dequeue = gpu_ikglp_notify_dequeue, | ||
2813 | .notify_acquired = gpu_ikglp_notify_acquired, | ||
2814 | .notify_freed = gpu_ikglp_notify_freed, | ||
2815 | |||
2816 | .notify_exit = gpu_ikglp_notify_exit, | ||
2817 | |||
2818 | .replica_to_resource = gpu_replica_to_resource, | ||
2819 | }; | ||
2820 | |||
2821 | struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops* ops, | ||
2822 | void* __user args) | ||
2823 | { | ||
2824 | return ikglp_aff_obs_new(ops, &gpu_ikglp_affinity, args); | ||
2825 | } | ||
2826 | |||
2827 | |||
2828 | |||
2829 | |||
2830 | |||
2831 | |||
2832 | |||
2833 | |||
2834 | // Simple ikglp Affinity (standard ikglp with auto-gpu registration) | ||
2835 | |||
2836 | struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t) | ||
2837 | { | ||
2838 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2839 | int min_count; | ||
2840 | int min_nr_users; | ||
2841 | struct ikglp_queue_info *shortest; | ||
2842 | struct fifo_queue *to_enqueue; | ||
2843 | int i; | ||
2844 | |||
2845 | // TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n"); | ||
2846 | |||
2847 | shortest = &aff->q_info[0]; | ||
2848 | min_count = shortest->q->count; | ||
2849 | min_nr_users = *(shortest->nr_cur_users); | ||
2850 | |||
2851 | TRACE_CUR("queue %d: waiters = %d, total holders = %d\n", | ||
2852 | ikglp_get_idx(sem, shortest->q), | ||
2853 | shortest->q->count, | ||
2854 | min_nr_users); | ||
2855 | |||
2856 | for(i = 1; i < sem->nr_replicas; ++i) { | ||
2857 | int len = aff->q_info[i].q->count; | ||
2858 | |||
2859 | // queue is smaller, or they're equal and the other has a smaller number | ||
2860 | // of total users. | ||
2861 | // | ||
2862 | // tie-break on the shortest number of simult users. this only kicks in | ||
2863 | // when there are more than 1 empty queues. | ||
2864 | if((len < min_count) || | ||
2865 | ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) { | ||
2866 | shortest = &aff->q_info[i]; | ||
2867 | min_count = shortest->q->count; | ||
2868 | min_nr_users = *(aff->q_info[i].nr_cur_users); | ||
2869 | } | ||
2870 | |||
2871 | TRACE_CUR("queue %d: waiters = %d, total holders = %d\n", | ||
2872 | ikglp_get_idx(sem, aff->q_info[i].q), | ||
2873 | aff->q_info[i].q->count, | ||
2874 | *(aff->q_info[i].nr_cur_users)); | ||
2875 | } | ||
2876 | |||
2877 | to_enqueue = shortest->q; | ||
2878 | TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n", | ||
2879 | ikglp_get_idx(sem, to_enqueue), | ||
2880 | ikglp_get_idx(sem, sem->shortest_fifo_queue)); | ||
2881 | |||
2882 | return to_enqueue; | ||
2883 | } | ||
2884 | |||
2885 | ikglp_wait_state_t* simple_gpu_ikglp_advise_steal(struct ikglp_affinity* aff, | ||
2886 | struct fifo_queue* dst) | ||
2887 | { | ||
2888 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2889 | // TRACE_CUR("Simple GPU ikglp advise_steal invoked\n"); | ||
2890 | return ikglp_find_hp_waiter_to_steal(sem); | ||
2891 | } | ||
2892 | |||
2893 | ikglp_donee_heap_node_t* simple_gpu_ikglp_advise_donee_selection(struct ikglp_affinity* aff, struct task_struct* donor) | ||
2894 | { | ||
2895 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2896 | ikglp_donee_heap_node_t *donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node); | ||
2897 | return(donee); | ||
2898 | } | ||
2899 | |||
2900 | ikglp_wait_state_t* simple_gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq) | ||
2901 | { | ||
2902 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2903 | ikglp_wait_state_t* donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node); | ||
2904 | return(donor); | ||
2905 | } | ||
2906 | |||
2907 | void simple_gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) | ||
2908 | { | ||
2909 | // TRACE_CUR("Simple GPU ikglp notify_enqueue invoked\n"); | ||
2910 | } | ||
2911 | |||
2912 | void simple_gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) | ||
2913 | { | ||
2914 | // TRACE_CUR("Simple GPU ikglp notify_dequeue invoked\n"); | ||
2915 | } | ||
2916 | |||
2917 | void simple_gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) | ||
2918 | { | ||
2919 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2920 | int replica = ikglp_get_idx(sem, fq); | ||
2921 | int gpu = replica_to_gpu(aff, replica); | ||
2922 | |||
2923 | // TRACE_CUR("Simple GPU ikglp notify_acquired invoked\n"); | ||
2924 | |||
2925 | // count the number or resource holders | ||
2926 | ++(*(aff->q_info[replica].nr_cur_users)); | ||
2927 | |||
2928 | reg_nv_device(gpu, 1, t); // register | ||
2929 | } | ||
2930 | |||
2931 | void simple_gpu_ikglp_notify_freed(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t) | ||
2932 | { | ||
2933 | struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock); | ||
2934 | int replica = ikglp_get_idx(sem, fq); | ||
2935 | int gpu = replica_to_gpu(aff, replica); | ||
2936 | |||
2937 | // TRACE_CUR("Simple GPU ikglp notify_freed invoked\n"); | ||
2938 | // count the number or resource holders | ||
2939 | --(*(aff->q_info[replica].nr_cur_users)); | ||
2940 | |||
2941 | reg_nv_device(gpu, 0, t); // unregister | ||
2942 | } | ||
2943 | |||
2944 | struct ikglp_affinity_ops simple_gpu_ikglp_affinity = | ||
2945 | { | ||
2946 | .advise_enqueue = simple_gpu_ikglp_advise_enqueue, | ||
2947 | .advise_steal = simple_gpu_ikglp_advise_steal, | ||
2948 | .advise_donee_selection = simple_gpu_ikglp_advise_donee_selection, | ||
2949 | .advise_donor_to_fq = simple_gpu_ikglp_advise_donor_to_fq, | ||
2950 | |||
2951 | .notify_enqueue = simple_gpu_ikglp_notify_enqueue, | ||
2952 | .notify_dequeue = simple_gpu_ikglp_notify_dequeue, | ||
2953 | .notify_acquired = simple_gpu_ikglp_notify_acquired, | ||
2954 | .notify_freed = simple_gpu_ikglp_notify_freed, | ||
2955 | |||
2956 | .notify_exit = NULL, | ||
2957 | |||
2958 | .replica_to_resource = gpu_replica_to_resource, | ||
2959 | }; | ||
2960 | |||
2961 | struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops, | ||
2962 | void* __user args) | ||
2963 | { | ||
2964 | return ikglp_aff_obs_new(ops, &simple_gpu_ikglp_affinity, args); | ||
2965 | } | ||
2966 | |||
2967 | #endif | ||
2968 | |||
2969 | |||
2970 | |||
2971 | |||
2972 | |||
2973 | |||
2974 | |||
2975 | |||
2976 | |||
diff --git a/litmus/jobs.c b/litmus/jobs.c index 13a4ed4c9e93..e25854e1d143 100644 --- a/litmus/jobs.c +++ b/litmus/jobs.c | |||
@@ -13,6 +13,8 @@ static inline void setup_release(struct task_struct *t, lt_t release) | |||
13 | t->rt_param.job_params.deadline = release + get_rt_relative_deadline(t); | 13 | t->rt_param.job_params.deadline = release + get_rt_relative_deadline(t); |
14 | t->rt_param.job_params.exec_time = 0; | 14 | t->rt_param.job_params.exec_time = 0; |
15 | 15 | ||
16 | clear_bit(RT_JOB_SIG_BUDGET_SENT, &t->rt_param.job_params.flags); | ||
17 | |||
16 | /* update job sequence number */ | 18 | /* update job sequence number */ |
17 | t->rt_param.job_params.job_no++; | 19 | t->rt_param.job_params.job_no++; |
18 | 20 | ||
diff --git a/litmus/kexclu_affinity.c b/litmus/kexclu_affinity.c new file mode 100644 index 000000000000..5ef5e54d600d --- /dev/null +++ b/litmus/kexclu_affinity.c | |||
@@ -0,0 +1,92 @@ | |||
1 | #include <litmus/fdso.h> | ||
2 | #include <litmus/sched_plugin.h> | ||
3 | #include <litmus/trace.h> | ||
4 | #include <litmus/litmus.h> | ||
5 | #include <litmus/locking.h> | ||
6 | |||
7 | #include <litmus/kexclu_affinity.h> | ||
8 | |||
9 | static int create_generic_aff_obs(void** obj_ref, obj_type_t type, void* __user arg); | ||
10 | static int open_generic_aff_obs(struct od_table_entry* entry, void* __user arg); | ||
11 | static int close_generic_aff_obs(struct od_table_entry* entry); | ||
12 | static void destroy_generic_aff_obs(obj_type_t type, void* sem); | ||
13 | |||
14 | struct fdso_ops generic_affinity_ops = { | ||
15 | .create = create_generic_aff_obs, | ||
16 | .open = open_generic_aff_obs, | ||
17 | .close = close_generic_aff_obs, | ||
18 | .destroy = destroy_generic_aff_obs | ||
19 | }; | ||
20 | |||
21 | static atomic_t aff_obs_id_gen = ATOMIC_INIT(0); | ||
22 | |||
23 | static inline bool is_affinity_observer(struct od_table_entry *entry) | ||
24 | { | ||
25 | return (entry->class == &generic_affinity_ops); | ||
26 | } | ||
27 | |||
28 | static inline struct affinity_observer* get_affinity_observer(struct od_table_entry* entry) | ||
29 | { | ||
30 | BUG_ON(!is_affinity_observer(entry)); | ||
31 | return (struct affinity_observer*) entry->obj->obj; | ||
32 | } | ||
33 | |||
34 | static int create_generic_aff_obs(void** obj_ref, obj_type_t type, void* __user arg) | ||
35 | { | ||
36 | struct affinity_observer* aff_obs; | ||
37 | int err; | ||
38 | |||
39 | err = litmus->allocate_aff_obs(&aff_obs, type, arg); | ||
40 | if (err == 0) { | ||
41 | BUG_ON(!aff_obs->lock); | ||
42 | aff_obs->type = type; | ||
43 | *obj_ref = aff_obs; | ||
44 | } | ||
45 | return err; | ||
46 | } | ||
47 | |||
48 | static int open_generic_aff_obs(struct od_table_entry* entry, void* __user arg) | ||
49 | { | ||
50 | struct affinity_observer* aff_obs = get_affinity_observer(entry); | ||
51 | if (aff_obs->ops->open) | ||
52 | return aff_obs->ops->open(aff_obs, arg); | ||
53 | else | ||
54 | return 0; /* default: any task can open it */ | ||
55 | } | ||
56 | |||
57 | static int close_generic_aff_obs(struct od_table_entry* entry) | ||
58 | { | ||
59 | struct affinity_observer* aff_obs = get_affinity_observer(entry); | ||
60 | if (aff_obs->ops->close) | ||
61 | return aff_obs->ops->close(aff_obs); | ||
62 | else | ||
63 | return 0; /* default: closing succeeds */ | ||
64 | } | ||
65 | |||
66 | static void destroy_generic_aff_obs(obj_type_t type, void* obj) | ||
67 | { | ||
68 | struct affinity_observer* aff_obs = (struct affinity_observer*) obj; | ||
69 | aff_obs->ops->deallocate(aff_obs); | ||
70 | } | ||
71 | |||
72 | |||
73 | struct litmus_lock* get_lock_from_od(int od) | ||
74 | { | ||
75 | extern struct fdso_ops generic_lock_ops; | ||
76 | |||
77 | struct od_table_entry *entry = get_entry_for_od(od); | ||
78 | |||
79 | if(entry && entry->class == &generic_lock_ops) { | ||
80 | return (struct litmus_lock*) entry->obj->obj; | ||
81 | } | ||
82 | return NULL; | ||
83 | } | ||
84 | |||
85 | void affinity_observer_new(struct affinity_observer* aff, | ||
86 | struct affinity_observer_ops* ops, | ||
87 | struct affinity_observer_args* args) | ||
88 | { | ||
89 | aff->ops = ops; | ||
90 | aff->lock = get_lock_from_od(args->lock_od); | ||
91 | aff->ident = atomic_inc_return(&aff_obs_id_gen); | ||
92 | } \ No newline at end of file | ||
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c new file mode 100644 index 000000000000..785a095275e6 --- /dev/null +++ b/litmus/kfmlp_lock.c | |||
@@ -0,0 +1,1003 @@ | |||
1 | #include <linux/slab.h> | ||
2 | #include <linux/uaccess.h> | ||
3 | |||
4 | #include <litmus/trace.h> | ||
5 | #include <litmus/sched_plugin.h> | ||
6 | #include <litmus/fdso.h> | ||
7 | |||
8 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
9 | #include <litmus/gpu_affinity.h> | ||
10 | #include <litmus/nvidia_info.h> | ||
11 | #endif | ||
12 | |||
13 | #include <litmus/kfmlp_lock.h> | ||
14 | |||
15 | static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem, | ||
16 | struct kfmlp_queue* queue) | ||
17 | { | ||
18 | return (queue - &sem->queues[0]); | ||
19 | } | ||
20 | |||
21 | static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem, | ||
22 | struct task_struct* holder) | ||
23 | { | ||
24 | int i; | ||
25 | for(i = 0; i < sem->num_resources; ++i) | ||
26 | if(sem->queues[i].owner == holder) | ||
27 | return(&sem->queues[i]); | ||
28 | return(NULL); | ||
29 | } | ||
30 | |||
31 | /* caller is responsible for locking */ | ||
32 | static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue, | ||
33 | struct task_struct *skip) | ||
34 | { | ||
35 | struct list_head *pos; | ||
36 | struct task_struct *queued, *found = NULL; | ||
37 | |||
38 | list_for_each(pos, &kqueue->wait.task_list) { | ||
39 | queued = (struct task_struct*) list_entry(pos, wait_queue_t, | ||
40 | task_list)->private; | ||
41 | |||
42 | /* Compare task prios, find high prio task. */ | ||
43 | //if (queued != skip && edf_higher_prio(queued, found)) | ||
44 | if (queued != skip && litmus->compare(queued, found)) | ||
45 | found = queued; | ||
46 | } | ||
47 | return found; | ||
48 | } | ||
49 | |||
50 | static inline struct kfmlp_queue* kfmlp_find_shortest(struct kfmlp_semaphore* sem, | ||
51 | struct kfmlp_queue* search_start) | ||
52 | { | ||
53 | // we start our search at search_start instead of at the beginning of the | ||
54 | // queue list to load-balance across all resources. | ||
55 | struct kfmlp_queue* step = search_start; | ||
56 | struct kfmlp_queue* shortest = sem->shortest_queue; | ||
57 | |||
58 | do | ||
59 | { | ||
60 | step = (step+1 != &sem->queues[sem->num_resources]) ? | ||
61 | step+1 : &sem->queues[0]; | ||
62 | |||
63 | if(step->count < shortest->count) | ||
64 | { | ||
65 | shortest = step; | ||
66 | if(step->count == 0) | ||
67 | break; /* can't get any shorter */ | ||
68 | } | ||
69 | |||
70 | }while(step != search_start); | ||
71 | |||
72 | return(shortest); | ||
73 | } | ||
74 | |||
75 | |||
76 | static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem, | ||
77 | wait_queue_t** to_steal, | ||
78 | struct kfmlp_queue** to_steal_from) | ||
79 | { | ||
80 | /* must hold sem->lock */ | ||
81 | |||
82 | int i; | ||
83 | |||
84 | *to_steal = NULL; | ||
85 | *to_steal_from = NULL; | ||
86 | |||
87 | for(i = 0; i < sem->num_resources; ++i) | ||
88 | { | ||
89 | if( (sem->queues[i].count > 1) && | ||
90 | ((*to_steal_from == NULL) || | ||
91 | //(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) ) | ||
92 | (litmus->compare(sem->queues[i].hp_waiter, (*to_steal_from)->hp_waiter))) ) | ||
93 | { | ||
94 | *to_steal_from = &sem->queues[i]; | ||
95 | } | ||
96 | } | ||
97 | |||
98 | if(*to_steal_from) | ||
99 | { | ||
100 | struct list_head *pos; | ||
101 | struct task_struct *target = (*to_steal_from)->hp_waiter; | ||
102 | |||
103 | TRACE_CUR("want to steal hp_waiter (%s/%d) from queue %d\n", | ||
104 | target->comm, | ||
105 | target->pid, | ||
106 | kfmlp_get_idx(sem, *to_steal_from)); | ||
107 | |||
108 | list_for_each(pos, &(*to_steal_from)->wait.task_list) | ||
109 | { | ||
110 | wait_queue_t *node = list_entry(pos, wait_queue_t, task_list); | ||
111 | struct task_struct *queued = (struct task_struct*) node->private; | ||
112 | /* Compare task prios, find high prio task. */ | ||
113 | if (queued == target) | ||
114 | { | ||
115 | *to_steal = node; | ||
116 | |||
117 | TRACE_CUR("steal: selected %s/%d from queue %d\n", | ||
118 | queued->comm, queued->pid, | ||
119 | kfmlp_get_idx(sem, *to_steal_from)); | ||
120 | |||
121 | return queued; | ||
122 | } | ||
123 | } | ||
124 | |||
125 | TRACE_CUR("Could not find %s/%d in queue %d!!! THIS IS A BUG!\n", | ||
126 | target->comm, | ||
127 | target->pid, | ||
128 | kfmlp_get_idx(sem, *to_steal_from)); | ||
129 | } | ||
130 | |||
131 | return NULL; | ||
132 | } | ||
133 | |||
134 | static void kfmlp_steal_node(struct kfmlp_semaphore *sem, | ||
135 | struct kfmlp_queue *dst, | ||
136 | wait_queue_t *wait, | ||
137 | struct kfmlp_queue *src) | ||
138 | { | ||
139 | struct task_struct* t = (struct task_struct*) wait->private; | ||
140 | |||
141 | __remove_wait_queue(&src->wait, wait); | ||
142 | --(src->count); | ||
143 | |||
144 | if(t == src->hp_waiter) { | ||
145 | src->hp_waiter = kfmlp_find_hp_waiter(src, NULL); | ||
146 | |||
147 | TRACE_CUR("queue %d: %s/%d is new hp_waiter\n", | ||
148 | kfmlp_get_idx(sem, src), | ||
149 | (src->hp_waiter) ? src->hp_waiter->comm : "nil", | ||
150 | (src->hp_waiter) ? src->hp_waiter->pid : -1); | ||
151 | |||
152 | if(src->owner && tsk_rt(src->owner)->inh_task == t) { | ||
153 | litmus->decrease_prio(src->owner, src->hp_waiter); | ||
154 | } | ||
155 | } | ||
156 | |||
157 | if(sem->shortest_queue->count > src->count) { | ||
158 | sem->shortest_queue = src; | ||
159 | TRACE_CUR("queue %d is the shortest\n", kfmlp_get_idx(sem, sem->shortest_queue)); | ||
160 | } | ||
161 | |||
162 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
163 | if(sem->aff_obs) { | ||
164 | sem->aff_obs->ops->notify_dequeue(sem->aff_obs, src, t); | ||
165 | } | ||
166 | #endif | ||
167 | |||
168 | init_waitqueue_entry(wait, t); | ||
169 | __add_wait_queue_tail_exclusive(&dst->wait, wait); | ||
170 | ++(dst->count); | ||
171 | |||
172 | if(litmus->compare(t, dst->hp_waiter)) { | ||
173 | dst->hp_waiter = t; | ||
174 | |||
175 | TRACE_CUR("queue %d: %s/%d is new hp_waiter\n", | ||
176 | kfmlp_get_idx(sem, dst), | ||
177 | t->comm, t->pid); | ||
178 | |||
179 | if(dst->owner && litmus->compare(t, dst->owner)) | ||
180 | { | ||
181 | litmus->increase_prio(dst->owner, t); | ||
182 | } | ||
183 | } | ||
184 | |||
185 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
186 | if(sem->aff_obs) { | ||
187 | sem->aff_obs->ops->notify_enqueue(sem->aff_obs, dst, t); | ||
188 | } | ||
189 | #endif | ||
190 | } | ||
191 | |||
192 | |||
193 | int kfmlp_lock(struct litmus_lock* l) | ||
194 | { | ||
195 | struct task_struct* t = current; | ||
196 | struct kfmlp_semaphore *sem = kfmlp_from_lock(l); | ||
197 | struct kfmlp_queue* my_queue = NULL; | ||
198 | wait_queue_t wait; | ||
199 | unsigned long flags; | ||
200 | |||
201 | if (!is_realtime(t)) | ||
202 | return -EPERM; | ||
203 | |||
204 | spin_lock_irqsave(&sem->lock, flags); | ||
205 | |||
206 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
207 | if(sem->aff_obs) { | ||
208 | my_queue = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t); | ||
209 | } | ||
210 | if(!my_queue) { | ||
211 | my_queue = sem->shortest_queue; | ||
212 | } | ||
213 | #else | ||
214 | my_queue = sem->shortest_queue; | ||
215 | #endif | ||
216 | |||
217 | if (my_queue->owner) { | ||
218 | /* resource is not free => must suspend and wait */ | ||
219 | TRACE_CUR("queue %d: Resource is not free => must suspend and wait. (queue size = %d)\n", | ||
220 | kfmlp_get_idx(sem, my_queue), | ||
221 | my_queue->count); | ||
222 | |||
223 | init_waitqueue_entry(&wait, t); | ||
224 | |||
225 | /* FIXME: interruptible would be nice some day */ | ||
226 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
227 | |||
228 | __add_wait_queue_tail_exclusive(&my_queue->wait, &wait); | ||
229 | |||
230 | TRACE_CUR("queue %d: hp_waiter is currently %s/%d\n", | ||
231 | kfmlp_get_idx(sem, my_queue), | ||
232 | (my_queue->hp_waiter) ? my_queue->hp_waiter->comm : "nil", | ||
233 | (my_queue->hp_waiter) ? my_queue->hp_waiter->pid : -1); | ||
234 | |||
235 | /* check if we need to activate priority inheritance */ | ||
236 | //if (edf_higher_prio(t, my_queue->hp_waiter)) | ||
237 | if (litmus->compare(t, my_queue->hp_waiter)) { | ||
238 | my_queue->hp_waiter = t; | ||
239 | TRACE_CUR("queue %d: %s/%d is new hp_waiter\n", | ||
240 | kfmlp_get_idx(sem, my_queue), | ||
241 | t->comm, t->pid); | ||
242 | |||
243 | //if (edf_higher_prio(t, my_queue->owner)) | ||
244 | if (litmus->compare(t, my_queue->owner)) { | ||
245 | litmus->increase_prio(my_queue->owner, my_queue->hp_waiter); | ||
246 | } | ||
247 | } | ||
248 | |||
249 | ++(my_queue->count); | ||
250 | |||
251 | if(my_queue == sem->shortest_queue) { | ||
252 | sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); | ||
253 | TRACE_CUR("queue %d is the shortest\n", | ||
254 | kfmlp_get_idx(sem, sem->shortest_queue)); | ||
255 | } | ||
256 | |||
257 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
258 | if(sem->aff_obs) { | ||
259 | sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t); | ||
260 | } | ||
261 | #endif | ||
262 | |||
263 | /* release lock before sleeping */ | ||
264 | spin_unlock_irqrestore(&sem->lock, flags); | ||
265 | |||
266 | /* We depend on the FIFO order. Thus, we don't need to recheck | ||
267 | * when we wake up; we are guaranteed to have the lock since | ||
268 | * there is only one wake up per release (or steal). | ||
269 | */ | ||
270 | suspend_for_lock(); | ||
271 | |||
272 | |||
273 | if(my_queue->owner == t) { | ||
274 | TRACE_CUR("queue %d: acquired through waiting\n", | ||
275 | kfmlp_get_idx(sem, my_queue)); | ||
276 | } | ||
277 | else { | ||
278 | /* this case may happen if our wait entry was stolen | ||
279 | between queues. record where we went. */ | ||
280 | my_queue = kfmlp_get_queue(sem, t); | ||
281 | |||
282 | BUG_ON(!my_queue); | ||
283 | TRACE_CUR("queue %d: acquired through stealing\n", | ||
284 | kfmlp_get_idx(sem, my_queue)); | ||
285 | } | ||
286 | } | ||
287 | else { | ||
288 | TRACE_CUR("queue %d: acquired immediately\n", | ||
289 | kfmlp_get_idx(sem, my_queue)); | ||
290 | |||
291 | my_queue->owner = t; | ||
292 | |||
293 | ++(my_queue->count); | ||
294 | |||
295 | if(my_queue == sem->shortest_queue) { | ||
296 | sem->shortest_queue = kfmlp_find_shortest(sem, my_queue); | ||
297 | TRACE_CUR("queue %d is the shortest\n", | ||
298 | kfmlp_get_idx(sem, sem->shortest_queue)); | ||
299 | } | ||
300 | |||
301 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
302 | if(sem->aff_obs) { | ||
303 | sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t); | ||
304 | sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, t); | ||
305 | } | ||
306 | #endif | ||
307 | |||
308 | spin_unlock_irqrestore(&sem->lock, flags); | ||
309 | } | ||
310 | |||
311 | |||
312 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
313 | if(sem->aff_obs) { | ||
314 | return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, my_queue); | ||
315 | } | ||
316 | #endif | ||
317 | return kfmlp_get_idx(sem, my_queue); | ||
318 | } | ||
319 | |||
320 | |||
321 | int kfmlp_unlock(struct litmus_lock* l) | ||
322 | { | ||
323 | struct task_struct *t = current, *next; | ||
324 | struct kfmlp_semaphore *sem = kfmlp_from_lock(l); | ||
325 | struct kfmlp_queue *my_queue, *to_steal_from; | ||
326 | unsigned long flags; | ||
327 | int err = 0; | ||
328 | |||
329 | my_queue = kfmlp_get_queue(sem, t); | ||
330 | |||
331 | if (!my_queue) { | ||
332 | err = -EINVAL; | ||
333 | goto out; | ||
334 | } | ||
335 | |||
336 | spin_lock_irqsave(&sem->lock, flags); | ||
337 | |||
338 | TRACE_CUR("queue %d: unlocking\n", kfmlp_get_idx(sem, my_queue)); | ||
339 | |||
340 | my_queue->owner = NULL; // clear ownership | ||
341 | --(my_queue->count); | ||
342 | |||
343 | if(my_queue->count < sem->shortest_queue->count) | ||
344 | { | ||
345 | sem->shortest_queue = my_queue; | ||
346 | TRACE_CUR("queue %d is the shortest\n", | ||
347 | kfmlp_get_idx(sem, sem->shortest_queue)); | ||
348 | } | ||
349 | |||
350 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
351 | if(sem->aff_obs) { | ||
352 | sem->aff_obs->ops->notify_dequeue(sem->aff_obs, my_queue, t); | ||
353 | sem->aff_obs->ops->notify_freed(sem->aff_obs, my_queue, t); | ||
354 | } | ||
355 | #endif | ||
356 | |||
357 | /* we lose the benefit of priority inheritance (if any) */ | ||
358 | if (tsk_rt(t)->inh_task) | ||
359 | litmus->decrease_prio(t, NULL); | ||
360 | |||
361 | |||
362 | /* check if there are jobs waiting for this resource */ | ||
363 | RETRY: | ||
364 | next = __waitqueue_remove_first(&my_queue->wait); | ||
365 | if (next) { | ||
366 | /* next becomes the resouce holder */ | ||
367 | my_queue->owner = next; | ||
368 | |||
369 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
370 | if(sem->aff_obs) { | ||
371 | sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, next); | ||
372 | } | ||
373 | #endif | ||
374 | |||
375 | TRACE_CUR("queue %d: lock ownership passed to %s/%d\n", | ||
376 | kfmlp_get_idx(sem, my_queue), next->comm, next->pid); | ||
377 | |||
378 | /* determine new hp_waiter if necessary */ | ||
379 | if (next == my_queue->hp_waiter) { | ||
380 | TRACE_TASK(next, "was highest-prio waiter\n"); | ||
381 | my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next); | ||
382 | if (my_queue->hp_waiter) | ||
383 | TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue)); | ||
384 | else | ||
385 | TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue)); | ||
386 | } else { | ||
387 | /* Well, if next is not the highest-priority waiter, | ||
388 | * then it ought to inherit the highest-priority | ||
389 | * waiter's priority. */ | ||
390 | litmus->increase_prio(next, my_queue->hp_waiter); | ||
391 | } | ||
392 | |||
393 | /* wake up next */ | ||
394 | wake_up_process(next); | ||
395 | } | ||
396 | else { | ||
397 | // TODO: put this stealing logic before we attempt to release | ||
398 | // our resource. (simplifies code and gets rid of ugly goto RETRY. | ||
399 | wait_queue_t *wait; | ||
400 | |||
401 | TRACE_CUR("queue %d: looking to steal someone...\n", | ||
402 | kfmlp_get_idx(sem, my_queue)); | ||
403 | |||
404 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
405 | next = (sem->aff_obs) ? | ||
406 | sem->aff_obs->ops->advise_steal(sem->aff_obs, &wait, &to_steal_from) : | ||
407 | kfmlp_select_hp_steal(sem, &wait, &to_steal_from); | ||
408 | #else | ||
409 | next = kfmlp_select_hp_steal(sem, &wait, &to_steal_from); | ||
410 | #endif | ||
411 | |||
412 | if(next) { | ||
413 | TRACE_CUR("queue %d: stealing %s/%d from queue %d\n", | ||
414 | kfmlp_get_idx(sem, my_queue), | ||
415 | next->comm, next->pid, | ||
416 | kfmlp_get_idx(sem, to_steal_from)); | ||
417 | |||
418 | kfmlp_steal_node(sem, my_queue, wait, to_steal_from); | ||
419 | |||
420 | goto RETRY; // will succeed this time. | ||
421 | } | ||
422 | else { | ||
423 | TRACE_CUR("queue %d: no one to steal.\n", | ||
424 | kfmlp_get_idx(sem, my_queue)); | ||
425 | } | ||
426 | } | ||
427 | |||
428 | spin_unlock_irqrestore(&sem->lock, flags); | ||
429 | |||
430 | out: | ||
431 | return err; | ||
432 | } | ||
433 | |||
434 | int kfmlp_close(struct litmus_lock* l) | ||
435 | { | ||
436 | struct task_struct *t = current; | ||
437 | struct kfmlp_semaphore *sem = kfmlp_from_lock(l); | ||
438 | struct kfmlp_queue *my_queue; | ||
439 | unsigned long flags; | ||
440 | |||
441 | int owner; | ||
442 | |||
443 | spin_lock_irqsave(&sem->lock, flags); | ||
444 | |||
445 | my_queue = kfmlp_get_queue(sem, t); | ||
446 | owner = (my_queue) ? (my_queue->owner == t) : 0; | ||
447 | |||
448 | spin_unlock_irqrestore(&sem->lock, flags); | ||
449 | |||
450 | if (owner) | ||
451 | kfmlp_unlock(l); | ||
452 | |||
453 | return 0; | ||
454 | } | ||
455 | |||
456 | void kfmlp_free(struct litmus_lock* l) | ||
457 | { | ||
458 | struct kfmlp_semaphore *sem = kfmlp_from_lock(l); | ||
459 | kfree(sem->queues); | ||
460 | kfree(sem); | ||
461 | } | ||
462 | |||
463 | |||
464 | |||
465 | struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args) | ||
466 | { | ||
467 | struct kfmlp_semaphore* sem; | ||
468 | int num_resources = 0; | ||
469 | int i; | ||
470 | |||
471 | if(!access_ok(VERIFY_READ, args, sizeof(num_resources))) | ||
472 | { | ||
473 | return(NULL); | ||
474 | } | ||
475 | if(__copy_from_user(&num_resources, args, sizeof(num_resources))) | ||
476 | { | ||
477 | return(NULL); | ||
478 | } | ||
479 | if(num_resources < 1) | ||
480 | { | ||
481 | return(NULL); | ||
482 | } | ||
483 | |||
484 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
485 | if(!sem) | ||
486 | { | ||
487 | return(NULL); | ||
488 | } | ||
489 | |||
490 | sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL); | ||
491 | if(!sem->queues) | ||
492 | { | ||
493 | kfree(sem); | ||
494 | return(NULL); | ||
495 | } | ||
496 | |||
497 | sem->litmus_lock.ops = ops; | ||
498 | spin_lock_init(&sem->lock); | ||
499 | sem->num_resources = num_resources; | ||
500 | |||
501 | for(i = 0; i < num_resources; ++i) | ||
502 | { | ||
503 | sem->queues[i].owner = NULL; | ||
504 | sem->queues[i].hp_waiter = NULL; | ||
505 | init_waitqueue_head(&sem->queues[i].wait); | ||
506 | sem->queues[i].count = 0; | ||
507 | } | ||
508 | |||
509 | sem->shortest_queue = &sem->queues[0]; | ||
510 | |||
511 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
512 | sem->aff_obs = NULL; | ||
513 | #endif | ||
514 | |||
515 | return &sem->litmus_lock; | ||
516 | } | ||
517 | |||
518 | |||
519 | |||
520 | |||
521 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
522 | |||
523 | static inline int __replica_to_gpu(struct kfmlp_affinity* aff, int replica) | ||
524 | { | ||
525 | int gpu = replica % aff->nr_rsrc; | ||
526 | return gpu; | ||
527 | } | ||
528 | |||
529 | static inline int replica_to_gpu(struct kfmlp_affinity* aff, int replica) | ||
530 | { | ||
531 | int gpu = __replica_to_gpu(aff, replica) + aff->offset; | ||
532 | return gpu; | ||
533 | } | ||
534 | |||
535 | static inline int gpu_to_base_replica(struct kfmlp_affinity* aff, int gpu) | ||
536 | { | ||
537 | int replica = gpu - aff->offset; | ||
538 | return replica; | ||
539 | } | ||
540 | |||
541 | |||
542 | int kfmlp_aff_obs_close(struct affinity_observer* obs) | ||
543 | { | ||
544 | return 0; | ||
545 | } | ||
546 | |||
547 | void kfmlp_aff_obs_free(struct affinity_observer* obs) | ||
548 | { | ||
549 | struct kfmlp_affinity *kfmlp_aff = kfmlp_aff_obs_from_aff_obs(obs); | ||
550 | kfree(kfmlp_aff->nr_cur_users_on_rsrc); | ||
551 | kfree(kfmlp_aff->q_info); | ||
552 | kfree(kfmlp_aff); | ||
553 | } | ||
554 | |||
555 | static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* ops, | ||
556 | struct kfmlp_affinity_ops* kfmlp_ops, | ||
557 | void* __user args) | ||
558 | { | ||
559 | struct kfmlp_affinity* kfmlp_aff; | ||
560 | struct gpu_affinity_observer_args aff_args; | ||
561 | struct kfmlp_semaphore* sem; | ||
562 | int i; | ||
563 | unsigned long flags; | ||
564 | |||
565 | if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) { | ||
566 | return(NULL); | ||
567 | } | ||
568 | if(__copy_from_user(&aff_args, args, sizeof(aff_args))) { | ||
569 | return(NULL); | ||
570 | } | ||
571 | |||
572 | sem = (struct kfmlp_semaphore*) get_lock_from_od(aff_args.obs.lock_od); | ||
573 | |||
574 | if(sem->litmus_lock.type != KFMLP_SEM) { | ||
575 | TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type); | ||
576 | return(NULL); | ||
577 | } | ||
578 | |||
579 | if((aff_args.nr_simult_users <= 0) || | ||
580 | (sem->num_resources%aff_args.nr_simult_users != 0)) { | ||
581 | TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users " | ||
582 | "(%d) per replica. #replicas should be evenly divisible " | ||
583 | "by #simult_users.\n", | ||
584 | sem->litmus_lock.ident, | ||
585 | sem->num_resources, | ||
586 | aff_args.nr_simult_users); | ||
587 | return(NULL); | ||
588 | } | ||
589 | |||
590 | // if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) { | ||
591 | // TRACE_CUR("System does not support #simult_users > %d. %d requested.\n", | ||
592 | // NV_MAX_SIMULT_USERS, aff_args.nr_simult_users); | ||
593 | //// return(NULL); | ||
594 | // } | ||
595 | |||
596 | kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL); | ||
597 | if(!kfmlp_aff) { | ||
598 | return(NULL); | ||
599 | } | ||
600 | |||
601 | kfmlp_aff->q_info = kmalloc(sizeof(struct kfmlp_queue_info)*sem->num_resources, GFP_KERNEL); | ||
602 | if(!kfmlp_aff->q_info) { | ||
603 | kfree(kfmlp_aff); | ||
604 | return(NULL); | ||
605 | } | ||
606 | |||
607 | kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->num_resources / aff_args.nr_simult_users), GFP_KERNEL); | ||
608 | if(!kfmlp_aff->nr_cur_users_on_rsrc) { | ||
609 | kfree(kfmlp_aff->q_info); | ||
610 | kfree(kfmlp_aff); | ||
611 | return(NULL); | ||
612 | } | ||
613 | |||
614 | affinity_observer_new(&kfmlp_aff->obs, ops, &aff_args.obs); | ||
615 | |||
616 | kfmlp_aff->ops = kfmlp_ops; | ||
617 | kfmlp_aff->offset = aff_args.replica_to_gpu_offset; | ||
618 | kfmlp_aff->nr_simult = aff_args.nr_simult_users; | ||
619 | kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult; | ||
620 | |||
621 | memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->num_resources / kfmlp_aff->nr_rsrc)); | ||
622 | |||
623 | for(i = 0; i < sem->num_resources; ++i) { | ||
624 | kfmlp_aff->q_info[i].q = &sem->queues[i]; | ||
625 | kfmlp_aff->q_info[i].estimated_len = 0; | ||
626 | |||
627 | // multiple q_info's will point to the same resource (aka GPU) if | ||
628 | // aff_args.nr_simult_users > 1 | ||
629 | kfmlp_aff->q_info[i].nr_cur_users = &kfmlp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(kfmlp_aff,i)]; | ||
630 | } | ||
631 | |||
632 | // attach observer to the lock | ||
633 | spin_lock_irqsave(&sem->lock, flags); | ||
634 | sem->aff_obs = kfmlp_aff; | ||
635 | spin_unlock_irqrestore(&sem->lock, flags); | ||
636 | |||
637 | return &kfmlp_aff->obs; | ||
638 | } | ||
639 | |||
640 | |||
641 | |||
642 | |||
643 | static int gpu_replica_to_resource(struct kfmlp_affinity* aff, | ||
644 | struct kfmlp_queue* fq) { | ||
645 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
646 | return(replica_to_gpu(aff, kfmlp_get_idx(sem, fq))); | ||
647 | } | ||
648 | |||
649 | |||
650 | // Smart KFMLP Affinity | ||
651 | |||
652 | //static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff) | ||
653 | //{ | ||
654 | // struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
655 | // struct kfmlp_queue_info *shortest = &aff->q_info[0]; | ||
656 | // int i; | ||
657 | // | ||
658 | // for(i = 1; i < sem->num_resources; ++i) { | ||
659 | // if(aff->q_info[i].estimated_len < shortest->estimated_len) { | ||
660 | // shortest = &aff->q_info[i]; | ||
661 | // } | ||
662 | // } | ||
663 | // | ||
664 | // return(shortest); | ||
665 | //} | ||
666 | |||
667 | struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t) | ||
668 | { | ||
669 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
670 | lt_t min_len; | ||
671 | int min_nr_users; | ||
672 | struct kfmlp_queue_info *shortest; | ||
673 | struct kfmlp_queue *to_enqueue; | ||
674 | int i; | ||
675 | int affinity_gpu; | ||
676 | |||
677 | // simply pick the shortest queue if, we have no affinity, or we have | ||
678 | // affinity with the shortest | ||
679 | if(unlikely(tsk_rt(t)->last_gpu < 0)) { | ||
680 | affinity_gpu = aff->offset; // first gpu | ||
681 | TRACE_CUR("no affinity\n"); | ||
682 | } | ||
683 | else { | ||
684 | affinity_gpu = tsk_rt(t)->last_gpu; | ||
685 | } | ||
686 | |||
687 | // all things being equal, let's start with the queue with which we have | ||
688 | // affinity. this helps us maintain affinity even when we don't have | ||
689 | // an estiamte for local-affinity execution time (i.e., 2nd time on GPU) | ||
690 | shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)]; | ||
691 | |||
692 | // if(shortest == aff->shortest_queue) { | ||
693 | // TRACE_CUR("special case: have affinity with shortest queue\n"); | ||
694 | // goto out; | ||
695 | // } | ||
696 | |||
697 | min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL); | ||
698 | min_nr_users = *(shortest->nr_cur_users); | ||
699 | |||
700 | TRACE_CUR("cs is %llu on queue %d: est len = %llu\n", | ||
701 | get_gpu_estimate(t, MIG_LOCAL), | ||
702 | kfmlp_get_idx(sem, shortest->q), | ||
703 | min_len); | ||
704 | |||
705 | for(i = 0; i < sem->num_resources; ++i) { | ||
706 | if(&aff->q_info[i] != shortest) { | ||
707 | |||
708 | lt_t est_len = | ||
709 | aff->q_info[i].estimated_len + | ||
710 | get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))); | ||
711 | |||
712 | // queue is smaller, or they're equal and the other has a smaller number | ||
713 | // of total users. | ||
714 | // | ||
715 | // tie-break on the shortest number of simult users. this only kicks in | ||
716 | // when there are more than 1 empty queues. | ||
717 | if((est_len < min_len) || | ||
718 | ((est_len == min_len) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) { | ||
719 | shortest = &aff->q_info[i]; | ||
720 | min_len = est_len; | ||
721 | min_nr_users = *(aff->q_info[i].nr_cur_users); | ||
722 | } | ||
723 | |||
724 | TRACE_CUR("cs is %llu on queue %d: est len = %llu\n", | ||
725 | get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))), | ||
726 | kfmlp_get_idx(sem, aff->q_info[i].q), | ||
727 | est_len); | ||
728 | } | ||
729 | } | ||
730 | |||
731 | to_enqueue = shortest->q; | ||
732 | TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n", | ||
733 | kfmlp_get_idx(sem, to_enqueue), | ||
734 | kfmlp_get_idx(sem, sem->shortest_queue)); | ||
735 | |||
736 | return to_enqueue; | ||
737 | } | ||
738 | |||
739 | struct task_struct* gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from) | ||
740 | { | ||
741 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
742 | |||
743 | // For now, just steal highest priority waiter | ||
744 | // TODO: Implement affinity-aware stealing. | ||
745 | |||
746 | return kfmlp_select_hp_steal(sem, to_steal, to_steal_from); | ||
747 | } | ||
748 | |||
749 | |||
750 | void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) | ||
751 | { | ||
752 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
753 | int replica = kfmlp_get_idx(sem, fq); | ||
754 | int gpu = replica_to_gpu(aff, replica); | ||
755 | struct kfmlp_queue_info *info = &aff->q_info[replica]; | ||
756 | lt_t est_time; | ||
757 | lt_t est_len_before; | ||
758 | |||
759 | if(current == t) { | ||
760 | tsk_rt(t)->suspend_gpu_tracker_on_block = 1; | ||
761 | } | ||
762 | |||
763 | est_len_before = info->estimated_len; | ||
764 | est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); | ||
765 | info->estimated_len += est_time; | ||
766 | |||
767 | TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n", | ||
768 | kfmlp_get_idx(sem, info->q), | ||
769 | est_len_before, est_time, | ||
770 | info->estimated_len); | ||
771 | |||
772 | // if(aff->shortest_queue == info) { | ||
773 | // // we may no longer be the shortest | ||
774 | // aff->shortest_queue = kfmlp_aff_find_shortest(aff); | ||
775 | // | ||
776 | // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n", | ||
777 | // kfmlp_get_idx(sem, aff->shortest_queue->q), | ||
778 | // aff->shortest_queue->q->count, | ||
779 | // aff->shortest_queue->estimated_len); | ||
780 | // } | ||
781 | } | ||
782 | |||
783 | void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) | ||
784 | { | ||
785 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
786 | int replica = kfmlp_get_idx(sem, fq); | ||
787 | int gpu = replica_to_gpu(aff, replica); | ||
788 | struct kfmlp_queue_info *info = &aff->q_info[replica]; | ||
789 | lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); | ||
790 | |||
791 | if(est_time > info->estimated_len) { | ||
792 | WARN_ON(1); | ||
793 | info->estimated_len = 0; | ||
794 | } | ||
795 | else { | ||
796 | info->estimated_len -= est_time; | ||
797 | } | ||
798 | |||
799 | TRACE_CUR("fq %d est len is now %llu\n", | ||
800 | kfmlp_get_idx(sem, info->q), | ||
801 | info->estimated_len); | ||
802 | |||
803 | // check to see if we're the shortest queue now. | ||
804 | // if((aff->shortest_queue != info) && | ||
805 | // (aff->shortest_queue->estimated_len > info->estimated_len)) { | ||
806 | // | ||
807 | // aff->shortest_queue = info; | ||
808 | // | ||
809 | // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n", | ||
810 | // kfmlp_get_idx(sem, info->q), | ||
811 | // info->q->count, | ||
812 | // info->estimated_len); | ||
813 | // } | ||
814 | } | ||
815 | |||
816 | void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) | ||
817 | { | ||
818 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
819 | int replica = kfmlp_get_idx(sem, fq); | ||
820 | int gpu = replica_to_gpu(aff, replica); | ||
821 | |||
822 | tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration | ||
823 | |||
824 | TRACE_CUR("%s/%d acquired gpu %d. migration type = %d\n", | ||
825 | t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration); | ||
826 | |||
827 | // count the number or resource holders | ||
828 | ++(*(aff->q_info[replica].nr_cur_users)); | ||
829 | |||
830 | reg_nv_device(gpu, 1, t); // register | ||
831 | |||
832 | |||
833 | tsk_rt(t)->suspend_gpu_tracker_on_block = 0; | ||
834 | reset_gpu_tracker(t); | ||
835 | start_gpu_tracker(t); | ||
836 | } | ||
837 | |||
838 | void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) | ||
839 | { | ||
840 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
841 | int replica = kfmlp_get_idx(sem, fq); | ||
842 | int gpu = replica_to_gpu(aff, replica); | ||
843 | lt_t est_time; | ||
844 | |||
845 | stop_gpu_tracker(t); // stop the tracker before we do anything else. | ||
846 | |||
847 | est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu)); | ||
848 | |||
849 | tsk_rt(t)->last_gpu = gpu; | ||
850 | |||
851 | // count the number or resource holders | ||
852 | --(*(aff->q_info[replica].nr_cur_users)); | ||
853 | |||
854 | reg_nv_device(gpu, 0, t); // unregister | ||
855 | |||
856 | // update estimates | ||
857 | update_gpu_estimate(t, get_gpu_time(t)); | ||
858 | |||
859 | TRACE_CUR("%s/%d freed gpu %d. actual time was %llu. estimated was %llu. diff is %d\n", | ||
860 | t->comm, t->pid, gpu, | ||
861 | get_gpu_time(t), | ||
862 | est_time, | ||
863 | (long long)get_gpu_time(t) - (long long)est_time); | ||
864 | } | ||
865 | |||
866 | struct kfmlp_affinity_ops gpu_kfmlp_affinity = | ||
867 | { | ||
868 | .advise_enqueue = gpu_kfmlp_advise_enqueue, | ||
869 | .advise_steal = gpu_kfmlp_advise_steal, | ||
870 | .notify_enqueue = gpu_kfmlp_notify_enqueue, | ||
871 | .notify_dequeue = gpu_kfmlp_notify_dequeue, | ||
872 | .notify_acquired = gpu_kfmlp_notify_acquired, | ||
873 | .notify_freed = gpu_kfmlp_notify_freed, | ||
874 | .replica_to_resource = gpu_replica_to_resource, | ||
875 | }; | ||
876 | |||
877 | struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* ops, | ||
878 | void* __user args) | ||
879 | { | ||
880 | return kfmlp_aff_obs_new(ops, &gpu_kfmlp_affinity, args); | ||
881 | } | ||
882 | |||
883 | |||
884 | |||
885 | |||
886 | |||
887 | |||
888 | |||
889 | |||
890 | // Simple KFMLP Affinity (standard KFMLP with auto-gpu registration) | ||
891 | |||
892 | struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t) | ||
893 | { | ||
894 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
895 | int min_count; | ||
896 | int min_nr_users; | ||
897 | struct kfmlp_queue_info *shortest; | ||
898 | struct kfmlp_queue *to_enqueue; | ||
899 | int i; | ||
900 | |||
901 | // TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n"); | ||
902 | |||
903 | shortest = &aff->q_info[0]; | ||
904 | min_count = shortest->q->count; | ||
905 | min_nr_users = *(shortest->nr_cur_users); | ||
906 | |||
907 | TRACE_CUR("queue %d: waiters = %d, total holders = %d\n", | ||
908 | kfmlp_get_idx(sem, shortest->q), | ||
909 | shortest->q->count, | ||
910 | min_nr_users); | ||
911 | |||
912 | for(i = 1; i < sem->num_resources; ++i) { | ||
913 | int len = aff->q_info[i].q->count; | ||
914 | |||
915 | // queue is smaller, or they're equal and the other has a smaller number | ||
916 | // of total users. | ||
917 | // | ||
918 | // tie-break on the shortest number of simult users. this only kicks in | ||
919 | // when there are more than 1 empty queues. | ||
920 | if((len < min_count) || | ||
921 | ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) { | ||
922 | shortest = &aff->q_info[i]; | ||
923 | min_count = shortest->q->count; | ||
924 | min_nr_users = *(aff->q_info[i].nr_cur_users); | ||
925 | } | ||
926 | |||
927 | TRACE_CUR("queue %d: waiters = %d, total holders = %d\n", | ||
928 | kfmlp_get_idx(sem, aff->q_info[i].q), | ||
929 | aff->q_info[i].q->count, | ||
930 | *(aff->q_info[i].nr_cur_users)); | ||
931 | } | ||
932 | |||
933 | to_enqueue = shortest->q; | ||
934 | TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n", | ||
935 | kfmlp_get_idx(sem, to_enqueue), | ||
936 | kfmlp_get_idx(sem, sem->shortest_queue)); | ||
937 | |||
938 | return to_enqueue; | ||
939 | } | ||
940 | |||
941 | struct task_struct* simple_gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from) | ||
942 | { | ||
943 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
944 | // TRACE_CUR("Simple GPU KFMLP advise_steal invoked\n"); | ||
945 | return kfmlp_select_hp_steal(sem, to_steal, to_steal_from); | ||
946 | } | ||
947 | |||
948 | void simple_gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) | ||
949 | { | ||
950 | // TRACE_CUR("Simple GPU KFMLP notify_enqueue invoked\n"); | ||
951 | } | ||
952 | |||
953 | void simple_gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) | ||
954 | { | ||
955 | // TRACE_CUR("Simple GPU KFMLP notify_dequeue invoked\n"); | ||
956 | } | ||
957 | |||
958 | void simple_gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) | ||
959 | { | ||
960 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
961 | int replica = kfmlp_get_idx(sem, fq); | ||
962 | int gpu = replica_to_gpu(aff, replica); | ||
963 | |||
964 | // TRACE_CUR("Simple GPU KFMLP notify_acquired invoked\n"); | ||
965 | |||
966 | // count the number or resource holders | ||
967 | ++(*(aff->q_info[replica].nr_cur_users)); | ||
968 | |||
969 | reg_nv_device(gpu, 1, t); // register | ||
970 | } | ||
971 | |||
972 | void simple_gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t) | ||
973 | { | ||
974 | struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock); | ||
975 | int replica = kfmlp_get_idx(sem, fq); | ||
976 | int gpu = replica_to_gpu(aff, replica); | ||
977 | |||
978 | // TRACE_CUR("Simple GPU KFMLP notify_freed invoked\n"); | ||
979 | // count the number or resource holders | ||
980 | --(*(aff->q_info[replica].nr_cur_users)); | ||
981 | |||
982 | reg_nv_device(gpu, 0, t); // unregister | ||
983 | } | ||
984 | |||
985 | struct kfmlp_affinity_ops simple_gpu_kfmlp_affinity = | ||
986 | { | ||
987 | .advise_enqueue = simple_gpu_kfmlp_advise_enqueue, | ||
988 | .advise_steal = simple_gpu_kfmlp_advise_steal, | ||
989 | .notify_enqueue = simple_gpu_kfmlp_notify_enqueue, | ||
990 | .notify_dequeue = simple_gpu_kfmlp_notify_dequeue, | ||
991 | .notify_acquired = simple_gpu_kfmlp_notify_acquired, | ||
992 | .notify_freed = simple_gpu_kfmlp_notify_freed, | ||
993 | .replica_to_resource = gpu_replica_to_resource, | ||
994 | }; | ||
995 | |||
996 | struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops, | ||
997 | void* __user args) | ||
998 | { | ||
999 | return kfmlp_aff_obs_new(ops, &simple_gpu_kfmlp_affinity, args); | ||
1000 | } | ||
1001 | |||
1002 | #endif | ||
1003 | |||
diff --git a/litmus/litmus.c b/litmus/litmus.c index dc94be71bfb6..2911e7ec7029 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c | |||
@@ -23,6 +23,14 @@ | |||
23 | #include <litmus/affinity.h> | 23 | #include <litmus/affinity.h> |
24 | #endif | 24 | #endif |
25 | 25 | ||
26 | #ifdef CONFIG_LITMUS_NVIDIA | ||
27 | #include <litmus/nvidia_info.h> | ||
28 | #endif | ||
29 | |||
30 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
31 | #include <litmus/aux_tasks.h> | ||
32 | #endif | ||
33 | |||
26 | /* Number of RT tasks that exist in the system */ | 34 | /* Number of RT tasks that exist in the system */ |
27 | atomic_t rt_task_count = ATOMIC_INIT(0); | 35 | atomic_t rt_task_count = ATOMIC_INIT(0); |
28 | 36 | ||
@@ -135,6 +143,16 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param) | |||
135 | pid, tp.budget_policy); | 143 | pid, tp.budget_policy); |
136 | goto out_unlock; | 144 | goto out_unlock; |
137 | } | 145 | } |
146 | if (tp.budget_signal_policy != NO_SIGNALS && | ||
147 | tp.budget_signal_policy != QUANTUM_SIGNALS && | ||
148 | tp.budget_signal_policy != PRECISE_SIGNALS) | ||
149 | { | ||
150 | printk(KERN_INFO "litmus: real-time task %d rejected " | ||
151 | "because unsupported budget signalling policy " | ||
152 | "specified (%d)\n", | ||
153 | pid, tp.budget_signal_policy); | ||
154 | goto out_unlock; | ||
155 | } | ||
138 | 156 | ||
139 | target->rt_param.task_params = tp; | 157 | target->rt_param.task_params = tp; |
140 | 158 | ||
@@ -272,6 +290,7 @@ asmlinkage long sys_query_job_no(unsigned int __user *job) | |||
272 | return retval; | 290 | return retval; |
273 | } | 291 | } |
274 | 292 | ||
293 | |||
275 | /* sys_null_call() is only used for determining raw system call | 294 | /* sys_null_call() is only used for determining raw system call |
276 | * overheads (kernel entry, kernel exit). It has no useful side effects. | 295 | * overheads (kernel entry, kernel exit). It has no useful side effects. |
277 | * If ts is non-NULL, then the current Feather-Trace time is recorded. | 296 | * If ts is non-NULL, then the current Feather-Trace time is recorded. |
@@ -289,12 +308,117 @@ asmlinkage long sys_null_call(cycles_t __user *ts) | |||
289 | return ret; | 308 | return ret; |
290 | } | 309 | } |
291 | 310 | ||
311 | |||
312 | asmlinkage long sys_sched_trace_event(int event, struct st_inject_args __user *__args) | ||
313 | { | ||
314 | long retval = 0; | ||
315 | struct task_struct* t = current; | ||
316 | |||
317 | struct st_inject_args args; | ||
318 | |||
319 | if (is_realtime(t)) { | ||
320 | printk(KERN_WARNING "Only non-real-time tasks may inject sched_trace events.\n"); | ||
321 | retval = -EINVAL; | ||
322 | goto out; | ||
323 | } | ||
324 | |||
325 | if (__args && copy_from_user(&args, __args, sizeof(args))) { | ||
326 | retval = -EFAULT; | ||
327 | goto out; | ||
328 | } | ||
329 | |||
330 | switch(event) { | ||
331 | /*************************************/ | ||
332 | /* events that don't need parameters */ | ||
333 | /*************************************/ | ||
334 | case ST_INJECT_NAME: | ||
335 | sched_trace_task_name(t); | ||
336 | break; | ||
337 | case ST_INJECT_PARAM: | ||
338 | /* presumes sporadic_task_ns() has already been called | ||
339 | * and valid data has been initialized even if the calling | ||
340 | * task is SCHED_NORMAL. */ | ||
341 | sched_trace_task_param(t); | ||
342 | break; | ||
343 | |||
344 | /*******************************/ | ||
345 | /* events that need parameters */ | ||
346 | /*******************************/ | ||
347 | case ST_INJECT_COMPLETION: | ||
348 | if (!__args) { | ||
349 | retval = -EINVAL; | ||
350 | goto out; | ||
351 | } | ||
352 | |||
353 | /* slam in the data */ | ||
354 | t->rt_param.job_params.job_no = args.job_no; | ||
355 | |||
356 | sched_trace_task_completion(t, 0); | ||
357 | break; | ||
358 | case ST_INJECT_RELEASE: | ||
359 | if (!__args) { | ||
360 | retval = -EINVAL; | ||
361 | goto out; | ||
362 | } | ||
363 | |||
364 | /* slam in the data */ | ||
365 | tsk_rt(t)->job_params.release = args.release; | ||
366 | tsk_rt(t)->job_params.deadline = args.deadline; | ||
367 | |||
368 | sched_trace_task_release(t); | ||
369 | break; | ||
370 | |||
371 | /**********************/ | ||
372 | /* unsupported events */ | ||
373 | /**********************/ | ||
374 | default: | ||
375 | retval = -EINVAL; | ||
376 | break; | ||
377 | } | ||
378 | |||
379 | out: | ||
380 | return retval; | ||
381 | } | ||
382 | |||
383 | |||
384 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) | ||
385 | void init_gpu_affinity_state(struct task_struct* p) | ||
386 | { | ||
387 | // under-damped | ||
388 | //p->rt_param.gpu_fb_param_a = _frac(14008, 10000); | ||
389 | //p->rt_param.gpu_fb_param_b = _frac(16024, 10000); | ||
390 | |||
391 | #if 0 | ||
392 | // emperical; | ||
393 | p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000); | ||
394 | p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000); | ||
395 | |||
396 | p->rt_param.gpu_fb_param_a[1] = _frac(8600, 10000); | ||
397 | p->rt_param.gpu_fb_param_b[1] = _frac(40000, 10000); | ||
398 | |||
399 | p->rt_param.gpu_fb_param_a[2] = _frac(6890, 10000); | ||
400 | p->rt_param.gpu_fb_param_b[2] = _frac(40000, 10000); | ||
401 | |||
402 | p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000); | ||
403 | p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000); | ||
404 | #endif | ||
405 | p->rt_param.gpu_migration = MIG_NONE; | ||
406 | p->rt_param.last_gpu = -1; | ||
407 | } | ||
408 | #endif | ||
409 | |||
292 | /* p is a real-time task. Re-init its state as a best-effort task. */ | 410 | /* p is a real-time task. Re-init its state as a best-effort task. */ |
293 | static void reinit_litmus_state(struct task_struct* p, int restore) | 411 | static void reinit_litmus_state(struct task_struct* p, int restore) |
294 | { | 412 | { |
295 | struct rt_task user_config = {}; | 413 | struct rt_task user_config = {}; |
296 | void* ctrl_page = NULL; | 414 | void* ctrl_page = NULL; |
297 | 415 | ||
416 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
417 | binheap_order_t prio_order = NULL; | ||
418 | #endif | ||
419 | |||
420 | TRACE_TASK(p, "reinit_litmus_state: restore = %d\n", restore); | ||
421 | |||
298 | if (restore) { | 422 | if (restore) { |
299 | /* Safe user-space provided configuration data. | 423 | /* Safe user-space provided configuration data. |
300 | * and allocated page. */ | 424 | * and allocated page. */ |
@@ -302,48 +426,57 @@ static void reinit_litmus_state(struct task_struct* p, int restore) | |||
302 | ctrl_page = p->rt_param.ctrl_page; | 426 | ctrl_page = p->rt_param.ctrl_page; |
303 | } | 427 | } |
304 | 428 | ||
429 | #ifdef CONFIG_LITMUS_NVIDIA | ||
430 | WARN_ON(p->rt_param.held_gpus != 0); | ||
431 | #endif | ||
432 | |||
433 | #ifdef CONFIG_LITMUS_LOCKING | ||
305 | /* We probably should not be inheriting any task's priority | 434 | /* We probably should not be inheriting any task's priority |
306 | * at this point in time. | 435 | * at this point in time. |
307 | */ | 436 | */ |
308 | WARN_ON(p->rt_param.inh_task); | 437 | WARN_ON(p->rt_param.inh_task); |
438 | #endif | ||
439 | |||
440 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
441 | prio_order = p->rt_param.hp_blocked_tasks.compare; | ||
442 | #endif | ||
309 | 443 | ||
310 | /* Cleanup everything else. */ | 444 | /* Cleanup everything else. */ |
311 | memset(&p->rt_param, 0, sizeof(p->rt_param)); | 445 | memset(&p->rt_param, 0, sizeof(p->rt_param)); |
312 | 446 | ||
447 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
448 | /* also clear out the aux_data. the !restore case is only called on | ||
449 | * fork (initial thread creation). */ | ||
450 | if (!restore) { | ||
451 | memset(&p->aux_data, 0, sizeof(p->aux_data)); | ||
452 | } | ||
453 | #endif | ||
454 | |||
313 | /* Restore preserved fields. */ | 455 | /* Restore preserved fields. */ |
314 | if (restore) { | 456 | if (restore) { |
315 | p->rt_param.task_params = user_config; | 457 | p->rt_param.task_params = user_config; |
316 | p->rt_param.ctrl_page = ctrl_page; | 458 | p->rt_param.ctrl_page = ctrl_page; |
317 | } | 459 | } |
318 | } | ||
319 | 460 | ||
320 | long litmus_admit_task(struct task_struct* tsk) | 461 | #ifdef CONFIG_LITMUS_NVIDIA |
321 | { | 462 | INIT_BINHEAP_NODE(&p->rt_param.gpu_owner_node); |
322 | long retval = 0; | 463 | #endif |
323 | 464 | ||
324 | BUG_ON(is_realtime(tsk)); | 465 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) |
466 | init_gpu_affinity_state(p); | ||
467 | #endif | ||
325 | 468 | ||
326 | tsk_rt(tsk)->heap_node = NULL; | 469 | #ifdef CONFIG_LITMUS_NESTED_LOCKING |
327 | tsk_rt(tsk)->rel_heap = NULL; | 470 | INIT_BINHEAP_HANDLE(&p->rt_param.hp_blocked_tasks, prio_order); |
471 | raw_spin_lock_init(&p->rt_param.hp_blocked_tasks_lock); | ||
472 | #endif | ||
473 | } | ||
328 | 474 | ||
329 | if (get_rt_relative_deadline(tsk) == 0 || | ||
330 | get_exec_cost(tsk) > | ||
331 | min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) { | ||
332 | TRACE_TASK(tsk, | ||
333 | "litmus admit: invalid task parameters " | ||
334 | "(e = %lu, p = %lu, d = %lu)\n", | ||
335 | get_exec_cost(tsk), get_rt_period(tsk), | ||
336 | get_rt_relative_deadline(tsk)); | ||
337 | retval = -EINVAL; | ||
338 | goto out; | ||
339 | } | ||
340 | 475 | ||
341 | if (!cpu_online(get_partition(tsk))) { | 476 | |
342 | TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n", | 477 | long __litmus_admit_task(struct task_struct* tsk) |
343 | get_partition(tsk)); | 478 | { |
344 | retval = -EINVAL; | 479 | long retval = 0; |
345 | goto out; | ||
346 | } | ||
347 | 480 | ||
348 | INIT_LIST_HEAD(&tsk_rt(tsk)->list); | 481 | INIT_LIST_HEAD(&tsk_rt(tsk)->list); |
349 | 482 | ||
@@ -360,6 +493,17 @@ long litmus_admit_task(struct task_struct* tsk) | |||
360 | bheap_node_init(&tsk_rt(tsk)->heap_node, tsk); | 493 | bheap_node_init(&tsk_rt(tsk)->heap_node, tsk); |
361 | } | 494 | } |
362 | 495 | ||
496 | #ifdef CONFIG_LITMUS_NVIDIA | ||
497 | atomic_set(&tsk_rt(tsk)->nv_int_count, 0); | ||
498 | #endif | ||
499 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) | ||
500 | init_gpu_affinity_state(tsk); | ||
501 | #endif | ||
502 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
503 | tsk_rt(tsk)->blocked_lock = NULL; | ||
504 | raw_spin_lock_init(&tsk_rt(tsk)->hp_blocked_tasks_lock); | ||
505 | #endif | ||
506 | |||
363 | preempt_disable(); | 507 | preempt_disable(); |
364 | 508 | ||
365 | retval = litmus->admit_task(tsk); | 509 | retval = litmus->admit_task(tsk); |
@@ -372,14 +516,56 @@ long litmus_admit_task(struct task_struct* tsk) | |||
372 | 516 | ||
373 | preempt_enable(); | 517 | preempt_enable(); |
374 | 518 | ||
375 | out: | ||
376 | if (retval) { | 519 | if (retval) { |
377 | bheap_node_free(tsk_rt(tsk)->heap_node); | 520 | bheap_node_free(tsk_rt(tsk)->heap_node); |
378 | release_heap_free(tsk_rt(tsk)->rel_heap); | 521 | release_heap_free(tsk_rt(tsk)->rel_heap); |
379 | } | 522 | } |
523 | |||
524 | out: | ||
525 | return retval; | ||
526 | } | ||
527 | |||
528 | long litmus_admit_task(struct task_struct* tsk) | ||
529 | { | ||
530 | long retval = 0; | ||
531 | |||
532 | BUG_ON(is_realtime(tsk)); | ||
533 | |||
534 | if (get_rt_relative_deadline(tsk) == 0 || | ||
535 | get_exec_cost(tsk) > | ||
536 | min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) { | ||
537 | TRACE_TASK(tsk, | ||
538 | "litmus admit: invalid task parameters " | ||
539 | "(e = %lu, p = %lu, d = %lu)\n", | ||
540 | get_exec_cost(tsk), get_rt_period(tsk), | ||
541 | get_rt_relative_deadline(tsk)); | ||
542 | retval = -EINVAL; | ||
543 | goto out; | ||
544 | } | ||
545 | |||
546 | if (!cpu_online(get_partition(tsk))) { | ||
547 | TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n", | ||
548 | get_partition(tsk)); | ||
549 | retval = -EINVAL; | ||
550 | goto out; | ||
551 | } | ||
552 | |||
553 | retval = __litmus_admit_task(tsk); | ||
554 | |||
555 | out: | ||
380 | return retval; | 556 | return retval; |
381 | } | 557 | } |
382 | 558 | ||
559 | void litmus_pre_exit_task(struct task_struct* tsk) | ||
560 | { | ||
561 | if (is_realtime(tsk)) { | ||
562 | if (tsk_rt(tsk)->rsrc_exit_cb) { | ||
563 | int ret = tsk_rt(tsk)->rsrc_exit_cb(tsk); | ||
564 | WARN_ON(ret != 0); | ||
565 | } | ||
566 | } | ||
567 | } | ||
568 | |||
383 | void litmus_exit_task(struct task_struct* tsk) | 569 | void litmus_exit_task(struct task_struct* tsk) |
384 | { | 570 | { |
385 | if (is_realtime(tsk)) { | 571 | if (is_realtime(tsk)) { |
@@ -388,7 +574,7 @@ void litmus_exit_task(struct task_struct* tsk) | |||
388 | litmus->task_exit(tsk); | 574 | litmus->task_exit(tsk); |
389 | 575 | ||
390 | BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node)); | 576 | BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node)); |
391 | bheap_node_free(tsk_rt(tsk)->heap_node); | 577 | bheap_node_free(tsk_rt(tsk)->heap_node); |
392 | release_heap_free(tsk_rt(tsk)->rel_heap); | 578 | release_heap_free(tsk_rt(tsk)->rel_heap); |
393 | 579 | ||
394 | atomic_dec(&rt_task_count); | 580 | atomic_dec(&rt_task_count); |
@@ -406,14 +592,19 @@ static int do_plugin_switch(void *_plugin) | |||
406 | ret = litmus->deactivate_plugin(); | 592 | ret = litmus->deactivate_plugin(); |
407 | if (0 != ret) | 593 | if (0 != ret) |
408 | goto out; | 594 | goto out; |
409 | ret = plugin->activate_plugin(); | 595 | |
596 | litmus = plugin; /* optimistic switch */ | ||
597 | mb(); | ||
598 | |||
599 | ret = litmus->activate_plugin(); | ||
410 | if (0 != ret) { | 600 | if (0 != ret) { |
411 | printk(KERN_INFO "Can't activate %s (%d).\n", | 601 | printk(KERN_INFO "Can't activate %s (%d).\n", |
412 | plugin->plugin_name, ret); | 602 | litmus->plugin_name, ret); |
413 | plugin = &linux_sched_plugin; | 603 | litmus = &linux_sched_plugin; /* fail to Linux */ |
604 | ret = litmus->activate_plugin(); | ||
605 | BUG_ON(ret); | ||
414 | } | 606 | } |
415 | printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name); | 607 | printk(KERN_INFO "Switched to LITMUS^RT plugin %s.\n", litmus->plugin_name); |
416 | litmus = plugin; | ||
417 | } else | 608 | } else |
418 | ret = -EBUSY; | 609 | ret = -EBUSY; |
419 | out: | 610 | out: |
@@ -429,6 +620,12 @@ int switch_sched_plugin(struct sched_plugin* plugin) | |||
429 | { | 620 | { |
430 | BUG_ON(!plugin); | 621 | BUG_ON(!plugin); |
431 | 622 | ||
623 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
624 | if (!klmirqd_is_dead()) { | ||
625 | kill_klmirqd(); | ||
626 | } | ||
627 | #endif | ||
628 | |||
432 | if (atomic_read(&rt_task_count) == 0) | 629 | if (atomic_read(&rt_task_count) == 0) |
433 | return stop_machine(do_plugin_switch, plugin, NULL); | 630 | return stop_machine(do_plugin_switch, plugin, NULL); |
434 | else | 631 | else |
@@ -441,18 +638,33 @@ int switch_sched_plugin(struct sched_plugin* plugin) | |||
441 | void litmus_fork(struct task_struct* p) | 638 | void litmus_fork(struct task_struct* p) |
442 | { | 639 | { |
443 | if (is_realtime(p)) { | 640 | if (is_realtime(p)) { |
641 | TRACE_TASK(p, "fork, is real-time\n"); | ||
642 | |||
444 | /* clean out any litmus related state, don't preserve anything */ | 643 | /* clean out any litmus related state, don't preserve anything */ |
445 | reinit_litmus_state(p, 0); | 644 | reinit_litmus_state(p, 0); |
645 | |||
446 | /* Don't let the child be a real-time task. */ | 646 | /* Don't let the child be a real-time task. */ |
447 | p->sched_reset_on_fork = 1; | 647 | p->sched_reset_on_fork = 1; |
448 | } else | 648 | |
649 | } else { | ||
449 | /* non-rt tasks might have ctrl_page set */ | 650 | /* non-rt tasks might have ctrl_page set */ |
450 | tsk_rt(p)->ctrl_page = NULL; | 651 | tsk_rt(p)->ctrl_page = NULL; |
451 | 652 | ||
653 | reinit_litmus_state(p, 0); | ||
654 | } | ||
655 | |||
452 | /* od tables are never inherited across a fork */ | 656 | /* od tables are never inherited across a fork */ |
453 | p->od_table = NULL; | 657 | p->od_table = NULL; |
454 | } | 658 | } |
455 | 659 | ||
660 | /* Called right before copy_process() returns a forked thread. */ | ||
661 | void litmus_post_fork_thread(struct task_struct* p) | ||
662 | { | ||
663 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
664 | make_aux_task_if_required(p); | ||
665 | #endif | ||
666 | } | ||
667 | |||
456 | /* Called upon execve(). | 668 | /* Called upon execve(). |
457 | * current is doing the exec. | 669 | * current is doing the exec. |
458 | * Don't let address space specific stuff leak. | 670 | * Don't let address space specific stuff leak. |
@@ -486,8 +698,10 @@ void exit_litmus(struct task_struct *dead_tsk) | |||
486 | } | 698 | } |
487 | 699 | ||
488 | /* main cleanup only for RT tasks */ | 700 | /* main cleanup only for RT tasks */ |
489 | if (is_realtime(dead_tsk)) | 701 | if (is_realtime(dead_tsk)) { |
702 | litmus_pre_exit_task(dead_tsk); /* todo: double check that no Linux rq lock is held */ | ||
490 | litmus_exit_task(dead_tsk); | 703 | litmus_exit_task(dead_tsk); |
704 | } | ||
491 | } | 705 | } |
492 | 706 | ||
493 | 707 | ||
diff --git a/litmus/litmus_pai_softirq.c b/litmus/litmus_pai_softirq.c new file mode 100644 index 000000000000..300571a81bbd --- /dev/null +++ b/litmus/litmus_pai_softirq.c | |||
@@ -0,0 +1,64 @@ | |||
1 | #include <linux/interrupt.h> | ||
2 | #include <linux/percpu.h> | ||
3 | #include <linux/cpu.h> | ||
4 | #include <linux/kthread.h> | ||
5 | #include <linux/ftrace.h> | ||
6 | #include <linux/smp.h> | ||
7 | #include <linux/slab.h> | ||
8 | #include <linux/mutex.h> | ||
9 | |||
10 | #include <linux/sched.h> | ||
11 | #include <linux/cpuset.h> | ||
12 | |||
13 | #include <litmus/litmus.h> | ||
14 | #include <litmus/sched_trace.h> | ||
15 | #include <litmus/jobs.h> | ||
16 | #include <litmus/sched_plugin.h> | ||
17 | #include <litmus/litmus_softirq.h> | ||
18 | |||
19 | |||
20 | |||
21 | int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id) | ||
22 | { | ||
23 | int ret = 0; /* assume failure */ | ||
24 | if(unlikely((t->owner == NULL) || !is_realtime(t->owner))) | ||
25 | { | ||
26 | TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); | ||
27 | BUG(); | ||
28 | } | ||
29 | |||
30 | ret = litmus->enqueue_pai_tasklet(t); | ||
31 | |||
32 | return(ret); | ||
33 | } | ||
34 | |||
35 | EXPORT_SYMBOL(__litmus_tasklet_schedule); | ||
36 | |||
37 | |||
38 | |||
39 | // failure causes default Linux handling. | ||
40 | int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id) | ||
41 | { | ||
42 | int ret = 0; /* assume failure */ | ||
43 | return(ret); | ||
44 | } | ||
45 | EXPORT_SYMBOL(__litmus_tasklet_hi_schedule); | ||
46 | |||
47 | |||
48 | // failure causes default Linux handling. | ||
49 | int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id) | ||
50 | { | ||
51 | int ret = 0; /* assume failure */ | ||
52 | return(ret); | ||
53 | } | ||
54 | EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first); | ||
55 | |||
56 | |||
57 | // failure causes default Linux handling. | ||
58 | int __litmus_schedule_work(struct work_struct *w, unsigned int k_id) | ||
59 | { | ||
60 | int ret = 0; /* assume failure */ | ||
61 | return(ret); | ||
62 | } | ||
63 | EXPORT_SYMBOL(__litmus_schedule_work); | ||
64 | |||
diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c index 4bf725a36c9c..136fecfb0b8b 100644 --- a/litmus/litmus_proc.c +++ b/litmus/litmus_proc.c | |||
@@ -20,11 +20,18 @@ static struct proc_dir_entry *litmus_dir = NULL, | |||
20 | #ifdef CONFIG_RELEASE_MASTER | 20 | #ifdef CONFIG_RELEASE_MASTER |
21 | *release_master_file = NULL, | 21 | *release_master_file = NULL, |
22 | #endif | 22 | #endif |
23 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
24 | *klmirqd_file = NULL, | ||
25 | #endif | ||
23 | *plugs_file = NULL; | 26 | *plugs_file = NULL; |
24 | 27 | ||
25 | /* in litmus/sync.c */ | 28 | /* in litmus/sync.c */ |
26 | int count_tasks_waiting_for_release(void); | 29 | int count_tasks_waiting_for_release(void); |
27 | 30 | ||
31 | extern int proc_read_klmirqd_stats(char *page, char **start, | ||
32 | off_t off, int count, | ||
33 | int *eof, void *data); | ||
34 | |||
28 | static int proc_read_stats(char *page, char **start, | 35 | static int proc_read_stats(char *page, char **start, |
29 | off_t off, int count, | 36 | off_t off, int count, |
30 | int *eof, void *data) | 37 | int *eof, void *data) |
@@ -161,6 +168,12 @@ int __init init_litmus_proc(void) | |||
161 | release_master_file->write_proc = proc_write_release_master; | 168 | release_master_file->write_proc = proc_write_release_master; |
162 | #endif | 169 | #endif |
163 | 170 | ||
171 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
172 | klmirqd_file = | ||
173 | create_proc_read_entry("klmirqd_stats", 0444, litmus_dir, | ||
174 | proc_read_klmirqd_stats, NULL); | ||
175 | #endif | ||
176 | |||
164 | stat_file = create_proc_read_entry("stats", 0444, litmus_dir, | 177 | stat_file = create_proc_read_entry("stats", 0444, litmus_dir, |
165 | proc_read_stats, NULL); | 178 | proc_read_stats, NULL); |
166 | 179 | ||
@@ -187,6 +200,10 @@ void exit_litmus_proc(void) | |||
187 | remove_proc_entry("stats", litmus_dir); | 200 | remove_proc_entry("stats", litmus_dir); |
188 | if (curr_file) | 201 | if (curr_file) |
189 | remove_proc_entry("active_plugin", litmus_dir); | 202 | remove_proc_entry("active_plugin", litmus_dir); |
203 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
204 | if (klmirqd_file) | ||
205 | remove_proc_entry("klmirqd_stats", litmus_dir); | ||
206 | #endif | ||
190 | #ifdef CONFIG_RELEASE_MASTER | 207 | #ifdef CONFIG_RELEASE_MASTER |
191 | if (release_master_file) | 208 | if (release_master_file) |
192 | remove_proc_entry("release_master", litmus_dir); | 209 | remove_proc_entry("release_master", litmus_dir); |
diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c new file mode 100644 index 000000000000..464a78d780ad --- /dev/null +++ b/litmus/litmus_softirq.c | |||
@@ -0,0 +1,1205 @@ | |||
1 | #include <linux/interrupt.h> | ||
2 | #include <linux/percpu.h> | ||
3 | #include <linux/cpu.h> | ||
4 | #include <linux/kthread.h> | ||
5 | #include <linux/ftrace.h> | ||
6 | #include <linux/smp.h> | ||
7 | #include <linux/slab.h> | ||
8 | #include <linux/mutex.h> | ||
9 | |||
10 | #include <linux/sched.h> | ||
11 | #include <linux/cpuset.h> | ||
12 | |||
13 | #include <litmus/litmus.h> | ||
14 | #include <litmus/sched_trace.h> | ||
15 | #include <litmus/jobs.h> | ||
16 | #include <litmus/sched_plugin.h> | ||
17 | #include <litmus/litmus_softirq.h> | ||
18 | |||
19 | /* TODO: Remove unneeded mb() and other barriers. */ | ||
20 | |||
21 | enum pending_flags | ||
22 | { | ||
23 | LIT_TASKLET_LOW = 0x1, | ||
24 | LIT_TASKLET_HI = LIT_TASKLET_LOW<<1, | ||
25 | LIT_WORK = LIT_TASKLET_HI<<1 | ||
26 | }; | ||
27 | |||
28 | struct klmirqd_registration | ||
29 | { | ||
30 | raw_spinlock_t lock; | ||
31 | u32 nr_threads; | ||
32 | unsigned int initialized:1; | ||
33 | unsigned int shuttingdown:1; | ||
34 | struct list_head threads; | ||
35 | }; | ||
36 | |||
37 | static atomic_t klmirqd_id_gen = ATOMIC_INIT(-1); | ||
38 | |||
39 | static struct klmirqd_registration klmirqd_state; | ||
40 | |||
41 | |||
42 | |||
43 | void init_klmirqd(void) | ||
44 | { | ||
45 | raw_spin_lock_init(&klmirqd_state.lock); | ||
46 | |||
47 | klmirqd_state.nr_threads = 0; | ||
48 | klmirqd_state.initialized = 1; | ||
49 | klmirqd_state.shuttingdown = 0; | ||
50 | INIT_LIST_HEAD(&klmirqd_state.threads); | ||
51 | } | ||
52 | |||
53 | static int __klmirqd_is_ready(void) | ||
54 | { | ||
55 | return (klmirqd_state.initialized == 1 && klmirqd_state.shuttingdown == 0); | ||
56 | } | ||
57 | |||
58 | int klmirqd_is_ready(void) | ||
59 | { | ||
60 | unsigned long flags; | ||
61 | int ret; | ||
62 | |||
63 | raw_spin_lock_irqsave(&klmirqd_state.lock, flags); | ||
64 | ret = __klmirqd_is_ready(); | ||
65 | raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags); | ||
66 | |||
67 | return ret; | ||
68 | } | ||
69 | |||
70 | int klmirqd_is_dead(void) | ||
71 | { | ||
72 | return(!klmirqd_is_ready()); | ||
73 | } | ||
74 | |||
75 | |||
76 | void kill_klmirqd(void) | ||
77 | { | ||
78 | if(!klmirqd_is_dead()) | ||
79 | { | ||
80 | unsigned long flags; | ||
81 | struct list_head *pos; | ||
82 | struct list_head *q; | ||
83 | |||
84 | raw_spin_lock_irqsave(&klmirqd_state.lock, flags); | ||
85 | |||
86 | TRACE("%s: Killing all klmirqd threads! (%d of them)\n", __FUNCTION__, klmirqd_state.nr_threads); | ||
87 | |||
88 | klmirqd_state.shuttingdown = 1; | ||
89 | |||
90 | list_for_each_safe(pos, q, &klmirqd_state.threads) { | ||
91 | struct klmirqd_info* info = list_entry(pos, struct klmirqd_info, klmirqd_reg); | ||
92 | |||
93 | if(info->terminating != 1) | ||
94 | { | ||
95 | info->terminating = 1; | ||
96 | mb(); /* just to be sure? */ | ||
97 | flush_pending(info->klmirqd); | ||
98 | |||
99 | /* signal termination */ | ||
100 | raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags); | ||
101 | kthread_stop(info->klmirqd); | ||
102 | raw_spin_lock_irqsave(&klmirqd_state.lock, flags); | ||
103 | } | ||
104 | } | ||
105 | |||
106 | raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags); | ||
107 | } | ||
108 | } | ||
109 | |||
110 | |||
111 | |||
112 | void kill_klmirqd_thread(struct task_struct* klmirqd_thread) | ||
113 | { | ||
114 | unsigned long flags; | ||
115 | struct klmirqd_info* info; | ||
116 | |||
117 | if (!tsk_rt(klmirqd_thread)->is_interrupt_thread) { | ||
118 | TRACE("%s/%d is not a klmirqd thread\n", klmirqd_thread->comm, klmirqd_thread->pid); | ||
119 | return; | ||
120 | } | ||
121 | |||
122 | TRACE("%s: Killing klmirqd thread %s/%d\n", __FUNCTION__, klmirqd_thread->comm, klmirqd_thread->pid); | ||
123 | |||
124 | raw_spin_lock_irqsave(&klmirqd_state.lock, flags); | ||
125 | |||
126 | info = tsk_rt(klmirqd_thread)->klmirqd_info; | ||
127 | |||
128 | if(info->terminating != 1) { | ||
129 | info->terminating = 1; | ||
130 | mb(); | ||
131 | |||
132 | flush_pending(klmirqd_thread); | ||
133 | kthread_stop(klmirqd_thread); | ||
134 | } | ||
135 | |||
136 | raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags); | ||
137 | } | ||
138 | |||
139 | struct klmirqd_launch_data | ||
140 | { | ||
141 | int cpu_affinity; | ||
142 | klmirqd_callback_t* cb; | ||
143 | char name[MAX_KLMIRQD_NAME_LEN+1]; | ||
144 | struct work_struct work; | ||
145 | }; | ||
146 | |||
147 | static int run_klmirqd(void* callback); | ||
148 | |||
149 | |||
150 | /* executed by a kworker from workqueues */ | ||
151 | static void __launch_klmirqd_thread(struct work_struct *work) | ||
152 | { | ||
153 | int id; | ||
154 | struct task_struct* thread = NULL; | ||
155 | struct klmirqd_launch_data* launch_data = | ||
156 | container_of(work, struct klmirqd_launch_data, work); | ||
157 | |||
158 | TRACE("Creating klmirqd thread\n"); | ||
159 | |||
160 | |||
161 | |||
162 | if (launch_data->cpu_affinity != -1) { | ||
163 | if (launch_data->name[0] == '\0') { | ||
164 | id = atomic_inc_return(&klmirqd_id_gen); | ||
165 | TRACE("Launching klmirqd_th%d/%d\n", id, launch_data->cpu_affinity); | ||
166 | |||
167 | thread = kthread_create( | ||
168 | run_klmirqd, | ||
169 | /* treat the affinity as a pointer, we'll cast it back later */ | ||
170 | (void*)launch_data->cb, | ||
171 | "klmirqd_th%d/%d", | ||
172 | id, | ||
173 | launch_data->cpu_affinity); | ||
174 | } | ||
175 | else { | ||
176 | TRACE("Launching %s/%d\n", launch_data->name, launch_data->cpu_affinity); | ||
177 | |||
178 | thread = kthread_create( | ||
179 | run_klmirqd, | ||
180 | /* treat the affinity as a pointer, we'll cast it back later */ | ||
181 | (void*)launch_data->cb, | ||
182 | "%s/%d", | ||
183 | launch_data->name, | ||
184 | launch_data->cpu_affinity); | ||
185 | } | ||
186 | |||
187 | /* litmus will put is in the right cluster. */ | ||
188 | kthread_bind(thread, launch_data->cpu_affinity); | ||
189 | } | ||
190 | else { | ||
191 | if (launch_data->name[0] == '\0') { | ||
192 | id = atomic_inc_return(&klmirqd_id_gen); | ||
193 | TRACE("Launching klmirqd_th%d\n", id); | ||
194 | |||
195 | thread = kthread_create( | ||
196 | run_klmirqd, | ||
197 | /* treat the affinity as a pointer, we'll cast it back later */ | ||
198 | (void*)launch_data->cb, | ||
199 | "klmirqd_th%d", | ||
200 | id); | ||
201 | |||
202 | } | ||
203 | else { | ||
204 | TRACE("Launching %s\n", launch_data->name); | ||
205 | |||
206 | thread = kthread_create( | ||
207 | run_klmirqd, | ||
208 | /* treat the affinity as a pointer, we'll cast it back later */ | ||
209 | (void*)launch_data->cb, | ||
210 | launch_data->name); | ||
211 | } | ||
212 | |||
213 | |||
214 | } | ||
215 | |||
216 | if (thread) { | ||
217 | wake_up_process(thread); | ||
218 | } | ||
219 | else { | ||
220 | TRACE("Could not create thread!\n"); | ||
221 | } | ||
222 | |||
223 | kfree(launch_data); | ||
224 | } | ||
225 | |||
226 | |||
227 | int launch_klmirqd_thread(char* name, int cpu, klmirqd_callback_t* cb) | ||
228 | { | ||
229 | struct klmirqd_launch_data* delayed_launch; | ||
230 | |||
231 | if (!klmirqd_is_ready()) { | ||
232 | TRACE("klmirqd is not ready. Check that it was initialized!\n"); | ||
233 | return -1; | ||
234 | } | ||
235 | |||
236 | /* tell a work queue to launch the threads. we can't make scheduling | ||
237 | calls since we're in an atomic state. */ | ||
238 | delayed_launch = kmalloc(sizeof(struct klmirqd_launch_data), GFP_ATOMIC); | ||
239 | delayed_launch->cpu_affinity = cpu; | ||
240 | delayed_launch->cb = cb; | ||
241 | INIT_WORK(&delayed_launch->work, __launch_klmirqd_thread); | ||
242 | |||
243 | if(name) { | ||
244 | snprintf(delayed_launch->name, MAX_KLMIRQD_NAME_LEN+1, "%s", name); | ||
245 | } | ||
246 | else { | ||
247 | delayed_launch->name[0] = '\0'; | ||
248 | } | ||
249 | |||
250 | schedule_work(&delayed_launch->work); | ||
251 | |||
252 | return 0; | ||
253 | } | ||
254 | |||
255 | |||
256 | |||
257 | |||
258 | #define KLMIRQD_SLICE_NR_JIFFIES 1 | ||
259 | #define KLMIRQD_SLICE_NS ((NSEC_PER_SEC / HZ) * KLMIRQD_SLICE_NR_JIFFIES) | ||
260 | |||
261 | static int become_litmus_daemon(struct task_struct* tsk) | ||
262 | { | ||
263 | int ret = 0; | ||
264 | |||
265 | struct rt_task tp = { | ||
266 | .period = KLMIRQD_SLICE_NS, /* dummy 1 second period */ | ||
267 | .relative_deadline = KLMIRQD_SLICE_NS, | ||
268 | .exec_cost = KLMIRQD_SLICE_NS, | ||
269 | .phase = 0, | ||
270 | .cpu = task_cpu(current), | ||
271 | .budget_policy = NO_ENFORCEMENT, | ||
272 | .budget_signal_policy = NO_SIGNALS, | ||
273 | .cls = RT_CLASS_BEST_EFFORT | ||
274 | }; | ||
275 | |||
276 | struct sched_param param = { .sched_priority = 0}; | ||
277 | |||
278 | TRACE_CUR("Setting %s/%d as daemon thread.\n", tsk->comm, tsk->pid); | ||
279 | |||
280 | /* set task params */ | ||
281 | tsk_rt(tsk)->task_params = tp; | ||
282 | tsk_rt(tsk)->is_interrupt_thread = 1; | ||
283 | |||
284 | /* inform the OS we're SCHED_LITMUS -- | ||
285 | sched_setscheduler_nocheck() calls litmus_admit_task(). */ | ||
286 | sched_setscheduler_nocheck(tsk, SCHED_LITMUS, ¶m); | ||
287 | |||
288 | return ret; | ||
289 | } | ||
290 | |||
291 | static int become_normal_daemon(struct task_struct* tsk) | ||
292 | { | ||
293 | int ret = 0; | ||
294 | |||
295 | struct sched_param param = { .sched_priority = 0}; | ||
296 | sched_setscheduler_nocheck(tsk, SCHED_NORMAL, ¶m); | ||
297 | |||
298 | return ret; | ||
299 | } | ||
300 | |||
301 | static int register_klmirqd(struct task_struct* tsk) | ||
302 | { | ||
303 | int retval = 0; | ||
304 | unsigned long flags; | ||
305 | struct klmirqd_info *info = NULL; | ||
306 | |||
307 | if (!tsk_rt(tsk)->is_interrupt_thread) { | ||
308 | TRACE("Only proxy threads already running in Litmus may become klmirqd threads!\n"); | ||
309 | WARN_ON(1); | ||
310 | retval = -1; | ||
311 | goto out; | ||
312 | } | ||
313 | |||
314 | raw_spin_lock_irqsave(&klmirqd_state.lock, flags); | ||
315 | |||
316 | if (!__klmirqd_is_ready()) { | ||
317 | TRACE("klmirqd is not ready! Did you forget to initialize it?\n"); | ||
318 | WARN_ON(1); | ||
319 | retval = -1; | ||
320 | goto out_unlock; | ||
321 | } | ||
322 | |||
323 | /* allocate and initialize klmirqd data for the thread */ | ||
324 | info = kmalloc(sizeof(struct klmirqd_info), GFP_KERNEL); | ||
325 | if (!info) { | ||
326 | TRACE("Failed to allocate klmirqd_info struct!\n"); | ||
327 | retval = -1; /* todo: pick better code */ | ||
328 | goto out_unlock; | ||
329 | } | ||
330 | memset(info, 0, sizeof(struct klmirqd_info)); | ||
331 | info->klmirqd = tsk; | ||
332 | info->pending_tasklets_hi.tail = &info->pending_tasklets_hi.head; | ||
333 | info->pending_tasklets.tail = &info->pending_tasklets.head; | ||
334 | INIT_LIST_HEAD(&info->worklist); | ||
335 | INIT_LIST_HEAD(&info->klmirqd_reg); | ||
336 | raw_spin_lock_init(&info->lock); | ||
337 | |||
338 | |||
339 | /* now register with klmirqd */ | ||
340 | list_add_tail(&info->klmirqd_reg, &klmirqd_state.threads); | ||
341 | ++klmirqd_state.nr_threads; | ||
342 | |||
343 | /* update the task struct to point to klmirqd info */ | ||
344 | tsk_rt(tsk)->klmirqd_info = info; | ||
345 | |||
346 | out_unlock: | ||
347 | raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags); | ||
348 | |||
349 | out: | ||
350 | return retval; | ||
351 | } | ||
352 | |||
353 | static int unregister_klmirqd(struct task_struct* tsk) | ||
354 | { | ||
355 | int retval = 0; | ||
356 | unsigned long flags; | ||
357 | struct klmirqd_info *info = tsk_rt(tsk)->klmirqd_info; | ||
358 | |||
359 | if (!tsk_rt(tsk)->is_interrupt_thread || !info) { | ||
360 | TRACE("%s/%d is not a klmirqd thread!\n", tsk->comm, tsk->pid); | ||
361 | WARN_ON(1); | ||
362 | retval = -1; | ||
363 | goto out; | ||
364 | } | ||
365 | |||
366 | raw_spin_lock_irqsave(&klmirqd_state.lock, flags); | ||
367 | |||
368 | /* remove the entry in the klmirqd thread list */ | ||
369 | list_del(&info->klmirqd_reg); | ||
370 | mb(); | ||
371 | --klmirqd_state.nr_threads; | ||
372 | |||
373 | /* remove link to klmirqd info from thread */ | ||
374 | tsk_rt(tsk)->klmirqd_info = NULL; | ||
375 | |||
376 | /* clean up memory */ | ||
377 | kfree(info); | ||
378 | |||
379 | raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags); | ||
380 | |||
381 | out: | ||
382 | return retval; | ||
383 | } | ||
384 | |||
385 | |||
386 | |||
387 | |||
388 | |||
389 | |||
390 | int proc_read_klmirqd_stats(char *page, char **start, | ||
391 | off_t off, int count, | ||
392 | int *eof, void *data) | ||
393 | { | ||
394 | unsigned long flags; | ||
395 | int len; | ||
396 | |||
397 | raw_spin_lock_irqsave(&klmirqd_state.lock, flags); | ||
398 | |||
399 | if (klmirqd_state.initialized) { | ||
400 | if (!klmirqd_state.shuttingdown) { | ||
401 | struct list_head *pos; | ||
402 | |||
403 | len = snprintf(page, PAGE_SIZE, | ||
404 | "num ready klmirqds: %d\n\n", | ||
405 | klmirqd_state.nr_threads); | ||
406 | |||
407 | list_for_each(pos, &klmirqd_state.threads) { | ||
408 | struct klmirqd_info* info = list_entry(pos, struct klmirqd_info, klmirqd_reg); | ||
409 | |||
410 | len += | ||
411 | snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */ | ||
412 | "klmirqd_thread: %s/%d\n" | ||
413 | "\tcurrent_owner: %s/%d\n" | ||
414 | "\tpending: %x\n" | ||
415 | "\tnum hi: %d\n" | ||
416 | "\tnum low: %d\n" | ||
417 | "\tnum work: %d\n\n", | ||
418 | info->klmirqd->comm, info->klmirqd->pid, | ||
419 | (info->current_owner != NULL) ? | ||
420 | info->current_owner->comm : "(null)", | ||
421 | (info->current_owner != NULL) ? | ||
422 | info->current_owner->pid : 0, | ||
423 | info->pending, | ||
424 | atomic_read(&info->num_hi_pending), | ||
425 | atomic_read(&info->num_low_pending), | ||
426 | atomic_read(&info->num_work_pending)); | ||
427 | } | ||
428 | } | ||
429 | else { | ||
430 | len = snprintf(page, PAGE_SIZE, "klmirqd is shutting down\n"); | ||
431 | } | ||
432 | } | ||
433 | else { | ||
434 | len = snprintf(page, PAGE_SIZE, "klmirqd is not initialized!\n"); | ||
435 | } | ||
436 | |||
437 | raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags); | ||
438 | |||
439 | return(len); | ||
440 | } | ||
441 | |||
442 | |||
443 | |||
444 | |||
445 | |||
446 | #if 0 | ||
447 | static atomic_t dump_id = ATOMIC_INIT(0); | ||
448 | |||
449 | static void __dump_state(struct klmirqd_info* which, const char* caller) | ||
450 | { | ||
451 | struct tasklet_struct* list; | ||
452 | |||
453 | int id = atomic_inc_return(&dump_id); | ||
454 | |||
455 | //if(in_interrupt()) | ||
456 | { | ||
457 | if(which->current_owner) | ||
458 | { | ||
459 | TRACE("(id: %d caller: %s)\n" | ||
460 | "klmirqd: %s/%d\n" | ||
461 | "current owner: %s/%d\n" | ||
462 | "pending: %x\n", | ||
463 | id, caller, | ||
464 | which->klmirqd->comm, which->klmirqd->pid, | ||
465 | which->current_owner->comm, which->current_owner->pid, | ||
466 | which->pending); | ||
467 | } | ||
468 | else | ||
469 | { | ||
470 | TRACE("(id: %d caller: %s)\n" | ||
471 | "klmirqd: %s/%d\n" | ||
472 | "current owner: %p\n" | ||
473 | "pending: %x\n", | ||
474 | id, caller, | ||
475 | which->klmirqd->comm, which->klmirqd->pid, | ||
476 | NULL, | ||
477 | which->pending); | ||
478 | } | ||
479 | |||
480 | list = which->pending_tasklets.head; | ||
481 | while(list) | ||
482 | { | ||
483 | struct tasklet_struct *t = list; | ||
484 | list = list->next; /* advance */ | ||
485 | if(t->owner) | ||
486 | TRACE("(id: %d caller: %s) Tasklet: %x, Owner = %s/%d\n", id, caller, t, t->owner->comm, t->owner->pid); | ||
487 | else | ||
488 | TRACE("(id: %d caller: %s) Tasklet: %x, Owner = %p\n", id, caller, t, NULL); | ||
489 | } | ||
490 | } | ||
491 | } | ||
492 | |||
493 | static void dump_state(struct klmirqd_info* which, const char* caller) | ||
494 | { | ||
495 | unsigned long flags; | ||
496 | |||
497 | raw_spin_lock_irqsave(&which->lock, flags); | ||
498 | __dump_state(which, caller); | ||
499 | raw_spin_unlock_irqrestore(&which->lock, flags); | ||
500 | } | ||
501 | #endif | ||
502 | |||
503 | |||
504 | |||
505 | |||
506 | |||
507 | |||
508 | |||
509 | |||
510 | |||
511 | |||
512 | |||
513 | /* forward declarations */ | ||
514 | static void ___litmus_tasklet_schedule(struct tasklet_struct *t, | ||
515 | struct klmirqd_info *which, | ||
516 | int wakeup); | ||
517 | static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t, | ||
518 | struct klmirqd_info *which, | ||
519 | int wakeup); | ||
520 | static void ___litmus_schedule_work(struct work_struct *w, | ||
521 | struct klmirqd_info *which, | ||
522 | int wakeup); | ||
523 | |||
524 | |||
525 | inline static u32 litirq_pending_hi_irqoff(struct klmirqd_info* which) | ||
526 | { | ||
527 | return (which->pending & LIT_TASKLET_HI); | ||
528 | } | ||
529 | |||
530 | inline static u32 litirq_pending_low_irqoff(struct klmirqd_info* which) | ||
531 | { | ||
532 | return (which->pending & LIT_TASKLET_LOW); | ||
533 | } | ||
534 | |||
535 | inline static u32 litirq_pending_work_irqoff(struct klmirqd_info* which) | ||
536 | { | ||
537 | return (which->pending & LIT_WORK); | ||
538 | } | ||
539 | |||
540 | inline static u32 litirq_pending_irqoff(struct klmirqd_info* which) | ||
541 | { | ||
542 | return(which->pending); | ||
543 | } | ||
544 | |||
545 | |||
546 | inline static u32 litirq_pending(struct klmirqd_info* which) | ||
547 | { | ||
548 | unsigned long flags; | ||
549 | u32 pending; | ||
550 | |||
551 | raw_spin_lock_irqsave(&which->lock, flags); | ||
552 | pending = litirq_pending_irqoff(which); | ||
553 | raw_spin_unlock_irqrestore(&which->lock, flags); | ||
554 | |||
555 | return pending; | ||
556 | }; | ||
557 | |||
558 | static void wakeup_litirqd_locked(struct klmirqd_info* which) | ||
559 | { | ||
560 | /* Interrupts are disabled: no need to stop preemption */ | ||
561 | if (which && which->klmirqd) | ||
562 | { | ||
563 | if(which->klmirqd->state != TASK_RUNNING) | ||
564 | { | ||
565 | TRACE("%s: Waking up klmirqd: %s/%d\n", __FUNCTION__, | ||
566 | which->klmirqd->comm, which->klmirqd->pid); | ||
567 | |||
568 | wake_up_process(which->klmirqd); | ||
569 | } | ||
570 | } | ||
571 | } | ||
572 | |||
573 | |||
574 | static void do_lit_tasklet(struct klmirqd_info* which, | ||
575 | struct tasklet_head* pending_tasklets) | ||
576 | { | ||
577 | unsigned long flags; | ||
578 | struct tasklet_struct *list; | ||
579 | atomic_t* count; | ||
580 | |||
581 | raw_spin_lock_irqsave(&which->lock, flags); | ||
582 | |||
583 | //__dump_state(which, "do_lit_tasklet: before steal"); | ||
584 | |||
585 | /* copy out the tasklets for our private use. */ | ||
586 | list = pending_tasklets->head; | ||
587 | pending_tasklets->head = NULL; | ||
588 | pending_tasklets->tail = &pending_tasklets->head; | ||
589 | |||
590 | /* remove pending flag */ | ||
591 | which->pending &= (pending_tasklets == &which->pending_tasklets) ? | ||
592 | ~LIT_TASKLET_LOW : | ||
593 | ~LIT_TASKLET_HI; | ||
594 | |||
595 | count = (pending_tasklets == &which->pending_tasklets) ? | ||
596 | &which->num_low_pending: | ||
597 | &which->num_hi_pending; | ||
598 | |||
599 | //__dump_state(which, "do_lit_tasklet: after steal"); | ||
600 | |||
601 | raw_spin_unlock_irqrestore(&which->lock, flags); | ||
602 | |||
603 | |||
604 | while(list) | ||
605 | { | ||
606 | struct tasklet_struct *t = list; | ||
607 | |||
608 | /* advance, lest we forget */ | ||
609 | list = list->next; | ||
610 | |||
611 | /* execute tasklet if it has my priority and is free */ | ||
612 | if (tasklet_trylock(t)) { | ||
613 | if (!atomic_read(&t->count)) { | ||
614 | |||
615 | sched_trace_tasklet_begin(t->owner); | ||
616 | |||
617 | if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) | ||
618 | { | ||
619 | BUG(); | ||
620 | } | ||
621 | TRACE_CUR("%s: Invoking tasklet.\n", __FUNCTION__); | ||
622 | t->func(t->data); | ||
623 | tasklet_unlock(t); | ||
624 | |||
625 | atomic_dec(count); | ||
626 | |||
627 | sched_trace_tasklet_end(t->owner, 0ul); | ||
628 | |||
629 | continue; /* process more tasklets */ | ||
630 | } | ||
631 | tasklet_unlock(t); | ||
632 | } | ||
633 | |||
634 | TRACE_CUR("%s: Could not invoke tasklet. Requeuing.\n", __FUNCTION__); | ||
635 | |||
636 | /* couldn't process tasklet. put it back at the end of the queue. */ | ||
637 | if(pending_tasklets == &which->pending_tasklets) | ||
638 | ___litmus_tasklet_schedule(t, which, 0); | ||
639 | else | ||
640 | ___litmus_tasklet_hi_schedule(t, which, 0); | ||
641 | } | ||
642 | } | ||
643 | |||
644 | |||
645 | // returns 1 if priorities need to be changed to continue processing | ||
646 | // pending tasklets. | ||
647 | static void do_litirq(struct klmirqd_info* which) | ||
648 | { | ||
649 | u32 pending; | ||
650 | |||
651 | if(in_interrupt()) | ||
652 | { | ||
653 | TRACE("%s: exiting early: in interrupt context!\n", __FUNCTION__); | ||
654 | return; | ||
655 | } | ||
656 | |||
657 | if(which->klmirqd != current) | ||
658 | { | ||
659 | TRACE_CUR("%s: exiting early: thread/info mismatch! Running %s/%d but given %s/%d.\n", | ||
660 | __FUNCTION__, current->comm, current->pid, | ||
661 | which->klmirqd->comm, which->klmirqd->pid); | ||
662 | return; | ||
663 | } | ||
664 | |||
665 | if(!is_realtime(current)) | ||
666 | { | ||
667 | TRACE_CUR("%s: exiting early: klmirqd is not real-time. Sched Policy = %d\n", | ||
668 | __FUNCTION__, current->policy); | ||
669 | return; | ||
670 | } | ||
671 | |||
672 | |||
673 | /* We only handle tasklets & work objects, no need for RCU triggers? */ | ||
674 | |||
675 | pending = litirq_pending(which); | ||
676 | if(pending) { | ||
677 | /* extract the work to do and do it! */ | ||
678 | if(pending & LIT_TASKLET_HI) { | ||
679 | TRACE_CUR("%s: Invoking HI tasklets.\n", __FUNCTION__); | ||
680 | do_lit_tasklet(which, &which->pending_tasklets_hi); | ||
681 | } | ||
682 | |||
683 | if(pending & LIT_TASKLET_LOW) { | ||
684 | TRACE_CUR("%s: Invoking LOW tasklets.\n", __FUNCTION__); | ||
685 | do_lit_tasklet(which, &which->pending_tasklets); | ||
686 | } | ||
687 | } | ||
688 | } | ||
689 | |||
690 | |||
691 | static void do_work(struct klmirqd_info* which) | ||
692 | { | ||
693 | unsigned long flags; | ||
694 | struct work_struct* work; | ||
695 | work_func_t f; | ||
696 | |||
697 | // only execute one work-queue item to yield to tasklets. | ||
698 | // ...is this a good idea, or should we just batch them? | ||
699 | raw_spin_lock_irqsave(&which->lock, flags); | ||
700 | |||
701 | if(!litirq_pending_work_irqoff(which)) | ||
702 | { | ||
703 | raw_spin_unlock_irqrestore(&which->lock, flags); | ||
704 | goto no_work; | ||
705 | } | ||
706 | |||
707 | work = list_first_entry(&which->worklist, struct work_struct, entry); | ||
708 | list_del_init(&work->entry); | ||
709 | |||
710 | if(list_empty(&which->worklist)) | ||
711 | { | ||
712 | which->pending &= ~LIT_WORK; | ||
713 | } | ||
714 | |||
715 | raw_spin_unlock_irqrestore(&which->lock, flags); | ||
716 | |||
717 | |||
718 | TRACE_CUR("%s: Invoking work object.\n", __FUNCTION__); | ||
719 | // do the work! | ||
720 | work_clear_pending(work); | ||
721 | f = work->func; | ||
722 | f(work); /* can't touch 'work' after this point, | ||
723 | the user may have freed it. */ | ||
724 | |||
725 | atomic_dec(&which->num_work_pending); | ||
726 | |||
727 | no_work: | ||
728 | return; | ||
729 | } | ||
730 | |||
731 | |||
732 | |||
733 | /* main loop for klitsoftirqd */ | ||
734 | static int run_klmirqd(void* callback) | ||
735 | { | ||
736 | int retval = 0; | ||
737 | struct klmirqd_info* info = NULL; | ||
738 | klmirqd_callback_t* cb = (klmirqd_callback_t*)(callback); | ||
739 | |||
740 | retval = become_litmus_daemon(current); | ||
741 | if (retval != 0) { | ||
742 | TRACE_CUR("%s: Failed to transition to rt-task.\n", __FUNCTION__); | ||
743 | goto failed; | ||
744 | } | ||
745 | |||
746 | retval = register_klmirqd(current); | ||
747 | if (retval != 0) { | ||
748 | TRACE_CUR("%s: Failed to become a klmirqd thread.\n", __FUNCTION__); | ||
749 | goto failed_sched_normal; | ||
750 | } | ||
751 | |||
752 | if (cb && cb->func) { | ||
753 | retval = cb->func(cb->arg); | ||
754 | if (retval != 0) { | ||
755 | TRACE_CUR("%s: klmirqd callback reported failure. retval = %d\n", __FUNCTION__, retval); | ||
756 | goto failed_unregister; | ||
757 | } | ||
758 | } | ||
759 | |||
760 | /* enter the interrupt handling workloop */ | ||
761 | |||
762 | info = tsk_rt(current)->klmirqd_info; | ||
763 | |||
764 | set_current_state(TASK_INTERRUPTIBLE); | ||
765 | |||
766 | while (!kthread_should_stop()) | ||
767 | { | ||
768 | preempt_disable(); | ||
769 | if (!litirq_pending(info)) | ||
770 | { | ||
771 | /* sleep for work */ | ||
772 | TRACE_CUR("%s: No more tasklets or work objects. Going to sleep.\n", | ||
773 | __FUNCTION__); | ||
774 | preempt_enable_no_resched(); | ||
775 | schedule(); | ||
776 | |||
777 | if(kthread_should_stop()) /* bail out */ | ||
778 | { | ||
779 | TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__); | ||
780 | continue; | ||
781 | } | ||
782 | |||
783 | preempt_disable(); | ||
784 | } | ||
785 | |||
786 | __set_current_state(TASK_RUNNING); | ||
787 | |||
788 | while (litirq_pending(info)) | ||
789 | { | ||
790 | preempt_enable_no_resched(); | ||
791 | |||
792 | if(kthread_should_stop()) | ||
793 | { | ||
794 | TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__); | ||
795 | break; | ||
796 | } | ||
797 | |||
798 | preempt_disable(); | ||
799 | |||
800 | /* Double check that there's still pending work and the owner hasn't | ||
801 | * changed. Pending items may have been flushed while we were sleeping. | ||
802 | */ | ||
803 | if(litirq_pending(info)) | ||
804 | { | ||
805 | TRACE_CUR("%s: Executing tasklets and/or work objects.\n", | ||
806 | __FUNCTION__); | ||
807 | |||
808 | do_litirq(info); | ||
809 | |||
810 | preempt_enable_no_resched(); | ||
811 | |||
812 | // work objects are preemptible. | ||
813 | do_work(info); | ||
814 | } | ||
815 | else | ||
816 | { | ||
817 | TRACE_CUR("%s: Pending work was flushed!\n", __FUNCTION__); | ||
818 | |||
819 | preempt_enable_no_resched(); | ||
820 | } | ||
821 | |||
822 | cond_resched(); | ||
823 | preempt_disable(); | ||
824 | } | ||
825 | preempt_enable(); | ||
826 | set_current_state(TASK_INTERRUPTIBLE); | ||
827 | } | ||
828 | __set_current_state(TASK_RUNNING); | ||
829 | |||
830 | failed_unregister: | ||
831 | /* remove our registration from klmirqd */ | ||
832 | unregister_klmirqd(current); | ||
833 | |||
834 | failed_sched_normal: | ||
835 | become_normal_daemon(current); | ||
836 | |||
837 | failed: | ||
838 | return retval; | ||
839 | } | ||
840 | |||
841 | |||
842 | void flush_pending(struct task_struct* tsk) | ||
843 | { | ||
844 | unsigned long flags; | ||
845 | struct tasklet_struct *list; | ||
846 | u32 work_flushed = 0; | ||
847 | |||
848 | struct klmirqd_info *which; | ||
849 | |||
850 | if (!tsk_rt(tsk)->is_interrupt_thread) { | ||
851 | TRACE("%s/%d is not a proxy thread\n", tsk->comm, tsk->pid); | ||
852 | WARN_ON(1); | ||
853 | return; | ||
854 | } | ||
855 | |||
856 | which = tsk_rt(tsk)->klmirqd_info; | ||
857 | if (!which) { | ||
858 | TRACE("%s/%d is not a klmirqd thread!\n", tsk->comm, tsk->pid); | ||
859 | WARN_ON(1); | ||
860 | return; | ||
861 | } | ||
862 | |||
863 | |||
864 | raw_spin_lock_irqsave(&which->lock, flags); | ||
865 | |||
866 | //__dump_state(which, "flush_pending: before"); | ||
867 | |||
868 | // flush hi tasklets. | ||
869 | if(litirq_pending_hi_irqoff(which)) | ||
870 | { | ||
871 | which->pending &= ~LIT_TASKLET_HI; | ||
872 | |||
873 | list = which->pending_tasklets_hi.head; | ||
874 | which->pending_tasklets_hi.head = NULL; | ||
875 | which->pending_tasklets_hi.tail = &which->pending_tasklets_hi.head; | ||
876 | |||
877 | TRACE("%s: Handing HI tasklets back to Linux.\n", __FUNCTION__); | ||
878 | |||
879 | while(list) | ||
880 | { | ||
881 | struct tasklet_struct *t = list; | ||
882 | list = list->next; | ||
883 | |||
884 | if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))) | ||
885 | { | ||
886 | BUG(); | ||
887 | } | ||
888 | |||
889 | work_flushed |= LIT_TASKLET_HI; | ||
890 | |||
891 | t->owner = NULL; | ||
892 | |||
893 | // WTF? | ||
894 | if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) | ||
895 | { | ||
896 | atomic_dec(&which->num_hi_pending); | ||
897 | ___tasklet_hi_schedule(t); | ||
898 | } | ||
899 | else | ||
900 | { | ||
901 | TRACE("%s: dropped hi tasklet??\n", __FUNCTION__); | ||
902 | BUG(); | ||
903 | } | ||
904 | |||
905 | } | ||
906 | } | ||
907 | |||
908 | // flush low tasklets. | ||
909 | if(litirq_pending_low_irqoff(which)) | ||
910 | { | ||
911 | which->pending &= ~LIT_TASKLET_LOW; | ||
912 | |||
913 | list = which->pending_tasklets.head; | ||
914 | which->pending_tasklets.head = NULL; | ||
915 | which->pending_tasklets.tail = &which->pending_tasklets.head; | ||
916 | |||
917 | TRACE("%s: Handing LOW tasklets back to Linux.\n", __FUNCTION__); | ||
918 | |||
919 | while(list) | ||
920 | { | ||
921 | struct tasklet_struct *t = list; | ||
922 | list = list->next; | ||
923 | |||
924 | if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))) | ||
925 | { | ||
926 | BUG(); | ||
927 | } | ||
928 | |||
929 | work_flushed |= LIT_TASKLET_LOW; | ||
930 | |||
931 | t->owner = NULL; | ||
932 | // sched_trace_tasklet_end(owner, 1ul); | ||
933 | |||
934 | if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) | ||
935 | { | ||
936 | atomic_dec(&which->num_low_pending); | ||
937 | ___tasklet_schedule(t); | ||
938 | } | ||
939 | else | ||
940 | { | ||
941 | TRACE("%s: dropped tasklet??\n", __FUNCTION__); | ||
942 | BUG(); | ||
943 | } | ||
944 | } | ||
945 | } | ||
946 | |||
947 | // flush work objects | ||
948 | if(litirq_pending_work_irqoff(which)) | ||
949 | { | ||
950 | which->pending &= ~LIT_WORK; | ||
951 | |||
952 | TRACE("%s: Handing work objects back to Linux.\n", __FUNCTION__); | ||
953 | |||
954 | while(!list_empty(&which->worklist)) | ||
955 | { | ||
956 | struct work_struct* work = | ||
957 | list_first_entry(&which->worklist, struct work_struct, entry); | ||
958 | list_del_init(&work->entry); | ||
959 | |||
960 | work_flushed |= LIT_WORK; | ||
961 | atomic_dec(&which->num_work_pending); | ||
962 | |||
963 | work->owner = NULL; | ||
964 | // sched_trace_work_end(owner, current, 1ul); | ||
965 | __schedule_work(work); | ||
966 | } | ||
967 | } | ||
968 | |||
969 | //__dump_state(which, "flush_pending: after (before reeval prio)"); | ||
970 | |||
971 | |||
972 | mb(); /* commit changes to pending flags */ | ||
973 | |||
974 | raw_spin_unlock_irqrestore(&which->lock, flags); | ||
975 | } | ||
976 | |||
977 | |||
978 | |||
979 | |||
980 | static void ___litmus_tasklet_schedule(struct tasklet_struct *t, | ||
981 | struct klmirqd_info *which, | ||
982 | int wakeup) | ||
983 | { | ||
984 | unsigned long flags; | ||
985 | u32 old_pending; | ||
986 | |||
987 | t->next = NULL; | ||
988 | |||
989 | raw_spin_lock_irqsave(&which->lock, flags); | ||
990 | |||
991 | //__dump_state(which, "___litmus_tasklet_schedule: before queuing"); | ||
992 | |||
993 | *(which->pending_tasklets.tail) = t; | ||
994 | which->pending_tasklets.tail = &t->next; | ||
995 | |||
996 | old_pending = which->pending; | ||
997 | which->pending |= LIT_TASKLET_LOW; | ||
998 | |||
999 | atomic_inc(&which->num_low_pending); | ||
1000 | |||
1001 | mb(); | ||
1002 | |||
1003 | if(!old_pending && wakeup) | ||
1004 | { | ||
1005 | wakeup_litirqd_locked(which); /* wake up the klmirqd */ | ||
1006 | } | ||
1007 | |||
1008 | //__dump_state(which, "___litmus_tasklet_schedule: after queuing"); | ||
1009 | |||
1010 | raw_spin_unlock_irqrestore(&which->lock, flags); | ||
1011 | } | ||
1012 | |||
1013 | |||
1014 | int __litmus_tasklet_schedule(struct tasklet_struct *t, struct task_struct* klmirqd_thread) | ||
1015 | { | ||
1016 | int ret = 0; /* assume failure */ | ||
1017 | struct klmirqd_info* info; | ||
1018 | |||
1019 | if (unlikely(!is_realtime(klmirqd_thread) || | ||
1020 | !tsk_rt(klmirqd_thread)->is_interrupt_thread || | ||
1021 | !tsk_rt(klmirqd_thread)->klmirqd_info)) { | ||
1022 | TRACE("%s: %s/%d can't handle tasklets\n", klmirqd_thread->comm, klmirqd_thread->pid); | ||
1023 | return ret; | ||
1024 | } | ||
1025 | |||
1026 | info = tsk_rt(klmirqd_thread)->klmirqd_info; | ||
1027 | |||
1028 | if (likely(!info->terminating)) { | ||
1029 | ret = 1; | ||
1030 | ___litmus_tasklet_schedule(t, info, 1); | ||
1031 | } | ||
1032 | else { | ||
1033 | TRACE("%s: Tasklet rejected because %s/%d is terminating\n", klmirqd_thread->comm, klmirqd_thread->pid); | ||
1034 | } | ||
1035 | return(ret); | ||
1036 | } | ||
1037 | |||
1038 | EXPORT_SYMBOL(__litmus_tasklet_schedule); | ||
1039 | |||
1040 | |||
1041 | static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t, | ||
1042 | struct klmirqd_info *which, | ||
1043 | int wakeup) | ||
1044 | { | ||
1045 | unsigned long flags; | ||
1046 | u32 old_pending; | ||
1047 | |||
1048 | t->next = NULL; | ||
1049 | |||
1050 | raw_spin_lock_irqsave(&which->lock, flags); | ||
1051 | |||
1052 | *(which->pending_tasklets_hi.tail) = t; | ||
1053 | which->pending_tasklets_hi.tail = &t->next; | ||
1054 | |||
1055 | old_pending = which->pending; | ||
1056 | which->pending |= LIT_TASKLET_HI; | ||
1057 | |||
1058 | atomic_inc(&which->num_hi_pending); | ||
1059 | |||
1060 | mb(); | ||
1061 | |||
1062 | if(!old_pending && wakeup) | ||
1063 | { | ||
1064 | wakeup_litirqd_locked(which); /* wake up the klmirqd */ | ||
1065 | } | ||
1066 | |||
1067 | raw_spin_unlock_irqrestore(&which->lock, flags); | ||
1068 | } | ||
1069 | |||
1070 | int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, struct task_struct* klmirqd_thread) | ||
1071 | { | ||
1072 | int ret = 0; /* assume failure */ | ||
1073 | struct klmirqd_info* info; | ||
1074 | |||
1075 | if (unlikely(!is_realtime(klmirqd_thread) || | ||
1076 | !tsk_rt(klmirqd_thread)->is_interrupt_thread || | ||
1077 | !tsk_rt(klmirqd_thread)->klmirqd_info)) { | ||
1078 | TRACE("%s: %s/%d can't handle tasklets\n", klmirqd_thread->comm, klmirqd_thread->pid); | ||
1079 | return ret; | ||
1080 | } | ||
1081 | |||
1082 | info = tsk_rt(klmirqd_thread)->klmirqd_info; | ||
1083 | |||
1084 | if (likely(!info->terminating)) { | ||
1085 | ret = 1; | ||
1086 | ___litmus_tasklet_hi_schedule(t, info, 1); | ||
1087 | } | ||
1088 | else { | ||
1089 | TRACE("%s: Tasklet rejected because %s/%d is terminating\n", klmirqd_thread->comm, klmirqd_thread->pid); | ||
1090 | } | ||
1091 | |||
1092 | return(ret); | ||
1093 | } | ||
1094 | |||
1095 | EXPORT_SYMBOL(__litmus_tasklet_hi_schedule); | ||
1096 | |||
1097 | |||
1098 | int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, struct task_struct* klmirqd_thread) | ||
1099 | { | ||
1100 | int ret = 0; /* assume failure */ | ||
1101 | u32 old_pending; | ||
1102 | struct klmirqd_info* info; | ||
1103 | |||
1104 | BUG_ON(!irqs_disabled()); | ||
1105 | |||
1106 | if (unlikely(!is_realtime(klmirqd_thread) || | ||
1107 | !tsk_rt(klmirqd_thread)->is_interrupt_thread || | ||
1108 | !tsk_rt(klmirqd_thread)->klmirqd_info)) { | ||
1109 | TRACE("%s: %s/%d can't handle tasklets\n", klmirqd_thread->comm, klmirqd_thread->pid); | ||
1110 | return ret; | ||
1111 | } | ||
1112 | |||
1113 | info = tsk_rt(klmirqd_thread)->klmirqd_info; | ||
1114 | |||
1115 | if (likely(!info->terminating)) { | ||
1116 | |||
1117 | raw_spin_lock(&info->lock); | ||
1118 | |||
1119 | ret = 1; // success! | ||
1120 | |||
1121 | t->next = info->pending_tasklets_hi.head; | ||
1122 | info->pending_tasklets_hi.head = t; | ||
1123 | |||
1124 | old_pending = info->pending; | ||
1125 | info->pending |= LIT_TASKLET_HI; | ||
1126 | |||
1127 | atomic_inc(&info->num_hi_pending); | ||
1128 | |||
1129 | mb(); | ||
1130 | |||
1131 | if(!old_pending) { | ||
1132 | wakeup_litirqd_locked(info); /* wake up the klmirqd */ | ||
1133 | } | ||
1134 | |||
1135 | raw_spin_unlock(&info->lock); | ||
1136 | } | ||
1137 | else { | ||
1138 | TRACE("%s: Tasklet rejected because %s/%d is terminating\n", klmirqd_thread->comm, klmirqd_thread->pid); | ||
1139 | } | ||
1140 | |||
1141 | return(ret); | ||
1142 | } | ||
1143 | |||
1144 | EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first); | ||
1145 | |||
1146 | |||
1147 | |||
1148 | static void ___litmus_schedule_work(struct work_struct *w, | ||
1149 | struct klmirqd_info *which, | ||
1150 | int wakeup) | ||
1151 | { | ||
1152 | unsigned long flags; | ||
1153 | u32 old_pending; | ||
1154 | |||
1155 | raw_spin_lock_irqsave(&which->lock, flags); | ||
1156 | |||
1157 | work_pending(w); | ||
1158 | list_add_tail(&w->entry, &which->worklist); | ||
1159 | |||
1160 | old_pending = which->pending; | ||
1161 | which->pending |= LIT_WORK; | ||
1162 | |||
1163 | atomic_inc(&which->num_work_pending); | ||
1164 | |||
1165 | mb(); | ||
1166 | |||
1167 | if(!old_pending && wakeup) | ||
1168 | { | ||
1169 | wakeup_litirqd_locked(which); /* wakeup the klmirqd */ | ||
1170 | } | ||
1171 | |||
1172 | raw_spin_unlock_irqrestore(&which->lock, flags); | ||
1173 | } | ||
1174 | |||
1175 | int __litmus_schedule_work(struct work_struct *w, struct task_struct* klmirqd_thread) | ||
1176 | { | ||
1177 | int ret = 1; /* assume success */ | ||
1178 | struct klmirqd_info* info; | ||
1179 | |||
1180 | if (unlikely(!is_realtime(klmirqd_thread) || | ||
1181 | !tsk_rt(klmirqd_thread)->is_interrupt_thread || | ||
1182 | !tsk_rt(klmirqd_thread)->klmirqd_info)) { | ||
1183 | TRACE("%s: %s/%d can't handle work items\n", klmirqd_thread->comm, klmirqd_thread->pid); | ||
1184 | return ret; | ||
1185 | } | ||
1186 | |||
1187 | info = tsk_rt(klmirqd_thread)->klmirqd_info; | ||
1188 | |||
1189 | |||
1190 | if (likely(!info->terminating)) { | ||
1191 | ___litmus_schedule_work(w, info, 1); | ||
1192 | } | ||
1193 | else { | ||
1194 | TRACE("%s: Work rejected because %s/%d is terminating\n", klmirqd_thread->comm, klmirqd_thread->pid); | ||
1195 | ret = 0; | ||
1196 | } | ||
1197 | |||
1198 | return(ret); | ||
1199 | } | ||
1200 | EXPORT_SYMBOL(__litmus_schedule_work); | ||
1201 | |||
1202 | |||
1203 | |||
1204 | |||
1205 | |||
diff --git a/litmus/locking.c b/litmus/locking.c index 43d9aece2e74..c21ec1ae36d7 100644 --- a/litmus/locking.c +++ b/litmus/locking.c | |||
@@ -8,8 +8,17 @@ | |||
8 | #include <litmus/litmus.h> | 8 | #include <litmus/litmus.h> |
9 | #include <litmus/sched_plugin.h> | 9 | #include <litmus/sched_plugin.h> |
10 | #include <litmus/trace.h> | 10 | #include <litmus/trace.h> |
11 | #include <litmus/litmus.h> | ||
11 | #include <litmus/wait.h> | 12 | #include <litmus/wait.h> |
12 | 13 | ||
14 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
15 | #include <linux/uaccess.h> | ||
16 | #endif | ||
17 | |||
18 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
19 | #include <litmus/gpu_affinity.h> | ||
20 | #endif | ||
21 | |||
13 | static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg); | 22 | static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg); |
14 | static int open_generic_lock(struct od_table_entry* entry, void* __user arg); | 23 | static int open_generic_lock(struct od_table_entry* entry, void* __user arg); |
15 | static int close_generic_lock(struct od_table_entry* entry); | 24 | static int close_generic_lock(struct od_table_entry* entry); |
@@ -22,6 +31,9 @@ struct fdso_ops generic_lock_ops = { | |||
22 | .destroy = destroy_generic_lock | 31 | .destroy = destroy_generic_lock |
23 | }; | 32 | }; |
24 | 33 | ||
34 | static atomic_t lock_id_gen = ATOMIC_INIT(0); | ||
35 | |||
36 | |||
25 | static inline bool is_lock(struct od_table_entry* entry) | 37 | static inline bool is_lock(struct od_table_entry* entry) |
26 | { | 38 | { |
27 | return entry->class == &generic_lock_ops; | 39 | return entry->class == &generic_lock_ops; |
@@ -39,8 +51,21 @@ static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user ar | |||
39 | int err; | 51 | int err; |
40 | 52 | ||
41 | err = litmus->allocate_lock(&lock, type, arg); | 53 | err = litmus->allocate_lock(&lock, type, arg); |
42 | if (err == 0) | 54 | if (err == 0) { |
55 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
56 | lock->nest.lock = lock; | ||
57 | lock->nest.hp_waiter_eff_prio = NULL; | ||
58 | |||
59 | INIT_BINHEAP_NODE(&lock->nest.hp_binheap_node); | ||
60 | if(!lock->nest.hp_waiter_ptr) { | ||
61 | TRACE_CUR("BEWARE: hp_waiter_ptr should probably not be NULL in " | ||
62 | "most uses. (exception: IKGLP donors)\n"); | ||
63 | } | ||
64 | #endif | ||
65 | lock->type = type; | ||
66 | lock->ident = atomic_inc_return(&lock_id_gen); | ||
43 | *obj_ref = lock; | 67 | *obj_ref = lock; |
68 | } | ||
44 | return err; | 69 | return err; |
45 | } | 70 | } |
46 | 71 | ||
@@ -83,7 +108,8 @@ asmlinkage long sys_litmus_lock(int lock_od) | |||
83 | entry = get_entry_for_od(lock_od); | 108 | entry = get_entry_for_od(lock_od); |
84 | if (entry && is_lock(entry)) { | 109 | if (entry && is_lock(entry)) { |
85 | l = get_lock(entry); | 110 | l = get_lock(entry); |
86 | TRACE_CUR("attempts to lock 0x%p\n", l); | 111 | //TRACE_CUR("attempts to lock 0x%p\n", l); |
112 | TRACE_CUR("attempts to lock %d\n", l->ident); | ||
87 | err = l->ops->lock(l); | 113 | err = l->ops->lock(l); |
88 | } | 114 | } |
89 | 115 | ||
@@ -111,7 +137,8 @@ asmlinkage long sys_litmus_unlock(int lock_od) | |||
111 | entry = get_entry_for_od(lock_od); | 137 | entry = get_entry_for_od(lock_od); |
112 | if (entry && is_lock(entry)) { | 138 | if (entry && is_lock(entry)) { |
113 | l = get_lock(entry); | 139 | l = get_lock(entry); |
114 | TRACE_CUR("attempts to unlock 0x%p\n", l); | 140 | //TRACE_CUR("attempts to unlock 0x%p\n", l); |
141 | TRACE_CUR("attempts to unlock %d\n", l->ident); | ||
115 | err = l->ops->unlock(l); | 142 | err = l->ops->unlock(l); |
116 | } | 143 | } |
117 | 144 | ||
@@ -138,6 +165,365 @@ struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq) | |||
138 | return(t); | 165 | return(t); |
139 | } | 166 | } |
140 | 167 | ||
168 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
169 | |||
170 | void print_hp_waiters(struct binheap_node* n, int depth) | ||
171 | { | ||
172 | struct litmus_lock *l; | ||
173 | struct nested_info *nest; | ||
174 | char padding[81] = " "; | ||
175 | struct task_struct *hp = NULL; | ||
176 | struct task_struct *hp_eff = NULL; | ||
177 | struct task_struct *node_prio = NULL; | ||
178 | |||
179 | |||
180 | if(n == NULL) { | ||
181 | TRACE("+-> %p\n", NULL); | ||
182 | return; | ||
183 | } | ||
184 | |||
185 | nest = binheap_entry(n, struct nested_info, hp_binheap_node); | ||
186 | l = nest->lock; | ||
187 | |||
188 | if(depth*2 <= 80) | ||
189 | padding[depth*2] = '\0'; | ||
190 | |||
191 | if(nest->hp_waiter_ptr && *(nest->hp_waiter_ptr)) { | ||
192 | hp = *(nest->hp_waiter_ptr); | ||
193 | |||
194 | if(tsk_rt(hp)->inh_task) { | ||
195 | hp_eff = tsk_rt(hp)->inh_task; | ||
196 | } | ||
197 | } | ||
198 | |||
199 | node_prio = nest->hp_waiter_eff_prio; | ||
200 | |||
201 | TRACE("%s+-> %s/%d [waiter = %s/%d] [waiter's inh = %s/%d] (lock = %d)\n", | ||
202 | padding, | ||
203 | (node_prio) ? node_prio->comm : "nil", | ||
204 | (node_prio) ? node_prio->pid : -1, | ||
205 | (hp) ? hp->comm : "nil", | ||
206 | (hp) ? hp->pid : -1, | ||
207 | (hp_eff) ? hp_eff->comm : "nil", | ||
208 | (hp_eff) ? hp_eff->pid : -1, | ||
209 | l->ident); | ||
210 | |||
211 | if(n->left) print_hp_waiters(n->left, depth+1); | ||
212 | if(n->right) print_hp_waiters(n->right, depth+1); | ||
213 | } | ||
214 | #endif | ||
215 | |||
216 | |||
217 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
218 | |||
219 | void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/) | ||
220 | { | ||
221 | /* | ||
222 | We pick the next lock in reverse order. This causes inheritance propagation | ||
223 | from locks received earlier to flow in the same direction as regular nested | ||
224 | locking. This might make fine-grain DGL easier in the future. | ||
225 | */ | ||
226 | |||
227 | BUG_ON(tsk_rt(dgl_wait->task)->blocked_lock); | ||
228 | |||
229 | //WARN_ON(dgl_wait->locks[dgl_wait->last_primary] != prev_lock); | ||
230 | |||
231 | // note reverse order | ||
232 | for(dgl_wait->last_primary = dgl_wait->last_primary - 1; | ||
233 | dgl_wait->last_primary >= 0; | ||
234 | --(dgl_wait->last_primary)){ | ||
235 | if(!dgl_wait->locks[dgl_wait->last_primary]->ops->is_owner( | ||
236 | dgl_wait->locks[dgl_wait->last_primary], dgl_wait->task)) { | ||
237 | |||
238 | tsk_rt(dgl_wait->task)->blocked_lock = | ||
239 | dgl_wait->locks[dgl_wait->last_primary]; | ||
240 | mb(); | ||
241 | |||
242 | TRACE_CUR("New blocked lock is %d\n", | ||
243 | dgl_wait->locks[dgl_wait->last_primary]->ident); | ||
244 | |||
245 | break; | ||
246 | } | ||
247 | } | ||
248 | } | ||
249 | |||
250 | int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key) | ||
251 | { | ||
252 | // should never be called. | ||
253 | BUG(); | ||
254 | return 1; | ||
255 | } | ||
256 | |||
257 | void __waitqueue_dgl_remove_first(wait_queue_head_t *wq, | ||
258 | dgl_wait_state_t** dgl_wait, | ||
259 | struct task_struct **task) | ||
260 | { | ||
261 | wait_queue_t *q; | ||
262 | |||
263 | *dgl_wait = NULL; | ||
264 | *task = NULL; | ||
265 | |||
266 | if (waitqueue_active(wq)) { | ||
267 | q = list_entry(wq->task_list.next, | ||
268 | wait_queue_t, task_list); | ||
269 | |||
270 | if(q->func == dgl_wake_up) { | ||
271 | *dgl_wait = (dgl_wait_state_t*) q->private; | ||
272 | } | ||
273 | else { | ||
274 | *task = (struct task_struct*) q->private; | ||
275 | } | ||
276 | |||
277 | __remove_wait_queue(wq, q); | ||
278 | } | ||
279 | } | ||
280 | |||
281 | void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait) | ||
282 | { | ||
283 | init_waitqueue_entry(wq_node, dgl_wait->task); | ||
284 | wq_node->private = dgl_wait; | ||
285 | wq_node->func = dgl_wake_up; | ||
286 | } | ||
287 | |||
288 | |||
289 | static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait) | ||
290 | { | ||
291 | int i; | ||
292 | unsigned long irqflags; //, dummyflags; | ||
293 | raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(dgl_wait->task); | ||
294 | |||
295 | BUG_ON(dgl_wait->task != current); | ||
296 | |||
297 | raw_spin_lock_irqsave(dgl_lock, irqflags); | ||
298 | |||
299 | |||
300 | dgl_wait->nr_remaining = dgl_wait->size; | ||
301 | |||
302 | TRACE_CUR("Locking DGL with size %d\n", dgl_wait->size); | ||
303 | |||
304 | // try to acquire each lock. enqueue (non-blocking) if it is unavailable. | ||
305 | for(i = 0; i < dgl_wait->size; ++i) { | ||
306 | struct litmus_lock *l = dgl_wait->locks[i]; | ||
307 | |||
308 | // dgl_lock() must set task state to TASK_UNINTERRUPTIBLE if task blocks. | ||
309 | |||
310 | if(l->ops->dgl_lock(l, dgl_wait, &dgl_wait->wq_nodes[i])) { | ||
311 | --(dgl_wait->nr_remaining); | ||
312 | TRACE_CUR("Acquired lock %d immediatly.\n", l->ident); | ||
313 | } | ||
314 | } | ||
315 | |||
316 | if(dgl_wait->nr_remaining == 0) { | ||
317 | // acquired entire group immediatly | ||
318 | TRACE_CUR("Acquired all locks in DGL immediatly!\n"); | ||
319 | } | ||
320 | else { | ||
321 | |||
322 | TRACE_CUR("As many as %d locks in DGL are pending. Suspending.\n", | ||
323 | dgl_wait->nr_remaining); | ||
324 | |||
325 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
326 | // KLUDGE: don't count this suspension as time in the critical gpu | ||
327 | // critical section | ||
328 | if(tsk_rt(dgl_wait->task)->held_gpus) { | ||
329 | tsk_rt(dgl_wait->task)->suspend_gpu_tracker_on_block = 1; | ||
330 | } | ||
331 | #endif | ||
332 | |||
333 | // note reverse order. see comments in select_next_lock for reason. | ||
334 | for(i = dgl_wait->size - 1; i >= 0; --i) { | ||
335 | struct litmus_lock *l = dgl_wait->locks[i]; | ||
336 | if(!l->ops->is_owner(l, dgl_wait->task)) { // double-check to be thread safe | ||
337 | |||
338 | TRACE_CUR("Activating priority inheritance on lock %d\n", | ||
339 | l->ident); | ||
340 | |||
341 | TS_DGL_LOCK_SUSPEND; | ||
342 | |||
343 | l->ops->enable_priority(l, dgl_wait); | ||
344 | dgl_wait->last_primary = i; | ||
345 | |||
346 | TRACE_CUR("Suspending for lock %d\n", l->ident); | ||
347 | |||
348 | raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending | ||
349 | |||
350 | schedule(); // suspend!!! | ||
351 | |||
352 | TS_DGL_LOCK_RESUME; | ||
353 | |||
354 | TRACE_CUR("Woken up from DGL suspension.\n"); | ||
355 | |||
356 | goto all_acquired; // we should hold all locks when we wake up. | ||
357 | } | ||
358 | } | ||
359 | |||
360 | TRACE_CUR("Didn't have to suspend after all, but calling schedule() anyway.\n"); | ||
361 | //BUG(); | ||
362 | } | ||
363 | |||
364 | raw_spin_unlock_irqrestore(dgl_lock, irqflags); | ||
365 | |||
366 | all_acquired: | ||
367 | |||
368 | // FOR SANITY CHECK FOR TESTING | ||
369 | // for(i = 0; i < dgl_wait->size; ++i) { | ||
370 | // struct litmus_lock *l = dgl_wait->locks[i]; | ||
371 | // BUG_ON(!l->ops->is_owner(l, dgl_wait->task)); | ||
372 | // } | ||
373 | |||
374 | TRACE_CUR("Acquired entire DGL\n"); | ||
375 | |||
376 | return 0; | ||
377 | } | ||
378 | |||
379 | static int supports_dgl(struct litmus_lock *l) | ||
380 | { | ||
381 | struct litmus_lock_ops* ops = l->ops; | ||
382 | |||
383 | return (ops->dgl_lock && | ||
384 | ops->is_owner && | ||
385 | ops->enable_priority); | ||
386 | } | ||
387 | |||
388 | asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size) | ||
389 | { | ||
390 | struct task_struct *t = current; | ||
391 | long err = -EINVAL; | ||
392 | int dgl_ods[MAX_DGL_SIZE]; | ||
393 | int i; | ||
394 | |||
395 | dgl_wait_state_t dgl_wait_state; // lives on the stack until all resources in DGL are held. | ||
396 | |||
397 | if(dgl_size > MAX_DGL_SIZE || dgl_size < 1) | ||
398 | goto out; | ||
399 | |||
400 | if(!access_ok(VERIFY_READ, usr_dgl_ods, dgl_size*(sizeof(int)))) | ||
401 | goto out; | ||
402 | |||
403 | if(__copy_from_user(&dgl_ods, usr_dgl_ods, dgl_size*(sizeof(int)))) | ||
404 | goto out; | ||
405 | |||
406 | if (!is_realtime(t)) { | ||
407 | err = -EPERM; | ||
408 | goto out; | ||
409 | } | ||
410 | |||
411 | for(i = 0; i < dgl_size; ++i) { | ||
412 | struct od_table_entry *entry = get_entry_for_od(dgl_ods[i]); | ||
413 | if(entry && is_lock(entry)) { | ||
414 | dgl_wait_state.locks[i] = get_lock(entry); | ||
415 | if(!supports_dgl(dgl_wait_state.locks[i])) { | ||
416 | TRACE_CUR("Lock %d does not support all required DGL operations.\n", | ||
417 | dgl_wait_state.locks[i]->ident); | ||
418 | goto out; | ||
419 | } | ||
420 | } | ||
421 | else { | ||
422 | TRACE_CUR("Invalid lock identifier\n"); | ||
423 | goto out; | ||
424 | } | ||
425 | } | ||
426 | |||
427 | dgl_wait_state.task = t; | ||
428 | dgl_wait_state.size = dgl_size; | ||
429 | |||
430 | TS_DGL_LOCK_START; | ||
431 | err = do_litmus_dgl_lock(&dgl_wait_state); | ||
432 | |||
433 | /* Note: task my have been suspended or preempted in between! Take | ||
434 | * this into account when computing overheads. */ | ||
435 | TS_DGL_LOCK_END; | ||
436 | |||
437 | out: | ||
438 | return err; | ||
439 | } | ||
440 | |||
441 | static long do_litmus_dgl_unlock(struct litmus_lock* dgl_locks[], int dgl_size) | ||
442 | { | ||
443 | int i; | ||
444 | long err = 0; | ||
445 | |||
446 | TRACE_CUR("Unlocking a DGL of %d size\n", dgl_size); | ||
447 | |||
448 | for(i = dgl_size - 1; i >= 0; --i) { // unlock in reverse order | ||
449 | |||
450 | struct litmus_lock *l = dgl_locks[i]; | ||
451 | long tmp_err; | ||
452 | |||
453 | TRACE_CUR("Unlocking lock %d of DGL.\n", l->ident); | ||
454 | |||
455 | tmp_err = l->ops->unlock(l); | ||
456 | |||
457 | if(tmp_err) { | ||
458 | TRACE_CUR("There was an error unlocking %d: %d.\n", l->ident, tmp_err); | ||
459 | err = tmp_err; | ||
460 | } | ||
461 | } | ||
462 | |||
463 | TRACE_CUR("DGL unlocked. err = %d\n", err); | ||
464 | |||
465 | return err; | ||
466 | } | ||
467 | |||
468 | asmlinkage long sys_litmus_dgl_unlock(void* __user usr_dgl_ods, int dgl_size) | ||
469 | { | ||
470 | long err = -EINVAL; | ||
471 | int dgl_ods[MAX_DGL_SIZE]; | ||
472 | struct od_table_entry* entry; | ||
473 | int i; | ||
474 | |||
475 | struct litmus_lock* dgl_locks[MAX_DGL_SIZE]; | ||
476 | |||
477 | if(dgl_size > MAX_DGL_SIZE || dgl_size < 1) | ||
478 | goto out; | ||
479 | |||
480 | if(!access_ok(VERIFY_READ, usr_dgl_ods, dgl_size*(sizeof(int)))) | ||
481 | goto out; | ||
482 | |||
483 | if(__copy_from_user(&dgl_ods, usr_dgl_ods, dgl_size*(sizeof(int)))) | ||
484 | goto out; | ||
485 | |||
486 | for(i = 0; i < dgl_size; ++i) { | ||
487 | entry = get_entry_for_od(dgl_ods[i]); | ||
488 | if(entry && is_lock(entry)) { | ||
489 | dgl_locks[i] = get_lock(entry); | ||
490 | if(!supports_dgl(dgl_locks[i])) { | ||
491 | TRACE_CUR("Lock %d does not support all required DGL operations.\n", | ||
492 | dgl_locks[i]->ident); | ||
493 | goto out; | ||
494 | } | ||
495 | } | ||
496 | else { | ||
497 | TRACE_CUR("Invalid lock identifier\n"); | ||
498 | goto out; | ||
499 | } | ||
500 | } | ||
501 | |||
502 | TS_DGL_UNLOCK_START; | ||
503 | err = do_litmus_dgl_unlock(dgl_locks, dgl_size); | ||
504 | |||
505 | /* Note: task my have been suspended or preempted in between! Take | ||
506 | * this into account when computing overheads. */ | ||
507 | TS_DGL_UNLOCK_END; | ||
508 | |||
509 | out: | ||
510 | return err; | ||
511 | } | ||
512 | |||
513 | #else // CONFIG_LITMUS_DGL_SUPPORT | ||
514 | |||
515 | asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size) | ||
516 | { | ||
517 | return -ENOSYS; | ||
518 | } | ||
519 | |||
520 | asmlinkage long sys_litmus_dgl_unlock(void* __user usr_dgl_ods, int dgl_size) | ||
521 | { | ||
522 | return -ENOSYS; | ||
523 | } | ||
524 | |||
525 | #endif | ||
526 | |||
141 | unsigned int __add_wait_queue_prio_exclusive( | 527 | unsigned int __add_wait_queue_prio_exclusive( |
142 | wait_queue_head_t* head, | 528 | wait_queue_head_t* head, |
143 | prio_wait_queue_t *new) | 529 | prio_wait_queue_t *new) |
@@ -171,7 +557,60 @@ out: | |||
171 | } | 557 | } |
172 | 558 | ||
173 | 559 | ||
174 | #else | 560 | void suspend_for_lock(void) |
561 | { | ||
562 | #if defined(CONFIG_REALTIME_AUX_TASKS) || defined(CONFIG_LITMUS_NVIDIA) | ||
563 | struct task_struct *t = current; | ||
564 | #endif | ||
565 | |||
566 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
567 | unsigned int aux_restore = 0; | ||
568 | unsigned int aux_hide; | ||
569 | #endif | ||
570 | |||
571 | #ifdef CONFIG_LITMUS_NVIDIA | ||
572 | unsigned int gpu_restore = 0; | ||
573 | unsigned int gpu_hide; | ||
574 | #endif | ||
575 | |||
576 | //#ifdef CONFIG_REALTIME_AUX_TASKS | ||
577 | // if (tsk_rt(t)->has_aux_tasks) { | ||
578 | // /* hide from aux tasks so they can't inherit our priority when we block | ||
579 | // * for a litmus lock. inheritance is already going to a litmus lock | ||
580 | // * holder. */ | ||
581 | // aux_hide = tsk_rt(t)->hide_from_aux_tasks; | ||
582 | // aux_restore = 1; | ||
583 | // tsk_rt(t)->hide_from_aux_tasks = 1; | ||
584 | // } | ||
585 | //#endif | ||
586 | |||
587 | #ifdef CONFIG_LITMUS_NVIDIA | ||
588 | if (tsk_rt(t)->held_gpus) { | ||
589 | gpu_hide = tsk_rt(t)->hide_from_gpu; | ||
590 | gpu_restore = 1; | ||
591 | tsk_rt(t)->hide_from_gpu = 1; | ||
592 | } | ||
593 | #endif | ||
594 | |||
595 | schedule(); | ||
596 | |||
597 | #ifdef CONFIG_LITMUS_NVIDIA | ||
598 | if (gpu_restore) { | ||
599 | /* restore our state */ | ||
600 | tsk_rt(t)->hide_from_gpu = gpu_hide; | ||
601 | } | ||
602 | #endif | ||
603 | |||
604 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
605 | if (aux_restore) { | ||
606 | /* restore our state */ | ||
607 | tsk_rt(t)->hide_from_aux_tasks = aux_hide; | ||
608 | } | ||
609 | #endif | ||
610 | } | ||
611 | |||
612 | |||
613 | #else // CONFIG_LITMUS_LOCKING | ||
175 | 614 | ||
176 | struct fdso_ops generic_lock_ops = {}; | 615 | struct fdso_ops generic_lock_ops = {}; |
177 | 616 | ||
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c new file mode 100644 index 000000000000..5a63fb732e8b --- /dev/null +++ b/litmus/nvidia_info.c | |||
@@ -0,0 +1,1137 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <linux/semaphore.h> | ||
3 | #include <linux/pci.h> | ||
4 | |||
5 | #include <litmus/sched_trace.h> | ||
6 | #include <litmus/nvidia_info.h> | ||
7 | #include <litmus/litmus.h> | ||
8 | |||
9 | #include <litmus/sched_plugin.h> | ||
10 | |||
11 | #include <litmus/binheap.h> | ||
12 | |||
13 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
14 | #include <litmus/litmus_softirq.h> | ||
15 | #endif | ||
16 | |||
17 | typedef unsigned char NvV8; /* "void": enumerated or multiple fields */ | ||
18 | typedef unsigned short NvV16; /* "void": enumerated or multiple fields */ | ||
19 | typedef unsigned char NvU8; /* 0 to 255 */ | ||
20 | typedef unsigned short NvU16; /* 0 to 65535 */ | ||
21 | typedef signed char NvS8; /* -128 to 127 */ | ||
22 | typedef signed short NvS16; /* -32768 to 32767 */ | ||
23 | typedef float NvF32; /* IEEE Single Precision (S1E8M23) */ | ||
24 | typedef double NvF64; /* IEEE Double Precision (S1E11M52) */ | ||
25 | typedef unsigned int NvV32; /* "void": enumerated or multiple fields */ | ||
26 | typedef unsigned int NvU32; /* 0 to 4294967295 */ | ||
27 | typedef unsigned long long NvU64; /* 0 to 18446744073709551615 */ | ||
28 | typedef union | ||
29 | { | ||
30 | volatile NvV8 Reg008[1]; | ||
31 | volatile NvV16 Reg016[1]; | ||
32 | volatile NvV32 Reg032[1]; | ||
33 | } litmus_nv_hwreg_t, * litmus_nv_phwreg_t; | ||
34 | |||
35 | typedef struct | ||
36 | { | ||
37 | NvU64 address; | ||
38 | #ifdef CONFIG_CUDA_5_0 | ||
39 | NvU64 strapped_size; | ||
40 | #endif | ||
41 | NvU64 size; | ||
42 | NvU32 offset; | ||
43 | NvU32 *map; | ||
44 | litmus_nv_phwreg_t map_u; | ||
45 | } litmus_nv_aperture_t; | ||
46 | |||
47 | typedef struct | ||
48 | { | ||
49 | void *priv; /* private data */ | ||
50 | void *os_state; /* os-specific device state */ | ||
51 | |||
52 | #ifndef CONFIG_CUDA_5_0 | ||
53 | int rmInitialized; | ||
54 | #endif | ||
55 | int flags; | ||
56 | |||
57 | /* PCI config info */ | ||
58 | NvU32 domain; | ||
59 | NvU16 bus; | ||
60 | NvU16 slot; | ||
61 | NvU16 vendor_id; | ||
62 | NvU16 device_id; | ||
63 | NvU16 subsystem_id; | ||
64 | NvU32 gpu_id; | ||
65 | void *handle; | ||
66 | |||
67 | NvU32 pci_cfg_space[16]; | ||
68 | |||
69 | /* physical characteristics */ | ||
70 | litmus_nv_aperture_t bars[3]; | ||
71 | litmus_nv_aperture_t *regs; | ||
72 | litmus_nv_aperture_t *fb, ud; | ||
73 | litmus_nv_aperture_t agp; | ||
74 | |||
75 | NvU32 interrupt_line; | ||
76 | |||
77 | NvU32 agp_config; | ||
78 | NvU32 agp_status; | ||
79 | |||
80 | NvU32 primary_vga; | ||
81 | |||
82 | NvU32 sim_env; | ||
83 | |||
84 | NvU32 rc_timer_enabled; | ||
85 | |||
86 | /* list of events allocated for this device */ | ||
87 | void *event_list; | ||
88 | |||
89 | void *kern_mappings; | ||
90 | |||
91 | } litmus_nv_state_t; | ||
92 | |||
93 | typedef struct work_struct litmus_nv_task_t; | ||
94 | |||
95 | typedef struct litmus_nv_work_s { | ||
96 | litmus_nv_task_t task; | ||
97 | void *data; | ||
98 | } litmus_nv_work_t; | ||
99 | |||
100 | typedef struct litmus_nv_linux_state_s { | ||
101 | litmus_nv_state_t nv_state; | ||
102 | atomic_t usage_count; | ||
103 | |||
104 | struct pci_dev *dev; | ||
105 | void *agp_bridge; | ||
106 | void *alloc_queue; | ||
107 | |||
108 | void *timer_sp; | ||
109 | void *isr_sp; | ||
110 | void *pci_cfgchk_sp; | ||
111 | void *isr_bh_sp; | ||
112 | |||
113 | #if defined(CONFIG_CUDA_4_0) || defined(CONFIG_CUDA_5_0) | ||
114 | char registry_keys[512]; | ||
115 | #endif | ||
116 | |||
117 | /* keep track of any pending bottom halfes */ | ||
118 | struct tasklet_struct tasklet; | ||
119 | litmus_nv_work_t work; | ||
120 | |||
121 | /* get a timer callback every second */ | ||
122 | struct timer_list rc_timer; | ||
123 | |||
124 | /* lock for linux-specific data, not used by core rm */ | ||
125 | struct semaphore ldata_lock; | ||
126 | |||
127 | /* lock for linux-specific alloc queue */ | ||
128 | struct semaphore at_lock; | ||
129 | |||
130 | #if 0 | ||
131 | #if defined(NV_USER_MAP) | ||
132 | /* list of user mappings */ | ||
133 | struct nv_usermap_s *usermap_list; | ||
134 | |||
135 | /* lock for VMware-specific mapping list */ | ||
136 | struct semaphore mt_lock; | ||
137 | #endif /* defined(NV_USER_MAP) */ | ||
138 | #if defined(NV_PM_SUPPORT_OLD_STYLE_APM) | ||
139 | void *apm_nv_dev; | ||
140 | #endif | ||
141 | #endif | ||
142 | |||
143 | NvU32 device_num; | ||
144 | struct litmus_nv_linux_state_s *next; | ||
145 | } litmus_nv_linux_state_t; | ||
146 | |||
147 | void dump_nvidia_info(const struct tasklet_struct *t) | ||
148 | { | ||
149 | litmus_nv_state_t* nvstate = NULL; | ||
150 | litmus_nv_linux_state_t* linuxstate = NULL; | ||
151 | struct pci_dev* pci = NULL; | ||
152 | |||
153 | nvstate = (litmus_nv_state_t*)(t->data); | ||
154 | |||
155 | if(nvstate) | ||
156 | { | ||
157 | TRACE("NV State:\n" | ||
158 | "\ttasklet ptr = %p\n" | ||
159 | "\tstate ptr = %p\n" | ||
160 | "\tprivate data ptr = %p\n" | ||
161 | "\tos state ptr = %p\n" | ||
162 | "\tdomain = %u\n" | ||
163 | "\tbus = %u\n" | ||
164 | "\tslot = %u\n" | ||
165 | "\tvender_id = %u\n" | ||
166 | "\tdevice_id = %u\n" | ||
167 | "\tsubsystem_id = %u\n" | ||
168 | "\tgpu_id = %u\n" | ||
169 | "\tinterrupt_line = %u\n", | ||
170 | t, | ||
171 | nvstate, | ||
172 | nvstate->priv, | ||
173 | nvstate->os_state, | ||
174 | nvstate->domain, | ||
175 | nvstate->bus, | ||
176 | nvstate->slot, | ||
177 | nvstate->vendor_id, | ||
178 | nvstate->device_id, | ||
179 | nvstate->subsystem_id, | ||
180 | nvstate->gpu_id, | ||
181 | nvstate->interrupt_line); | ||
182 | |||
183 | linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state); | ||
184 | } | ||
185 | else | ||
186 | { | ||
187 | TRACE("INVALID NVSTATE????\n"); | ||
188 | } | ||
189 | |||
190 | if(linuxstate) | ||
191 | { | ||
192 | int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate); | ||
193 | int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state)); | ||
194 | int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate); | ||
195 | |||
196 | |||
197 | TRACE("LINUX NV State:\n" | ||
198 | "\tlinux nv state ptr: %p\n" | ||
199 | "\taddress of tasklet: %p\n" | ||
200 | "\taddress of work: %p\n" | ||
201 | "\tusage_count: %d\n" | ||
202 | "\tdevice_num: %u\n" | ||
203 | "\ttasklet addr == this tasklet: %d\n" | ||
204 | "\tpci: %p\n", | ||
205 | linuxstate, | ||
206 | &(linuxstate->tasklet), | ||
207 | &(linuxstate->work), | ||
208 | atomic_read(&(linuxstate->usage_count)), | ||
209 | linuxstate->device_num, | ||
210 | (t == &(linuxstate->tasklet)), | ||
211 | linuxstate->dev); | ||
212 | |||
213 | pci = linuxstate->dev; | ||
214 | |||
215 | TRACE("Offsets:\n" | ||
216 | "\tOffset from LinuxState: %d, %x\n" | ||
217 | "\tOffset from NVState: %d, %x\n" | ||
218 | "\tOffset from parameter: %d, %x\n" | ||
219 | "\tdevice_num: %u\n", | ||
220 | ls_offset, ls_offset, | ||
221 | ns_offset_raw, ns_offset_raw, | ||
222 | ns_offset_desired, ns_offset_desired, | ||
223 | *((u32*)((void*)nvstate + ns_offset_desired))); | ||
224 | } | ||
225 | else | ||
226 | { | ||
227 | TRACE("INVALID LINUXNVSTATE?????\n"); | ||
228 | } | ||
229 | |||
230 | #if 0 | ||
231 | if(pci) | ||
232 | { | ||
233 | TRACE("PCI DEV Info:\n" | ||
234 | "pci device ptr: %p\n" | ||
235 | "\tdevfn = %d\n" | ||
236 | "\tvendor = %d\n" | ||
237 | "\tdevice = %d\n" | ||
238 | "\tsubsystem_vendor = %d\n" | ||
239 | "\tsubsystem_device = %d\n" | ||
240 | "\tslot # = %d\n", | ||
241 | pci, | ||
242 | pci->devfn, | ||
243 | pci->vendor, | ||
244 | pci->device, | ||
245 | pci->subsystem_vendor, | ||
246 | pci->subsystem_device, | ||
247 | pci->slot->number); | ||
248 | } | ||
249 | else | ||
250 | { | ||
251 | TRACE("INVALID PCIDEV PTR?????\n"); | ||
252 | } | ||
253 | #endif | ||
254 | } | ||
255 | |||
256 | |||
257 | |||
258 | static struct module* nvidia_mod = NULL; | ||
259 | |||
260 | |||
261 | |||
262 | |||
263 | #if 0 | ||
264 | static int nvidia_ready_module_notify(struct notifier_block *self, | ||
265 | unsigned long val, void *data) | ||
266 | { | ||
267 | mutex_lock(&module_mutex); | ||
268 | nvidia_mod = find_module("nvidia"); | ||
269 | mutex_unlock(&module_mutex); | ||
270 | |||
271 | if(nvidia_mod != NULL) | ||
272 | { | ||
273 | TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__, | ||
274 | (void*)(nvidia_mod->module_core), | ||
275 | (void*)(nvidia_mod->module_core) + nvidia_mod->core_size); | ||
276 | init_nv_device_reg(); | ||
277 | return(0); | ||
278 | } | ||
279 | else | ||
280 | { | ||
281 | TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__); | ||
282 | } | ||
283 | } | ||
284 | |||
285 | static int nvidia_going_module_notify(struct notifier_block *self, | ||
286 | unsigned long val, void *data) | ||
287 | { | ||
288 | nvidia_mod = NULL; | ||
289 | mb(); | ||
290 | |||
291 | return 0; | ||
292 | } | ||
293 | |||
294 | static struct notifier_block nvidia_ready = { | ||
295 | .notifier_call = nvidia_ready_module_notify, | ||
296 | .priority = 1, | ||
297 | }; | ||
298 | |||
299 | static struct notifier_block nvidia_going = { | ||
300 | .notifier_call = nvidia_going_module_notify, | ||
301 | .priority = 1, | ||
302 | }; | ||
303 | #endif | ||
304 | |||
305 | |||
306 | |||
307 | static int init_nv_device_reg(void); | ||
308 | static int shutdown_nv_device_reg(void); | ||
309 | |||
310 | |||
311 | int init_nvidia_info(void) | ||
312 | { | ||
313 | mutex_lock(&module_mutex); | ||
314 | nvidia_mod = find_module("nvidia"); | ||
315 | mutex_unlock(&module_mutex); | ||
316 | if(nvidia_mod != NULL) | ||
317 | { | ||
318 | TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__, | ||
319 | (void*)(nvidia_mod->module_core), | ||
320 | (void*)(nvidia_mod->module_core) + nvidia_mod->core_size); | ||
321 | init_nv_device_reg(); | ||
322 | return(0); | ||
323 | } | ||
324 | else | ||
325 | { | ||
326 | TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__); | ||
327 | |||
328 | init_nv_device_reg(); | ||
329 | return(0); | ||
330 | // return(-1); | ||
331 | } | ||
332 | } | ||
333 | |||
334 | void shutdown_nvidia_info(void) | ||
335 | { | ||
336 | nvidia_mod = NULL; | ||
337 | mb(); | ||
338 | |||
339 | shutdown_nv_device_reg(); | ||
340 | } | ||
341 | |||
342 | /* works with pointers to static data inside the module too. */ | ||
343 | int is_nvidia_func(void* func_addr) | ||
344 | { | ||
345 | int ret = 0; | ||
346 | if(nvidia_mod) | ||
347 | { | ||
348 | ret = within_module_core((long unsigned int)func_addr, nvidia_mod); | ||
349 | /* | ||
350 | if(ret) | ||
351 | { | ||
352 | TRACE("%s : %p is in NVIDIA module: %d\n", | ||
353 | __FUNCTION__, func_addr, ret); | ||
354 | }*/ | ||
355 | } | ||
356 | |||
357 | return(ret); | ||
358 | } | ||
359 | |||
360 | u32 get_tasklet_nv_device_num(const struct tasklet_struct *t) | ||
361 | { | ||
362 | // life is too short to use hard-coded offsets. update this later. | ||
363 | litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data); | ||
364 | litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state); | ||
365 | |||
366 | BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM); | ||
367 | |||
368 | return(linuxstate->device_num); | ||
369 | } | ||
370 | |||
371 | u32 get_work_nv_device_num(const struct work_struct *t) | ||
372 | { | ||
373 | // offset determined though observed behavior of the NV driver. | ||
374 | const int DEVICE_NUM_OFFSET = sizeof(struct work_struct); | ||
375 | void* state = (void*)(t); | ||
376 | void** device_num_ptr = state + DEVICE_NUM_OFFSET; | ||
377 | return(*((u32*)(*device_num_ptr))); | ||
378 | } | ||
379 | |||
380 | |||
381 | /////////////////////////////////////////////////////////////////////////////// | ||
382 | /////////////////////////////////////////////////////////////////////////////// | ||
383 | /////////////////////////////////////////////////////////////////////////////// | ||
384 | |||
385 | |||
386 | typedef struct { | ||
387 | raw_spinlock_t lock; /* not needed if GPU not shared between scheudling domains */ | ||
388 | struct binheap owners; | ||
389 | |||
390 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
391 | klmirqd_callback_t callback; | ||
392 | struct task_struct* thread; | ||
393 | int ready:1; /* todo: make threads check for the ready flag */ | ||
394 | #endif | ||
395 | |||
396 | #ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG | ||
397 | struct tasklet_struct nv_klmirqd_dbg_tasklet; | ||
398 | #endif | ||
399 | }nv_device_registry_t; | ||
400 | |||
401 | |||
402 | static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM]; | ||
403 | |||
404 | |||
405 | |||
406 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
407 | static int nvidia_klmirqd_cb(void *arg) | ||
408 | { | ||
409 | unsigned long flags; | ||
410 | int reg_device_id = (int)(long long)(arg); | ||
411 | nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; | ||
412 | |||
413 | TRACE("nv klmirqd callback for GPU %d\n", reg_device_id); | ||
414 | |||
415 | raw_spin_lock_irqsave(®->lock, flags); | ||
416 | reg->thread = current; | ||
417 | reg->ready = 1; | ||
418 | raw_spin_unlock_irqrestore(®->lock, flags); | ||
419 | |||
420 | return 0; | ||
421 | } | ||
422 | #endif | ||
423 | |||
424 | #ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG | ||
425 | struct nv_klmirqd_dbg_timer_struct | ||
426 | { | ||
427 | struct hrtimer timer; | ||
428 | }; | ||
429 | |||
430 | static struct nv_klmirqd_dbg_timer_struct nv_klmirqd_dbg_timer; | ||
431 | |||
432 | static void nv_klmirqd_arm_dbg_timer(lt_t relative_time) | ||
433 | { | ||
434 | lt_t when_to_fire = litmus_clock() + relative_time; | ||
435 | |||
436 | TRACE("next nv tasklet in %d ns\n", relative_time); | ||
437 | |||
438 | __hrtimer_start_range_ns(&nv_klmirqd_dbg_timer.timer, | ||
439 | ns_to_ktime(when_to_fire), | ||
440 | 0, | ||
441 | HRTIMER_MODE_ABS_PINNED, | ||
442 | 0); | ||
443 | } | ||
444 | |||
445 | static void nv_klmirqd_dbg_tasklet_func(unsigned long arg) | ||
446 | { | ||
447 | lt_t now = litmus_clock(); | ||
448 | nv_device_registry_t *reg = (nv_device_registry_t*)arg; | ||
449 | int gpunum = reg - &NV_DEVICE_REG[0]; | ||
450 | |||
451 | TRACE("nv klmirqd routine invoked for GPU %d!\n", gpunum); | ||
452 | |||
453 | /* set up the next timer */ | ||
454 | nv_klmirqd_arm_dbg_timer(now % (NSEC_PER_MSEC * 10)); // within the next 10ms. | ||
455 | } | ||
456 | |||
457 | |||
458 | static enum hrtimer_restart nvklmirqd_timer_func(struct hrtimer *timer) | ||
459 | { | ||
460 | lt_t now = litmus_clock(); | ||
461 | int gpu = (int)(now % num_online_gpus()); | ||
462 | nv_device_registry_t *reg; | ||
463 | |||
464 | TRACE("nvklmirqd_timer invoked!\n"); | ||
465 | |||
466 | reg = &NV_DEVICE_REG[gpu]; | ||
467 | |||
468 | if (reg->thread && reg->ready) { | ||
469 | TRACE("Adding a tasklet for GPU %d\n", gpu); | ||
470 | litmus_tasklet_schedule(®->nv_klmirqd_dbg_tasklet, reg->thread); | ||
471 | } | ||
472 | else { | ||
473 | TRACE("nv klmirqd is not ready!\n"); | ||
474 | nv_klmirqd_arm_dbg_timer(now % (NSEC_PER_MSEC * 10)); // within the next 10ms. | ||
475 | } | ||
476 | |||
477 | return HRTIMER_NORESTART; | ||
478 | } | ||
479 | #endif | ||
480 | |||
481 | |||
482 | static int gpu_owner_max_priority_order(struct binheap_node *a, | ||
483 | struct binheap_node *b) | ||
484 | { | ||
485 | struct task_struct *d_a = container_of(binheap_entry(a, struct rt_param, gpu_owner_node), | ||
486 | struct task_struct, rt_param); | ||
487 | struct task_struct *d_b = container_of(binheap_entry(b, struct rt_param, gpu_owner_node), | ||
488 | struct task_struct, rt_param); | ||
489 | |||
490 | BUG_ON(!d_a); | ||
491 | BUG_ON(!d_b); | ||
492 | |||
493 | return litmus->compare(d_a, d_b); | ||
494 | } | ||
495 | |||
496 | static int init_nv_device_reg(void) | ||
497 | { | ||
498 | int i; | ||
499 | char name[MAX_KLMIRQD_NAME_LEN+1]; | ||
500 | |||
501 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
502 | if (!klmirqd_is_ready()) { | ||
503 | TRACE("klmirqd is not ready!\n"); | ||
504 | return 0; | ||
505 | } | ||
506 | #endif | ||
507 | |||
508 | memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG)); | ||
509 | mb(); | ||
510 | |||
511 | |||
512 | for(i = 0; i < num_online_gpus(); ++i) { | ||
513 | raw_spin_lock_init(&NV_DEVICE_REG[i].lock); | ||
514 | INIT_BINHEAP_HANDLE(&NV_DEVICE_REG[i].owners, gpu_owner_max_priority_order); | ||
515 | |||
516 | #ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG | ||
517 | tasklet_init(&NV_DEVICE_REG[i].nv_klmirqd_dbg_tasklet, nv_klmirqd_dbg_tasklet_func, (unsigned long)&NV_DEVICE_REG[i]); | ||
518 | #endif | ||
519 | |||
520 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
521 | { | ||
522 | int default_cpu = litmus->map_gpu_to_cpu(i); | ||
523 | |||
524 | snprintf(name, MAX_KLMIRQD_NAME_LEN, "nvklmirqd%d", i); | ||
525 | |||
526 | NV_DEVICE_REG[i].callback.func = nvidia_klmirqd_cb; | ||
527 | NV_DEVICE_REG[i].callback.arg = (void*)(long long)(i); | ||
528 | mb(); | ||
529 | |||
530 | if(launch_klmirqd_thread(name, default_cpu, &NV_DEVICE_REG[i].callback) != 0) { | ||
531 | TRACE("Failed to create klmirqd thread for GPU %d\n", i); | ||
532 | } | ||
533 | } | ||
534 | #endif | ||
535 | } | ||
536 | |||
537 | #ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG | ||
538 | hrtimer_init(&nv_klmirqd_dbg_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
539 | nv_klmirqd_dbg_timer.timer.function = nvklmirqd_timer_func; | ||
540 | nv_klmirqd_arm_dbg_timer(NSEC_PER_MSEC * 1000); | ||
541 | #endif | ||
542 | |||
543 | return(1); | ||
544 | } | ||
545 | |||
546 | |||
547 | /* The following code is full of nasty race conditions... */ | ||
548 | /* spawning of klimirqd threads can race with init_nv_device_reg()!!!! */ | ||
549 | static int shutdown_nv_device_reg(void) | ||
550 | { | ||
551 | TRACE("Shutting down nv device registration.\n"); | ||
552 | |||
553 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
554 | { | ||
555 | int i; | ||
556 | nv_device_registry_t *reg; | ||
557 | |||
558 | for (i = 0; i < num_online_gpus(); ++i) { | ||
559 | |||
560 | TRACE("Shutting down GPU %d.\n", i); | ||
561 | |||
562 | reg = &NV_DEVICE_REG[i]; | ||
563 | |||
564 | if (reg->thread && reg->ready) { | ||
565 | kill_klmirqd_thread(reg->thread); | ||
566 | |||
567 | /* assume that all goes according to plan... */ | ||
568 | reg->thread = NULL; | ||
569 | reg->ready = 0; | ||
570 | } | ||
571 | |||
572 | while (!binheap_empty(®->owners)) { | ||
573 | binheap_delete_root(®->owners, struct rt_param, gpu_owner_node); | ||
574 | } | ||
575 | } | ||
576 | } | ||
577 | #endif | ||
578 | |||
579 | return(1); | ||
580 | } | ||
581 | |||
582 | |||
583 | /* use to get the owner of nv_device_id. */ | ||
584 | struct task_struct* get_nv_max_device_owner(u32 target_device_id) | ||
585 | { | ||
586 | struct task_struct *owner = NULL; | ||
587 | nv_device_registry_t *reg; | ||
588 | |||
589 | BUG_ON(target_device_id >= NV_DEVICE_NUM); | ||
590 | |||
591 | reg = &NV_DEVICE_REG[target_device_id]; | ||
592 | |||
593 | if (!binheap_empty(®->owners)) { | ||
594 | struct task_struct *hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), | ||
595 | struct task_struct, rt_param); | ||
596 | TRACE_CUR("hp: %s/%d\n", hp->comm, hp->pid); | ||
597 | } | ||
598 | |||
599 | return(owner); | ||
600 | } | ||
601 | |||
602 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
603 | struct task_struct* get_nv_klmirqd_thread(u32 target_device_id) | ||
604 | { | ||
605 | struct task_struct *klmirqd = NULL; | ||
606 | nv_device_registry_t *reg; | ||
607 | |||
608 | BUG_ON(target_device_id >= NV_DEVICE_NUM); | ||
609 | |||
610 | reg = &NV_DEVICE_REG[target_device_id]; | ||
611 | |||
612 | if(likely(reg->ready)) { | ||
613 | klmirqd = reg->thread; | ||
614 | } | ||
615 | |||
616 | return klmirqd; | ||
617 | } | ||
618 | #endif | ||
619 | |||
620 | |||
621 | |||
622 | |||
623 | |||
624 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
625 | static int gpu_klmirqd_increase_priority(struct task_struct *klmirqd, struct task_struct *hp) | ||
626 | { | ||
627 | int retval = 0; | ||
628 | |||
629 | TRACE_CUR("Increasing priority of nv klmirqd: %s/%d.\n", klmirqd->comm, klmirqd->pid); | ||
630 | |||
631 | /* the klmirqd thread should never attempt to hold a litmus-level real-time | ||
632 | * so nested support is not required */ | ||
633 | retval = litmus->__increase_prio(klmirqd, hp); | ||
634 | |||
635 | return retval; | ||
636 | } | ||
637 | |||
638 | static int gpu_klmirqd_decrease_priority(struct task_struct *klmirqd, struct task_struct *hp) | ||
639 | { | ||
640 | int retval = 0; | ||
641 | |||
642 | TRACE_CUR("Decreasing priority of nv klmirqd: %s/%d.\n", klmirqd->comm, klmirqd->pid); | ||
643 | |||
644 | /* the klmirqd thread should never attempt to hold a litmus-level real-time | ||
645 | * so nested support is not required */ | ||
646 | retval = litmus->__decrease_prio(klmirqd, hp); | ||
647 | |||
648 | return retval; | ||
649 | } | ||
650 | #endif | ||
651 | |||
652 | |||
653 | |||
654 | |||
655 | /* call when an gpu owner becomes real-time */ | ||
656 | long enable_gpu_owner(struct task_struct *t) | ||
657 | { | ||
658 | long retval = 0; | ||
659 | // unsigned long flags; | ||
660 | int gpu; | ||
661 | nv_device_registry_t *reg; | ||
662 | |||
663 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
664 | struct task_struct *hp; | ||
665 | #endif | ||
666 | |||
667 | if (!tsk_rt(t)->held_gpus) { | ||
668 | TRACE_CUR("task %s/%d does not hold any GPUs\n", t->comm, t->pid); | ||
669 | return -1; | ||
670 | } | ||
671 | |||
672 | BUG_ON(!is_realtime(t)); | ||
673 | |||
674 | gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus)); | ||
675 | |||
676 | if (binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) { | ||
677 | TRACE_CUR("task %s/%d is already active on GPU %d\n", t->comm, t->pid, gpu); | ||
678 | goto out; | ||
679 | } | ||
680 | |||
681 | /* update the registration (and maybe klmirqd) */ | ||
682 | reg = &NV_DEVICE_REG[gpu]; | ||
683 | |||
684 | // raw_spin_lock_irqsave(®->lock, flags); | ||
685 | |||
686 | binheap_add(&tsk_rt(t)->gpu_owner_node, ®->owners, | ||
687 | struct rt_param, gpu_owner_node); | ||
688 | |||
689 | |||
690 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
691 | hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), | ||
692 | struct task_struct, rt_param); | ||
693 | |||
694 | if (hp == t) { | ||
695 | /* we're the new hp */ | ||
696 | TRACE_CUR("%s/%d is new hp on GPU %d.\n", t->comm, t->pid, gpu); | ||
697 | |||
698 | retval = gpu_klmirqd_increase_priority(reg->thread, (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp); | ||
699 | } | ||
700 | #endif | ||
701 | |||
702 | // raw_spin_unlock_irqsave(®->lock, flags); | ||
703 | |||
704 | out: | ||
705 | return retval; | ||
706 | } | ||
707 | |||
708 | /* call when an gpu owner exits real-time */ | ||
709 | long disable_gpu_owner(struct task_struct *t) | ||
710 | { | ||
711 | long retval = 0; | ||
712 | // unsigned long flags; | ||
713 | int gpu; | ||
714 | nv_device_registry_t *reg; | ||
715 | |||
716 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
717 | struct task_struct *hp; | ||
718 | struct task_struct *new_hp = NULL; | ||
719 | #endif | ||
720 | |||
721 | if (!tsk_rt(t)->held_gpus) { | ||
722 | TRACE_CUR("task %s/%d does not hold any GPUs\n", t->comm, t->pid); | ||
723 | return -1; | ||
724 | } | ||
725 | |||
726 | BUG_ON(!is_realtime(t)); | ||
727 | |||
728 | gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus)); | ||
729 | |||
730 | if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) { | ||
731 | TRACE_CUR("task %s/%d is not active on GPU %d\n", t->comm, t->pid, gpu); | ||
732 | goto out; | ||
733 | } | ||
734 | |||
735 | TRACE_CUR("task %s/%d exiting from GPU %d.\n", t->comm, t->pid, gpu); | ||
736 | |||
737 | |||
738 | reg = &NV_DEVICE_REG[gpu]; | ||
739 | |||
740 | // raw_spin_lock_irqsave(®->lock, flags); | ||
741 | |||
742 | |||
743 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
744 | hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), | ||
745 | struct task_struct, rt_param); | ||
746 | |||
747 | binheap_delete(&tsk_rt(t)->gpu_owner_node, ®->owners); | ||
748 | |||
749 | |||
750 | if (!binheap_empty(®->owners)) { | ||
751 | new_hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), | ||
752 | struct task_struct, rt_param); | ||
753 | } | ||
754 | |||
755 | if (hp == t && new_hp != t) { | ||
756 | struct task_struct *to_inh = NULL; | ||
757 | |||
758 | TRACE_CUR("%s/%d is no longer hp on GPU %d.\n", t->comm, t->pid, gpu); | ||
759 | |||
760 | if (new_hp) { | ||
761 | to_inh = (tsk_rt(new_hp)->inh_task) ? tsk_rt(new_hp)->inh_task : new_hp; | ||
762 | } | ||
763 | |||
764 | retval = gpu_klmirqd_decrease_priority(reg->thread, to_inh); | ||
765 | } | ||
766 | #else | ||
767 | binheap_delete(&tsk_rt(t)->gpu_owner_node, ®->owners); | ||
768 | #endif | ||
769 | |||
770 | // raw_spin_unlock_irqsave(®->lock, flags); | ||
771 | |||
772 | |||
773 | out: | ||
774 | return retval; | ||
775 | } | ||
776 | |||
777 | |||
778 | |||
779 | |||
780 | |||
781 | |||
782 | |||
783 | |||
784 | |||
785 | |||
786 | int gpu_owner_increase_priority(struct task_struct *t) | ||
787 | { | ||
788 | int retval = 0; | ||
789 | int gpu; | ||
790 | nv_device_registry_t *reg; | ||
791 | |||
792 | struct task_struct *hp = NULL; | ||
793 | struct task_struct *hp_eff = NULL; | ||
794 | |||
795 | BUG_ON(!is_realtime(t)); | ||
796 | BUG_ON(!tsk_rt(t)->held_gpus); | ||
797 | |||
798 | gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus)); | ||
799 | |||
800 | if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) { | ||
801 | WARN_ON(!is_running(t)); | ||
802 | TRACE_CUR("gpu klmirqd may not inherit from %s/%d on GPU %d\n", | ||
803 | t->comm, t->pid, gpu); | ||
804 | goto out; | ||
805 | } | ||
806 | |||
807 | |||
808 | |||
809 | |||
810 | TRACE_CUR("task %s/%d on GPU %d increasing priority.\n", t->comm, t->pid, gpu); | ||
811 | reg = &NV_DEVICE_REG[gpu]; | ||
812 | |||
813 | hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), | ||
814 | struct task_struct, rt_param); | ||
815 | hp_eff = effective_priority(hp); | ||
816 | |||
817 | if (hp != t) { /* our position in the heap may have changed. hp is already at the root. */ | ||
818 | binheap_decrease(&tsk_rt(t)->gpu_owner_node, ®->owners); | ||
819 | } | ||
820 | |||
821 | hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), | ||
822 | struct task_struct, rt_param); | ||
823 | |||
824 | if (effective_priority(hp) != hp_eff) { /* the eff. prio. of hp has changed */ | ||
825 | hp_eff = effective_priority(hp); | ||
826 | TRACE_CUR("%s/%d is new hp on GPU %d.\n", t->comm, t->pid, gpu); | ||
827 | |||
828 | retval = gpu_klmirqd_increase_priority(reg->thread, hp_eff); | ||
829 | } | ||
830 | |||
831 | out: | ||
832 | return retval; | ||
833 | } | ||
834 | |||
835 | |||
836 | int gpu_owner_decrease_priority(struct task_struct *t) | ||
837 | { | ||
838 | int retval = 0; | ||
839 | int gpu; | ||
840 | nv_device_registry_t *reg; | ||
841 | |||
842 | struct task_struct *hp = NULL; | ||
843 | struct task_struct *hp_eff = NULL; | ||
844 | |||
845 | BUG_ON(!is_realtime(t)); | ||
846 | BUG_ON(!tsk_rt(t)->held_gpus); | ||
847 | |||
848 | gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus)); | ||
849 | |||
850 | if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) { | ||
851 | WARN_ON(!is_running(t)); | ||
852 | TRACE_CUR("nv klmirqd may not inherit from %s/%d on GPU %d\n", | ||
853 | t->comm, t->pid, gpu); | ||
854 | goto out; | ||
855 | } | ||
856 | |||
857 | TRACE_CUR("task %s/%d on GPU %d decresing priority.\n", t->comm, t->pid, gpu); | ||
858 | reg = &NV_DEVICE_REG[gpu]; | ||
859 | |||
860 | hp = container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), | ||
861 | struct task_struct, rt_param); | ||
862 | hp_eff = effective_priority(hp); | ||
863 | binheap_delete(&tsk_rt(t)->gpu_owner_node, ®->owners); | ||
864 | binheap_add(&tsk_rt(t)->gpu_owner_node, ®->owners, | ||
865 | struct rt_param, gpu_owner_node); | ||
866 | |||
867 | if (hp == t) { /* t was originally the hp */ | ||
868 | struct task_struct *new_hp = | ||
869 | container_of(binheap_top_entry(®->owners, struct rt_param, gpu_owner_node), | ||
870 | struct task_struct, rt_param); | ||
871 | if (effective_priority(new_hp) != hp_eff) { /* eff prio. of hp has changed */ | ||
872 | hp_eff = effective_priority(new_hp); | ||
873 | TRACE_CUR("%s/%d is no longer hp on GPU %d.\n", t->comm, t->pid, gpu); | ||
874 | retval = gpu_klmirqd_decrease_priority(reg->thread, hp_eff); | ||
875 | } | ||
876 | } | ||
877 | |||
878 | out: | ||
879 | return retval; | ||
880 | } | ||
881 | |||
882 | |||
883 | |||
884 | |||
885 | |||
886 | |||
887 | |||
888 | |||
889 | |||
890 | static int __reg_nv_device(int reg_device_id, struct task_struct *t) | ||
891 | { | ||
892 | __set_bit(reg_device_id, &tsk_rt(t)->held_gpus); | ||
893 | |||
894 | return(0); | ||
895 | } | ||
896 | |||
897 | static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t) | ||
898 | { | ||
899 | __clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus); | ||
900 | |||
901 | return(0); | ||
902 | } | ||
903 | |||
904 | |||
905 | int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t) | ||
906 | { | ||
907 | int ret; | ||
908 | |||
909 | if((reg_device_id < num_online_gpus()) && (reg_device_id >= 0)) | ||
910 | { | ||
911 | if(reg_action) | ||
912 | ret = __reg_nv_device(reg_device_id, t); | ||
913 | else | ||
914 | ret = __clear_reg_nv_device(reg_device_id, t); | ||
915 | } | ||
916 | else | ||
917 | { | ||
918 | ret = -ENODEV; | ||
919 | } | ||
920 | |||
921 | return(ret); | ||
922 | } | ||
923 | |||
924 | |||
925 | |||
926 | |||
927 | |||
928 | |||
929 | |||
930 | |||
931 | |||
932 | |||
933 | |||
934 | |||
935 | |||
936 | |||
937 | |||
938 | |||
939 | |||
940 | |||
941 | |||
942 | |||
943 | |||
944 | |||
945 | |||
946 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
947 | //void pai_check_priority_increase(struct task_struct *t, int reg_device_id) | ||
948 | //{ | ||
949 | // unsigned long flags; | ||
950 | // nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; | ||
951 | // | ||
952 | // | ||
953 | // | ||
954 | // if(reg->max_prio_owner != t) { | ||
955 | // | ||
956 | // raw_spin_lock_irqsave(®->lock, flags); | ||
957 | // | ||
958 | // if(reg->max_prio_owner != t) { | ||
959 | // if(litmus->compare(t, reg->max_prio_owner)) { | ||
960 | // litmus->change_prio_pai_tasklet(reg->max_prio_owner, t); | ||
961 | // reg->max_prio_owner = t; | ||
962 | // } | ||
963 | // } | ||
964 | // | ||
965 | // raw_spin_unlock_irqrestore(®->lock, flags); | ||
966 | // } | ||
967 | //} | ||
968 | // | ||
969 | // | ||
970 | //void pai_check_priority_decrease(struct task_struct *t, int reg_device_id) | ||
971 | //{ | ||
972 | // unsigned long flags; | ||
973 | // nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id]; | ||
974 | // | ||
975 | // if(reg->max_prio_owner == t) { | ||
976 | // | ||
977 | // raw_spin_lock_irqsave(®->lock, flags); | ||
978 | // | ||
979 | // if(reg->max_prio_owner == t) { | ||
980 | // reg->max_prio_owner = find_hp_owner(reg, NULL); | ||
981 | // if(reg->max_prio_owner != t) { | ||
982 | // litmus->change_prio_pai_tasklet(t, reg->max_prio_owner); | ||
983 | // } | ||
984 | // } | ||
985 | // | ||
986 | // raw_spin_unlock_irqrestore(®->lock, flags); | ||
987 | // } | ||
988 | //} | ||
989 | #endif | ||
990 | |||
991 | |||
992 | |||
993 | |||
994 | |||
995 | //static int __reg_nv_device(int reg_device_id, struct task_struct *t) | ||
996 | //{ | ||
997 | // int ret = 0; | ||
998 | // int i; | ||
999 | // struct task_struct *old_max = NULL; | ||
1000 | // | ||
1001 | // | ||
1002 | // raw_spin_lock_irqsave(®->lock, flags); | ||
1003 | // | ||
1004 | // if(reg->nr_owners < NV_MAX_SIMULT_USERS) { | ||
1005 | // TRACE_TASK(t, "registers GPU %d\n", reg_device_id); | ||
1006 | // for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) { | ||
1007 | // if(reg->owners[i] == NULL) { | ||
1008 | // reg->owners[i] = t; | ||
1009 | // | ||
1010 | // //if(edf_higher_prio(t, reg->max_prio_owner)) { | ||
1011 | // if(litmus->compare(t, reg->max_prio_owner)) { | ||
1012 | // old_max = reg->max_prio_owner; | ||
1013 | // reg->max_prio_owner = t; | ||
1014 | // | ||
1015 | //#ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
1016 | // litmus->change_prio_pai_tasklet(old_max, t); | ||
1017 | //#endif | ||
1018 | // } | ||
1019 | // | ||
1020 | //#ifdef CONFIG_LITMUS_SOFTIRQD | ||
1021 | // down_and_set_stat(t, HELD, &tsk_rt(t)->klmirqd_sem); | ||
1022 | //#endif | ||
1023 | // ++(reg->nr_owners); | ||
1024 | // | ||
1025 | // break; | ||
1026 | // } | ||
1027 | // } | ||
1028 | // } | ||
1029 | // else | ||
1030 | // { | ||
1031 | // TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id); | ||
1032 | // //ret = -EBUSY; | ||
1033 | // } | ||
1034 | // | ||
1035 | // raw_spin_unlock_irqrestore(®->lock, flags); | ||
1036 | // | ||
1037 | // __set_bit(reg_device_id, &tsk_rt(t)->held_gpus); | ||
1038 | // | ||
1039 | // return(ret); | ||
1040 | //} | ||
1041 | // | ||
1042 | //static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t) | ||
1043 | //{ | ||
1044 | // int ret = 0; | ||
1045 | // int i; | ||
1046 | // unsigned long flags; | ||
1047 | // nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id]; | ||
1048 | // | ||
1049 | //#ifdef CONFIG_LITMUS_SOFTIRQD | ||
1050 | // struct task_struct* klmirqd_th = get_klmirqd(de_reg_device_id); | ||
1051 | //#endif | ||
1052 | // | ||
1053 | // if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) { | ||
1054 | // return ret; | ||
1055 | // } | ||
1056 | // | ||
1057 | // raw_spin_lock_irqsave(®->lock, flags); | ||
1058 | // | ||
1059 | // TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id); | ||
1060 | // | ||
1061 | // for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) { | ||
1062 | // if(reg->owners[i] == t) { | ||
1063 | //#ifdef CONFIG_LITMUS_SOFTIRQD | ||
1064 | // flush_pending(klmirqd_th, t); | ||
1065 | //#endif | ||
1066 | // if(reg->max_prio_owner == t) { | ||
1067 | // reg->max_prio_owner = find_hp_owner(reg, t); | ||
1068 | //#ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
1069 | // litmus->change_prio_pai_tasklet(t, reg->max_prio_owner); | ||
1070 | //#endif | ||
1071 | // } | ||
1072 | // | ||
1073 | //#ifdef CONFIG_LITMUS_SOFTIRQD | ||
1074 | // up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klmirqd_sem); | ||
1075 | //#endif | ||
1076 | // | ||
1077 | // reg->owners[i] = NULL; | ||
1078 | // --(reg->nr_owners); | ||
1079 | // | ||
1080 | // break; | ||
1081 | // } | ||
1082 | // } | ||
1083 | // | ||
1084 | // raw_spin_unlock_irqrestore(®->lock, flags); | ||
1085 | // | ||
1086 | // __clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus); | ||
1087 | // | ||
1088 | // return(ret); | ||
1089 | //} | ||
1090 | // | ||
1091 | // | ||
1092 | //int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t) | ||
1093 | //{ | ||
1094 | // int ret; | ||
1095 | // | ||
1096 | // if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0)) | ||
1097 | // { | ||
1098 | // if(reg_action) | ||
1099 | // ret = __reg_nv_device(reg_device_id, t); | ||
1100 | // else | ||
1101 | // ret = __clear_reg_nv_device(reg_device_id, t); | ||
1102 | // } | ||
1103 | // else | ||
1104 | // { | ||
1105 | // ret = -ENODEV; | ||
1106 | // } | ||
1107 | // | ||
1108 | // return(ret); | ||
1109 | //} | ||
1110 | |||
1111 | |||
1112 | |||
1113 | //void lock_nv_registry(u32 target_device_id, unsigned long* flags) | ||
1114 | //{ | ||
1115 | // BUG_ON(target_device_id >= NV_DEVICE_NUM); | ||
1116 | // | ||
1117 | // if(in_interrupt()) | ||
1118 | // TRACE("Locking registry for %d.\n", target_device_id); | ||
1119 | // else | ||
1120 | // TRACE_CUR("Locking registry for %d.\n", target_device_id); | ||
1121 | // | ||
1122 | // raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags); | ||
1123 | //} | ||
1124 | // | ||
1125 | //void unlock_nv_registry(u32 target_device_id, unsigned long* flags) | ||
1126 | //{ | ||
1127 | // BUG_ON(target_device_id >= NV_DEVICE_NUM); | ||
1128 | // | ||
1129 | // if(in_interrupt()) | ||
1130 | // TRACE("Unlocking registry for %d.\n", target_device_id); | ||
1131 | // else | ||
1132 | // TRACE_CUR("Unlocking registry for %d.\n", target_device_id); | ||
1133 | // | ||
1134 | // raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags); | ||
1135 | //} | ||
1136 | |||
1137 | |||
diff --git a/litmus/preempt.c b/litmus/preempt.c index 6be2f26728b8..86ad2efb591a 100644 --- a/litmus/preempt.c +++ b/litmus/preempt.c | |||
@@ -27,10 +27,12 @@ void sched_state_will_schedule(struct task_struct* tsk) | |||
27 | set_sched_state(PICKED_WRONG_TASK); | 27 | set_sched_state(PICKED_WRONG_TASK); |
28 | else | 28 | else |
29 | set_sched_state(WILL_SCHEDULE); | 29 | set_sched_state(WILL_SCHEDULE); |
30 | } else | 30 | } else { |
31 | /* Litmus tasks should never be subject to a remote | 31 | /* Litmus tasks should never be subject to a remote |
32 | * set_tsk_need_resched(). */ | 32 | * set_tsk_need_resched(). */ |
33 | BUG_ON(is_realtime(tsk)); | 33 | //BUG_ON(is_realtime(tsk)); |
34 | } | ||
35 | |||
34 | #ifdef CONFIG_PREEMPT_STATE_TRACE | 36 | #ifdef CONFIG_PREEMPT_STATE_TRACE |
35 | TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n", | 37 | TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n", |
36 | __builtin_return_address(0)); | 38 | __builtin_return_address(0)); |
@@ -46,14 +48,18 @@ void sched_state_ipi(void) | |||
46 | /* Cause scheduler to be invoked. | 48 | /* Cause scheduler to be invoked. |
47 | * This will cause a transition to WILL_SCHEDULE. */ | 49 | * This will cause a transition to WILL_SCHEDULE. */ |
48 | set_tsk_need_resched(current); | 50 | set_tsk_need_resched(current); |
51 | /* | ||
49 | TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n", | 52 | TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n", |
50 | current->comm, current->pid); | 53 | current->comm, current->pid); |
54 | */ | ||
51 | TS_SEND_RESCHED_END; | 55 | TS_SEND_RESCHED_END; |
52 | } else { | 56 | } else { |
53 | /* ignore */ | 57 | /* ignore */ |
58 | /* | ||
54 | TRACE_STATE("ignoring IPI in state %x (%s)\n", | 59 | TRACE_STATE("ignoring IPI in state %x (%s)\n", |
55 | get_sched_state(), | 60 | get_sched_state(), |
56 | sched_state_name(get_sched_state())); | 61 | sched_state_name(get_sched_state())); |
62 | */ | ||
57 | } | 63 | } |
58 | } | 64 | } |
59 | 65 | ||
@@ -70,23 +76,34 @@ void litmus_reschedule(int cpu) | |||
70 | * is not aware of the need to reschedule at this point. */ | 76 | * is not aware of the need to reschedule at this point. */ |
71 | 77 | ||
72 | /* is a context switch in progress? */ | 78 | /* is a context switch in progress? */ |
73 | if (cpu_is_in_sched_state(cpu, TASK_PICKED)) | 79 | if (cpu_is_in_sched_state(cpu, TASK_PICKED)) { |
74 | picked_transition_ok = sched_state_transition_on( | 80 | picked_transition_ok = sched_state_transition_on( |
75 | cpu, TASK_PICKED, PICKED_WRONG_TASK); | 81 | cpu, TASK_PICKED, PICKED_WRONG_TASK); |
76 | 82 | ||
83 | TRACE_CUR("cpu %d: picked_transition_ok = %d\n", cpu, picked_transition_ok); | ||
84 | } | ||
85 | else { | ||
86 | TRACE_CUR("cpu %d: picked_transition_ok = 0 (static)\n", cpu); | ||
87 | } | ||
88 | |||
77 | if (!picked_transition_ok && | 89 | if (!picked_transition_ok && |
78 | cpu_is_in_sched_state(cpu, TASK_SCHEDULED)) { | 90 | cpu_is_in_sched_state(cpu, TASK_SCHEDULED)) { |
79 | /* We either raced with the end of the context switch, or the | 91 | /* We either raced with the end of the context switch, or the |
80 | * CPU was in TASK_SCHEDULED anyway. */ | 92 | * CPU was in TASK_SCHEDULED anyway. */ |
81 | scheduled_transition_ok = sched_state_transition_on( | 93 | scheduled_transition_ok = sched_state_transition_on( |
82 | cpu, TASK_SCHEDULED, SHOULD_SCHEDULE); | 94 | cpu, TASK_SCHEDULED, SHOULD_SCHEDULE); |
95 | TRACE_CUR("cpu %d: scheduled_transition_ok = %d\n", cpu, scheduled_transition_ok); | ||
96 | } | ||
97 | else { | ||
98 | TRACE_CUR("cpu %d: scheduled_transition_ok = 0 (static)\n", cpu); | ||
83 | } | 99 | } |
84 | 100 | ||
85 | /* If the CPU was in state TASK_SCHEDULED, then we need to cause the | 101 | /* If the CPU was in state TASK_SCHEDULED, then we need to cause the |
86 | * scheduler to be invoked. */ | 102 | * scheduler to be invoked. */ |
87 | if (scheduled_transition_ok) { | 103 | if (scheduled_transition_ok) { |
88 | if (smp_processor_id() == cpu) | 104 | if (smp_processor_id() == cpu) { |
89 | set_tsk_need_resched(current); | 105 | set_tsk_need_resched(current); |
106 | } | ||
90 | else { | 107 | else { |
91 | TS_SEND_RESCHED_START(cpu); | 108 | TS_SEND_RESCHED_START(cpu); |
92 | smp_send_reschedule(cpu); | 109 | smp_send_reschedule(cpu); |
@@ -101,11 +118,16 @@ void litmus_reschedule(int cpu) | |||
101 | 118 | ||
102 | void litmus_reschedule_local(void) | 119 | void litmus_reschedule_local(void) |
103 | { | 120 | { |
104 | if (is_in_sched_state(TASK_PICKED)) | 121 | if (is_in_sched_state(TASK_PICKED)) { |
105 | set_sched_state(PICKED_WRONG_TASK); | 122 | set_sched_state(PICKED_WRONG_TASK); |
123 | |||
124 | TRACE_CUR("cpu %d: transitioned to PICKED_WRONG_TASK\n", smp_processor_id()); | ||
125 | } | ||
106 | else if (is_in_sched_state(TASK_SCHEDULED | SHOULD_SCHEDULE)) { | 126 | else if (is_in_sched_state(TASK_SCHEDULED | SHOULD_SCHEDULE)) { |
107 | set_sched_state(WILL_SCHEDULE); | 127 | set_sched_state(WILL_SCHEDULE); |
108 | set_tsk_need_resched(current); | 128 | set_tsk_need_resched(current); |
129 | |||
130 | TRACE_CUR("cpu %d: transitioned to WILL_SCHEDULE\n", smp_processor_id()); | ||
109 | } | 131 | } |
110 | } | 132 | } |
111 | 133 | ||
diff --git a/litmus/rsm_lock.c b/litmus/rsm_lock.c new file mode 100644 index 000000000000..3dfd8ae9d221 --- /dev/null +++ b/litmus/rsm_lock.c | |||
@@ -0,0 +1,796 @@ | |||
1 | #include <linux/slab.h> | ||
2 | #include <linux/uaccess.h> | ||
3 | |||
4 | #include <litmus/trace.h> | ||
5 | #include <litmus/sched_plugin.h> | ||
6 | #include <litmus/rsm_lock.h> | ||
7 | |||
8 | //#include <litmus/edf_common.h> | ||
9 | |||
10 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
11 | #include <litmus/gpu_affinity.h> | ||
12 | #endif | ||
13 | |||
14 | |||
15 | /* caller is responsible for locking */ | ||
16 | static struct task_struct* rsm_mutex_find_hp_waiter(struct rsm_mutex *mutex, | ||
17 | struct task_struct* skip) | ||
18 | { | ||
19 | wait_queue_t *q; | ||
20 | struct list_head *pos; | ||
21 | struct task_struct *queued = NULL, *found = NULL; | ||
22 | |||
23 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
24 | dgl_wait_state_t *dgl_wait = NULL; | ||
25 | #endif | ||
26 | |||
27 | list_for_each(pos, &mutex->wait.task_list) { | ||
28 | q = list_entry(pos, wait_queue_t, task_list); | ||
29 | |||
30 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
31 | if(q->func == dgl_wake_up) { | ||
32 | dgl_wait = (dgl_wait_state_t*) q->private; | ||
33 | if(tsk_rt(dgl_wait->task)->blocked_lock == &mutex->litmus_lock) { | ||
34 | queued = dgl_wait->task; | ||
35 | } | ||
36 | else { | ||
37 | queued = NULL; // skip it. | ||
38 | } | ||
39 | } | ||
40 | else { | ||
41 | queued = (struct task_struct*) q->private; | ||
42 | } | ||
43 | #else | ||
44 | queued = (struct task_struct*) q->private; | ||
45 | #endif | ||
46 | |||
47 | /* Compare task prios, find high prio task. */ | ||
48 | //if (queued && queued != skip && edf_higher_prio(queued, found)) { | ||
49 | if (queued && queued != skip && litmus->compare(queued, found)) { | ||
50 | found = queued; | ||
51 | } | ||
52 | } | ||
53 | return found; | ||
54 | } | ||
55 | |||
56 | |||
57 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
58 | |||
59 | int rsm_mutex_is_owner(struct litmus_lock *l, struct task_struct *t) | ||
60 | { | ||
61 | struct rsm_mutex *mutex = rsm_mutex_from_lock(l); | ||
62 | return(mutex->owner == t); | ||
63 | } | ||
64 | |||
65 | // return 1 if resource was immediatly acquired. | ||
66 | // Assumes mutex->lock is held. | ||
67 | // Must set task state to TASK_UNINTERRUPTIBLE if task blocks. | ||
68 | int rsm_mutex_dgl_lock(struct litmus_lock *l, dgl_wait_state_t* dgl_wait, | ||
69 | wait_queue_t* wq_node) | ||
70 | { | ||
71 | struct rsm_mutex *mutex = rsm_mutex_from_lock(l); | ||
72 | struct task_struct *t = dgl_wait->task; | ||
73 | |||
74 | int acquired_immediatly = 0; | ||
75 | |||
76 | BUG_ON(t != current); | ||
77 | |||
78 | if (mutex->owner) { | ||
79 | TRACE_TASK(t, "Enqueuing on lock %d.\n", l->ident); | ||
80 | |||
81 | init_dgl_waitqueue_entry(wq_node, dgl_wait); | ||
82 | |||
83 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
84 | __add_wait_queue_tail_exclusive(&mutex->wait, wq_node); | ||
85 | } else { | ||
86 | TRACE_TASK(t, "Acquired lock %d with no blocking.\n", l->ident); | ||
87 | |||
88 | /* it's ours now */ | ||
89 | mutex->owner = t; | ||
90 | |||
91 | raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock); | ||
92 | binheap_add(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks, | ||
93 | struct nested_info, hp_binheap_node); | ||
94 | raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); | ||
95 | |||
96 | acquired_immediatly = 1; | ||
97 | } | ||
98 | |||
99 | return acquired_immediatly; | ||
100 | } | ||
101 | |||
102 | void rsm_mutex_enable_priority(struct litmus_lock *l, | ||
103 | dgl_wait_state_t* dgl_wait) | ||
104 | { | ||
105 | struct rsm_mutex *mutex = rsm_mutex_from_lock(l); | ||
106 | struct task_struct *t = dgl_wait->task; | ||
107 | struct task_struct *owner = mutex->owner; | ||
108 | unsigned long flags = 0; // these are unused under DGL coarse-grain locking | ||
109 | |||
110 | BUG_ON(owner == t); | ||
111 | |||
112 | tsk_rt(t)->blocked_lock = l; | ||
113 | mb(); | ||
114 | |||
115 | //if (edf_higher_prio(t, mutex->hp_waiter)) { | ||
116 | if (litmus->compare(t, mutex->hp_waiter)) { | ||
117 | |||
118 | struct task_struct *old_max_eff_prio; | ||
119 | struct task_struct *new_max_eff_prio; | ||
120 | struct task_struct *new_prio = NULL; | ||
121 | |||
122 | if(mutex->hp_waiter) | ||
123 | TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n", | ||
124 | mutex->hp_waiter->comm, mutex->hp_waiter->pid); | ||
125 | else | ||
126 | TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n"); | ||
127 | |||
128 | raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
129 | |||
130 | old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); | ||
131 | mutex->hp_waiter = t; | ||
132 | l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter); | ||
133 | binheap_decrease(&l->nest.hp_binheap_node, | ||
134 | &tsk_rt(owner)->hp_blocked_tasks); | ||
135 | new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); | ||
136 | |||
137 | if(new_max_eff_prio != old_max_eff_prio) { | ||
138 | TRACE_TASK(t, "is new hp_waiter.\n"); | ||
139 | |||
140 | if ((effective_priority(owner) == old_max_eff_prio) || | ||
141 | //(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))){ | ||
142 | (litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))){ | ||
143 | new_prio = new_max_eff_prio; | ||
144 | } | ||
145 | } | ||
146 | else { | ||
147 | TRACE_TASK(t, "no change in max_eff_prio of heap.\n"); | ||
148 | } | ||
149 | |||
150 | if(new_prio) { | ||
151 | litmus->nested_increase_prio(owner, new_prio, | ||
152 | &mutex->lock, flags); // unlocks lock. | ||
153 | } | ||
154 | else { | ||
155 | raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
156 | unlock_fine_irqrestore(&mutex->lock, flags); | ||
157 | } | ||
158 | } | ||
159 | else { | ||
160 | TRACE_TASK(t, "no change in hp_waiter.\n"); | ||
161 | unlock_fine_irqrestore(&mutex->lock, flags); | ||
162 | } | ||
163 | } | ||
164 | |||
165 | static void select_next_lock_if_primary(struct litmus_lock *l, | ||
166 | dgl_wait_state_t *dgl_wait) | ||
167 | { | ||
168 | if(tsk_rt(dgl_wait->task)->blocked_lock == l) { | ||
169 | TRACE_CUR("Lock %d in DGL was primary for %s/%d.\n", | ||
170 | l->ident, dgl_wait->task->comm, dgl_wait->task->pid); | ||
171 | tsk_rt(dgl_wait->task)->blocked_lock = NULL; | ||
172 | mb(); | ||
173 | select_next_lock(dgl_wait /*, l*/); // pick the next lock to be blocked on | ||
174 | } | ||
175 | else { | ||
176 | TRACE_CUR("Got lock early! Lock %d in DGL was NOT primary for %s/%d.\n", | ||
177 | l->ident, dgl_wait->task->comm, dgl_wait->task->pid); | ||
178 | } | ||
179 | } | ||
180 | #endif | ||
181 | |||
182 | |||
183 | |||
184 | |||
185 | int rsm_mutex_lock(struct litmus_lock* l) | ||
186 | { | ||
187 | struct task_struct *t = current; | ||
188 | struct task_struct *owner; | ||
189 | struct rsm_mutex *mutex = rsm_mutex_from_lock(l); | ||
190 | wait_queue_t wait; | ||
191 | unsigned long flags; | ||
192 | |||
193 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
194 | raw_spinlock_t *dgl_lock; | ||
195 | #endif | ||
196 | |||
197 | if (!is_realtime(t)) | ||
198 | return -EPERM; | ||
199 | |||
200 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
201 | dgl_lock = litmus->get_dgl_spinlock(t); | ||
202 | #endif | ||
203 | |||
204 | lock_global_irqsave(dgl_lock, flags); | ||
205 | lock_fine_irqsave(&mutex->lock, flags); | ||
206 | |||
207 | if (mutex->owner) { | ||
208 | TRACE_TASK(t, "Blocking on lock %d.\n", l->ident); | ||
209 | |||
210 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
211 | // KLUDGE: don't count this suspension as time in the critical gpu | ||
212 | // critical section | ||
213 | if(tsk_rt(t)->held_gpus) { | ||
214 | tsk_rt(t)->suspend_gpu_tracker_on_block = 1; | ||
215 | } | ||
216 | #endif | ||
217 | |||
218 | /* resource is not free => must suspend and wait */ | ||
219 | |||
220 | owner = mutex->owner; | ||
221 | |||
222 | init_waitqueue_entry(&wait, t); | ||
223 | |||
224 | tsk_rt(t)->blocked_lock = l; /* record where we are blocked */ | ||
225 | mb(); // needed? | ||
226 | |||
227 | /* FIXME: interruptible would be nice some day */ | ||
228 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
229 | |||
230 | __add_wait_queue_tail_exclusive(&mutex->wait, &wait); | ||
231 | |||
232 | /* check if we need to activate priority inheritance */ | ||
233 | //if (edf_higher_prio(t, mutex->hp_waiter)) { | ||
234 | if (litmus->compare(t, mutex->hp_waiter)) { | ||
235 | |||
236 | struct task_struct *old_max_eff_prio; | ||
237 | struct task_struct *new_max_eff_prio; | ||
238 | struct task_struct *new_prio = NULL; | ||
239 | |||
240 | if(mutex->hp_waiter) | ||
241 | TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n", | ||
242 | mutex->hp_waiter->comm, mutex->hp_waiter->pid); | ||
243 | else | ||
244 | TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n"); | ||
245 | |||
246 | raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
247 | |||
248 | old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); | ||
249 | mutex->hp_waiter = t; | ||
250 | l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter); | ||
251 | binheap_decrease(&l->nest.hp_binheap_node, | ||
252 | &tsk_rt(owner)->hp_blocked_tasks); | ||
253 | new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); | ||
254 | |||
255 | if(new_max_eff_prio != old_max_eff_prio) { | ||
256 | TRACE_TASK(t, "is new hp_waiter.\n"); | ||
257 | |||
258 | if ((effective_priority(owner) == old_max_eff_prio) || | ||
259 | //(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))){ | ||
260 | (litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))){ | ||
261 | new_prio = new_max_eff_prio; | ||
262 | } | ||
263 | } | ||
264 | else { | ||
265 | TRACE_TASK(t, "no change in max_eff_prio of heap.\n"); | ||
266 | } | ||
267 | |||
268 | if(new_prio) { | ||
269 | litmus->nested_increase_prio(owner, new_prio, &mutex->lock, | ||
270 | flags); // unlocks lock. | ||
271 | } | ||
272 | else { | ||
273 | raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
274 | unlock_fine_irqrestore(&mutex->lock, flags); | ||
275 | } | ||
276 | } | ||
277 | else { | ||
278 | TRACE_TASK(t, "no change in hp_waiter.\n"); | ||
279 | |||
280 | unlock_fine_irqrestore(&mutex->lock, flags); | ||
281 | } | ||
282 | |||
283 | unlock_global_irqrestore(dgl_lock, flags); | ||
284 | |||
285 | TS_LOCK_SUSPEND; | ||
286 | |||
287 | /* We depend on the FIFO order. Thus, we don't need to recheck | ||
288 | * when we wake up; we are guaranteed to have the lock since | ||
289 | * there is only one wake up per release. | ||
290 | */ | ||
291 | |||
292 | suspend_for_lock(); | ||
293 | |||
294 | TS_LOCK_RESUME; | ||
295 | |||
296 | /* Since we hold the lock, no other task will change | ||
297 | * ->owner. We can thus check it without acquiring the spin | ||
298 | * lock. */ | ||
299 | BUG_ON(mutex->owner != t); | ||
300 | |||
301 | TRACE_TASK(t, "Acquired lock %d.\n", l->ident); | ||
302 | |||
303 | } else { | ||
304 | TRACE_TASK(t, "Acquired lock %d with no blocking.\n", l->ident); | ||
305 | |||
306 | /* it's ours now */ | ||
307 | mutex->owner = t; | ||
308 | |||
309 | raw_spin_lock(&tsk_rt(mutex->owner)->hp_blocked_tasks_lock); | ||
310 | binheap_add(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks, | ||
311 | struct nested_info, hp_binheap_node); | ||
312 | raw_spin_unlock(&tsk_rt(mutex->owner)->hp_blocked_tasks_lock); | ||
313 | |||
314 | |||
315 | unlock_fine_irqrestore(&mutex->lock, flags); | ||
316 | unlock_global_irqrestore(dgl_lock, flags); | ||
317 | } | ||
318 | |||
319 | return 0; | ||
320 | } | ||
321 | |||
322 | |||
323 | |||
324 | int rsm_mutex_unlock(struct litmus_lock* l) | ||
325 | { | ||
326 | struct task_struct *t = current, *next = NULL; | ||
327 | struct rsm_mutex *mutex = rsm_mutex_from_lock(l); | ||
328 | unsigned long flags; | ||
329 | |||
330 | struct task_struct *old_max_eff_prio; | ||
331 | |||
332 | int wake_up_task = 1; | ||
333 | |||
334 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
335 | dgl_wait_state_t *dgl_wait = NULL; | ||
336 | raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t); | ||
337 | #endif | ||
338 | |||
339 | int err = 0; | ||
340 | |||
341 | if (mutex->owner != t) { | ||
342 | err = -EINVAL; | ||
343 | return err; | ||
344 | } | ||
345 | |||
346 | lock_global_irqsave(dgl_lock, flags); | ||
347 | lock_fine_irqsave(&mutex->lock, flags); | ||
348 | |||
349 | raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock); | ||
350 | |||
351 | TRACE_TASK(t, "Freeing lock %d\n", l->ident); | ||
352 | |||
353 | old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks); | ||
354 | binheap_delete(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks); | ||
355 | |||
356 | if(tsk_rt(t)->inh_task){ | ||
357 | struct task_struct *new_max_eff_prio = | ||
358 | top_priority(&tsk_rt(t)->hp_blocked_tasks); | ||
359 | |||
360 | if((new_max_eff_prio == NULL) || | ||
361 | /* there was a change in eff prio */ | ||
362 | ( (new_max_eff_prio != old_max_eff_prio) && | ||
363 | /* and owner had the old eff prio */ | ||
364 | (effective_priority(t) == old_max_eff_prio)) ) | ||
365 | { | ||
366 | // old_max_eff_prio > new_max_eff_prio | ||
367 | |||
368 | //if(__edf_higher_prio(new_max_eff_prio, BASE, t, EFFECTIVE)) { | ||
369 | if(litmus->__compare(new_max_eff_prio, BASE, t, EFFECTIVE)) { | ||
370 | TRACE_TASK(t, "new_max_eff_prio > task's eff_prio-- new_max_eff_prio: %s/%d task: %s/%d [%s/%d]\n", | ||
371 | new_max_eff_prio->comm, new_max_eff_prio->pid, | ||
372 | t->comm, t->pid, tsk_rt(t)->inh_task->comm, | ||
373 | tsk_rt(t)->inh_task->pid); | ||
374 | WARN_ON(1); | ||
375 | } | ||
376 | |||
377 | litmus->decrease_prio(t, new_max_eff_prio); | ||
378 | } | ||
379 | } | ||
380 | |||
381 | if(binheap_empty(&tsk_rt(t)->hp_blocked_tasks) && | ||
382 | tsk_rt(t)->inh_task != NULL) | ||
383 | { | ||
384 | WARN_ON(tsk_rt(t)->inh_task != NULL); | ||
385 | TRACE_TASK(t, "No more locks are held, but eff_prio = %s/%d\n", | ||
386 | tsk_rt(t)->inh_task->comm, tsk_rt(t)->inh_task->pid); | ||
387 | } | ||
388 | |||
389 | raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); | ||
390 | |||
391 | |||
392 | /* check if there are jobs waiting for this resource */ | ||
393 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
394 | __waitqueue_dgl_remove_first(&mutex->wait, &dgl_wait, &next); | ||
395 | if(dgl_wait) { | ||
396 | next = dgl_wait->task; | ||
397 | //select_next_lock_if_primary(l, dgl_wait); | ||
398 | } | ||
399 | #else | ||
400 | next = __waitqueue_remove_first(&mutex->wait); | ||
401 | #endif | ||
402 | if (next) { | ||
403 | /* next becomes the resouce holder */ | ||
404 | mutex->owner = next; | ||
405 | TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid); | ||
406 | |||
407 | /* determine new hp_waiter if necessary */ | ||
408 | if (next == mutex->hp_waiter) { | ||
409 | |||
410 | TRACE_TASK(next, "was highest-prio waiter\n"); | ||
411 | /* next has the highest priority --- it doesn't need to | ||
412 | * inherit. However, we need to make sure that the | ||
413 | * next-highest priority in the queue is reflected in | ||
414 | * hp_waiter. */ | ||
415 | mutex->hp_waiter = rsm_mutex_find_hp_waiter(mutex, next); | ||
416 | l->nest.hp_waiter_eff_prio = (mutex->hp_waiter) ? | ||
417 | effective_priority(mutex->hp_waiter) : | ||
418 | NULL; | ||
419 | |||
420 | if (mutex->hp_waiter) | ||
421 | TRACE_TASK(mutex->hp_waiter, "is new highest-prio waiter\n"); | ||
422 | else | ||
423 | TRACE("no further waiters\n"); | ||
424 | |||
425 | raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock); | ||
426 | |||
427 | binheap_add(&l->nest.hp_binheap_node, | ||
428 | &tsk_rt(next)->hp_blocked_tasks, | ||
429 | struct nested_info, hp_binheap_node); | ||
430 | |||
431 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
432 | if(dgl_wait) { | ||
433 | select_next_lock_if_primary(l, dgl_wait); | ||
434 | //wake_up_task = atomic_dec_and_test(&dgl_wait->nr_remaining); | ||
435 | --(dgl_wait->nr_remaining); | ||
436 | wake_up_task = (dgl_wait->nr_remaining == 0); | ||
437 | } | ||
438 | #endif | ||
439 | raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock); | ||
440 | } | ||
441 | else { | ||
442 | /* Well, if 'next' is not the highest-priority waiter, | ||
443 | * then it (probably) ought to inherit the highest-priority | ||
444 | * waiter's priority. */ | ||
445 | TRACE_TASK(next, "is not hp_waiter of lock %d.\n", l->ident); | ||
446 | |||
447 | raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock); | ||
448 | |||
449 | binheap_add(&l->nest.hp_binheap_node, | ||
450 | &tsk_rt(next)->hp_blocked_tasks, | ||
451 | struct nested_info, hp_binheap_node); | ||
452 | |||
453 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
454 | if(dgl_wait) { | ||
455 | select_next_lock_if_primary(l, dgl_wait); | ||
456 | --(dgl_wait->nr_remaining); | ||
457 | wake_up_task = (dgl_wait->nr_remaining == 0); | ||
458 | } | ||
459 | #endif | ||
460 | |||
461 | /* It is possible that 'next' *should* be the hp_waiter, but isn't | ||
462 | * because that update hasn't yet executed (update operation is | ||
463 | * probably blocked on mutex->lock). So only inherit if the top of | ||
464 | * 'next's top heap node is indeed the effective prio. of hp_waiter. | ||
465 | * (We use l->hp_waiter_eff_prio instead of effective_priority(hp_waiter) | ||
466 | * since the effective priority of hp_waiter can change (and the | ||
467 | * update has not made it to this lock).) | ||
468 | */ | ||
469 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
470 | if((l->nest.hp_waiter_eff_prio != NULL) && | ||
471 | (top_priority(&tsk_rt(next)->hp_blocked_tasks) == | ||
472 | l->nest.hp_waiter_eff_prio)) | ||
473 | { | ||
474 | if(dgl_wait && tsk_rt(next)->blocked_lock) { | ||
475 | BUG_ON(wake_up_task); | ||
476 | //if(__edf_higher_prio(l->nest.hp_waiter_eff_prio, BASE, next, EFFECTIVE)) { | ||
477 | if(litmus->__compare(l->nest.hp_waiter_eff_prio, BASE, next, EFFECTIVE)) { | ||
478 | litmus->nested_increase_prio(next, | ||
479 | l->nest.hp_waiter_eff_prio, &mutex->lock, flags); // unlocks lock && hp_blocked_tasks_lock. | ||
480 | goto out; // all spinlocks are released. bail out now. | ||
481 | } | ||
482 | } | ||
483 | else { | ||
484 | litmus->increase_prio(next, l->nest.hp_waiter_eff_prio); | ||
485 | } | ||
486 | } | ||
487 | |||
488 | raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock); | ||
489 | #else | ||
490 | if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) == | ||
491 | l->nest.hp_waiter_eff_prio)) | ||
492 | { | ||
493 | litmus->increase_prio(next, l->nest.hp_waiter_eff_prio); | ||
494 | } | ||
495 | raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock); | ||
496 | #endif | ||
497 | } | ||
498 | |||
499 | if(wake_up_task) { | ||
500 | TRACE_TASK(next, "waking up since it is no longer blocked.\n"); | ||
501 | |||
502 | tsk_rt(next)->blocked_lock = NULL; | ||
503 | mb(); | ||
504 | |||
505 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
506 | // re-enable tracking | ||
507 | if(tsk_rt(next)->held_gpus) { | ||
508 | tsk_rt(next)->suspend_gpu_tracker_on_block = 0; | ||
509 | } | ||
510 | #endif | ||
511 | |||
512 | wake_up_process(next); | ||
513 | } | ||
514 | else { | ||
515 | TRACE_TASK(next, "is still blocked.\n"); | ||
516 | } | ||
517 | } | ||
518 | else { | ||
519 | /* becomes available */ | ||
520 | mutex->owner = NULL; | ||
521 | } | ||
522 | |||
523 | unlock_fine_irqrestore(&mutex->lock, flags); | ||
524 | |||
525 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
526 | out: | ||
527 | #endif | ||
528 | unlock_global_irqrestore(dgl_lock, flags); | ||
529 | |||
530 | return err; | ||
531 | } | ||
532 | |||
533 | |||
534 | void rsm_mutex_propagate_increase_inheritance(struct litmus_lock* l, | ||
535 | struct task_struct* t, | ||
536 | raw_spinlock_t* to_unlock, | ||
537 | unsigned long irqflags) | ||
538 | { | ||
539 | struct rsm_mutex *mutex = rsm_mutex_from_lock(l); | ||
540 | |||
541 | // relay-style locking | ||
542 | lock_fine(&mutex->lock); | ||
543 | unlock_fine(to_unlock); | ||
544 | |||
545 | if(tsk_rt(t)->blocked_lock == l) { // prevent race on tsk_rt(t)->blocked | ||
546 | struct task_struct *owner = mutex->owner; | ||
547 | |||
548 | struct task_struct *old_max_eff_prio; | ||
549 | struct task_struct *new_max_eff_prio; | ||
550 | |||
551 | raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
552 | |||
553 | old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); | ||
554 | |||
555 | //if((t != mutex->hp_waiter) && edf_higher_prio(t, mutex->hp_waiter)) { | ||
556 | if((t != mutex->hp_waiter) && litmus->compare(t, mutex->hp_waiter)) { | ||
557 | TRACE_TASK(t, "is new highest-prio waiter by propagation.\n"); | ||
558 | mutex->hp_waiter = t; | ||
559 | } | ||
560 | if(t == mutex->hp_waiter) { | ||
561 | // reflect the decreased priority in the heap node. | ||
562 | l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter); | ||
563 | |||
564 | BUG_ON(!binheap_is_in_heap(&l->nest.hp_binheap_node)); | ||
565 | BUG_ON(!binheap_is_in_this_heap(&l->nest.hp_binheap_node, | ||
566 | &tsk_rt(owner)->hp_blocked_tasks)); | ||
567 | |||
568 | binheap_decrease(&l->nest.hp_binheap_node, | ||
569 | &tsk_rt(owner)->hp_blocked_tasks); | ||
570 | } | ||
571 | |||
572 | new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); | ||
573 | |||
574 | |||
575 | if(new_max_eff_prio != old_max_eff_prio) { | ||
576 | // new_max_eff_prio > old_max_eff_prio holds. | ||
577 | if ((effective_priority(owner) == old_max_eff_prio) || | ||
578 | //(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))) { | ||
579 | (litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))) { | ||
580 | TRACE_CUR("Propagating inheritance to holder of lock %d.\n", | ||
581 | l->ident); | ||
582 | |||
583 | // beware: recursion | ||
584 | litmus->nested_increase_prio(owner, new_max_eff_prio, | ||
585 | &mutex->lock, irqflags); // unlocks mutex->lock | ||
586 | } | ||
587 | else { | ||
588 | TRACE_CUR("Lower priority than holder %s/%d. No propagation.\n", | ||
589 | owner->comm, owner->pid); | ||
590 | raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
591 | unlock_fine_irqrestore(&mutex->lock, irqflags); | ||
592 | } | ||
593 | } | ||
594 | else { | ||
595 | TRACE_TASK(mutex->owner, "No change in maxiumum effective priority.\n"); | ||
596 | raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
597 | unlock_fine_irqrestore(&mutex->lock, irqflags); | ||
598 | } | ||
599 | } | ||
600 | else { | ||
601 | struct litmus_lock *still_blocked = tsk_rt(t)->blocked_lock; | ||
602 | |||
603 | TRACE_TASK(t, "is not blocked on lock %d.\n", l->ident); | ||
604 | if(still_blocked) { | ||
605 | TRACE_TASK(t, "is still blocked on a lock though (lock %d).\n", | ||
606 | still_blocked->ident); | ||
607 | if(still_blocked->ops->propagate_increase_inheritance) { | ||
608 | /* due to relay-style nesting of spinlocks (acq. A, acq. B, free A, free B) | ||
609 | we know that task 't' has not released any locks behind us in this | ||
610 | chain. Propagation just needs to catch up with task 't'. */ | ||
611 | still_blocked->ops->propagate_increase_inheritance(still_blocked, | ||
612 | t, | ||
613 | &mutex->lock, | ||
614 | irqflags); | ||
615 | } | ||
616 | else { | ||
617 | TRACE_TASK(t, | ||
618 | "Inheritor is blocked on lock (%p) that does not " | ||
619 | "support nesting!\n", | ||
620 | still_blocked); | ||
621 | unlock_fine_irqrestore(&mutex->lock, irqflags); | ||
622 | } | ||
623 | } | ||
624 | else { | ||
625 | unlock_fine_irqrestore(&mutex->lock, irqflags); | ||
626 | } | ||
627 | } | ||
628 | } | ||
629 | |||
630 | |||
631 | void rsm_mutex_propagate_decrease_inheritance(struct litmus_lock* l, | ||
632 | struct task_struct* t, | ||
633 | raw_spinlock_t* to_unlock, | ||
634 | unsigned long irqflags) | ||
635 | { | ||
636 | struct rsm_mutex *mutex = rsm_mutex_from_lock(l); | ||
637 | |||
638 | // relay-style locking | ||
639 | lock_fine(&mutex->lock); | ||
640 | unlock_fine(to_unlock); | ||
641 | |||
642 | if(tsk_rt(t)->blocked_lock == l) { // prevent race on tsk_rt(t)->blocked | ||
643 | if(t == mutex->hp_waiter) { | ||
644 | struct task_struct *owner = mutex->owner; | ||
645 | |||
646 | struct task_struct *old_max_eff_prio; | ||
647 | struct task_struct *new_max_eff_prio; | ||
648 | |||
649 | raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
650 | |||
651 | old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); | ||
652 | |||
653 | binheap_delete(&l->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks); | ||
654 | mutex->hp_waiter = rsm_mutex_find_hp_waiter(mutex, NULL); | ||
655 | l->nest.hp_waiter_eff_prio = (mutex->hp_waiter) ? | ||
656 | effective_priority(mutex->hp_waiter) : NULL; | ||
657 | binheap_add(&l->nest.hp_binheap_node, | ||
658 | &tsk_rt(owner)->hp_blocked_tasks, | ||
659 | struct nested_info, hp_binheap_node); | ||
660 | |||
661 | new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks); | ||
662 | |||
663 | if((old_max_eff_prio != new_max_eff_prio) && | ||
664 | (effective_priority(owner) == old_max_eff_prio)) | ||
665 | { | ||
666 | // Need to set new effective_priority for owner | ||
667 | |||
668 | struct task_struct *decreased_prio; | ||
669 | |||
670 | TRACE_CUR("Propagating decreased inheritance to holder of lock %d.\n", | ||
671 | l->ident); | ||
672 | |||
673 | //if(__edf_higher_prio(new_max_eff_prio, BASE, owner, BASE)) { | ||
674 | if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) { | ||
675 | TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of lock %d.\n", | ||
676 | (new_max_eff_prio) ? new_max_eff_prio->comm : "nil", | ||
677 | (new_max_eff_prio) ? new_max_eff_prio->pid : -1, | ||
678 | owner->comm, | ||
679 | owner->pid, | ||
680 | l->ident); | ||
681 | |||
682 | decreased_prio = new_max_eff_prio; | ||
683 | } | ||
684 | else { | ||
685 | TRACE_CUR("%s/%d has lesser base priority than base priority of owner (%s/%d) of lock %d.\n", | ||
686 | (new_max_eff_prio) ? new_max_eff_prio->comm : "nil", | ||
687 | (new_max_eff_prio) ? new_max_eff_prio->pid : -1, | ||
688 | owner->comm, | ||
689 | owner->pid, | ||
690 | l->ident); | ||
691 | |||
692 | decreased_prio = NULL; | ||
693 | } | ||
694 | |||
695 | // beware: recursion | ||
696 | litmus->nested_decrease_prio(owner, decreased_prio, &mutex->lock, irqflags); // will unlock mutex->lock | ||
697 | } | ||
698 | else { | ||
699 | raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock); | ||
700 | unlock_fine_irqrestore(&mutex->lock, irqflags); | ||
701 | } | ||
702 | } | ||
703 | else { | ||
704 | TRACE_TASK(t, "is not hp_waiter. No propagation.\n"); | ||
705 | unlock_fine_irqrestore(&mutex->lock, irqflags); | ||
706 | } | ||
707 | } | ||
708 | else { | ||
709 | struct litmus_lock *still_blocked = tsk_rt(t)->blocked_lock; | ||
710 | |||
711 | TRACE_TASK(t, "is not blocked on lock %d.\n", l->ident); | ||
712 | if(still_blocked) { | ||
713 | TRACE_TASK(t, "is still blocked on a lock though (lock %d).\n", | ||
714 | still_blocked->ident); | ||
715 | if(still_blocked->ops->propagate_decrease_inheritance) { | ||
716 | /* due to linked nesting of spinlocks (acq. A, acq. B, free A, free B) | ||
717 | we know that task 't' has not released any locks behind us in this | ||
718 | chain. propagation just needs to catch up with task 't' */ | ||
719 | still_blocked->ops->propagate_decrease_inheritance(still_blocked, | ||
720 | t, | ||
721 | &mutex->lock, | ||
722 | irqflags); | ||
723 | } | ||
724 | else { | ||
725 | TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n", | ||
726 | still_blocked); | ||
727 | unlock_fine_irqrestore(&mutex->lock, irqflags); | ||
728 | } | ||
729 | } | ||
730 | else { | ||
731 | unlock_fine_irqrestore(&mutex->lock, irqflags); | ||
732 | } | ||
733 | } | ||
734 | } | ||
735 | |||
736 | |||
737 | int rsm_mutex_close(struct litmus_lock* l) | ||
738 | { | ||
739 | struct task_struct *t = current; | ||
740 | struct rsm_mutex *mutex = rsm_mutex_from_lock(l); | ||
741 | unsigned long flags; | ||
742 | |||
743 | int owner; | ||
744 | |||
745 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
746 | raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t); | ||
747 | #endif | ||
748 | |||
749 | lock_global_irqsave(dgl_lock, flags); | ||
750 | lock_fine_irqsave(&mutex->lock, flags); | ||
751 | |||
752 | owner = (mutex->owner == t); | ||
753 | |||
754 | unlock_fine_irqrestore(&mutex->lock, flags); | ||
755 | unlock_global_irqrestore(dgl_lock, flags); | ||
756 | |||
757 | if (owner) | ||
758 | rsm_mutex_unlock(l); | ||
759 | |||
760 | return 0; | ||
761 | } | ||
762 | |||
763 | void rsm_mutex_free(struct litmus_lock* lock) | ||
764 | { | ||
765 | kfree(rsm_mutex_from_lock(lock)); | ||
766 | } | ||
767 | |||
768 | struct litmus_lock* rsm_mutex_new(struct litmus_lock_ops* ops) | ||
769 | { | ||
770 | struct rsm_mutex* mutex; | ||
771 | |||
772 | mutex = kmalloc(sizeof(*mutex), GFP_KERNEL); | ||
773 | if (!mutex) | ||
774 | return NULL; | ||
775 | |||
776 | mutex->litmus_lock.ops = ops; | ||
777 | mutex->owner = NULL; | ||
778 | mutex->hp_waiter = NULL; | ||
779 | init_waitqueue_head(&mutex->wait); | ||
780 | |||
781 | |||
782 | #ifdef CONFIG_DEBUG_SPINLOCK | ||
783 | { | ||
784 | __raw_spin_lock_init(&mutex->lock, | ||
785 | ((struct litmus_lock*)mutex)->cheat_lockdep, | ||
786 | &((struct litmus_lock*)mutex)->key); | ||
787 | } | ||
788 | #else | ||
789 | raw_spin_lock_init(&mutex->lock); | ||
790 | #endif | ||
791 | |||
792 | ((struct litmus_lock*)mutex)->nest.hp_waiter_ptr = &mutex->hp_waiter; | ||
793 | |||
794 | return &mutex->litmus_lock; | ||
795 | } | ||
796 | |||
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c index 1683d3847560..54322e278a1e 100644 --- a/litmus/rt_domain.c +++ b/litmus/rt_domain.c | |||
@@ -300,10 +300,15 @@ void rt_domain_init(rt_domain_t *rt, | |||
300 | */ | 300 | */ |
301 | void __add_ready(rt_domain_t* rt, struct task_struct *new) | 301 | void __add_ready(rt_domain_t* rt, struct task_struct *new) |
302 | { | 302 | { |
303 | TRACE("rt: adding %s/%d (%llu, %llu, %llu) rel=%llu " | 303 | TRACE("rt: adding %s/%d (%llu, %llu, %llu) " |
304 | "to ready queue at %llu\n", | 304 | "[inh_task: %s/%d (%llu, %llu %llu)] " |
305 | new->comm, new->pid, | 305 | "rel=%llu to ready queue at %llu\n", |
306 | get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new), | 306 | new->comm, new->pid, get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new), |
307 | (tsk_rt(new)->inh_task) ? tsk_rt(new)->inh_task->comm : "(nil)", | ||
308 | (tsk_rt(new)->inh_task) ? tsk_rt(new)->inh_task->pid : 0, | ||
309 | (tsk_rt(new)->inh_task) ? get_exec_cost(tsk_rt(new)->inh_task) : 0, | ||
310 | (tsk_rt(new)->inh_task) ? get_rt_period(tsk_rt(new)->inh_task) : 0, | ||
311 | (tsk_rt(new)->inh_task) ? get_rt_relative_deadline(tsk_rt(new)->inh_task) : 0, | ||
307 | get_release(new), litmus_clock()); | 312 | get_release(new), litmus_clock()); |
308 | 313 | ||
309 | BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node)); | 314 | BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node)); |
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c index b45b46fc4fca..db47f4413329 100644 --- a/litmus/sched_cedf.c +++ b/litmus/sched_cedf.c | |||
@@ -29,7 +29,7 @@ | |||
29 | #include <linux/percpu.h> | 29 | #include <linux/percpu.h> |
30 | #include <linux/sched.h> | 30 | #include <linux/sched.h> |
31 | #include <linux/slab.h> | 31 | #include <linux/slab.h> |
32 | 32 | #include <linux/uaccess.h> | |
33 | #include <linux/module.h> | 33 | #include <linux/module.h> |
34 | 34 | ||
35 | #include <litmus/litmus.h> | 35 | #include <litmus/litmus.h> |
@@ -43,14 +43,48 @@ | |||
43 | #include <litmus/clustered.h> | 43 | #include <litmus/clustered.h> |
44 | 44 | ||
45 | #include <litmus/bheap.h> | 45 | #include <litmus/bheap.h> |
46 | #include <litmus/binheap.h> | ||
47 | #include <litmus/trace.h> | ||
48 | |||
49 | #ifdef CONFIG_LITMUS_LOCKING | ||
50 | #include <litmus/kfmlp_lock.h> | ||
51 | #endif | ||
52 | |||
53 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
54 | #include <litmus/rsm_lock.h> | ||
55 | #include <litmus/ikglp_lock.h> | ||
56 | #endif | ||
46 | 57 | ||
47 | #ifdef CONFIG_SCHED_CPU_AFFINITY | 58 | #ifdef CONFIG_SCHED_CPU_AFFINITY |
48 | #include <litmus/affinity.h> | 59 | #include <litmus/affinity.h> |
49 | #endif | 60 | #endif |
50 | 61 | ||
62 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
63 | #include <litmus/aux_tasks.h> | ||
64 | #endif | ||
65 | |||
51 | /* to configure the cluster size */ | 66 | /* to configure the cluster size */ |
52 | #include <litmus/litmus_proc.h> | 67 | #include <litmus/litmus_proc.h> |
53 | #include <linux/uaccess.h> | 68 | |
69 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
70 | #include <litmus/affinity.h> | ||
71 | #endif | ||
72 | |||
73 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
74 | #include <litmus/litmus_softirq.h> | ||
75 | #endif | ||
76 | |||
77 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
78 | #include <linux/interrupt.h> | ||
79 | #endif | ||
80 | |||
81 | #ifdef CONFIG_LITMUS_NVIDIA | ||
82 | #include <litmus/nvidia_info.h> | ||
83 | #endif | ||
84 | |||
85 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
86 | #include <litmus/gpu_affinity.h> | ||
87 | #endif | ||
54 | 88 | ||
55 | /* Reference configuration variable. Determines which cache level is used to | 89 | /* Reference configuration variable. Determines which cache level is used to |
56 | * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that | 90 | * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that |
@@ -71,7 +105,7 @@ typedef struct { | |||
71 | struct task_struct* linked; /* only RT tasks */ | 105 | struct task_struct* linked; /* only RT tasks */ |
72 | struct task_struct* scheduled; /* only RT tasks */ | 106 | struct task_struct* scheduled; /* only RT tasks */ |
73 | atomic_t will_schedule; /* prevent unneeded IPIs */ | 107 | atomic_t will_schedule; /* prevent unneeded IPIs */ |
74 | struct bheap_node* hn; | 108 | struct binheap_node hn; |
75 | } cpu_entry_t; | 109 | } cpu_entry_t; |
76 | 110 | ||
77 | /* one cpu_entry_t per CPU */ | 111 | /* one cpu_entry_t per CPU */ |
@@ -97,10 +131,17 @@ typedef struct clusterdomain { | |||
97 | /* map of this cluster cpus */ | 131 | /* map of this cluster cpus */ |
98 | cpumask_var_t cpu_map; | 132 | cpumask_var_t cpu_map; |
99 | /* the cpus queue themselves according to priority in here */ | 133 | /* the cpus queue themselves according to priority in here */ |
100 | struct bheap_node *heap_node; | 134 | struct binheap cpu_heap; |
101 | struct bheap cpu_heap; | ||
102 | /* lock for this cluster */ | 135 | /* lock for this cluster */ |
103 | #define cluster_lock domain.ready_lock | 136 | #define cluster_lock domain.ready_lock |
137 | |||
138 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
139 | struct tasklet_head pending_tasklets; | ||
140 | #endif | ||
141 | |||
142 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
143 | raw_spinlock_t dgl_lock; | ||
144 | #endif | ||
104 | } cedf_domain_t; | 145 | } cedf_domain_t; |
105 | 146 | ||
106 | /* a cedf_domain per cluster; allocation is done at init/activation time */ | 147 | /* a cedf_domain per cluster; allocation is done at init/activation time */ |
@@ -109,6 +150,29 @@ cedf_domain_t *cedf; | |||
109 | #define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster) | 150 | #define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster) |
110 | #define task_cpu_cluster(task) remote_cluster(get_partition(task)) | 151 | #define task_cpu_cluster(task) remote_cluster(get_partition(task)) |
111 | 152 | ||
153 | /* total number of cluster */ | ||
154 | static int num_clusters; | ||
155 | /* we do not support cluster of different sizes */ | ||
156 | static unsigned int cluster_size; | ||
157 | |||
158 | static int clusters_allocated = 0; | ||
159 | |||
160 | |||
161 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) | ||
162 | static int num_gpu_clusters; | ||
163 | static unsigned int gpu_cluster_size; | ||
164 | #endif | ||
165 | |||
166 | |||
167 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
168 | static raw_spinlock_t* cedf_get_dgl_spinlock(struct task_struct *t) | ||
169 | { | ||
170 | cedf_domain_t *cluster = task_cpu_cluster(t); | ||
171 | return(&cluster->dgl_lock); | ||
172 | } | ||
173 | #endif | ||
174 | |||
175 | |||
112 | /* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling | 176 | /* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling |
113 | * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose | 177 | * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose |
114 | * information during the initialization of the plugin (e.g., topology) | 178 | * information during the initialization of the plugin (e.g., topology) |
@@ -116,11 +180,11 @@ cedf_domain_t *cedf; | |||
116 | */ | 180 | */ |
117 | #define VERBOSE_INIT | 181 | #define VERBOSE_INIT |
118 | 182 | ||
119 | static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) | 183 | static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b) |
120 | { | 184 | { |
121 | cpu_entry_t *a, *b; | 185 | cpu_entry_t *a = binheap_entry(_a, cpu_entry_t, hn); |
122 | a = _a->value; | 186 | cpu_entry_t *b = binheap_entry(_b, cpu_entry_t, hn); |
123 | b = _b->value; | 187 | |
124 | /* Note that a and b are inverted: we want the lowest-priority CPU at | 188 | /* Note that a and b are inverted: we want the lowest-priority CPU at |
125 | * the top of the heap. | 189 | * the top of the heap. |
126 | */ | 190 | */ |
@@ -134,20 +198,17 @@ static void update_cpu_position(cpu_entry_t *entry) | |||
134 | { | 198 | { |
135 | cedf_domain_t *cluster = entry->cluster; | 199 | cedf_domain_t *cluster = entry->cluster; |
136 | 200 | ||
137 | if (likely(bheap_node_in_heap(entry->hn))) | 201 | if (likely(binheap_is_in_heap(&entry->hn))) { |
138 | bheap_delete(cpu_lower_prio, | 202 | binheap_delete(&entry->hn, &cluster->cpu_heap); |
139 | &cluster->cpu_heap, | 203 | } |
140 | entry->hn); | ||
141 | 204 | ||
142 | bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn); | 205 | binheap_add(&entry->hn, &cluster->cpu_heap, cpu_entry_t, hn); |
143 | } | 206 | } |
144 | 207 | ||
145 | /* caller must hold cedf lock */ | 208 | /* caller must hold cedf lock */ |
146 | static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster) | 209 | static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster) |
147 | { | 210 | { |
148 | struct bheap_node* hn; | 211 | return binheap_top_entry(&cluster->cpu_heap, cpu_entry_t, hn); |
149 | hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap); | ||
150 | return hn->value; | ||
151 | } | 212 | } |
152 | 213 | ||
153 | 214 | ||
@@ -209,7 +270,7 @@ static noinline void link_task_to_cpu(struct task_struct* linked, | |||
209 | } | 270 | } |
210 | 271 | ||
211 | /* unlink - Make sure a task is not linked any longer to an entry | 272 | /* unlink - Make sure a task is not linked any longer to an entry |
212 | * where it was linked before. Must hold cedf_lock. | 273 | * where it was linked before. Must hold cluster_lock. |
213 | */ | 274 | */ |
214 | static noinline void unlink(struct task_struct* t) | 275 | static noinline void unlink(struct task_struct* t) |
215 | { | 276 | { |
@@ -245,7 +306,7 @@ static void preempt(cpu_entry_t *entry) | |||
245 | } | 306 | } |
246 | 307 | ||
247 | /* requeue - Put an unlinked task into gsn-edf domain. | 308 | /* requeue - Put an unlinked task into gsn-edf domain. |
248 | * Caller must hold cedf_lock. | 309 | * Caller must hold cluster_lock. |
249 | */ | 310 | */ |
250 | static noinline void requeue(struct task_struct* task) | 311 | static noinline void requeue(struct task_struct* task) |
251 | { | 312 | { |
@@ -255,7 +316,15 @@ static noinline void requeue(struct task_struct* task) | |||
255 | BUG_ON(is_queued(task)); | 316 | BUG_ON(is_queued(task)); |
256 | 317 | ||
257 | if (is_released(task, litmus_clock())) | 318 | if (is_released(task, litmus_clock())) |
258 | __add_ready(&cluster->domain, task); | 319 | #ifdef CONFIG_REALTIME_AUX_TASKS |
320 | if (unlikely(tsk_rt(task)->is_aux_task && !is_running(task))) { | ||
321 | /* aux_task probably transitioned to real-time while it was blocked */ | ||
322 | TRACE_CUR("aux task %s/%d is not ready!\n", task->comm, task->pid); | ||
323 | unlink(task); /* really needed? */ | ||
324 | } | ||
325 | else | ||
326 | #endif | ||
327 | __add_ready(&cluster->domain, task); | ||
259 | else { | 328 | else { |
260 | /* it has got to wait */ | 329 | /* it has got to wait */ |
261 | add_release(&cluster->domain, task); | 330 | add_release(&cluster->domain, task); |
@@ -340,13 +409,17 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) | |||
340 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | 409 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); |
341 | } | 410 | } |
342 | 411 | ||
343 | /* caller holds cedf_lock */ | 412 | /* caller holds cluster_lock */ |
344 | static noinline void job_completion(struct task_struct *t, int forced) | 413 | static noinline void job_completion(struct task_struct *t, int forced) |
345 | { | 414 | { |
346 | BUG_ON(!t); | 415 | BUG_ON(!t); |
347 | 416 | ||
348 | sched_trace_task_completion(t, forced); | 417 | sched_trace_task_completion(t, forced); |
349 | 418 | ||
419 | #ifdef CONFIG_LITMUS_NVIDIA | ||
420 | atomic_set(&tsk_rt(t)->nv_int_count, 0); | ||
421 | #endif | ||
422 | |||
350 | TRACE_TASK(t, "job_completion().\n"); | 423 | TRACE_TASK(t, "job_completion().\n"); |
351 | 424 | ||
352 | /* set flags */ | 425 | /* set flags */ |
@@ -371,25 +444,341 @@ static noinline void job_completion(struct task_struct *t, int forced) | |||
371 | */ | 444 | */ |
372 | static void cedf_tick(struct task_struct* t) | 445 | static void cedf_tick(struct task_struct* t) |
373 | { | 446 | { |
374 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | 447 | if (is_realtime(t) && budget_exhausted(t)) |
375 | if (!is_np(t)) { | 448 | { |
376 | /* np tasks will be preempted when they become | 449 | if (budget_signalled(t) && !sigbudget_sent(t)) { |
377 | * preemptable again | 450 | /* signal exhaustion */ |
378 | */ | 451 | send_sigbudget(t); |
379 | litmus_reschedule_local(); | 452 | } |
380 | set_will_schedule(); | 453 | |
381 | TRACE("cedf_scheduler_tick: " | 454 | if (budget_enforced(t)) { |
382 | "%d is preemptable " | 455 | if (!is_np(t)) { |
383 | " => FORCE_RESCHED\n", t->pid); | 456 | /* np tasks will be preempted when they become |
384 | } else if (is_user_np(t)) { | 457 | * preemptable again |
385 | TRACE("cedf_scheduler_tick: " | 458 | */ |
386 | "%d is non-preemptable, " | 459 | litmus_reschedule_local(); |
387 | "preemption delayed.\n", t->pid); | 460 | set_will_schedule(); |
388 | request_exit_np(t); | 461 | TRACE("cedf_scheduler_tick: " |
462 | "%d is preemptable " | ||
463 | " => FORCE_RESCHED\n", t->pid); | ||
464 | } else if (is_user_np(t)) { | ||
465 | TRACE("cedf_scheduler_tick: " | ||
466 | "%d is non-preemptable, " | ||
467 | "preemption delayed.\n", t->pid); | ||
468 | request_exit_np(t); | ||
469 | } | ||
389 | } | 470 | } |
390 | } | 471 | } |
391 | } | 472 | } |
392 | 473 | ||
474 | |||
475 | |||
476 | |||
477 | |||
478 | |||
479 | |||
480 | |||
481 | |||
482 | |||
483 | |||
484 | |||
485 | |||
486 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
487 | |||
488 | |||
489 | static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed) | ||
490 | { | ||
491 | if (!atomic_read(&tasklet->count)) { | ||
492 | if(tasklet->owner) { | ||
493 | sched_trace_tasklet_begin(tasklet->owner); | ||
494 | } | ||
495 | |||
496 | if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) | ||
497 | { | ||
498 | BUG(); | ||
499 | } | ||
500 | TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n", | ||
501 | __FUNCTION__, | ||
502 | (tasklet->owner) ? tasklet->owner->pid : -1, | ||
503 | (tasklet->owner) ? 0 : 1); | ||
504 | tasklet->func(tasklet->data); | ||
505 | tasklet_unlock(tasklet); | ||
506 | |||
507 | if(tasklet->owner) { | ||
508 | sched_trace_tasklet_end(tasklet->owner, flushed); | ||
509 | } | ||
510 | } | ||
511 | else { | ||
512 | BUG(); | ||
513 | } | ||
514 | } | ||
515 | |||
516 | |||
517 | static void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_task) | ||
518 | { | ||
519 | int work_to_do = 1; | ||
520 | struct tasklet_struct *tasklet = NULL; | ||
521 | unsigned long flags; | ||
522 | |||
523 | while(work_to_do) { | ||
524 | |||
525 | TS_NV_SCHED_BOTISR_START; | ||
526 | |||
527 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
528 | |||
529 | if(cluster->pending_tasklets.head != NULL) { | ||
530 | // remove tasklet at head. | ||
531 | struct tasklet_struct *prev = NULL; | ||
532 | tasklet = cluster->pending_tasklets.head; | ||
533 | |||
534 | // find a tasklet with prio to execute; skip ones where | ||
535 | // sched_task has a higher priority. | ||
536 | // We use the '!edf' test instead of swaping function arguments since | ||
537 | // both sched_task and owner could be NULL. In this case, we want to | ||
538 | // still execute the tasklet. | ||
539 | while(tasklet && !edf_higher_prio(tasklet->owner, sched_task)) { | ||
540 | prev = tasklet; | ||
541 | tasklet = tasklet->next; | ||
542 | } | ||
543 | |||
544 | if(tasklet) { // found something to execuite | ||
545 | // remove the tasklet from the queue | ||
546 | if(prev) { | ||
547 | prev->next = tasklet->next; | ||
548 | if(prev->next == NULL) { | ||
549 | TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); | ||
550 | cluster->pending_tasklets.tail = &(prev); | ||
551 | } | ||
552 | } | ||
553 | else { | ||
554 | cluster->pending_tasklets.head = tasklet->next; | ||
555 | if(tasklet->next == NULL) { | ||
556 | TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); | ||
557 | cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head); | ||
558 | } | ||
559 | } | ||
560 | } | ||
561 | else { | ||
562 | TRACE("%s: No tasklets with eligible priority.\n", __FUNCTION__); | ||
563 | } | ||
564 | } | ||
565 | else { | ||
566 | TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__); | ||
567 | } | ||
568 | |||
569 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
570 | |||
571 | if(tasklet) { | ||
572 | __do_lit_tasklet(tasklet, 0ul); | ||
573 | tasklet = NULL; | ||
574 | } | ||
575 | else { | ||
576 | work_to_do = 0; | ||
577 | } | ||
578 | |||
579 | TS_NV_SCHED_BOTISR_END; | ||
580 | } | ||
581 | } | ||
582 | |||
583 | static void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster) | ||
584 | { | ||
585 | struct tasklet_struct* step; | ||
586 | |||
587 | tasklet->next = NULL; // make sure there are no old values floating around | ||
588 | |||
589 | step = cluster->pending_tasklets.head; | ||
590 | if(step == NULL) { | ||
591 | TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid); | ||
592 | // insert at tail. | ||
593 | *(cluster->pending_tasklets.tail) = tasklet; | ||
594 | cluster->pending_tasklets.tail = &(tasklet->next); | ||
595 | } | ||
596 | else if((*(cluster->pending_tasklets.tail) != NULL) && | ||
597 | edf_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) { | ||
598 | // insert at tail. | ||
599 | TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid); | ||
600 | |||
601 | *(cluster->pending_tasklets.tail) = tasklet; | ||
602 | cluster->pending_tasklets.tail = &(tasklet->next); | ||
603 | } | ||
604 | else { | ||
605 | |||
606 | // insert the tasklet somewhere in the middle. | ||
607 | |||
608 | TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__); | ||
609 | |||
610 | while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) { | ||
611 | step = step->next; | ||
612 | } | ||
613 | |||
614 | // insert tasklet right before step->next. | ||
615 | |||
616 | TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, | ||
617 | tasklet->owner->pid, | ||
618 | (step->owner) ? | ||
619 | step->owner->pid : | ||
620 | -1, | ||
621 | (step->next) ? | ||
622 | ((step->next->owner) ? | ||
623 | step->next->owner->pid : | ||
624 | -1) : | ||
625 | -1); | ||
626 | |||
627 | tasklet->next = step->next; | ||
628 | step->next = tasklet; | ||
629 | |||
630 | // patch up the head if needed. | ||
631 | if(cluster->pending_tasklets.head == step) | ||
632 | { | ||
633 | TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid); | ||
634 | cluster->pending_tasklets.head = tasklet; | ||
635 | } | ||
636 | } | ||
637 | } | ||
638 | |||
639 | static void cedf_run_tasklets(struct task_struct* sched_task) | ||
640 | { | ||
641 | cedf_domain_t* cluster; | ||
642 | |||
643 | preempt_disable(); | ||
644 | |||
645 | cluster = (is_realtime(sched_task)) ? | ||
646 | task_cpu_cluster(sched_task) : | ||
647 | remote_cluster(smp_processor_id()); | ||
648 | |||
649 | if(cluster && cluster->pending_tasklets.head != NULL) { | ||
650 | TRACE("%s: There are tasklets to process.\n", __FUNCTION__); | ||
651 | do_lit_tasklets(cluster, sched_task); | ||
652 | } | ||
653 | |||
654 | preempt_enable_no_resched(); | ||
655 | } | ||
656 | |||
657 | |||
658 | |||
659 | static int cedf_enqueue_pai_tasklet(struct tasklet_struct* tasklet) | ||
660 | { | ||
661 | #if 0 | ||
662 | cedf_domain_t *cluster = NULL; | ||
663 | cpu_entry_t *targetCPU = NULL; | ||
664 | int thisCPU; | ||
665 | int runLocal = 0; | ||
666 | int runNow = 0; | ||
667 | unsigned long flags; | ||
668 | |||
669 | if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner))) | ||
670 | { | ||
671 | TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); | ||
672 | return 0; | ||
673 | } | ||
674 | |||
675 | cluster = task_cpu_cluster(tasklet->owner); | ||
676 | |||
677 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
678 | |||
679 | thisCPU = smp_processor_id(); | ||
680 | |||
681 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
682 | { | ||
683 | cpu_entry_t* affinity = NULL; | ||
684 | |||
685 | // use this CPU if it is in our cluster and isn't running any RT work. | ||
686 | if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(cedf_cpu_entries).linked == NULL)) { | ||
687 | affinity = &(__get_cpu_var(cedf_cpu_entries)); | ||
688 | } | ||
689 | else { | ||
690 | // this CPU is busy or shouldn't run tasklet in this cluster. | ||
691 | // look for available near by CPUs. | ||
692 | // NOTE: Affinity towards owner and not this CPU. Is this right? | ||
693 | affinity = | ||
694 | cedf_get_nearest_available_cpu(cluster, | ||
695 | &per_cpu(cedf_cpu_entries, task_cpu(tasklet->owner))); | ||
696 | } | ||
697 | |||
698 | targetCPU = affinity; | ||
699 | } | ||
700 | #endif | ||
701 | |||
702 | if (targetCPU == NULL) { | ||
703 | targetCPU = lowest_prio_cpu(cluster); | ||
704 | } | ||
705 | |||
706 | if (edf_higher_prio(tasklet->owner, targetCPU->linked)) { | ||
707 | if (thisCPU == targetCPU->cpu) { | ||
708 | TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__); | ||
709 | runLocal = 1; | ||
710 | runNow = 1; | ||
711 | } | ||
712 | else { | ||
713 | TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__); | ||
714 | runLocal = 0; | ||
715 | runNow = 1; | ||
716 | } | ||
717 | } | ||
718 | else { | ||
719 | runLocal = 0; | ||
720 | runNow = 0; | ||
721 | } | ||
722 | |||
723 | if(!runLocal) { | ||
724 | // enqueue the tasklet | ||
725 | __add_pai_tasklet(tasklet, cluster); | ||
726 | } | ||
727 | |||
728 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
729 | |||
730 | |||
731 | if (runLocal /*&& runNow */) { // runNow == 1 is implied | ||
732 | TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__); | ||
733 | __do_lit_tasklet(tasklet, 0ul); | ||
734 | } | ||
735 | else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied | ||
736 | TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu); | ||
737 | preempt(targetCPU); // need to be protected by cluster_lock? | ||
738 | } | ||
739 | else { | ||
740 | TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__); | ||
741 | } | ||
742 | #else | ||
743 | TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__); | ||
744 | __do_lit_tasklet(tasklet, 0ul); | ||
745 | #endif | ||
746 | return(1); // success | ||
747 | } | ||
748 | |||
749 | static void cedf_change_prio_pai_tasklet(struct task_struct *old_prio, | ||
750 | struct task_struct *new_prio) | ||
751 | { | ||
752 | struct tasklet_struct* step; | ||
753 | unsigned long flags; | ||
754 | cedf_domain_t *cluster; | ||
755 | struct task_struct *probe; | ||
756 | |||
757 | // identify the cluster by the assignment of these tasks. one should | ||
758 | // be non-NULL. | ||
759 | probe = (old_prio) ? old_prio : new_prio; | ||
760 | |||
761 | if(probe) { | ||
762 | cluster = task_cpu_cluster(probe); | ||
763 | |||
764 | if(cluster->pending_tasklets.head != NULL) { | ||
765 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
766 | for(step = cluster->pending_tasklets.head; step != NULL; step = step->next) { | ||
767 | if(step->owner == old_prio) { | ||
768 | TRACE("%s: Found tasklet to change: %d\n", __FUNCTION__, step->owner->pid); | ||
769 | step->owner = new_prio; | ||
770 | } | ||
771 | } | ||
772 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
773 | } | ||
774 | } | ||
775 | else { | ||
776 | TRACE("%s: Both priorities were NULL\n"); | ||
777 | } | ||
778 | } | ||
779 | |||
780 | #endif // PAI | ||
781 | |||
393 | /* Getting schedule() right is a bit tricky. schedule() may not make any | 782 | /* Getting schedule() right is a bit tricky. schedule() may not make any |
394 | * assumptions on the state of the current task since it may be called for a | 783 | * assumptions on the state of the current task since it may be called for a |
395 | * number of reasons. The reasons include a scheduler_tick() determined that it | 784 | * number of reasons. The reasons include a scheduler_tick() determined that it |
@@ -415,7 +804,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) | |||
415 | { | 804 | { |
416 | cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); | 805 | cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); |
417 | cedf_domain_t *cluster = entry->cluster; | 806 | cedf_domain_t *cluster = entry->cluster; |
418 | int out_of_time, sleep, preempt, np, exists, blocks; | 807 | int out_of_time, signal_budget, sleep, preempt, np, exists, blocks; |
419 | struct task_struct* next = NULL; | 808 | struct task_struct* next = NULL; |
420 | 809 | ||
421 | #ifdef CONFIG_RELEASE_MASTER | 810 | #ifdef CONFIG_RELEASE_MASTER |
@@ -442,6 +831,10 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) | |||
442 | out_of_time = exists && | 831 | out_of_time = exists && |
443 | budget_enforced(entry->scheduled) && | 832 | budget_enforced(entry->scheduled) && |
444 | budget_exhausted(entry->scheduled); | 833 | budget_exhausted(entry->scheduled); |
834 | signal_budget = exists && | ||
835 | budget_signalled(entry->scheduled) && | ||
836 | budget_exhausted(entry->scheduled) && | ||
837 | !sigbudget_sent(entry->scheduled); | ||
445 | np = exists && is_np(entry->scheduled); | 838 | np = exists && is_np(entry->scheduled); |
446 | sleep = exists && is_completed(entry->scheduled); | 839 | sleep = exists && is_completed(entry->scheduled); |
447 | preempt = entry->scheduled != entry->linked; | 840 | preempt = entry->scheduled != entry->linked; |
@@ -460,12 +853,28 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) | |||
460 | TRACE_TASK(prev, "will be preempted by %s/%d\n", | 853 | TRACE_TASK(prev, "will be preempted by %s/%d\n", |
461 | entry->linked->comm, entry->linked->pid); | 854 | entry->linked->comm, entry->linked->pid); |
462 | 855 | ||
856 | /* Send the signal that the budget has been exhausted */ | ||
857 | if (signal_budget) | ||
858 | send_sigbudget(entry->scheduled); | ||
463 | 859 | ||
464 | /* If a task blocks we have no choice but to reschedule. | 860 | /* If a task blocks we have no choice but to reschedule. |
465 | */ | 861 | */ |
466 | if (blocks) | 862 | if (blocks) |
467 | unlink(entry->scheduled); | 863 | unlink(entry->scheduled); |
468 | 864 | ||
865 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) | ||
866 | if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) { | ||
867 | if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) { | ||
868 | // don't track preemptions or locking protocol suspensions. | ||
869 | TRACE_TASK(entry->scheduled, "stopping GPU tracker.\n"); | ||
870 | stop_gpu_tracker(entry->scheduled); | ||
871 | } | ||
872 | else if(blocks && !tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) { | ||
873 | TRACE_TASK(entry->scheduled, "GPU tracker remains on during suspension.\n"); | ||
874 | } | ||
875 | } | ||
876 | #endif | ||
877 | |||
469 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | 878 | /* Request a sys_exit_np() call if we would like to preempt but cannot. |
470 | * We need to make sure to update the link structure anyway in case | 879 | * We need to make sure to update the link structure anyway in case |
471 | * that we are still linked. Multiple calls to request_exit_np() don't | 880 | * that we are still linked. Multiple calls to request_exit_np() don't |
@@ -515,7 +924,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) | |||
515 | raw_spin_unlock(&cluster->cluster_lock); | 924 | raw_spin_unlock(&cluster->cluster_lock); |
516 | 925 | ||
517 | #ifdef WANT_ALL_SCHED_EVENTS | 926 | #ifdef WANT_ALL_SCHED_EVENTS |
518 | TRACE("cedf_lock released, next=0x%p\n", next); | 927 | TRACE("cluster_lock released, next=0x%p\n", next); |
519 | 928 | ||
520 | if (next) | 929 | if (next) |
521 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | 930 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); |
@@ -523,7 +932,6 @@ static struct task_struct* cedf_schedule(struct task_struct * prev) | |||
523 | TRACE("becomes idle at %llu.\n", litmus_clock()); | 932 | TRACE("becomes idle at %llu.\n", litmus_clock()); |
524 | #endif | 933 | #endif |
525 | 934 | ||
526 | |||
527 | return next; | 935 | return next; |
528 | } | 936 | } |
529 | 937 | ||
@@ -549,7 +957,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running) | |||
549 | cpu_entry_t* entry; | 957 | cpu_entry_t* entry; |
550 | cedf_domain_t* cluster; | 958 | cedf_domain_t* cluster; |
551 | 959 | ||
552 | TRACE("gsn edf: task new %d\n", t->pid); | 960 | TRACE("c-edf: task new %d\n", t->pid); |
553 | 961 | ||
554 | /* the cluster doesn't change even if t is running */ | 962 | /* the cluster doesn't change even if t is running */ |
555 | cluster = task_cpu_cluster(t); | 963 | cluster = task_cpu_cluster(t); |
@@ -587,7 +995,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running) | |||
587 | static void cedf_task_wake_up(struct task_struct *task) | 995 | static void cedf_task_wake_up(struct task_struct *task) |
588 | { | 996 | { |
589 | unsigned long flags; | 997 | unsigned long flags; |
590 | lt_t now; | 998 | //lt_t now; |
591 | cedf_domain_t *cluster; | 999 | cedf_domain_t *cluster; |
592 | 1000 | ||
593 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); | 1001 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); |
@@ -595,6 +1003,9 @@ static void cedf_task_wake_up(struct task_struct *task) | |||
595 | cluster = task_cpu_cluster(task); | 1003 | cluster = task_cpu_cluster(task); |
596 | 1004 | ||
597 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | 1005 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); |
1006 | |||
1007 | #if 0 | ||
1008 | /* sporadic task model. will increment job numbers automatically */ | ||
598 | now = litmus_clock(); | 1009 | now = litmus_clock(); |
599 | if (is_tardy(task, now)) { | 1010 | if (is_tardy(task, now)) { |
600 | /* new sporadic release */ | 1011 | /* new sporadic release */ |
@@ -608,6 +1019,26 @@ static void cedf_task_wake_up(struct task_struct *task) | |||
608 | tsk_rt(task)->completed = 0; | 1019 | tsk_rt(task)->completed = 0; |
609 | } | 1020 | } |
610 | } | 1021 | } |
1022 | #else | ||
1023 | /* periodic task model. don't force job to end. | ||
1024 | * rely on user to say when jobs complete or when budget expires. */ | ||
1025 | tsk_rt(task)->completed = 0; | ||
1026 | #endif | ||
1027 | |||
1028 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1029 | if (tsk_rt(task)->has_aux_tasks && !tsk_rt(task)->hide_from_aux_tasks) { | ||
1030 | TRACE_CUR("%s/%d is ready so aux tasks may not inherit.\n", task->comm, task->pid); | ||
1031 | disable_aux_task_owner(task); | ||
1032 | } | ||
1033 | #endif | ||
1034 | |||
1035 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1036 | if (tsk_rt(task)->held_gpus && !tsk_rt(task)->hide_from_gpu) { | ||
1037 | TRACE_CUR("%s/%d is ready so gpu klmirqd tasks may not inherit.\n", task->comm, task->pid); | ||
1038 | disable_gpu_owner(task); | ||
1039 | } | ||
1040 | #endif | ||
1041 | |||
611 | cedf_job_arrival(task); | 1042 | cedf_job_arrival(task); |
612 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | 1043 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); |
613 | } | 1044 | } |
@@ -623,7 +1054,25 @@ static void cedf_task_block(struct task_struct *t) | |||
623 | 1054 | ||
624 | /* unlink if necessary */ | 1055 | /* unlink if necessary */ |
625 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | 1056 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); |
1057 | |||
626 | unlink(t); | 1058 | unlink(t); |
1059 | |||
1060 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1061 | if (tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->hide_from_aux_tasks) { | ||
1062 | |||
1063 | TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid); | ||
1064 | enable_aux_task_owner(t); | ||
1065 | } | ||
1066 | #endif | ||
1067 | |||
1068 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1069 | if (tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu) { | ||
1070 | |||
1071 | TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid); | ||
1072 | enable_gpu_owner(t); | ||
1073 | } | ||
1074 | #endif | ||
1075 | |||
627 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | 1076 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); |
628 | 1077 | ||
629 | BUG_ON(!is_realtime(t)); | 1078 | BUG_ON(!is_realtime(t)); |
@@ -635,8 +1084,30 @@ static void cedf_task_exit(struct task_struct * t) | |||
635 | unsigned long flags; | 1084 | unsigned long flags; |
636 | cedf_domain_t *cluster = task_cpu_cluster(t); | 1085 | cedf_domain_t *cluster = task_cpu_cluster(t); |
637 | 1086 | ||
1087 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
1088 | cedf_change_prio_pai_tasklet(t, NULL); | ||
1089 | #endif | ||
1090 | |||
638 | /* unlink if necessary */ | 1091 | /* unlink if necessary */ |
639 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | 1092 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); |
1093 | |||
1094 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1095 | /* make sure we clean up on our way out */ | ||
1096 | if (unlikely(tsk_rt(t)->is_aux_task)) { | ||
1097 | exit_aux_task(t); | ||
1098 | } | ||
1099 | else if(tsk_rt(t)->has_aux_tasks) { | ||
1100 | disable_aux_task_owner(t); | ||
1101 | } | ||
1102 | #endif | ||
1103 | |||
1104 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1105 | /* make sure we clean up on our way out */ | ||
1106 | if(tsk_rt(t)->held_gpus) { | ||
1107 | disable_gpu_owner(t); | ||
1108 | } | ||
1109 | #endif | ||
1110 | |||
640 | unlink(t); | 1111 | unlink(t); |
641 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | 1112 | if (tsk_rt(t)->scheduled_on != NO_CPU) { |
642 | cpu_entry_t *cpu; | 1113 | cpu_entry_t *cpu; |
@@ -652,13 +1123,505 @@ static void cedf_task_exit(struct task_struct * t) | |||
652 | 1123 | ||
653 | static long cedf_admit_task(struct task_struct* tsk) | 1124 | static long cedf_admit_task(struct task_struct* tsk) |
654 | { | 1125 | { |
1126 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1127 | INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks, | ||
1128 | edf_max_heap_base_priority_order); | ||
1129 | #endif | ||
1130 | |||
655 | return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; | 1131 | return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; |
656 | } | 1132 | } |
657 | 1133 | ||
658 | /* total number of cluster */ | 1134 | |
659 | static int num_clusters; | 1135 | |
660 | /* we do not support cluster of different sizes */ | 1136 | #ifdef CONFIG_LITMUS_LOCKING |
661 | static unsigned int cluster_size; | 1137 | |
1138 | #include <litmus/fdso.h> | ||
1139 | |||
1140 | |||
1141 | |||
1142 | /* called with IRQs off */ | ||
1143 | static int __increase_priority_inheritance(struct task_struct* t, | ||
1144 | struct task_struct* prio_inh) | ||
1145 | { | ||
1146 | int success = 1; | ||
1147 | int linked_on; | ||
1148 | int check_preempt = 0; | ||
1149 | cedf_domain_t* cluster; | ||
1150 | |||
1151 | if (prio_inh && prio_inh == effective_priority(t)) { | ||
1152 | /* relationship already established. */ | ||
1153 | TRACE_TASK(t, "already has effective priority of %s/%d\n", | ||
1154 | prio_inh->comm, prio_inh->pid); | ||
1155 | goto out; | ||
1156 | } | ||
1157 | |||
1158 | cluster = task_cpu_cluster(t); | ||
1159 | |||
1160 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1161 | /* this sanity check allows for weaker locking in protocols */ | ||
1162 | /* TODO (klmirqd): Skip this check if 't' is a proxy thread (???) */ | ||
1163 | if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) { | ||
1164 | #endif | ||
1165 | TRACE_TASK(t, "inherits priority from %s/%d\n", | ||
1166 | prio_inh->comm, prio_inh->pid); | ||
1167 | tsk_rt(t)->inh_task = prio_inh; | ||
1168 | |||
1169 | linked_on = tsk_rt(t)->linked_on; | ||
1170 | |||
1171 | /* If it is scheduled, then we need to reorder the CPU heap. */ | ||
1172 | if (linked_on != NO_CPU) { | ||
1173 | TRACE_TASK(t, "%s: linked on %d\n", | ||
1174 | __FUNCTION__, linked_on); | ||
1175 | /* Holder is scheduled; need to re-order CPUs. | ||
1176 | * We can't use heap_decrease() here since | ||
1177 | * the cpu_heap is ordered in reverse direction, so | ||
1178 | * it is actually an increase. */ | ||
1179 | binheap_delete(&per_cpu(cedf_cpu_entries, linked_on).hn, | ||
1180 | &cluster->cpu_heap); | ||
1181 | binheap_add(&per_cpu(cedf_cpu_entries, linked_on).hn, | ||
1182 | &cluster->cpu_heap, cpu_entry_t, hn); | ||
1183 | |||
1184 | } else { | ||
1185 | /* holder may be queued: first stop queue changes */ | ||
1186 | raw_spin_lock(&cluster->domain.release_lock); | ||
1187 | if (is_queued(t)) { | ||
1188 | TRACE_TASK(t, "%s: is queued\n", | ||
1189 | __FUNCTION__); | ||
1190 | /* We need to update the position of holder in some | ||
1191 | * heap. Note that this could be a release heap if we | ||
1192 | * budget enforcement is used and this job overran. */ | ||
1193 | check_preempt = | ||
1194 | !bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node); | ||
1195 | } else { | ||
1196 | /* Nothing to do: if it is not queued and not linked | ||
1197 | * then it is either sleeping or currently being moved | ||
1198 | * by other code (e.g., a timer interrupt handler) that | ||
1199 | * will use the correct priority when enqueuing the | ||
1200 | * task. */ | ||
1201 | TRACE_TASK(t, "%s: is NOT queued => Done.\n", | ||
1202 | __FUNCTION__); | ||
1203 | } | ||
1204 | raw_spin_unlock(&cluster->domain.release_lock); | ||
1205 | |||
1206 | /* If holder was enqueued in a release heap, then the following | ||
1207 | * preemption check is pointless, but we can't easily detect | ||
1208 | * that case. If you want to fix this, then consider that | ||
1209 | * simply adding a state flag requires O(n) time to update when | ||
1210 | * releasing n tasks, which conflicts with the goal to have | ||
1211 | * O(log n) merges. */ | ||
1212 | if (check_preempt) { | ||
1213 | /* heap_decrease() hit the top level of the heap: make | ||
1214 | * sure preemption checks get the right task, not the | ||
1215 | * potentially stale cache. */ | ||
1216 | bheap_uncache_min(edf_ready_order, | ||
1217 | &cluster->domain.ready_queue); | ||
1218 | check_for_preemptions(cluster); | ||
1219 | } | ||
1220 | |||
1221 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1222 | /* propagate to aux tasks */ | ||
1223 | if (tsk_rt(t)->has_aux_tasks) { | ||
1224 | aux_task_owner_increase_priority(t); | ||
1225 | } | ||
1226 | #endif | ||
1227 | |||
1228 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1229 | /* propagate to gpu klmirqd */ | ||
1230 | if (tsk_rt(t)->held_gpus) { | ||
1231 | gpu_owner_increase_priority(t); | ||
1232 | } | ||
1233 | #endif | ||
1234 | } | ||
1235 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1236 | } | ||
1237 | else { | ||
1238 | TRACE_TASK(t, "Spurious invalid priority increase. " | ||
1239 | "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n" | ||
1240 | "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n", | ||
1241 | t->comm, t->pid, | ||
1242 | effective_priority(t)->comm, effective_priority(t)->pid, | ||
1243 | (prio_inh) ? prio_inh->comm : "nil", | ||
1244 | (prio_inh) ? prio_inh->pid : -1); | ||
1245 | WARN_ON(!prio_inh); | ||
1246 | success = 0; | ||
1247 | } | ||
1248 | #endif | ||
1249 | |||
1250 | out: | ||
1251 | return success; | ||
1252 | } | ||
1253 | |||
1254 | /* called with IRQs off */ | ||
1255 | static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) | ||
1256 | { | ||
1257 | cedf_domain_t* cluster = task_cpu_cluster(t); | ||
1258 | |||
1259 | raw_spin_lock(&cluster->cluster_lock); | ||
1260 | |||
1261 | __increase_priority_inheritance(t, prio_inh); | ||
1262 | |||
1263 | raw_spin_unlock(&cluster->cluster_lock); | ||
1264 | |||
1265 | #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA) | ||
1266 | if(tsk_rt(t)->held_gpus) { | ||
1267 | int i; | ||
1268 | for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus)); | ||
1269 | i < NV_DEVICE_NUM; | ||
1270 | i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) { | ||
1271 | pai_check_priority_increase(t, i); | ||
1272 | } | ||
1273 | } | ||
1274 | #endif | ||
1275 | } | ||
1276 | |||
1277 | /* called with IRQs off */ | ||
1278 | static int __decrease_priority_inheritance(struct task_struct* t, | ||
1279 | struct task_struct* prio_inh) | ||
1280 | { | ||
1281 | int success = 1; | ||
1282 | |||
1283 | if (prio_inh == tsk_rt(t)->inh_task) { | ||
1284 | /* relationship already established. */ | ||
1285 | TRACE_TASK(t, "already inherits priority from %s/%d\n", | ||
1286 | (prio_inh) ? prio_inh->comm : "(nil)", | ||
1287 | (prio_inh) ? prio_inh->pid : 0); | ||
1288 | goto out; | ||
1289 | } | ||
1290 | |||
1291 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1292 | if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) { | ||
1293 | #endif | ||
1294 | /* A job only stops inheriting a priority when it releases a | ||
1295 | * resource. Thus we can make the following assumption.*/ | ||
1296 | if(prio_inh) | ||
1297 | TRACE_TASK(t, "EFFECTIVE priority decreased to %s/%d\n", | ||
1298 | prio_inh->comm, prio_inh->pid); | ||
1299 | else | ||
1300 | TRACE_TASK(t, "base priority restored.\n"); | ||
1301 | |||
1302 | tsk_rt(t)->inh_task = prio_inh; | ||
1303 | |||
1304 | if(tsk_rt(t)->scheduled_on != NO_CPU) { | ||
1305 | TRACE_TASK(t, "is scheduled.\n"); | ||
1306 | |||
1307 | /* Check if rescheduling is necessary. We can't use heap_decrease() | ||
1308 | * since the priority was effectively lowered. */ | ||
1309 | unlink(t); | ||
1310 | cedf_job_arrival(t); | ||
1311 | } | ||
1312 | else { | ||
1313 | cedf_domain_t* cluster = task_cpu_cluster(t); | ||
1314 | /* task is queued */ | ||
1315 | raw_spin_lock(&cluster->domain.release_lock); | ||
1316 | if (is_queued(t)) { | ||
1317 | TRACE_TASK(t, "is queued.\n"); | ||
1318 | |||
1319 | /* decrease in priority, so we have to re-add to binomial heap */ | ||
1320 | unlink(t); | ||
1321 | cedf_job_arrival(t); | ||
1322 | } | ||
1323 | else { | ||
1324 | TRACE_TASK(t, "is not in scheduler. Probably on wait queue somewhere.\n"); | ||
1325 | } | ||
1326 | raw_spin_unlock(&cluster->domain.release_lock); | ||
1327 | } | ||
1328 | |||
1329 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1330 | /* propagate to aux tasks */ | ||
1331 | if (tsk_rt(t)->has_aux_tasks) { | ||
1332 | aux_task_owner_decrease_priority(t); | ||
1333 | } | ||
1334 | #endif | ||
1335 | |||
1336 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1337 | /* propagate to gpu */ | ||
1338 | if (tsk_rt(t)->held_gpus) { | ||
1339 | gpu_owner_decrease_priority(t); | ||
1340 | } | ||
1341 | #endif | ||
1342 | |||
1343 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1344 | } | ||
1345 | else { | ||
1346 | TRACE_TASK(t, "Spurious invalid priority decrease. " | ||
1347 | "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n" | ||
1348 | "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n", | ||
1349 | t->comm, t->pid, | ||
1350 | effective_priority(t)->comm, effective_priority(t)->pid, | ||
1351 | (prio_inh) ? prio_inh->comm : "nil", | ||
1352 | (prio_inh) ? prio_inh->pid : -1); | ||
1353 | success = 0; | ||
1354 | } | ||
1355 | #endif | ||
1356 | |||
1357 | out: | ||
1358 | return success; | ||
1359 | } | ||
1360 | |||
1361 | static void decrease_priority_inheritance(struct task_struct* t, | ||
1362 | struct task_struct* prio_inh) | ||
1363 | { | ||
1364 | cedf_domain_t* cluster = task_cpu_cluster(t); | ||
1365 | |||
1366 | raw_spin_lock(&cluster->cluster_lock); | ||
1367 | __decrease_priority_inheritance(t, prio_inh); | ||
1368 | |||
1369 | raw_spin_unlock(&cluster->cluster_lock); | ||
1370 | |||
1371 | #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA) | ||
1372 | if(tsk_rt(t)->held_gpus) { | ||
1373 | int i; | ||
1374 | for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus)); | ||
1375 | i < NV_DEVICE_NUM; | ||
1376 | i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) { | ||
1377 | pai_check_priority_decrease(t, i); | ||
1378 | } | ||
1379 | } | ||
1380 | #endif | ||
1381 | } | ||
1382 | |||
1383 | |||
1384 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1385 | |||
1386 | /* called with IRQs off */ | ||
1387 | /* preconditions: | ||
1388 | (1) The 'hp_blocked_tasks_lock' of task 't' is held. | ||
1389 | (2) The lock 'to_unlock' is held. | ||
1390 | */ | ||
1391 | static void nested_increase_priority_inheritance(struct task_struct* t, | ||
1392 | struct task_struct* prio_inh, | ||
1393 | raw_spinlock_t *to_unlock, | ||
1394 | unsigned long irqflags) | ||
1395 | { | ||
1396 | struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock; | ||
1397 | |||
1398 | if(tsk_rt(t)->inh_task != prio_inh) { // shield redundent calls. | ||
1399 | increase_priority_inheritance(t, prio_inh); // increase our prio. | ||
1400 | } | ||
1401 | |||
1402 | raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap. | ||
1403 | |||
1404 | |||
1405 | if(blocked_lock) { | ||
1406 | if(blocked_lock->ops->propagate_increase_inheritance) { | ||
1407 | TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n", | ||
1408 | blocked_lock->ident); | ||
1409 | |||
1410 | // beware: recursion | ||
1411 | blocked_lock->ops->propagate_increase_inheritance(blocked_lock, | ||
1412 | t, to_unlock, | ||
1413 | irqflags); | ||
1414 | } | ||
1415 | else { | ||
1416 | TRACE_TASK(t, "Inheritor is blocked on lock (%d) that does not support nesting!\n", | ||
1417 | blocked_lock->ident); | ||
1418 | unlock_fine_irqrestore(to_unlock, irqflags); | ||
1419 | } | ||
1420 | } | ||
1421 | else { | ||
1422 | TRACE_TASK(t, "is not blocked. No propagation.\n"); | ||
1423 | unlock_fine_irqrestore(to_unlock, irqflags); | ||
1424 | } | ||
1425 | } | ||
1426 | |||
1427 | /* called with IRQs off */ | ||
1428 | /* preconditions: | ||
1429 | (1) The 'hp_blocked_tasks_lock' of task 't' is held. | ||
1430 | (2) The lock 'to_unlock' is held. | ||
1431 | */ | ||
1432 | static void nested_decrease_priority_inheritance(struct task_struct* t, | ||
1433 | struct task_struct* prio_inh, | ||
1434 | raw_spinlock_t *to_unlock, | ||
1435 | unsigned long irqflags) | ||
1436 | { | ||
1437 | struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock; | ||
1438 | decrease_priority_inheritance(t, prio_inh); | ||
1439 | |||
1440 | raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap. | ||
1441 | |||
1442 | if(blocked_lock) { | ||
1443 | if(blocked_lock->ops->propagate_decrease_inheritance) { | ||
1444 | TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n", | ||
1445 | blocked_lock->ident); | ||
1446 | |||
1447 | // beware: recursion | ||
1448 | blocked_lock->ops->propagate_decrease_inheritance(blocked_lock, t, | ||
1449 | to_unlock, | ||
1450 | irqflags); | ||
1451 | } | ||
1452 | else { | ||
1453 | TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n", | ||
1454 | blocked_lock); | ||
1455 | unlock_fine_irqrestore(to_unlock, irqflags); | ||
1456 | } | ||
1457 | } | ||
1458 | else { | ||
1459 | TRACE_TASK(t, "is not blocked. No propagation.\n"); | ||
1460 | unlock_fine_irqrestore(to_unlock, irqflags); | ||
1461 | } | ||
1462 | } | ||
1463 | |||
1464 | |||
1465 | /* ******************** RSM MUTEX ********************** */ | ||
1466 | |||
1467 | static struct litmus_lock_ops cedf_rsm_mutex_lock_ops = { | ||
1468 | .lock = rsm_mutex_lock, | ||
1469 | .unlock = rsm_mutex_unlock, | ||
1470 | .close = rsm_mutex_close, | ||
1471 | .deallocate = rsm_mutex_free, | ||
1472 | |||
1473 | .propagate_increase_inheritance = rsm_mutex_propagate_increase_inheritance, | ||
1474 | .propagate_decrease_inheritance = rsm_mutex_propagate_decrease_inheritance, | ||
1475 | |||
1476 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
1477 | .dgl_lock = rsm_mutex_dgl_lock, | ||
1478 | .is_owner = rsm_mutex_is_owner, | ||
1479 | .enable_priority = rsm_mutex_enable_priority, | ||
1480 | #endif | ||
1481 | }; | ||
1482 | |||
1483 | static struct litmus_lock* cedf_new_rsm_mutex(void) | ||
1484 | { | ||
1485 | return rsm_mutex_new(&cedf_rsm_mutex_lock_ops); | ||
1486 | } | ||
1487 | |||
1488 | /* ******************** IKGLP ********************** */ | ||
1489 | |||
1490 | static struct litmus_lock_ops cedf_ikglp_lock_ops = { | ||
1491 | .lock = ikglp_lock, | ||
1492 | .unlock = ikglp_unlock, | ||
1493 | .close = ikglp_close, | ||
1494 | .deallocate = ikglp_free, | ||
1495 | |||
1496 | // ikglp can only be an outer-most lock. | ||
1497 | .propagate_increase_inheritance = NULL, | ||
1498 | .propagate_decrease_inheritance = NULL, | ||
1499 | }; | ||
1500 | |||
1501 | static struct litmus_lock* cedf_new_ikglp(void* __user arg) | ||
1502 | { | ||
1503 | // assumes clusters of uniform size. | ||
1504 | return ikglp_new(cluster_size/num_clusters, &cedf_ikglp_lock_ops, arg); | ||
1505 | } | ||
1506 | |||
1507 | #endif /* CONFIG_LITMUS_NESTED_LOCKING */ | ||
1508 | |||
1509 | |||
1510 | |||
1511 | |||
1512 | /* ******************** KFMLP support ********************** */ | ||
1513 | |||
1514 | static struct litmus_lock_ops cedf_kfmlp_lock_ops = { | ||
1515 | .lock = kfmlp_lock, | ||
1516 | .unlock = kfmlp_unlock, | ||
1517 | .close = kfmlp_close, | ||
1518 | .deallocate = kfmlp_free, | ||
1519 | |||
1520 | // kfmlp can only be an outer-most lock. | ||
1521 | .propagate_increase_inheritance = NULL, | ||
1522 | .propagate_decrease_inheritance = NULL, | ||
1523 | }; | ||
1524 | |||
1525 | |||
1526 | static struct litmus_lock* cedf_new_kfmlp(void* __user arg) | ||
1527 | { | ||
1528 | return kfmlp_new(&cedf_kfmlp_lock_ops, arg); | ||
1529 | } | ||
1530 | |||
1531 | |||
1532 | /* **** lock constructor **** */ | ||
1533 | |||
1534 | static long cedf_allocate_lock(struct litmus_lock **lock, int type, | ||
1535 | void* __user args) | ||
1536 | { | ||
1537 | int err; | ||
1538 | |||
1539 | switch (type) { | ||
1540 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1541 | case RSM_MUTEX: | ||
1542 | *lock = cedf_new_rsm_mutex(); | ||
1543 | break; | ||
1544 | |||
1545 | case IKGLP_SEM: | ||
1546 | *lock = cedf_new_ikglp(args); | ||
1547 | break; | ||
1548 | #endif | ||
1549 | case KFMLP_SEM: | ||
1550 | *lock = cedf_new_kfmlp(args); | ||
1551 | break; | ||
1552 | |||
1553 | default: | ||
1554 | err = -ENXIO; | ||
1555 | goto UNSUPPORTED_LOCK; | ||
1556 | }; | ||
1557 | |||
1558 | if (*lock) | ||
1559 | err = 0; | ||
1560 | else | ||
1561 | err = -ENOMEM; | ||
1562 | |||
1563 | UNSUPPORTED_LOCK: | ||
1564 | return err; | ||
1565 | } | ||
1566 | |||
1567 | #endif // CONFIG_LITMUS_LOCKING | ||
1568 | |||
1569 | |||
1570 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1571 | static struct affinity_observer_ops cedf_kfmlp_affinity_ops = { | ||
1572 | .close = kfmlp_aff_obs_close, | ||
1573 | .deallocate = kfmlp_aff_obs_free, | ||
1574 | }; | ||
1575 | |||
1576 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1577 | static struct affinity_observer_ops cedf_ikglp_affinity_ops = { | ||
1578 | .close = ikglp_aff_obs_close, | ||
1579 | .deallocate = ikglp_aff_obs_free, | ||
1580 | }; | ||
1581 | #endif | ||
1582 | |||
1583 | static long cedf_allocate_affinity_observer(struct affinity_observer **aff_obs, | ||
1584 | int type, | ||
1585 | void* __user args) | ||
1586 | { | ||
1587 | int err; | ||
1588 | |||
1589 | switch (type) { | ||
1590 | |||
1591 | case KFMLP_SIMPLE_GPU_AFF_OBS: | ||
1592 | *aff_obs = kfmlp_simple_gpu_aff_obs_new(&cedf_kfmlp_affinity_ops, args); | ||
1593 | break; | ||
1594 | |||
1595 | case KFMLP_GPU_AFF_OBS: | ||
1596 | *aff_obs = kfmlp_gpu_aff_obs_new(&cedf_kfmlp_affinity_ops, args); | ||
1597 | break; | ||
1598 | |||
1599 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1600 | case IKGLP_SIMPLE_GPU_AFF_OBS: | ||
1601 | *aff_obs = ikglp_simple_gpu_aff_obs_new(&cedf_ikglp_affinity_ops, args); | ||
1602 | break; | ||
1603 | |||
1604 | case IKGLP_GPU_AFF_OBS: | ||
1605 | *aff_obs = ikglp_gpu_aff_obs_new(&cedf_ikglp_affinity_ops, args); | ||
1606 | break; | ||
1607 | #endif | ||
1608 | default: | ||
1609 | err = -ENXIO; | ||
1610 | goto UNSUPPORTED_AFF_OBS; | ||
1611 | }; | ||
1612 | |||
1613 | if (*aff_obs) | ||
1614 | err = 0; | ||
1615 | else | ||
1616 | err = -ENOMEM; | ||
1617 | |||
1618 | UNSUPPORTED_AFF_OBS: | ||
1619 | return err; | ||
1620 | } | ||
1621 | #endif | ||
1622 | |||
1623 | |||
1624 | |||
662 | 1625 | ||
663 | #ifdef VERBOSE_INIT | 1626 | #ifdef VERBOSE_INIT |
664 | static void print_cluster_topology(cpumask_var_t mask, int cpu) | 1627 | static void print_cluster_topology(cpumask_var_t mask, int cpu) |
@@ -673,16 +1636,17 @@ static void print_cluster_topology(cpumask_var_t mask, int cpu) | |||
673 | } | 1636 | } |
674 | #endif | 1637 | #endif |
675 | 1638 | ||
676 | static int clusters_allocated = 0; | ||
677 | |||
678 | static void cleanup_cedf(void) | 1639 | static void cleanup_cedf(void) |
679 | { | 1640 | { |
680 | int i; | 1641 | int i; |
681 | 1642 | ||
1643 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1644 | shutdown_nvidia_info(); | ||
1645 | #endif | ||
1646 | |||
682 | if (clusters_allocated) { | 1647 | if (clusters_allocated) { |
683 | for (i = 0; i < num_clusters; i++) { | 1648 | for (i = 0; i < num_clusters; i++) { |
684 | kfree(cedf[i].cpus); | 1649 | kfree(cedf[i].cpus); |
685 | kfree(cedf[i].heap_node); | ||
686 | free_cpumask_var(cedf[i].cpu_map); | 1650 | free_cpumask_var(cedf[i].cpu_map); |
687 | } | 1651 | } |
688 | 1652 | ||
@@ -690,6 +1654,18 @@ static void cleanup_cedf(void) | |||
690 | } | 1654 | } |
691 | } | 1655 | } |
692 | 1656 | ||
1657 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) | ||
1658 | static int cedf_map_gpu_to_cpu(int gpu) | ||
1659 | { | ||
1660 | int cpu_cluster = gpu / gpu_cluster_size; | ||
1661 | int default_cpu = cedf[cpu_cluster].cpus[0]->cpu; // first CPU in given cluster | ||
1662 | |||
1663 | TRACE("CPU %d is default for GPU %d interrupt threads.\n", default_cpu, gpu); | ||
1664 | |||
1665 | return default_cpu; | ||
1666 | } | ||
1667 | #endif | ||
1668 | |||
693 | static long cedf_activate_plugin(void) | 1669 | static long cedf_activate_plugin(void) |
694 | { | 1670 | { |
695 | int i, j, cpu, ccpu, cpu_count; | 1671 | int i, j, cpu, ccpu, cpu_count; |
@@ -736,18 +1712,33 @@ static long cedf_activate_plugin(void) | |||
736 | printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n", | 1712 | printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n", |
737 | num_clusters, cluster_size); | 1713 | num_clusters, cluster_size); |
738 | 1714 | ||
1715 | |||
1716 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) | ||
1717 | num_gpu_clusters = min(num_clusters, num_online_gpus()); | ||
1718 | gpu_cluster_size = num_online_gpus() / num_gpu_clusters; | ||
1719 | |||
1720 | if (((num_online_gpus() % gpu_cluster_size) != 0) || | ||
1721 | (num_gpu_clusters != num_clusters)) { | ||
1722 | printk(KERN_WARNING "C-EDF: GPUs not uniformly distributed among CPU clusters.\n"); | ||
1723 | } | ||
1724 | #endif | ||
1725 | |||
739 | /* initialize clusters */ | 1726 | /* initialize clusters */ |
740 | cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC); | 1727 | cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC); |
741 | for (i = 0; i < num_clusters; i++) { | 1728 | for (i = 0; i < num_clusters; i++) { |
742 | 1729 | ||
743 | cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t), | 1730 | cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t), |
744 | GFP_ATOMIC); | 1731 | GFP_ATOMIC); |
745 | cedf[i].heap_node = kmalloc( | 1732 | INIT_BINHEAP_HANDLE(&(cedf[i].cpu_heap), cpu_lower_prio); |
746 | cluster_size * sizeof(struct bheap_node), | ||
747 | GFP_ATOMIC); | ||
748 | bheap_init(&(cedf[i].cpu_heap)); | ||
749 | edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs); | 1733 | edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs); |
750 | 1734 | ||
1735 | |||
1736 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
1737 | cedf[i].pending_tasklets.head = NULL; | ||
1738 | cedf[i].pending_tasklets.tail = &(cedf[i].pending_tasklets.head); | ||
1739 | #endif | ||
1740 | |||
1741 | |||
751 | if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) | 1742 | if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) |
752 | return -ENOMEM; | 1743 | return -ENOMEM; |
753 | #ifdef CONFIG_RELEASE_MASTER | 1744 | #ifdef CONFIG_RELEASE_MASTER |
@@ -758,6 +1749,10 @@ static long cedf_activate_plugin(void) | |||
758 | /* cycle through cluster and add cpus to them */ | 1749 | /* cycle through cluster and add cpus to them */ |
759 | for (i = 0; i < num_clusters; i++) { | 1750 | for (i = 0; i < num_clusters; i++) { |
760 | 1751 | ||
1752 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
1753 | raw_spin_lock_init(&cedf[i].dgl_lock); | ||
1754 | #endif | ||
1755 | |||
761 | for_each_online_cpu(cpu) { | 1756 | for_each_online_cpu(cpu) { |
762 | /* check if the cpu is already in a cluster */ | 1757 | /* check if the cpu is already in a cluster */ |
763 | for (j = 0; j < num_clusters; j++) | 1758 | for (j = 0; j < num_clusters; j++) |
@@ -788,8 +1783,8 @@ static long cedf_activate_plugin(void) | |||
788 | atomic_set(&entry->will_schedule, 0); | 1783 | atomic_set(&entry->will_schedule, 0); |
789 | entry->cpu = ccpu; | 1784 | entry->cpu = ccpu; |
790 | entry->cluster = &cedf[i]; | 1785 | entry->cluster = &cedf[i]; |
791 | entry->hn = &(cedf[i].heap_node[cpu_count]); | 1786 | |
792 | bheap_node_init(&entry->hn, entry); | 1787 | INIT_BINHEAP_NODE(&entry->hn); |
793 | 1788 | ||
794 | cpu_count++; | 1789 | cpu_count++; |
795 | 1790 | ||
@@ -806,6 +1801,14 @@ static long cedf_activate_plugin(void) | |||
806 | } | 1801 | } |
807 | } | 1802 | } |
808 | 1803 | ||
1804 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
1805 | init_klmirqd(); | ||
1806 | #endif | ||
1807 | |||
1808 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1809 | init_nvidia_info(); | ||
1810 | #endif | ||
1811 | |||
809 | free_cpumask_var(mask); | 1812 | free_cpumask_var(mask); |
810 | clusters_allocated = 1; | 1813 | clusters_allocated = 1; |
811 | return 0; | 1814 | return 0; |
@@ -824,6 +1827,33 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = { | |||
824 | .task_block = cedf_task_block, | 1827 | .task_block = cedf_task_block, |
825 | .admit_task = cedf_admit_task, | 1828 | .admit_task = cedf_admit_task, |
826 | .activate_plugin = cedf_activate_plugin, | 1829 | .activate_plugin = cedf_activate_plugin, |
1830 | .compare = edf_higher_prio, | ||
1831 | #ifdef CONFIG_LITMUS_LOCKING | ||
1832 | .allocate_lock = cedf_allocate_lock, | ||
1833 | .increase_prio = increase_priority_inheritance, | ||
1834 | .decrease_prio = decrease_priority_inheritance, | ||
1835 | .__increase_prio = __increase_priority_inheritance, | ||
1836 | .__decrease_prio = __decrease_priority_inheritance, | ||
1837 | #endif | ||
1838 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1839 | .nested_increase_prio = nested_increase_priority_inheritance, | ||
1840 | .nested_decrease_prio = nested_decrease_priority_inheritance, | ||
1841 | .__compare = __edf_higher_prio, | ||
1842 | #endif | ||
1843 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
1844 | .get_dgl_spinlock = cedf_get_dgl_spinlock, | ||
1845 | #endif | ||
1846 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1847 | .allocate_aff_obs = cedf_allocate_affinity_observer, | ||
1848 | #endif | ||
1849 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
1850 | .enqueue_pai_tasklet = cedf_enqueue_pai_tasklet, | ||
1851 | .change_prio_pai_tasklet = cedf_change_prio_pai_tasklet, | ||
1852 | .run_tasklets = cedf_run_tasklets, | ||
1853 | #endif | ||
1854 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) | ||
1855 | .map_gpu_to_cpu = cedf_map_gpu_to_cpu, | ||
1856 | #endif | ||
827 | }; | 1857 | }; |
828 | 1858 | ||
829 | static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL; | 1859 | static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL; |
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c index b8548b885b35..01791a18e8f3 100644 --- a/litmus/sched_gsn_edf.c +++ b/litmus/sched_gsn_edf.c | |||
@@ -12,24 +12,54 @@ | |||
12 | #include <linux/percpu.h> | 12 | #include <linux/percpu.h> |
13 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
15 | #include <linux/uaccess.h> | ||
16 | #include <linux/module.h> | ||
15 | 17 | ||
16 | #include <litmus/litmus.h> | 18 | #include <litmus/litmus.h> |
17 | #include <litmus/jobs.h> | 19 | #include <litmus/jobs.h> |
18 | #include <litmus/sched_plugin.h> | 20 | #include <litmus/sched_plugin.h> |
19 | #include <litmus/edf_common.h> | 21 | #include <litmus/edf_common.h> |
20 | #include <litmus/sched_trace.h> | 22 | #include <litmus/sched_trace.h> |
21 | #include <litmus/trace.h> | ||
22 | 23 | ||
23 | #include <litmus/preempt.h> | 24 | #include <litmus/preempt.h> |
24 | #include <litmus/budget.h> | 25 | #include <litmus/budget.h> |
25 | 26 | ||
26 | #include <litmus/bheap.h> | 27 | #include <litmus/bheap.h> |
28 | #include <litmus/binheap.h> | ||
29 | #include <litmus/trace.h> | ||
30 | |||
31 | #ifdef CONFIG_LITMUS_LOCKING | ||
32 | #include <litmus/kfmlp_lock.h> | ||
33 | #endif | ||
34 | |||
35 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
36 | #include <litmus/rsm_lock.h> | ||
37 | #include <litmus/ikglp_lock.h> | ||
38 | #endif | ||
27 | 39 | ||
28 | #ifdef CONFIG_SCHED_CPU_AFFINITY | 40 | #ifdef CONFIG_SCHED_CPU_AFFINITY |
29 | #include <litmus/affinity.h> | 41 | #include <litmus/affinity.h> |
30 | #endif | 42 | #endif |
31 | 43 | ||
32 | #include <linux/module.h> | 44 | #ifdef CONFIG_REALTIME_AUX_TASKS |
45 | #include <litmus/aux_tasks.h> | ||
46 | #endif | ||
47 | |||
48 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
49 | #include <litmus/litmus_softirq.h> | ||
50 | #endif | ||
51 | |||
52 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
53 | #include <linux/interrupt.h> | ||
54 | #endif | ||
55 | |||
56 | #ifdef CONFIG_LITMUS_NVIDIA | ||
57 | #include <litmus/nvidia_info.h> | ||
58 | #endif | ||
59 | |||
60 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
61 | #include <litmus/gpu_affinity.h> | ||
62 | #endif | ||
33 | 63 | ||
34 | /* Overview of GSN-EDF operations. | 64 | /* Overview of GSN-EDF operations. |
35 | * | 65 | * |
@@ -104,52 +134,64 @@ typedef struct { | |||
104 | int cpu; | 134 | int cpu; |
105 | struct task_struct* linked; /* only RT tasks */ | 135 | struct task_struct* linked; /* only RT tasks */ |
106 | struct task_struct* scheduled; /* only RT tasks */ | 136 | struct task_struct* scheduled; /* only RT tasks */ |
107 | struct bheap_node* hn; | 137 | struct binheap_node hn; |
108 | } cpu_entry_t; | 138 | } cpu_entry_t; |
109 | DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries); | 139 | DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries); |
110 | 140 | ||
111 | cpu_entry_t* gsnedf_cpus[NR_CPUS]; | 141 | cpu_entry_t* gsnedf_cpus[NR_CPUS]; |
112 | 142 | ||
113 | /* the cpus queue themselves according to priority in here */ | 143 | /* the cpus queue themselves according to priority in here */ |
114 | static struct bheap_node gsnedf_heap_node[NR_CPUS]; | 144 | static struct binheap gsnedf_cpu_heap; |
115 | static struct bheap gsnedf_cpu_heap; | ||
116 | 145 | ||
117 | static rt_domain_t gsnedf; | 146 | static rt_domain_t gsnedf; |
118 | #define gsnedf_lock (gsnedf.ready_lock) | 147 | #define gsnedf_lock (gsnedf.ready_lock) |
119 | 148 | ||
149 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
150 | static raw_spinlock_t dgl_lock; | ||
151 | |||
152 | static raw_spinlock_t* gsnedf_get_dgl_spinlock(struct task_struct *t) | ||
153 | { | ||
154 | return(&dgl_lock); | ||
155 | } | ||
156 | #endif | ||
157 | |||
158 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
159 | struct tasklet_head gsnedf_pending_tasklets; | ||
160 | #endif | ||
161 | |||
120 | 162 | ||
121 | /* Uncomment this if you want to see all scheduling decisions in the | 163 | /* Uncomment this if you want to see all scheduling decisions in the |
122 | * TRACE() log. | 164 | * TRACE() log. |
123 | #define WANT_ALL_SCHED_EVENTS | 165 | #define WANT_ALL_SCHED_EVENTS |
124 | */ | 166 | */ |
125 | 167 | ||
126 | static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) | 168 | static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b) |
127 | { | 169 | { |
128 | cpu_entry_t *a, *b; | 170 | cpu_entry_t *a = binheap_entry(_a, cpu_entry_t, hn); |
129 | a = _a->value; | 171 | cpu_entry_t *b = binheap_entry(_b, cpu_entry_t, hn); |
130 | b = _b->value; | 172 | |
131 | /* Note that a and b are inverted: we want the lowest-priority CPU at | 173 | /* Note that a and b are inverted: we want the lowest-priority CPU at |
132 | * the top of the heap. | 174 | * the top of the heap. |
133 | */ | 175 | */ |
134 | return edf_higher_prio(b->linked, a->linked); | 176 | return edf_higher_prio(b->linked, a->linked); |
135 | } | 177 | } |
136 | 178 | ||
179 | |||
137 | /* update_cpu_position - Move the cpu entry to the correct place to maintain | 180 | /* update_cpu_position - Move the cpu entry to the correct place to maintain |
138 | * order in the cpu queue. Caller must hold gsnedf lock. | 181 | * order in the cpu queue. Caller must hold gsnedf lock. |
139 | */ | 182 | */ |
140 | static void update_cpu_position(cpu_entry_t *entry) | 183 | static void update_cpu_position(cpu_entry_t *entry) |
141 | { | 184 | { |
142 | if (likely(bheap_node_in_heap(entry->hn))) | 185 | if (likely(binheap_is_in_heap(&entry->hn))) { |
143 | bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); | 186 | binheap_delete(&entry->hn, &gsnedf_cpu_heap); |
144 | bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); | 187 | } |
188 | binheap_add(&entry->hn, &gsnedf_cpu_heap, cpu_entry_t, hn); | ||
145 | } | 189 | } |
146 | 190 | ||
147 | /* caller must hold gsnedf lock */ | 191 | /* caller must hold gsnedf lock */ |
148 | static cpu_entry_t* lowest_prio_cpu(void) | 192 | static cpu_entry_t* lowest_prio_cpu(void) |
149 | { | 193 | { |
150 | struct bheap_node* hn; | 194 | return binheap_top_entry(&gsnedf_cpu_heap, cpu_entry_t, hn); |
151 | hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap); | ||
152 | return hn->value; | ||
153 | } | 195 | } |
154 | 196 | ||
155 | 197 | ||
@@ -164,8 +206,17 @@ static noinline void link_task_to_cpu(struct task_struct* linked, | |||
164 | struct task_struct* tmp; | 206 | struct task_struct* tmp; |
165 | int on_cpu; | 207 | int on_cpu; |
166 | 208 | ||
209 | //int print = (linked != NULL || entry->linked != NULL); | ||
210 | |||
167 | BUG_ON(linked && !is_realtime(linked)); | 211 | BUG_ON(linked && !is_realtime(linked)); |
168 | 212 | ||
213 | /* | ||
214 | if (print) { | ||
215 | TRACE_CUR("linked = %s/%d\n", (linked) ? linked->comm : "(nil)", (linked)? linked->pid : 0); | ||
216 | TRACE_CUR("entry->linked = %s/%d\n", (entry->linked) ? entry->linked->comm : "(nil)", (entry->linked)? entry->linked->pid : 0); | ||
217 | } | ||
218 | */ | ||
219 | |||
169 | /* Currently linked task is set to be unlinked. */ | 220 | /* Currently linked task is set to be unlinked. */ |
170 | if (entry->linked) { | 221 | if (entry->linked) { |
171 | entry->linked->rt_param.linked_on = NO_CPU; | 222 | entry->linked->rt_param.linked_on = NO_CPU; |
@@ -201,12 +252,18 @@ static noinline void link_task_to_cpu(struct task_struct* linked, | |||
201 | linked->rt_param.linked_on = entry->cpu; | 252 | linked->rt_param.linked_on = entry->cpu; |
202 | } | 253 | } |
203 | entry->linked = linked; | 254 | entry->linked = linked; |
204 | #ifdef WANT_ALL_SCHED_EVENTS | 255 | |
205 | if (linked) | 256 | /* |
206 | TRACE_TASK(linked, "linked to %d.\n", entry->cpu); | 257 | if (print) { |
207 | else | 258 | //#ifdef WANT_ALL_SCHED_EVENTS |
208 | TRACE("NULL linked to %d.\n", entry->cpu); | 259 | if (linked) |
209 | #endif | 260 | TRACE_TASK(linked, "linked to %d.\n", entry->cpu); |
261 | else | ||
262 | TRACE("NULL linked to %d.\n", entry->cpu); | ||
263 | //#endif | ||
264 | } | ||
265 | */ | ||
266 | |||
210 | update_cpu_position(entry); | 267 | update_cpu_position(entry); |
211 | } | 268 | } |
212 | 269 | ||
@@ -251,8 +308,17 @@ static noinline void requeue(struct task_struct* task) | |||
251 | /* sanity check before insertion */ | 308 | /* sanity check before insertion */ |
252 | BUG_ON(is_queued(task)); | 309 | BUG_ON(is_queued(task)); |
253 | 310 | ||
254 | if (is_released(task, litmus_clock())) | 311 | if (is_released(task, litmus_clock())) { |
255 | __add_ready(&gsnedf, task); | 312 | #ifdef CONFIG_REALTIME_AUX_TASKS |
313 | if (unlikely(tsk_rt(task)->is_aux_task && !is_running(task))) { | ||
314 | /* aux_task probably transitioned to real-time while it was blocked */ | ||
315 | TRACE_CUR("aux task %s/%d is not ready!\n", task->comm, task->pid); | ||
316 | unlink(task); /* really needed? */ | ||
317 | } | ||
318 | else | ||
319 | #endif | ||
320 | __add_ready(&gsnedf, task); | ||
321 | } | ||
256 | else { | 322 | else { |
257 | /* it has got to wait */ | 323 | /* it has got to wait */ |
258 | add_release(&gsnedf, task); | 324 | add_release(&gsnedf, task); |
@@ -326,6 +392,7 @@ static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) | |||
326 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | 392 | raw_spin_lock_irqsave(&gsnedf_lock, flags); |
327 | 393 | ||
328 | __merge_ready(rt, tasks); | 394 | __merge_ready(rt, tasks); |
395 | |||
329 | check_for_preemptions(); | 396 | check_for_preemptions(); |
330 | 397 | ||
331 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | 398 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); |
@@ -338,12 +405,17 @@ static noinline void job_completion(struct task_struct *t, int forced) | |||
338 | 405 | ||
339 | sched_trace_task_completion(t, forced); | 406 | sched_trace_task_completion(t, forced); |
340 | 407 | ||
408 | #ifdef CONFIG_LITMUS_NVIDIA | ||
409 | atomic_set(&tsk_rt(t)->nv_int_count, 0); | ||
410 | #endif | ||
411 | |||
341 | TRACE_TASK(t, "job_completion().\n"); | 412 | TRACE_TASK(t, "job_completion().\n"); |
342 | 413 | ||
343 | /* set flags */ | 414 | /* set flags */ |
344 | tsk_rt(t)->completed = 1; | 415 | tsk_rt(t)->completed = 1; |
345 | /* prepare for next period */ | 416 | /* prepare for next period */ |
346 | prepare_for_next_period(t); | 417 | prepare_for_next_period(t); |
418 | |||
347 | if (is_released(t, litmus_clock())) | 419 | if (is_released(t, litmus_clock())) |
348 | sched_trace_task_release(t); | 420 | sched_trace_task_release(t); |
349 | /* unlink */ | 421 | /* unlink */ |
@@ -362,24 +434,350 @@ static noinline void job_completion(struct task_struct *t, int forced) | |||
362 | */ | 434 | */ |
363 | static void gsnedf_tick(struct task_struct* t) | 435 | static void gsnedf_tick(struct task_struct* t) |
364 | { | 436 | { |
365 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | 437 | if (is_realtime(t) && budget_exhausted(t)) |
366 | if (!is_np(t)) { | 438 | { |
367 | /* np tasks will be preempted when they become | 439 | if (budget_signalled(t) && !sigbudget_sent(t)) { |
368 | * preemptable again | 440 | /* signal exhaustion */ |
369 | */ | 441 | send_sigbudget(t); |
370 | litmus_reschedule_local(); | 442 | } |
371 | TRACE("gsnedf_scheduler_tick: " | 443 | |
372 | "%d is preemptable " | 444 | if (budget_enforced(t)) { |
373 | " => FORCE_RESCHED\n", t->pid); | 445 | if (!is_np(t)) { |
374 | } else if (is_user_np(t)) { | 446 | /* np tasks will be preempted when they become |
375 | TRACE("gsnedf_scheduler_tick: " | 447 | * preemptable again |
376 | "%d is non-preemptable, " | 448 | */ |
377 | "preemption delayed.\n", t->pid); | 449 | litmus_reschedule_local(); |
378 | request_exit_np(t); | 450 | TRACE("gsnedf_scheduler_tick: " |
451 | "%d is preemptable " | ||
452 | " => FORCE_RESCHED\n", t->pid); | ||
453 | } else if (is_user_np(t)) { | ||
454 | TRACE("gsnedf_scheduler_tick: " | ||
455 | "%d is non-preemptable, " | ||
456 | "preemption delayed.\n", t->pid); | ||
457 | request_exit_np(t); | ||
458 | } | ||
459 | } | ||
460 | } | ||
461 | |||
462 | /* | ||
463 | if(is_realtime(t)) { | ||
464 | TRACE_TASK(t, "tick %llu\n", litmus_clock()); | ||
465 | } | ||
466 | */ | ||
467 | } | ||
468 | |||
469 | |||
470 | |||
471 | |||
472 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
473 | |||
474 | |||
475 | static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed) | ||
476 | { | ||
477 | if (!atomic_read(&tasklet->count)) { | ||
478 | if(tasklet->owner) { | ||
479 | sched_trace_tasklet_begin(tasklet->owner); | ||
480 | } | ||
481 | |||
482 | if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) | ||
483 | { | ||
484 | BUG(); | ||
485 | } | ||
486 | TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n", | ||
487 | __FUNCTION__, | ||
488 | (tasklet->owner) ? tasklet->owner->pid : -1, | ||
489 | (tasklet->owner) ? 0 : 1); | ||
490 | tasklet->func(tasklet->data); | ||
491 | tasklet_unlock(tasklet); | ||
492 | |||
493 | if(tasklet->owner) { | ||
494 | sched_trace_tasklet_end(tasklet->owner, flushed); | ||
495 | } | ||
496 | } | ||
497 | else { | ||
498 | BUG(); | ||
499 | } | ||
500 | } | ||
501 | |||
502 | static void do_lit_tasklets(struct task_struct* sched_task) | ||
503 | { | ||
504 | int work_to_do = 1; | ||
505 | struct tasklet_struct *tasklet = NULL; | ||
506 | unsigned long flags; | ||
507 | |||
508 | while(work_to_do) { | ||
509 | |||
510 | TS_NV_SCHED_BOTISR_START; | ||
511 | |||
512 | // execute one tasklet that has higher priority | ||
513 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
514 | |||
515 | if(gsnedf_pending_tasklets.head != NULL) { | ||
516 | struct tasklet_struct *prev = NULL; | ||
517 | tasklet = gsnedf_pending_tasklets.head; | ||
518 | |||
519 | while(tasklet && edf_higher_prio(sched_task, tasklet->owner)) { | ||
520 | prev = tasklet; | ||
521 | tasklet = tasklet->next; | ||
522 | } | ||
523 | |||
524 | // remove the tasklet from the queue | ||
525 | if(prev) { | ||
526 | prev->next = tasklet->next; | ||
527 | if(prev->next == NULL) { | ||
528 | TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); | ||
529 | gsnedf_pending_tasklets.tail = &(prev); | ||
530 | } | ||
531 | } | ||
532 | else { | ||
533 | gsnedf_pending_tasklets.head = tasklet->next; | ||
534 | if(tasklet->next == NULL) { | ||
535 | TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); | ||
536 | gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head); | ||
537 | } | ||
538 | } | ||
539 | } | ||
540 | else { | ||
541 | TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__); | ||
542 | } | ||
543 | |||
544 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
545 | |||
546 | if(tasklet) { | ||
547 | __do_lit_tasklet(tasklet, 0ul); | ||
548 | tasklet = NULL; | ||
549 | } | ||
550 | else { | ||
551 | work_to_do = 0; | ||
552 | } | ||
553 | |||
554 | TS_NV_SCHED_BOTISR_END; | ||
555 | } | ||
556 | } | ||
557 | |||
558 | //static void do_lit_tasklets(struct task_struct* sched_task) | ||
559 | //{ | ||
560 | // int work_to_do = 1; | ||
561 | // struct tasklet_struct *tasklet = NULL; | ||
562 | // //struct tasklet_struct *step; | ||
563 | // unsigned long flags; | ||
564 | // | ||
565 | // while(work_to_do) { | ||
566 | // | ||
567 | // TS_NV_SCHED_BOTISR_START; | ||
568 | // | ||
569 | // // remove tasklet at head of list if it has higher priority. | ||
570 | // raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
571 | // | ||
572 | // if(gsnedf_pending_tasklets.head != NULL) { | ||
573 | // // remove tasklet at head. | ||
574 | // tasklet = gsnedf_pending_tasklets.head; | ||
575 | // | ||
576 | // if(edf_higher_prio(tasklet->owner, sched_task)) { | ||
577 | // | ||
578 | // if(NULL == tasklet->next) { | ||
579 | // // tasklet is at the head, list only has one element | ||
580 | // TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); | ||
581 | // gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head); | ||
582 | // } | ||
583 | // | ||
584 | // // remove the tasklet from the queue | ||
585 | // gsnedf_pending_tasklets.head = tasklet->next; | ||
586 | // | ||
587 | // TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); | ||
588 | // } | ||
589 | // else { | ||
590 | // TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id()); | ||
591 | // tasklet = NULL; | ||
592 | // } | ||
593 | // } | ||
594 | // else { | ||
595 | // TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__); | ||
596 | // } | ||
597 | // | ||
598 | // raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
599 | // | ||
600 | // TS_NV_SCHED_BOTISR_END; | ||
601 | // | ||
602 | // if(tasklet) { | ||
603 | // __do_lit_tasklet(tasklet, 0ul); | ||
604 | // tasklet = NULL; | ||
605 | // } | ||
606 | // else { | ||
607 | // work_to_do = 0; | ||
608 | // } | ||
609 | // } | ||
610 | // | ||
611 | // //TRACE("%s: exited.\n", __FUNCTION__); | ||
612 | //} | ||
613 | |||
614 | static void __add_pai_tasklet(struct tasklet_struct* tasklet) | ||
615 | { | ||
616 | struct tasklet_struct* step; | ||
617 | |||
618 | tasklet->next = NULL; // make sure there are no old values floating around | ||
619 | |||
620 | step = gsnedf_pending_tasklets.head; | ||
621 | if(step == NULL) { | ||
622 | TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid); | ||
623 | // insert at tail. | ||
624 | *(gsnedf_pending_tasklets.tail) = tasklet; | ||
625 | gsnedf_pending_tasklets.tail = &(tasklet->next); | ||
626 | } | ||
627 | else if((*(gsnedf_pending_tasklets.tail) != NULL) && | ||
628 | edf_higher_prio((*(gsnedf_pending_tasklets.tail))->owner, tasklet->owner)) { | ||
629 | // insert at tail. | ||
630 | TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid); | ||
631 | |||
632 | *(gsnedf_pending_tasklets.tail) = tasklet; | ||
633 | gsnedf_pending_tasklets.tail = &(tasklet->next); | ||
634 | } | ||
635 | else { | ||
636 | // insert the tasklet somewhere in the middle. | ||
637 | |||
638 | TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__); | ||
639 | |||
640 | while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) { | ||
641 | step = step->next; | ||
642 | } | ||
643 | |||
644 | // insert tasklet right before step->next. | ||
645 | |||
646 | TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1); | ||
647 | |||
648 | tasklet->next = step->next; | ||
649 | step->next = tasklet; | ||
650 | |||
651 | // patch up the head if needed. | ||
652 | if(gsnedf_pending_tasklets.head == step) | ||
653 | { | ||
654 | TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid); | ||
655 | gsnedf_pending_tasklets.head = tasklet; | ||
656 | } | ||
657 | } | ||
658 | } | ||
659 | |||
660 | static void gsnedf_run_tasklets(struct task_struct* sched_task) | ||
661 | { | ||
662 | preempt_disable(); | ||
663 | |||
664 | if(gsnedf_pending_tasklets.head != NULL) { | ||
665 | TRACE("%s: There are tasklets to process.\n", __FUNCTION__); | ||
666 | do_lit_tasklets(sched_task); | ||
667 | } | ||
668 | |||
669 | preempt_enable_no_resched(); | ||
670 | } | ||
671 | |||
672 | static int gsnedf_enqueue_pai_tasklet(struct tasklet_struct* tasklet) | ||
673 | { | ||
674 | cpu_entry_t *targetCPU = NULL; | ||
675 | int thisCPU; | ||
676 | int runLocal = 0; | ||
677 | int runNow = 0; | ||
678 | unsigned long flags; | ||
679 | |||
680 | if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner))) | ||
681 | { | ||
682 | TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); | ||
683 | return 0; | ||
684 | } | ||
685 | |||
686 | |||
687 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
688 | |||
689 | thisCPU = smp_processor_id(); | ||
690 | |||
691 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
692 | { | ||
693 | cpu_entry_t* affinity = NULL; | ||
694 | |||
695 | // use this CPU if it is in our cluster and isn't running any RT work. | ||
696 | if( | ||
697 | #ifdef CONFIG_RELEASE_MASTER | ||
698 | (thisCPU != gsnedf.release_master) && | ||
699 | #endif | ||
700 | (__get_cpu_var(gsnedf_cpu_entries).linked == NULL)) { | ||
701 | affinity = &(__get_cpu_var(gsnedf_cpu_entries)); | ||
702 | } | ||
703 | else { | ||
704 | // this CPU is busy or shouldn't run tasklet in this cluster. | ||
705 | // look for available near by CPUs. | ||
706 | // NOTE: Affinity towards owner and not this CPU. Is this right? | ||
707 | affinity = | ||
708 | gsnedf_get_nearest_available_cpu( | ||
709 | &per_cpu(gsnedf_cpu_entries, task_cpu(tasklet->owner))); | ||
710 | } | ||
711 | |||
712 | targetCPU = affinity; | ||
713 | } | ||
714 | #endif | ||
715 | |||
716 | if (targetCPU == NULL) { | ||
717 | targetCPU = lowest_prio_cpu(); | ||
718 | } | ||
719 | |||
720 | if (edf_higher_prio(tasklet->owner, targetCPU->linked)) { | ||
721 | if (thisCPU == targetCPU->cpu) { | ||
722 | TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__); | ||
723 | runLocal = 1; | ||
724 | runNow = 1; | ||
725 | } | ||
726 | else { | ||
727 | TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__); | ||
728 | runLocal = 0; | ||
729 | runNow = 1; | ||
730 | } | ||
731 | } | ||
732 | else { | ||
733 | runLocal = 0; | ||
734 | runNow = 0; | ||
735 | } | ||
736 | |||
737 | if(!runLocal) { | ||
738 | // enqueue the tasklet | ||
739 | __add_pai_tasklet(tasklet); | ||
740 | } | ||
741 | |||
742 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
743 | |||
744 | |||
745 | if (runLocal /*&& runNow */) { // runNow == 1 is implied | ||
746 | TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__); | ||
747 | __do_lit_tasklet(tasklet, 0ul); | ||
748 | } | ||
749 | else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied | ||
750 | TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu); | ||
751 | preempt(targetCPU); // need to be protected by cedf_lock? | ||
752 | } | ||
753 | else { | ||
754 | TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__); | ||
755 | } | ||
756 | |||
757 | return(1); // success | ||
758 | } | ||
759 | |||
760 | static void gsnedf_change_prio_pai_tasklet(struct task_struct *old_prio, | ||
761 | struct task_struct *new_prio) | ||
762 | { | ||
763 | struct tasklet_struct* step; | ||
764 | unsigned long flags; | ||
765 | |||
766 | if(gsnedf_pending_tasklets.head != NULL) { | ||
767 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | ||
768 | for(step = gsnedf_pending_tasklets.head; step != NULL; step = step->next) { | ||
769 | if(step->owner == old_prio) { | ||
770 | TRACE("%s: Found tasklet to change: %d\n", __FUNCTION__, step->owner->pid); | ||
771 | step->owner = new_prio; | ||
772 | } | ||
379 | } | 773 | } |
774 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | ||
380 | } | 775 | } |
381 | } | 776 | } |
382 | 777 | ||
778 | #endif // end PAI | ||
779 | |||
780 | |||
383 | /* Getting schedule() right is a bit tricky. schedule() may not make any | 781 | /* Getting schedule() right is a bit tricky. schedule() may not make any |
384 | * assumptions on the state of the current task since it may be called for a | 782 | * assumptions on the state of the current task since it may be called for a |
385 | * number of reasons. The reasons include a scheduler_tick() determined that it | 783 | * number of reasons. The reasons include a scheduler_tick() determined that it |
@@ -404,9 +802,11 @@ static void gsnedf_tick(struct task_struct* t) | |||
404 | static struct task_struct* gsnedf_schedule(struct task_struct * prev) | 802 | static struct task_struct* gsnedf_schedule(struct task_struct * prev) |
405 | { | 803 | { |
406 | cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); | 804 | cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); |
407 | int out_of_time, sleep, preempt, np, exists, blocks; | 805 | int out_of_time, signal_budget, sleep, preempt, np, exists, blocks; |
408 | struct task_struct* next = NULL; | 806 | struct task_struct* next = NULL; |
409 | 807 | ||
808 | //int completion = 0; | ||
809 | |||
410 | #ifdef CONFIG_RELEASE_MASTER | 810 | #ifdef CONFIG_RELEASE_MASTER |
411 | /* Bail out early if we are the release master. | 811 | /* Bail out early if we are the release master. |
412 | * The release master never schedules any real-time tasks. | 812 | * The release master never schedules any real-time tasks. |
@@ -427,8 +827,13 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) | |||
427 | /* (0) Determine state */ | 827 | /* (0) Determine state */ |
428 | exists = entry->scheduled != NULL; | 828 | exists = entry->scheduled != NULL; |
429 | blocks = exists && !is_running(entry->scheduled); | 829 | blocks = exists && !is_running(entry->scheduled); |
430 | out_of_time = exists && budget_enforced(entry->scheduled) | 830 | out_of_time = exists && |
431 | && budget_exhausted(entry->scheduled); | 831 | budget_enforced(entry->scheduled) && |
832 | budget_exhausted(entry->scheduled); | ||
833 | signal_budget = exists && | ||
834 | budget_signalled(entry->scheduled) && | ||
835 | budget_exhausted(entry->scheduled) && | ||
836 | !sigbudget_sent(entry->scheduled); | ||
432 | np = exists && is_np(entry->scheduled); | 837 | np = exists && is_np(entry->scheduled); |
433 | sleep = exists && is_completed(entry->scheduled); | 838 | sleep = exists && is_completed(entry->scheduled); |
434 | preempt = entry->scheduled != entry->linked; | 839 | preempt = entry->scheduled != entry->linked; |
@@ -437,21 +842,36 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) | |||
437 | TRACE_TASK(prev, "invoked gsnedf_schedule.\n"); | 842 | TRACE_TASK(prev, "invoked gsnedf_schedule.\n"); |
438 | #endif | 843 | #endif |
439 | 844 | ||
440 | if (exists) | 845 | if (exists) { |
441 | TRACE_TASK(prev, | 846 | TRACE_TASK(prev, |
442 | "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " | 847 | "blocks:%d out_of_time:%d signal_budget: %d np:%d sleep:%d preempt:%d " |
443 | "state:%d sig:%d\n", | 848 | "state:%d sig:%d\n", |
444 | blocks, out_of_time, np, sleep, preempt, | 849 | blocks, out_of_time, signal_budget, np, sleep, preempt, |
445 | prev->state, signal_pending(prev)); | 850 | prev->state, signal_pending(prev)); |
851 | } | ||
852 | |||
446 | if (entry->linked && preempt) | 853 | if (entry->linked && preempt) |
447 | TRACE_TASK(prev, "will be preempted by %s/%d\n", | 854 | TRACE_TASK(prev, "will be preempted by %s/%d\n", |
448 | entry->linked->comm, entry->linked->pid); | 855 | entry->linked->comm, entry->linked->pid); |
449 | 856 | ||
857 | /* Send the signal that the budget has been exhausted */ | ||
858 | if (signal_budget) { | ||
859 | send_sigbudget(entry->scheduled); | ||
860 | } | ||
450 | 861 | ||
451 | /* If a task blocks we have no choice but to reschedule. | 862 | /* If a task blocks we have no choice but to reschedule. |
452 | */ | 863 | */ |
453 | if (blocks) | 864 | if (blocks) { |
454 | unlink(entry->scheduled); | 865 | unlink(entry->scheduled); |
866 | } | ||
867 | |||
868 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) | ||
869 | if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) { | ||
870 | if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) { | ||
871 | stop_gpu_tracker(entry->scheduled); | ||
872 | } | ||
873 | } | ||
874 | #endif | ||
455 | 875 | ||
456 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | 876 | /* Request a sys_exit_np() call if we would like to preempt but cannot. |
457 | * We need to make sure to update the link structure anyway in case | 877 | * We need to make sure to update the link structure anyway in case |
@@ -468,8 +888,10 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) | |||
468 | * this. Don't do a job completion if we block (can't have timers running | 888 | * this. Don't do a job completion if we block (can't have timers running |
469 | * for blocked jobs). | 889 | * for blocked jobs). |
470 | */ | 890 | */ |
471 | if (!np && (out_of_time || sleep) && !blocks) | 891 | if (!np && (out_of_time || sleep) && !blocks) { |
472 | job_completion(entry->scheduled, !sleep); | 892 | job_completion(entry->scheduled, !sleep); |
893 | //completion = 1; | ||
894 | } | ||
473 | 895 | ||
474 | /* Link pending task if we became unlinked. | 896 | /* Link pending task if we became unlinked. |
475 | */ | 897 | */ |
@@ -492,12 +914,21 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) | |||
492 | entry->scheduled->rt_param.scheduled_on = NO_CPU; | 914 | entry->scheduled->rt_param.scheduled_on = NO_CPU; |
493 | TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); | 915 | TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); |
494 | } | 916 | } |
495 | } else | 917 | } |
918 | else | ||
919 | { | ||
496 | /* Only override Linux scheduler if we have a real-time task | 920 | /* Only override Linux scheduler if we have a real-time task |
497 | * scheduled that needs to continue. | 921 | * scheduled that needs to continue. |
498 | */ | 922 | */ |
499 | if (exists) | 923 | if (exists) |
500 | next = prev; | 924 | next = prev; |
925 | } | ||
926 | |||
927 | #if 0 | ||
928 | if (completion) { | ||
929 | TRACE_CUR("switching away from a completion\n"); | ||
930 | } | ||
931 | #endif | ||
501 | 932 | ||
502 | sched_state_task_picked(); | 933 | sched_state_task_picked(); |
503 | 934 | ||
@@ -512,7 +943,6 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev) | |||
512 | TRACE("becomes idle at %llu.\n", litmus_clock()); | 943 | TRACE("becomes idle at %llu.\n", litmus_clock()); |
513 | #endif | 944 | #endif |
514 | 945 | ||
515 | |||
516 | return next; | 946 | return next; |
517 | } | 947 | } |
518 | 948 | ||
@@ -524,6 +954,7 @@ static void gsnedf_finish_switch(struct task_struct *prev) | |||
524 | cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); | 954 | cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); |
525 | 955 | ||
526 | entry->scheduled = is_realtime(current) ? current : NULL; | 956 | entry->scheduled = is_realtime(current) ? current : NULL; |
957 | |||
527 | #ifdef WANT_ALL_SCHED_EVENTS | 958 | #ifdef WANT_ALL_SCHED_EVENTS |
528 | TRACE_TASK(prev, "switched away from\n"); | 959 | TRACE_TASK(prev, "switched away from\n"); |
529 | #endif | 960 | #endif |
@@ -537,7 +968,7 @@ static void gsnedf_task_new(struct task_struct * t, int on_rq, int running) | |||
537 | unsigned long flags; | 968 | unsigned long flags; |
538 | cpu_entry_t* entry; | 969 | cpu_entry_t* entry; |
539 | 970 | ||
540 | TRACE("gsn edf: task new %d\n", t->pid); | 971 | TRACE("gsn edf: task new = %d on_rq = %d running = %d\n", t->pid, on_rq, running); |
541 | 972 | ||
542 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | 973 | raw_spin_lock_irqsave(&gsnedf_lock, flags); |
543 | 974 | ||
@@ -572,11 +1003,14 @@ static void gsnedf_task_new(struct task_struct * t, int on_rq, int running) | |||
572 | static void gsnedf_task_wake_up(struct task_struct *task) | 1003 | static void gsnedf_task_wake_up(struct task_struct *task) |
573 | { | 1004 | { |
574 | unsigned long flags; | 1005 | unsigned long flags; |
575 | lt_t now; | 1006 | //lt_t now; |
576 | 1007 | ||
577 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); | 1008 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); |
578 | 1009 | ||
579 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | 1010 | raw_spin_lock_irqsave(&gsnedf_lock, flags); |
1011 | |||
1012 | #if 0 | ||
1013 | /* sporadic task model. will increment job numbers automatically */ | ||
580 | now = litmus_clock(); | 1014 | now = litmus_clock(); |
581 | if (is_tardy(task, now)) { | 1015 | if (is_tardy(task, now)) { |
582 | /* new sporadic release */ | 1016 | /* new sporadic release */ |
@@ -590,6 +1024,25 @@ static void gsnedf_task_wake_up(struct task_struct *task) | |||
590 | tsk_rt(task)->completed = 0; | 1024 | tsk_rt(task)->completed = 0; |
591 | } | 1025 | } |
592 | } | 1026 | } |
1027 | #else | ||
1028 | /* don't force job to end. rely on user to say when jobs complete */ | ||
1029 | tsk_rt(task)->completed = 0; | ||
1030 | #endif | ||
1031 | |||
1032 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1033 | if (tsk_rt(task)->has_aux_tasks && !tsk_rt(task)->hide_from_aux_tasks) { | ||
1034 | TRACE_CUR("%s/%d is ready so aux tasks may not inherit.\n", task->comm, task->pid); | ||
1035 | disable_aux_task_owner(task); | ||
1036 | } | ||
1037 | #endif | ||
1038 | |||
1039 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1040 | if (tsk_rt(task)->held_gpus && !tsk_rt(task)->hide_from_gpu) { | ||
1041 | TRACE_CUR("%s/%d is ready so gpu klmirqd tasks may not inherit.\n", task->comm, task->pid); | ||
1042 | disable_gpu_owner(task); | ||
1043 | } | ||
1044 | #endif | ||
1045 | |||
593 | gsnedf_job_arrival(task); | 1046 | gsnedf_job_arrival(task); |
594 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | 1047 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); |
595 | } | 1048 | } |
@@ -602,7 +1055,25 @@ static void gsnedf_task_block(struct task_struct *t) | |||
602 | 1055 | ||
603 | /* unlink if necessary */ | 1056 | /* unlink if necessary */ |
604 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | 1057 | raw_spin_lock_irqsave(&gsnedf_lock, flags); |
1058 | |||
605 | unlink(t); | 1059 | unlink(t); |
1060 | |||
1061 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1062 | if (tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->hide_from_aux_tasks) { | ||
1063 | |||
1064 | TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid); | ||
1065 | enable_aux_task_owner(t); | ||
1066 | } | ||
1067 | #endif | ||
1068 | |||
1069 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1070 | if (tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu) { | ||
1071 | |||
1072 | TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid); | ||
1073 | enable_gpu_owner(t); | ||
1074 | } | ||
1075 | #endif | ||
1076 | |||
606 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | 1077 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); |
607 | 1078 | ||
608 | BUG_ON(!is_realtime(t)); | 1079 | BUG_ON(!is_realtime(t)); |
@@ -613,8 +1084,30 @@ static void gsnedf_task_exit(struct task_struct * t) | |||
613 | { | 1084 | { |
614 | unsigned long flags; | 1085 | unsigned long flags; |
615 | 1086 | ||
1087 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
1088 | gsnedf_change_prio_pai_tasklet(t, NULL); | ||
1089 | #endif | ||
1090 | |||
616 | /* unlink if necessary */ | 1091 | /* unlink if necessary */ |
617 | raw_spin_lock_irqsave(&gsnedf_lock, flags); | 1092 | raw_spin_lock_irqsave(&gsnedf_lock, flags); |
1093 | |||
1094 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1095 | /* make sure we clean up on our way out */ | ||
1096 | if (unlikely(tsk_rt(t)->is_aux_task)) { | ||
1097 | exit_aux_task(t); | ||
1098 | } | ||
1099 | else if(tsk_rt(t)->has_aux_tasks) { | ||
1100 | disable_aux_task_owner(t); | ||
1101 | } | ||
1102 | #endif | ||
1103 | |||
1104 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1105 | /* make sure we clean up on our way out */ | ||
1106 | if(tsk_rt(t)->held_gpus) { | ||
1107 | disable_gpu_owner(t); | ||
1108 | } | ||
1109 | #endif | ||
1110 | |||
618 | unlink(t); | 1111 | unlink(t); |
619 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | 1112 | if (tsk_rt(t)->scheduled_on != NO_CPU) { |
620 | gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL; | 1113 | gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL; |
@@ -623,106 +1116,413 @@ static void gsnedf_task_exit(struct task_struct * t) | |||
623 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); | 1116 | raw_spin_unlock_irqrestore(&gsnedf_lock, flags); |
624 | 1117 | ||
625 | BUG_ON(!is_realtime(t)); | 1118 | BUG_ON(!is_realtime(t)); |
626 | TRACE_TASK(t, "RIP\n"); | 1119 | TRACE_TASK(t, "RIP\n"); |
627 | } | 1120 | } |
628 | 1121 | ||
629 | 1122 | ||
630 | static long gsnedf_admit_task(struct task_struct* tsk) | 1123 | static long gsnedf_admit_task(struct task_struct* tsk) |
631 | { | 1124 | { |
1125 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1126 | INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks, | ||
1127 | edf_max_heap_base_priority_order); | ||
1128 | #endif | ||
1129 | |||
632 | return 0; | 1130 | return 0; |
633 | } | 1131 | } |
634 | 1132 | ||
1133 | |||
1134 | |||
1135 | |||
1136 | |||
1137 | |||
635 | #ifdef CONFIG_LITMUS_LOCKING | 1138 | #ifdef CONFIG_LITMUS_LOCKING |
636 | 1139 | ||
637 | #include <litmus/fdso.h> | 1140 | #include <litmus/fdso.h> |
638 | 1141 | ||
639 | /* called with IRQs off */ | 1142 | /* called with IRQs off */ |
640 | static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) | 1143 | static int __increase_priority_inheritance(struct task_struct* t, |
1144 | struct task_struct* prio_inh) | ||
641 | { | 1145 | { |
1146 | int success = 1; | ||
642 | int linked_on; | 1147 | int linked_on; |
643 | int check_preempt = 0; | 1148 | int check_preempt = 0; |
644 | 1149 | ||
645 | raw_spin_lock(&gsnedf_lock); | 1150 | if (prio_inh && prio_inh == effective_priority(t)) { |
1151 | /* relationship already established. */ | ||
1152 | TRACE_TASK(t, "already has effective priority of %s/%d\n", | ||
1153 | prio_inh->comm, prio_inh->pid); | ||
1154 | goto out; | ||
1155 | } | ||
646 | 1156 | ||
647 | TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid); | 1157 | #ifdef CONFIG_LITMUS_NESTED_LOCKING |
648 | tsk_rt(t)->inh_task = prio_inh; | 1158 | /* this sanity check allows for weaker locking in protocols */ |
649 | 1159 | if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) { | |
650 | linked_on = tsk_rt(t)->linked_on; | 1160 | #endif |
651 | 1161 | TRACE_TASK(t, "inherits priority from %s/%d\n", | |
652 | /* If it is scheduled, then we need to reorder the CPU heap. */ | 1162 | prio_inh->comm, prio_inh->pid); |
653 | if (linked_on != NO_CPU) { | 1163 | tsk_rt(t)->inh_task = prio_inh; |
654 | TRACE_TASK(t, "%s: linked on %d\n", | 1164 | |
655 | __FUNCTION__, linked_on); | 1165 | linked_on = tsk_rt(t)->linked_on; |
656 | /* Holder is scheduled; need to re-order CPUs. | 1166 | |
657 | * We can't use heap_decrease() here since | 1167 | /* If it is scheduled, then we need to reorder the CPU heap. */ |
658 | * the cpu_heap is ordered in reverse direction, so | 1168 | if (linked_on != NO_CPU) { |
659 | * it is actually an increase. */ | 1169 | TRACE_TASK(t, "%s: linked on %d\n", |
660 | bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, | 1170 | __FUNCTION__, linked_on); |
661 | gsnedf_cpus[linked_on]->hn); | 1171 | /* Holder is scheduled; need to re-order CPUs. |
662 | bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, | 1172 | * We can't use heap_decrease() here since |
663 | gsnedf_cpus[linked_on]->hn); | 1173 | * the cpu_heap is ordered in reverse direction, so |
664 | } else { | 1174 | * it is actually an increase. */ |
665 | /* holder may be queued: first stop queue changes */ | 1175 | binheap_delete(&gsnedf_cpus[linked_on]->hn, &gsnedf_cpu_heap); |
666 | raw_spin_lock(&gsnedf.release_lock); | 1176 | binheap_add(&gsnedf_cpus[linked_on]->hn, |
667 | if (is_queued(t)) { | 1177 | &gsnedf_cpu_heap, cpu_entry_t, hn); |
668 | TRACE_TASK(t, "%s: is queued\n", | ||
669 | __FUNCTION__); | ||
670 | /* We need to update the position of holder in some | ||
671 | * heap. Note that this could be a release heap if we | ||
672 | * budget enforcement is used and this job overran. */ | ||
673 | check_preempt = | ||
674 | !bheap_decrease(edf_ready_order, | ||
675 | tsk_rt(t)->heap_node); | ||
676 | } else { | 1178 | } else { |
677 | /* Nothing to do: if it is not queued and not linked | 1179 | /* holder may be queued: first stop queue changes */ |
678 | * then it is either sleeping or currently being moved | 1180 | raw_spin_lock(&gsnedf.release_lock); |
679 | * by other code (e.g., a timer interrupt handler) that | 1181 | if (is_queued(t)) { |
680 | * will use the correct priority when enqueuing the | 1182 | TRACE_TASK(t, "%s: is queued\n", |
681 | * task. */ | 1183 | __FUNCTION__); |
682 | TRACE_TASK(t, "%s: is NOT queued => Done.\n", | 1184 | /* We need to update the position of holder in some |
683 | __FUNCTION__); | 1185 | * heap. Note that this could be a release heap if we |
684 | } | 1186 | * budget enforcement is used and this job overran. */ |
685 | raw_spin_unlock(&gsnedf.release_lock); | 1187 | check_preempt = |
686 | 1188 | !bheap_decrease(edf_ready_order, | |
687 | /* If holder was enqueued in a release heap, then the following | 1189 | tsk_rt(t)->heap_node); |
688 | * preemption check is pointless, but we can't easily detect | 1190 | } else { |
689 | * that case. If you want to fix this, then consider that | 1191 | /* Nothing to do: if it is not queued and not linked |
690 | * simply adding a state flag requires O(n) time to update when | 1192 | * then it is either sleeping or currently being moved |
691 | * releasing n tasks, which conflicts with the goal to have | 1193 | * by other code (e.g., a timer interrupt handler) that |
692 | * O(log n) merges. */ | 1194 | * will use the correct priority when enqueuing the |
693 | if (check_preempt) { | 1195 | * task. */ |
694 | /* heap_decrease() hit the top level of the heap: make | 1196 | TRACE_TASK(t, "%s: is NOT queued => Done.\n", |
695 | * sure preemption checks get the right task, not the | 1197 | __FUNCTION__); |
696 | * potentially stale cache. */ | 1198 | } |
697 | bheap_uncache_min(edf_ready_order, | 1199 | raw_spin_unlock(&gsnedf.release_lock); |
698 | &gsnedf.ready_queue); | 1200 | |
699 | check_for_preemptions(); | 1201 | /* If holder was enqueued in a release heap, then the following |
1202 | * preemption check is pointless, but we can't easily detect | ||
1203 | * that case. If you want to fix this, then consider that | ||
1204 | * simply adding a state flag requires O(n) time to update when | ||
1205 | * releasing n tasks, which conflicts with the goal to have | ||
1206 | * O(log n) merges. */ | ||
1207 | if (check_preempt) { | ||
1208 | /* heap_decrease() hit the top level of the heap: make | ||
1209 | * sure preemption checks get the right task, not the | ||
1210 | * potentially stale cache. */ | ||
1211 | bheap_uncache_min(edf_ready_order, | ||
1212 | &gsnedf.ready_queue); | ||
1213 | check_for_preemptions(); | ||
1214 | } | ||
1215 | |||
1216 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1217 | /* propagate to aux tasks */ | ||
1218 | if (tsk_rt(t)->has_aux_tasks) { | ||
1219 | aux_task_owner_increase_priority(t); | ||
1220 | } | ||
1221 | #endif | ||
1222 | |||
1223 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1224 | /* propagate to gpu klmirqd */ | ||
1225 | if (tsk_rt(t)->held_gpus) { | ||
1226 | gpu_owner_increase_priority(t); | ||
1227 | } | ||
1228 | #endif | ||
1229 | |||
700 | } | 1230 | } |
1231 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1232 | } | ||
1233 | else { | ||
1234 | TRACE_TASK(t, "Spurious invalid priority increase. " | ||
1235 | "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n" | ||
1236 | "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n", | ||
1237 | t->comm, t->pid, | ||
1238 | effective_priority(t)->comm, effective_priority(t)->pid, | ||
1239 | (prio_inh) ? prio_inh->comm : "nil", | ||
1240 | (prio_inh) ? prio_inh->pid : -1); | ||
1241 | WARN_ON(!prio_inh); | ||
1242 | success = 0; | ||
701 | } | 1243 | } |
1244 | #endif | ||
702 | 1245 | ||
703 | raw_spin_unlock(&gsnedf_lock); | 1246 | out: |
1247 | return success; | ||
704 | } | 1248 | } |
705 | 1249 | ||
706 | /* called with IRQs off */ | 1250 | /* called with IRQs off */ |
707 | static void clear_priority_inheritance(struct task_struct* t) | 1251 | static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) |
708 | { | 1252 | { |
1253 | int success; | ||
1254 | |||
709 | raw_spin_lock(&gsnedf_lock); | 1255 | raw_spin_lock(&gsnedf_lock); |
710 | 1256 | ||
711 | /* A job only stops inheriting a priority when it releases a | 1257 | success = __increase_priority_inheritance(t, prio_inh); |
712 | * resource. Thus we can make the following assumption.*/ | ||
713 | BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU); | ||
714 | 1258 | ||
715 | TRACE_TASK(t, "priority restored\n"); | 1259 | raw_spin_unlock(&gsnedf_lock); |
716 | tsk_rt(t)->inh_task = NULL; | ||
717 | 1260 | ||
718 | /* Check if rescheduling is necessary. We can't use heap_decrease() | 1261 | #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA) |
719 | * since the priority was effectively lowered. */ | 1262 | if(tsk_rt(t)->held_gpus) { |
720 | unlink(t); | 1263 | int i; |
721 | gsnedf_job_arrival(t); | 1264 | for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus)); |
1265 | i < NV_DEVICE_NUM; | ||
1266 | i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) { | ||
1267 | pai_check_priority_increase(t, i); | ||
1268 | } | ||
1269 | } | ||
1270 | #endif | ||
1271 | } | ||
1272 | |||
1273 | |||
1274 | /* called with IRQs off */ | ||
1275 | static int __decrease_priority_inheritance(struct task_struct* t, | ||
1276 | struct task_struct* prio_inh) | ||
1277 | { | ||
1278 | int success = 1; | ||
1279 | |||
1280 | if (prio_inh == tsk_rt(t)->inh_task) { | ||
1281 | /* relationship already established. */ | ||
1282 | TRACE_TASK(t, "already inherits priority from %s/%d\n", | ||
1283 | (prio_inh) ? prio_inh->comm : "(nil)", | ||
1284 | (prio_inh) ? prio_inh->pid : 0); | ||
1285 | goto out; | ||
1286 | } | ||
1287 | |||
1288 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1289 | if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) { | ||
1290 | #endif | ||
1291 | /* A job only stops inheriting a priority when it releases a | ||
1292 | * resource. Thus we can make the following assumption.*/ | ||
1293 | if(prio_inh) | ||
1294 | TRACE_TASK(t, "EFFECTIVE priority decreased to %s/%d\n", | ||
1295 | prio_inh->comm, prio_inh->pid); | ||
1296 | else | ||
1297 | TRACE_TASK(t, "base priority restored.\n"); | ||
1298 | |||
1299 | tsk_rt(t)->inh_task = prio_inh; | ||
1300 | |||
1301 | if(tsk_rt(t)->scheduled_on != NO_CPU) { | ||
1302 | TRACE_TASK(t, "is scheduled.\n"); | ||
1303 | |||
1304 | /* Check if rescheduling is necessary. We can't use heap_decrease() | ||
1305 | * since the priority was effectively lowered. */ | ||
1306 | unlink(t); | ||
1307 | gsnedf_job_arrival(t); | ||
1308 | } | ||
1309 | else { | ||
1310 | /* task is queued */ | ||
1311 | raw_spin_lock(&gsnedf.release_lock); | ||
1312 | if (is_queued(t)) { | ||
1313 | TRACE_TASK(t, "is queued.\n"); | ||
1314 | |||
1315 | /* decrease in priority, so we have to re-add to binomial heap */ | ||
1316 | unlink(t); | ||
1317 | gsnedf_job_arrival(t); | ||
1318 | } | ||
1319 | else { | ||
1320 | TRACE_TASK(t, "is not in scheduler. Probably on wait queue somewhere.\n"); | ||
1321 | } | ||
1322 | raw_spin_unlock(&gsnedf.release_lock); | ||
1323 | } | ||
1324 | |||
1325 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1326 | /* propagate to aux tasks */ | ||
1327 | if (tsk_rt(t)->has_aux_tasks) { | ||
1328 | aux_task_owner_decrease_priority(t); | ||
1329 | } | ||
1330 | #endif | ||
1331 | |||
1332 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1333 | /* propagate to gpu */ | ||
1334 | if (tsk_rt(t)->held_gpus) { | ||
1335 | gpu_owner_decrease_priority(t); | ||
1336 | } | ||
1337 | #endif | ||
1338 | |||
1339 | |||
1340 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1341 | } | ||
1342 | else { | ||
1343 | TRACE_TASK(t, "Spurious invalid priority decrease. " | ||
1344 | "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n" | ||
1345 | "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n", | ||
1346 | t->comm, t->pid, | ||
1347 | effective_priority(t)->comm, effective_priority(t)->pid, | ||
1348 | (prio_inh) ? prio_inh->comm : "nil", | ||
1349 | (prio_inh) ? prio_inh->pid : -1); | ||
1350 | success = 0; | ||
1351 | } | ||
1352 | #endif | ||
1353 | |||
1354 | out: | ||
1355 | return success; | ||
1356 | } | ||
1357 | |||
1358 | static void decrease_priority_inheritance(struct task_struct* t, | ||
1359 | struct task_struct* prio_inh) | ||
1360 | { | ||
1361 | int success; | ||
1362 | |||
1363 | raw_spin_lock(&gsnedf_lock); | ||
1364 | |||
1365 | success = __decrease_priority_inheritance(t, prio_inh); | ||
722 | 1366 | ||
723 | raw_spin_unlock(&gsnedf_lock); | 1367 | raw_spin_unlock(&gsnedf_lock); |
1368 | |||
1369 | #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA) | ||
1370 | if(tsk_rt(t)->held_gpus) { | ||
1371 | int i; | ||
1372 | for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus)); | ||
1373 | i < NV_DEVICE_NUM; | ||
1374 | i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) { | ||
1375 | pai_check_priority_decrease(t, i); | ||
1376 | } | ||
1377 | } | ||
1378 | #endif | ||
1379 | } | ||
1380 | |||
1381 | |||
1382 | |||
1383 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1384 | |||
1385 | /* called with IRQs off */ | ||
1386 | /* preconditions: | ||
1387 | (1) The 'hp_blocked_tasks_lock' of task 't' is held. | ||
1388 | (2) The lock 'to_unlock' is held. | ||
1389 | */ | ||
1390 | static void nested_increase_priority_inheritance(struct task_struct* t, | ||
1391 | struct task_struct* prio_inh, | ||
1392 | raw_spinlock_t *to_unlock, | ||
1393 | unsigned long irqflags) | ||
1394 | { | ||
1395 | struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock; | ||
1396 | |||
1397 | if(tsk_rt(t)->inh_task != prio_inh) { // shield redundent calls. | ||
1398 | increase_priority_inheritance(t, prio_inh); // increase our prio. | ||
1399 | } | ||
1400 | |||
1401 | raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap. | ||
1402 | |||
1403 | |||
1404 | if(blocked_lock) { | ||
1405 | if(blocked_lock->ops->propagate_increase_inheritance) { | ||
1406 | TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n", | ||
1407 | blocked_lock->ident); | ||
1408 | |||
1409 | // beware: recursion | ||
1410 | blocked_lock->ops->propagate_increase_inheritance(blocked_lock, | ||
1411 | t, to_unlock, | ||
1412 | irqflags); | ||
1413 | } | ||
1414 | else { | ||
1415 | TRACE_TASK(t, "Inheritor is blocked on lock (%d) that does not support nesting!\n", | ||
1416 | blocked_lock->ident); | ||
1417 | unlock_fine_irqrestore(to_unlock, irqflags); | ||
1418 | } | ||
1419 | } | ||
1420 | else { | ||
1421 | TRACE_TASK(t, "is not blocked. No propagation.\n"); | ||
1422 | unlock_fine_irqrestore(to_unlock, irqflags); | ||
1423 | } | ||
1424 | } | ||
1425 | |||
1426 | /* called with IRQs off */ | ||
1427 | /* preconditions: | ||
1428 | (1) The 'hp_blocked_tasks_lock' of task 't' is held. | ||
1429 | (2) The lock 'to_unlock' is held. | ||
1430 | */ | ||
1431 | static void nested_decrease_priority_inheritance(struct task_struct* t, | ||
1432 | struct task_struct* prio_inh, | ||
1433 | raw_spinlock_t *to_unlock, | ||
1434 | unsigned long irqflags) | ||
1435 | { | ||
1436 | struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock; | ||
1437 | decrease_priority_inheritance(t, prio_inh); | ||
1438 | |||
1439 | raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap. | ||
1440 | |||
1441 | if(blocked_lock) { | ||
1442 | if(blocked_lock->ops->propagate_decrease_inheritance) { | ||
1443 | TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n", | ||
1444 | blocked_lock->ident); | ||
1445 | |||
1446 | // beware: recursion | ||
1447 | blocked_lock->ops->propagate_decrease_inheritance(blocked_lock, t, | ||
1448 | to_unlock, | ||
1449 | irqflags); | ||
1450 | } | ||
1451 | else { | ||
1452 | TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n", | ||
1453 | blocked_lock); | ||
1454 | unlock_fine_irqrestore(to_unlock, irqflags); | ||
1455 | } | ||
1456 | } | ||
1457 | else { | ||
1458 | TRACE_TASK(t, "is not blocked. No propagation.\n"); | ||
1459 | unlock_fine_irqrestore(to_unlock, irqflags); | ||
1460 | } | ||
1461 | } | ||
1462 | |||
1463 | |||
1464 | /* ******************** RSM MUTEX ********************** */ | ||
1465 | |||
1466 | static struct litmus_lock_ops gsnedf_rsm_mutex_lock_ops = { | ||
1467 | .lock = rsm_mutex_lock, | ||
1468 | .unlock = rsm_mutex_unlock, | ||
1469 | .close = rsm_mutex_close, | ||
1470 | .deallocate = rsm_mutex_free, | ||
1471 | |||
1472 | .propagate_increase_inheritance = rsm_mutex_propagate_increase_inheritance, | ||
1473 | .propagate_decrease_inheritance = rsm_mutex_propagate_decrease_inheritance, | ||
1474 | |||
1475 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
1476 | .dgl_lock = rsm_mutex_dgl_lock, | ||
1477 | .is_owner = rsm_mutex_is_owner, | ||
1478 | .enable_priority = rsm_mutex_enable_priority, | ||
1479 | #endif | ||
1480 | }; | ||
1481 | |||
1482 | static struct litmus_lock* gsnedf_new_rsm_mutex(void) | ||
1483 | { | ||
1484 | return rsm_mutex_new(&gsnedf_rsm_mutex_lock_ops); | ||
724 | } | 1485 | } |
725 | 1486 | ||
1487 | /* ******************** IKGLP ********************** */ | ||
1488 | |||
1489 | static struct litmus_lock_ops gsnedf_ikglp_lock_ops = { | ||
1490 | .lock = ikglp_lock, | ||
1491 | .unlock = ikglp_unlock, | ||
1492 | .close = ikglp_close, | ||
1493 | .deallocate = ikglp_free, | ||
1494 | |||
1495 | // ikglp can only be an outer-most lock. | ||
1496 | .propagate_increase_inheritance = NULL, | ||
1497 | .propagate_decrease_inheritance = NULL, | ||
1498 | }; | ||
1499 | |||
1500 | static struct litmus_lock* gsnedf_new_ikglp(void* __user arg) | ||
1501 | { | ||
1502 | return ikglp_new(num_online_cpus(), &gsnedf_ikglp_lock_ops, arg); | ||
1503 | } | ||
1504 | |||
1505 | #endif /* CONFIG_LITMUS_NESTED_LOCKING */ | ||
1506 | |||
1507 | |||
1508 | /* ******************** KFMLP support ********************** */ | ||
1509 | |||
1510 | static struct litmus_lock_ops gsnedf_kfmlp_lock_ops = { | ||
1511 | .lock = kfmlp_lock, | ||
1512 | .unlock = kfmlp_unlock, | ||
1513 | .close = kfmlp_close, | ||
1514 | .deallocate = kfmlp_free, | ||
1515 | |||
1516 | // kfmlp can only be an outer-most lock. | ||
1517 | .propagate_increase_inheritance = NULL, | ||
1518 | .propagate_decrease_inheritance = NULL, | ||
1519 | }; | ||
1520 | |||
1521 | |||
1522 | static struct litmus_lock* gsnedf_new_kfmlp(void* __user arg) | ||
1523 | { | ||
1524 | return kfmlp_new(&gsnedf_kfmlp_lock_ops, arg); | ||
1525 | } | ||
726 | 1526 | ||
727 | /* ******************** FMLP support ********************** */ | 1527 | /* ******************** FMLP support ********************** */ |
728 | 1528 | ||
@@ -789,7 +1589,7 @@ int gsnedf_fmlp_lock(struct litmus_lock* l) | |||
789 | if (edf_higher_prio(t, sem->hp_waiter)) { | 1589 | if (edf_higher_prio(t, sem->hp_waiter)) { |
790 | sem->hp_waiter = t; | 1590 | sem->hp_waiter = t; |
791 | if (edf_higher_prio(t, sem->owner)) | 1591 | if (edf_higher_prio(t, sem->owner)) |
792 | set_priority_inheritance(sem->owner, sem->hp_waiter); | 1592 | increase_priority_inheritance(sem->owner, sem->hp_waiter); |
793 | } | 1593 | } |
794 | 1594 | ||
795 | TS_LOCK_SUSPEND; | 1595 | TS_LOCK_SUSPEND; |
@@ -802,7 +1602,7 @@ int gsnedf_fmlp_lock(struct litmus_lock* l) | |||
802 | * there is only one wake up per release. | 1602 | * there is only one wake up per release. |
803 | */ | 1603 | */ |
804 | 1604 | ||
805 | schedule(); | 1605 | suspend_for_lock(); |
806 | 1606 | ||
807 | TS_LOCK_RESUME; | 1607 | TS_LOCK_RESUME; |
808 | 1608 | ||
@@ -857,7 +1657,7 @@ int gsnedf_fmlp_unlock(struct litmus_lock* l) | |||
857 | /* Well, if next is not the highest-priority waiter, | 1657 | /* Well, if next is not the highest-priority waiter, |
858 | * then it ought to inherit the highest-priority | 1658 | * then it ought to inherit the highest-priority |
859 | * waiter's priority. */ | 1659 | * waiter's priority. */ |
860 | set_priority_inheritance(next, sem->hp_waiter); | 1660 | increase_priority_inheritance(next, sem->hp_waiter); |
861 | } | 1661 | } |
862 | 1662 | ||
863 | /* wake up next */ | 1663 | /* wake up next */ |
@@ -868,7 +1668,7 @@ int gsnedf_fmlp_unlock(struct litmus_lock* l) | |||
868 | 1668 | ||
869 | /* we lose the benefit of priority inheritance (if any) */ | 1669 | /* we lose the benefit of priority inheritance (if any) */ |
870 | if (tsk_rt(t)->inh_task) | 1670 | if (tsk_rt(t)->inh_task) |
871 | clear_priority_inheritance(t); | 1671 | decrease_priority_inheritance(t, NULL); |
872 | 1672 | ||
873 | out: | 1673 | out: |
874 | spin_unlock_irqrestore(&sem->wait.lock, flags); | 1674 | spin_unlock_irqrestore(&sem->wait.lock, flags); |
@@ -906,6 +1706,11 @@ static struct litmus_lock_ops gsnedf_fmlp_lock_ops = { | |||
906 | .lock = gsnedf_fmlp_lock, | 1706 | .lock = gsnedf_fmlp_lock, |
907 | .unlock = gsnedf_fmlp_unlock, | 1707 | .unlock = gsnedf_fmlp_unlock, |
908 | .deallocate = gsnedf_fmlp_free, | 1708 | .deallocate = gsnedf_fmlp_free, |
1709 | |||
1710 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1711 | .propagate_increase_inheritance = NULL, | ||
1712 | .propagate_decrease_inheritance = NULL | ||
1713 | #endif | ||
909 | }; | 1714 | }; |
910 | 1715 | ||
911 | static struct litmus_lock* gsnedf_new_fmlp(void) | 1716 | static struct litmus_lock* gsnedf_new_fmlp(void) |
@@ -924,31 +1729,110 @@ static struct litmus_lock* gsnedf_new_fmlp(void) | |||
924 | return &sem->litmus_lock; | 1729 | return &sem->litmus_lock; |
925 | } | 1730 | } |
926 | 1731 | ||
927 | /* **** lock constructor **** */ | ||
928 | |||
929 | 1732 | ||
930 | static long gsnedf_allocate_lock(struct litmus_lock **lock, int type, | 1733 | static long gsnedf_allocate_lock(struct litmus_lock **lock, int type, |
931 | void* __user unused) | 1734 | void* __user args) |
932 | { | 1735 | { |
933 | int err = -ENXIO; | 1736 | int err; |
934 | 1737 | ||
935 | /* GSN-EDF currently only supports the FMLP for global resources. */ | ||
936 | switch (type) { | 1738 | switch (type) { |
937 | 1739 | ||
938 | case FMLP_SEM: | 1740 | case FMLP_SEM: |
939 | /* Flexible Multiprocessor Locking Protocol */ | 1741 | /* Flexible Multiprocessor Locking Protocol */ |
940 | *lock = gsnedf_new_fmlp(); | 1742 | *lock = gsnedf_new_fmlp(); |
941 | if (*lock) | 1743 | break; |
942 | err = 0; | 1744 | #ifdef CONFIG_LITMUS_NESTED_LOCKING |
943 | else | 1745 | case RSM_MUTEX: |
944 | err = -ENOMEM; | 1746 | *lock = gsnedf_new_rsm_mutex(); |
945 | break; | 1747 | break; |
946 | 1748 | ||
1749 | case IKGLP_SEM: | ||
1750 | *lock = gsnedf_new_ikglp(args); | ||
1751 | break; | ||
1752 | #endif | ||
1753 | case KFMLP_SEM: | ||
1754 | *lock = gsnedf_new_kfmlp(args); | ||
1755 | break; | ||
1756 | default: | ||
1757 | err = -ENXIO; | ||
1758 | goto UNSUPPORTED_LOCK; | ||
1759 | }; | ||
1760 | |||
1761 | if (*lock) | ||
1762 | err = 0; | ||
1763 | else | ||
1764 | err = -ENOMEM; | ||
1765 | |||
1766 | UNSUPPORTED_LOCK: | ||
1767 | return err; | ||
1768 | } | ||
1769 | |||
1770 | #endif // CONFIG_LITMUS_LOCKING | ||
1771 | |||
1772 | |||
1773 | |||
1774 | |||
1775 | |||
1776 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1777 | static struct affinity_observer_ops gsnedf_kfmlp_affinity_ops = { | ||
1778 | .close = kfmlp_aff_obs_close, | ||
1779 | .deallocate = kfmlp_aff_obs_free, | ||
1780 | }; | ||
1781 | |||
1782 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1783 | static struct affinity_observer_ops gsnedf_ikglp_affinity_ops = { | ||
1784 | .close = ikglp_aff_obs_close, | ||
1785 | .deallocate = ikglp_aff_obs_free, | ||
1786 | }; | ||
1787 | #endif | ||
1788 | |||
1789 | static long gsnedf_allocate_affinity_observer( | ||
1790 | struct affinity_observer **aff_obs, | ||
1791 | int type, | ||
1792 | void* __user args) | ||
1793 | { | ||
1794 | int err; | ||
1795 | |||
1796 | switch (type) { | ||
1797 | |||
1798 | case KFMLP_SIMPLE_GPU_AFF_OBS: | ||
1799 | *aff_obs = kfmlp_simple_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args); | ||
1800 | break; | ||
1801 | |||
1802 | case KFMLP_GPU_AFF_OBS: | ||
1803 | *aff_obs = kfmlp_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args); | ||
1804 | break; | ||
1805 | |||
1806 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1807 | case IKGLP_SIMPLE_GPU_AFF_OBS: | ||
1808 | *aff_obs = ikglp_simple_gpu_aff_obs_new(&gsnedf_ikglp_affinity_ops, args); | ||
1809 | break; | ||
1810 | |||
1811 | case IKGLP_GPU_AFF_OBS: | ||
1812 | *aff_obs = ikglp_gpu_aff_obs_new(&gsnedf_ikglp_affinity_ops, args); | ||
1813 | break; | ||
1814 | #endif | ||
1815 | default: | ||
1816 | err = -ENXIO; | ||
1817 | goto UNSUPPORTED_AFF_OBS; | ||
947 | }; | 1818 | }; |
948 | 1819 | ||
1820 | if (*aff_obs) | ||
1821 | err = 0; | ||
1822 | else | ||
1823 | err = -ENOMEM; | ||
1824 | |||
1825 | UNSUPPORTED_AFF_OBS: | ||
949 | return err; | 1826 | return err; |
950 | } | 1827 | } |
1828 | #endif | ||
1829 | |||
951 | 1830 | ||
1831 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) | ||
1832 | static int gsnedf_map_gpu_to_cpu(int gpu) | ||
1833 | { | ||
1834 | return -1; // No CPU affinity needed. | ||
1835 | } | ||
952 | #endif | 1836 | #endif |
953 | 1837 | ||
954 | 1838 | ||
@@ -957,14 +1841,14 @@ static long gsnedf_activate_plugin(void) | |||
957 | int cpu; | 1841 | int cpu; |
958 | cpu_entry_t *entry; | 1842 | cpu_entry_t *entry; |
959 | 1843 | ||
960 | bheap_init(&gsnedf_cpu_heap); | 1844 | INIT_BINHEAP_HANDLE(&gsnedf_cpu_heap, cpu_lower_prio); |
961 | #ifdef CONFIG_RELEASE_MASTER | 1845 | #ifdef CONFIG_RELEASE_MASTER |
962 | gsnedf.release_master = atomic_read(&release_master_cpu); | 1846 | gsnedf.release_master = atomic_read(&release_master_cpu); |
963 | #endif | 1847 | #endif |
964 | 1848 | ||
965 | for_each_online_cpu(cpu) { | 1849 | for_each_online_cpu(cpu) { |
966 | entry = &per_cpu(gsnedf_cpu_entries, cpu); | 1850 | entry = &per_cpu(gsnedf_cpu_entries, cpu); |
967 | bheap_node_init(&entry->hn, entry); | 1851 | INIT_BINHEAP_NODE(&entry->hn); |
968 | entry->linked = NULL; | 1852 | entry->linked = NULL; |
969 | entry->scheduled = NULL; | 1853 | entry->scheduled = NULL; |
970 | #ifdef CONFIG_RELEASE_MASTER | 1854 | #ifdef CONFIG_RELEASE_MASTER |
@@ -978,6 +1862,20 @@ static long gsnedf_activate_plugin(void) | |||
978 | } | 1862 | } |
979 | #endif | 1863 | #endif |
980 | } | 1864 | } |
1865 | |||
1866 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
1867 | gsnedf_pending_tasklets.head = NULL; | ||
1868 | gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head); | ||
1869 | #endif | ||
1870 | |||
1871 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
1872 | init_klmirqd(); | ||
1873 | #endif | ||
1874 | |||
1875 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1876 | init_nvidia_info(); | ||
1877 | #endif | ||
1878 | |||
981 | return 0; | 1879 | return 0; |
982 | } | 1880 | } |
983 | 1881 | ||
@@ -994,8 +1892,32 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = { | |||
994 | .task_block = gsnedf_task_block, | 1892 | .task_block = gsnedf_task_block, |
995 | .admit_task = gsnedf_admit_task, | 1893 | .admit_task = gsnedf_admit_task, |
996 | .activate_plugin = gsnedf_activate_plugin, | 1894 | .activate_plugin = gsnedf_activate_plugin, |
1895 | .compare = edf_higher_prio, | ||
997 | #ifdef CONFIG_LITMUS_LOCKING | 1896 | #ifdef CONFIG_LITMUS_LOCKING |
998 | .allocate_lock = gsnedf_allocate_lock, | 1897 | .allocate_lock = gsnedf_allocate_lock, |
1898 | .increase_prio = increase_priority_inheritance, | ||
1899 | .decrease_prio = decrease_priority_inheritance, | ||
1900 | .__increase_prio = __increase_priority_inheritance, | ||
1901 | .__decrease_prio = __decrease_priority_inheritance, | ||
1902 | #endif | ||
1903 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1904 | .nested_increase_prio = nested_increase_priority_inheritance, | ||
1905 | .nested_decrease_prio = nested_decrease_priority_inheritance, | ||
1906 | .__compare = __edf_higher_prio, | ||
1907 | #endif | ||
1908 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
1909 | .get_dgl_spinlock = gsnedf_get_dgl_spinlock, | ||
1910 | #endif | ||
1911 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
1912 | .allocate_aff_obs = gsnedf_allocate_affinity_observer, | ||
1913 | #endif | ||
1914 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
1915 | .enqueue_pai_tasklet = gsnedf_enqueue_pai_tasklet, | ||
1916 | .change_prio_pai_tasklet = gsnedf_change_prio_pai_tasklet, | ||
1917 | .run_tasklets = gsnedf_run_tasklets, | ||
1918 | #endif | ||
1919 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) | ||
1920 | .map_gpu_to_cpu = gsnedf_map_gpu_to_cpu, | ||
999 | #endif | 1921 | #endif |
1000 | }; | 1922 | }; |
1001 | 1923 | ||
@@ -1005,15 +1927,20 @@ static int __init init_gsn_edf(void) | |||
1005 | int cpu; | 1927 | int cpu; |
1006 | cpu_entry_t *entry; | 1928 | cpu_entry_t *entry; |
1007 | 1929 | ||
1008 | bheap_init(&gsnedf_cpu_heap); | 1930 | INIT_BINHEAP_HANDLE(&gsnedf_cpu_heap, cpu_lower_prio); |
1009 | /* initialize CPU state */ | 1931 | /* initialize CPU state */ |
1010 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 1932 | for (cpu = 0; cpu < NR_CPUS; ++cpu) { |
1011 | entry = &per_cpu(gsnedf_cpu_entries, cpu); | 1933 | entry = &per_cpu(gsnedf_cpu_entries, cpu); |
1012 | gsnedf_cpus[cpu] = entry; | 1934 | gsnedf_cpus[cpu] = entry; |
1013 | entry->cpu = cpu; | 1935 | entry->cpu = cpu; |
1014 | entry->hn = &gsnedf_heap_node[cpu]; | 1936 | |
1015 | bheap_node_init(&entry->hn, entry); | 1937 | INIT_BINHEAP_NODE(&entry->hn); |
1016 | } | 1938 | } |
1939 | |||
1940 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
1941 | raw_spin_lock_init(&dgl_lock); | ||
1942 | #endif | ||
1943 | |||
1017 | edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs); | 1944 | edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs); |
1018 | return register_sched_plugin(&gsn_edf_plugin); | 1945 | return register_sched_plugin(&gsn_edf_plugin); |
1019 | } | 1946 | } |
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c index 6b32cf09abbd..9de03c95b825 100644 --- a/litmus/sched_litmus.c +++ b/litmus/sched_litmus.c | |||
@@ -175,8 +175,10 @@ static void enqueue_task_litmus(struct rq *rq, struct task_struct *p, | |||
175 | litmus->task_wake_up(p); | 175 | litmus->task_wake_up(p); |
176 | 176 | ||
177 | rq->litmus.nr_running++; | 177 | rq->litmus.nr_running++; |
178 | } else | 178 | } else { |
179 | TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n"); | 179 | TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n"); |
180 | //WARN_ON(1); | ||
181 | } | ||
180 | } | 182 | } |
181 | 183 | ||
182 | static void dequeue_task_litmus(struct rq *rq, struct task_struct *p, | 184 | static void dequeue_task_litmus(struct rq *rq, struct task_struct *p, |
diff --git a/litmus/sched_pfp.c b/litmus/sched_pfp.c index 91e52391a173..a96c2b1aa26f 100644 --- a/litmus/sched_pfp.c +++ b/litmus/sched_pfp.c | |||
@@ -142,17 +142,25 @@ static void pfp_tick(struct task_struct *t) | |||
142 | */ | 142 | */ |
143 | BUG_ON(is_realtime(t) && t != pfp->scheduled); | 143 | BUG_ON(is_realtime(t) && t != pfp->scheduled); |
144 | 144 | ||
145 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | 145 | if (is_realtime(t) && budget_exhausted(t)) |
146 | if (!is_np(t)) { | 146 | { |
147 | litmus_reschedule_local(); | 147 | if (budget_signalled(t) && !sigbudget_sent(t)) { |
148 | TRACE("pfp_scheduler_tick: " | 148 | /* signal exhaustion */ |
149 | "%d is preemptable " | 149 | send_sigbudget(t); |
150 | " => FORCE_RESCHED\n", t->pid); | 150 | } |
151 | } else if (is_user_np(t)) { | 151 | |
152 | TRACE("pfp_scheduler_tick: " | 152 | if (budget_enforced(t)) { |
153 | "%d is non-preemptable, " | 153 | if (!is_np(t)) { |
154 | "preemption delayed.\n", t->pid); | 154 | litmus_reschedule_local(); |
155 | request_exit_np(t); | 155 | TRACE("pfp_scheduler_tick: " |
156 | "%d is preemptable " | ||
157 | " => FORCE_RESCHED\n", t->pid); | ||
158 | } else if (is_user_np(t)) { | ||
159 | TRACE("pfp_scheduler_tick: " | ||
160 | "%d is non-preemptable, " | ||
161 | "preemption delayed.\n", t->pid); | ||
162 | request_exit_np(t); | ||
163 | } | ||
156 | } | 164 | } |
157 | } | 165 | } |
158 | } | 166 | } |
@@ -162,7 +170,7 @@ static struct task_struct* pfp_schedule(struct task_struct * prev) | |||
162 | pfp_domain_t* pfp = local_pfp; | 170 | pfp_domain_t* pfp = local_pfp; |
163 | struct task_struct* next; | 171 | struct task_struct* next; |
164 | 172 | ||
165 | int out_of_time, sleep, preempt, np, exists, blocks, resched, migrate; | 173 | int out_of_time, signal_budget, sleep, preempt, np, exists, blocks, resched, migrate; |
166 | 174 | ||
167 | raw_spin_lock(&pfp->slock); | 175 | raw_spin_lock(&pfp->slock); |
168 | 176 | ||
@@ -179,6 +187,10 @@ static struct task_struct* pfp_schedule(struct task_struct * prev) | |||
179 | out_of_time = exists && | 187 | out_of_time = exists && |
180 | budget_enforced(pfp->scheduled) && | 188 | budget_enforced(pfp->scheduled) && |
181 | budget_exhausted(pfp->scheduled); | 189 | budget_exhausted(pfp->scheduled); |
190 | signal_budget = exists && | ||
191 | budget_signalled(pfp->scheduled) && | ||
192 | budget_exhausted(pfp->scheduled) && | ||
193 | !sigbudget_sent(pfp->scheduled); | ||
182 | np = exists && is_np(pfp->scheduled); | 194 | np = exists && is_np(pfp->scheduled); |
183 | sleep = exists && is_completed(pfp->scheduled); | 195 | sleep = exists && is_completed(pfp->scheduled); |
184 | migrate = exists && get_partition(pfp->scheduled) != pfp->cpu; | 196 | migrate = exists && get_partition(pfp->scheduled) != pfp->cpu; |
@@ -190,6 +202,10 @@ static struct task_struct* pfp_schedule(struct task_struct * prev) | |||
190 | */ | 202 | */ |
191 | resched = preempt; | 203 | resched = preempt; |
192 | 204 | ||
205 | /* Send the signal that the budget has been exhausted */ | ||
206 | if (signal_budget) | ||
207 | send_sigbudget(pfp->scheduled); | ||
208 | |||
193 | /* If a task blocks we have no choice but to reschedule. | 209 | /* If a task blocks we have no choice but to reschedule. |
194 | */ | 210 | */ |
195 | if (blocks) | 211 | if (blocks) |
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c index 00a1900d6457..76ff892122aa 100644 --- a/litmus/sched_plugin.c +++ b/litmus/sched_plugin.c | |||
@@ -13,6 +13,10 @@ | |||
13 | #include <litmus/preempt.h> | 13 | #include <litmus/preempt.h> |
14 | #include <litmus/jobs.h> | 14 | #include <litmus/jobs.h> |
15 | 15 | ||
16 | #ifdef CONFIG_LITMUS_NVIDIA | ||
17 | #include <litmus/nvidia_info.h> | ||
18 | #endif | ||
19 | |||
16 | /* | 20 | /* |
17 | * Generic function to trigger preemption on either local or remote cpu | 21 | * Generic function to trigger preemption on either local or remote cpu |
18 | * from scheduler plugins. The key feature is that this function is | 22 | * from scheduler plugins. The key feature is that this function is |
@@ -27,11 +31,19 @@ void preempt_if_preemptable(struct task_struct* t, int cpu) | |||
27 | 31 | ||
28 | int reschedule = 0; | 32 | int reschedule = 0; |
29 | 33 | ||
30 | if (!t) | 34 | TRACE_CUR("preempt_if_preemptable: %s/%d\n", |
35 | (t) ? t->comm : "(nil)", | ||
36 | (t) ? t->pid : 0); | ||
37 | |||
38 | if (!t) { | ||
39 | TRACE_CUR("unconditionally reshcedule\n"); | ||
31 | /* move non-real-time task out of the way */ | 40 | /* move non-real-time task out of the way */ |
32 | reschedule = 1; | 41 | reschedule = 1; |
42 | } | ||
33 | else { | 43 | else { |
34 | if (smp_processor_id() == cpu) { | 44 | if (smp_processor_id() == cpu) { |
45 | TRACE_CUR("preempt local cpu.\n"); | ||
46 | |||
35 | /* local CPU case */ | 47 | /* local CPU case */ |
36 | /* check if we need to poke userspace */ | 48 | /* check if we need to poke userspace */ |
37 | if (is_user_np(t)) | 49 | if (is_user_np(t)) |
@@ -43,14 +55,22 @@ void preempt_if_preemptable(struct task_struct* t, int cpu) | |||
43 | * currently-executing task */ | 55 | * currently-executing task */ |
44 | reschedule = 1; | 56 | reschedule = 1; |
45 | } else { | 57 | } else { |
58 | int is_knp = is_kernel_np(t); | ||
59 | int reqexit = request_exit_np_atomic(t); | ||
60 | TRACE_CUR("preempt remote cpu: isknp = %d reqexit = %d\n", is_knp, reqexit); | ||
61 | |||
46 | /* Remote CPU case. Only notify if it's not a kernel | 62 | /* Remote CPU case. Only notify if it's not a kernel |
47 | * NP section and if we didn't set the userspace | 63 | * NP section and if we didn't set the userspace |
48 | * flag. */ | 64 | * flag. */ |
49 | reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t)); | 65 | //reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t)); |
66 | reschedule = !(is_knp || reqexit); | ||
50 | } | 67 | } |
51 | } | 68 | } |
52 | if (likely(reschedule)) | 69 | |
70 | if (likely(reschedule)) { | ||
71 | TRACE_CUR("calling litmus_reschedule()\n"); | ||
53 | litmus_reschedule(cpu); | 72 | litmus_reschedule(cpu); |
73 | } | ||
54 | } | 74 | } |
55 | 75 | ||
56 | 76 | ||
@@ -102,6 +122,9 @@ static long litmus_dummy_complete_job(void) | |||
102 | 122 | ||
103 | static long litmus_dummy_activate_plugin(void) | 123 | static long litmus_dummy_activate_plugin(void) |
104 | { | 124 | { |
125 | #ifdef CONFIG_LITMUS_NVIDIA | ||
126 | shutdown_nvidia_info(); | ||
127 | #endif | ||
105 | return 0; | 128 | return 0; |
106 | } | 129 | } |
107 | 130 | ||
@@ -110,14 +133,100 @@ static long litmus_dummy_deactivate_plugin(void) | |||
110 | return 0; | 133 | return 0; |
111 | } | 134 | } |
112 | 135 | ||
113 | #ifdef CONFIG_LITMUS_LOCKING | 136 | static int litmus_dummy_compare(struct task_struct* a, struct task_struct* b) |
137 | { | ||
138 | TRACE_CUR("WARNING: Dummy compare function called!\n"); | ||
139 | return 0; | ||
140 | } | ||
114 | 141 | ||
142 | #ifdef CONFIG_LITMUS_LOCKING | ||
115 | static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type, | 143 | static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type, |
116 | void* __user config) | 144 | void* __user config) |
117 | { | 145 | { |
118 | return -ENXIO; | 146 | return -ENXIO; |
119 | } | 147 | } |
120 | 148 | ||
149 | static void litmus_dummy_increase_prio(struct task_struct* t, struct task_struct* prio_inh) | ||
150 | { | ||
151 | } | ||
152 | |||
153 | static void litmus_dummy_decrease_prio(struct task_struct* t, struct task_struct* prio_inh) | ||
154 | { | ||
155 | } | ||
156 | |||
157 | static int litmus_dummy___increase_prio(struct task_struct* t, struct task_struct* prio_inh) | ||
158 | { | ||
159 | TRACE_CUR("WARNING: Dummy litmus_dummy___increase_prio called!\n"); | ||
160 | return 0; | ||
161 | } | ||
162 | |||
163 | static int litmus_dummy___decrease_prio(struct task_struct* t, struct task_struct* prio_inh) | ||
164 | { | ||
165 | TRACE_CUR("WARNING: Dummy litmus_dummy___decrease_prio called!\n"); | ||
166 | return 0; | ||
167 | } | ||
168 | #endif | ||
169 | |||
170 | |||
171 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
172 | static int litmus_dummy_enqueue_pai_tasklet(struct tasklet_struct* t) | ||
173 | { | ||
174 | TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__); | ||
175 | return(0); // failure. | ||
176 | } | ||
177 | |||
178 | static void litmus_dummy_change_prio_pai_tasklet(struct task_struct *old_prio, | ||
179 | struct task_struct *new_prio) | ||
180 | { | ||
181 | TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__); | ||
182 | } | ||
183 | |||
184 | static void litmus_dummy_run_tasklets(struct task_struct* t) | ||
185 | { | ||
186 | //TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__); | ||
187 | } | ||
188 | #endif | ||
189 | |||
190 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
191 | static void litmus_dummy_nested_increase_prio(struct task_struct* t, struct task_struct* prio_inh, | ||
192 | raw_spinlock_t *to_unlock, unsigned long irqflags) | ||
193 | { | ||
194 | } | ||
195 | |||
196 | static void litmus_dummy_nested_decrease_prio(struct task_struct* t, struct task_struct* prio_inh, | ||
197 | raw_spinlock_t *to_unlock, unsigned long irqflags) | ||
198 | { | ||
199 | } | ||
200 | |||
201 | static int litmus_dummy___compare(struct task_struct* a, comparison_mode_t a_mod, | ||
202 | struct task_struct* b, comparison_mode_t b_mode) | ||
203 | { | ||
204 | TRACE_CUR("WARNING: Dummy compare function called!\n"); | ||
205 | return 0; | ||
206 | } | ||
207 | #endif | ||
208 | |||
209 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
210 | static raw_spinlock_t* litmus_dummy_get_dgl_spinlock(struct task_struct *t) | ||
211 | { | ||
212 | return NULL; | ||
213 | } | ||
214 | #endif | ||
215 | |||
216 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
217 | static long litmus_dummy_allocate_aff_obs(struct affinity_observer **aff_obs, | ||
218 | int type, | ||
219 | void* __user config) | ||
220 | { | ||
221 | return -ENXIO; | ||
222 | } | ||
223 | #endif | ||
224 | |||
225 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) | ||
226 | static int litmus_dummy_map_gpu_to_cpu(int gpu) | ||
227 | { | ||
228 | return 0; | ||
229 | } | ||
121 | #endif | 230 | #endif |
122 | 231 | ||
123 | 232 | ||
@@ -136,9 +245,34 @@ struct sched_plugin linux_sched_plugin = { | |||
136 | .finish_switch = litmus_dummy_finish_switch, | 245 | .finish_switch = litmus_dummy_finish_switch, |
137 | .activate_plugin = litmus_dummy_activate_plugin, | 246 | .activate_plugin = litmus_dummy_activate_plugin, |
138 | .deactivate_plugin = litmus_dummy_deactivate_plugin, | 247 | .deactivate_plugin = litmus_dummy_deactivate_plugin, |
248 | .compare = litmus_dummy_compare, | ||
139 | #ifdef CONFIG_LITMUS_LOCKING | 249 | #ifdef CONFIG_LITMUS_LOCKING |
140 | .allocate_lock = litmus_dummy_allocate_lock, | 250 | .allocate_lock = litmus_dummy_allocate_lock, |
251 | .increase_prio = litmus_dummy_increase_prio, | ||
252 | .decrease_prio = litmus_dummy_decrease_prio, | ||
253 | .__increase_prio = litmus_dummy___increase_prio, | ||
254 | .__decrease_prio = litmus_dummy___decrease_prio, | ||
255 | #endif | ||
256 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
257 | .nested_increase_prio = litmus_dummy_nested_increase_prio, | ||
258 | .nested_decrease_prio = litmus_dummy_nested_decrease_prio, | ||
259 | .__compare = litmus_dummy___compare, | ||
260 | #endif | ||
261 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
262 | .enqueue_pai_tasklet = litmus_dummy_enqueue_pai_tasklet, | ||
263 | .change_prio_pai_tasklet = litmus_dummy_change_prio_pai_tasklet, | ||
264 | .run_tasklets = litmus_dummy_run_tasklets, | ||
265 | #endif | ||
266 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
267 | .get_dgl_spinlock = litmus_dummy_get_dgl_spinlock, | ||
141 | #endif | 268 | #endif |
269 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
270 | .allocate_aff_obs = litmus_dummy_allocate_aff_obs, | ||
271 | #endif | ||
272 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) | ||
273 | .map_gpu_to_cpu = litmus_dummy_map_gpu_to_cpu, | ||
274 | #endif | ||
275 | |||
142 | .admit_task = litmus_dummy_admit_task | 276 | .admit_task = litmus_dummy_admit_task |
143 | }; | 277 | }; |
144 | 278 | ||
@@ -174,9 +308,34 @@ int register_sched_plugin(struct sched_plugin* plugin) | |||
174 | CHECK(complete_job); | 308 | CHECK(complete_job); |
175 | CHECK(activate_plugin); | 309 | CHECK(activate_plugin); |
176 | CHECK(deactivate_plugin); | 310 | CHECK(deactivate_plugin); |
311 | CHECK(compare); | ||
177 | #ifdef CONFIG_LITMUS_LOCKING | 312 | #ifdef CONFIG_LITMUS_LOCKING |
178 | CHECK(allocate_lock); | 313 | CHECK(allocate_lock); |
314 | CHECK(increase_prio); | ||
315 | CHECK(decrease_prio); | ||
316 | CHECK(__increase_prio); | ||
317 | CHECK(__decrease_prio); | ||
318 | #endif | ||
319 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
320 | CHECK(nested_increase_prio); | ||
321 | CHECK(nested_decrease_prio); | ||
322 | CHECK(__compare); | ||
323 | #endif | ||
324 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
325 | CHECK(enqueue_pai_tasklet); | ||
326 | CHECK(change_prio_pai_tasklet); | ||
327 | CHECK(run_tasklets); | ||
179 | #endif | 328 | #endif |
329 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
330 | CHECK(get_dgl_spinlock); | ||
331 | #endif | ||
332 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
333 | CHECK(allocate_aff_obs); | ||
334 | #endif | ||
335 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) | ||
336 | CHECK(map_gpu_to_cpu); | ||
337 | #endif | ||
338 | |||
180 | CHECK(admit_task); | 339 | CHECK(admit_task); |
181 | 340 | ||
182 | if (!plugin->release_at) | 341 | if (!plugin->release_at) |
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c index 0e1675d2e572..63fa6103882a 100644 --- a/litmus/sched_psn_edf.c +++ b/litmus/sched_psn_edf.c | |||
@@ -174,17 +174,25 @@ static void psnedf_tick(struct task_struct *t) | |||
174 | */ | 174 | */ |
175 | BUG_ON(is_realtime(t) && t != pedf->scheduled); | 175 | BUG_ON(is_realtime(t) && t != pedf->scheduled); |
176 | 176 | ||
177 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | 177 | if (is_realtime(t) && budget_exhausted(t)) |
178 | if (!is_np(t)) { | 178 | { |
179 | litmus_reschedule_local(); | 179 | if (budget_signalled(t) && !sigbudget_sent(t)) { |
180 | TRACE("psnedf_scheduler_tick: " | 180 | /* signal exhaustion */ |
181 | "%d is preemptable " | 181 | send_sigbudget(t); |
182 | " => FORCE_RESCHED\n", t->pid); | 182 | } |
183 | } else if (is_user_np(t)) { | 183 | |
184 | TRACE("psnedf_scheduler_tick: " | 184 | if (budget_enforced(t)) { |
185 | "%d is non-preemptable, " | 185 | if (!is_np(t)) { |
186 | "preemption delayed.\n", t->pid); | 186 | litmus_reschedule_local(); |
187 | request_exit_np(t); | 187 | TRACE("psnedf_scheduler_tick: " |
188 | "%d is preemptable " | ||
189 | " => FORCE_RESCHED\n", t->pid); | ||
190 | } else if (is_user_np(t)) { | ||
191 | TRACE("psnedf_scheduler_tick: " | ||
192 | "%d is non-preemptable, " | ||
193 | "preemption delayed.\n", t->pid); | ||
194 | request_exit_np(t); | ||
195 | } | ||
188 | } | 196 | } |
189 | } | 197 | } |
190 | } | 198 | } |
@@ -195,8 +203,7 @@ static struct task_struct* psnedf_schedule(struct task_struct * prev) | |||
195 | rt_domain_t* edf = &pedf->domain; | 203 | rt_domain_t* edf = &pedf->domain; |
196 | struct task_struct* next; | 204 | struct task_struct* next; |
197 | 205 | ||
198 | int out_of_time, sleep, preempt, | 206 | int out_of_time, signal_budget, sleep, preempt, np, exists, blocks, resched; |
199 | np, exists, blocks, resched; | ||
200 | 207 | ||
201 | raw_spin_lock(&pedf->slock); | 208 | raw_spin_lock(&pedf->slock); |
202 | 209 | ||
@@ -213,6 +220,10 @@ static struct task_struct* psnedf_schedule(struct task_struct * prev) | |||
213 | out_of_time = exists && | 220 | out_of_time = exists && |
214 | budget_enforced(pedf->scheduled) && | 221 | budget_enforced(pedf->scheduled) && |
215 | budget_exhausted(pedf->scheduled); | 222 | budget_exhausted(pedf->scheduled); |
223 | signal_budget = exists && | ||
224 | budget_signalled(pedf->scheduled) && | ||
225 | budget_exhausted(pedf->scheduled) && | ||
226 | !sigbudget_sent(pedf->scheduled); | ||
216 | np = exists && is_np(pedf->scheduled); | 227 | np = exists && is_np(pedf->scheduled); |
217 | sleep = exists && is_completed(pedf->scheduled); | 228 | sleep = exists && is_completed(pedf->scheduled); |
218 | preempt = edf_preemption_needed(edf, prev); | 229 | preempt = edf_preemption_needed(edf, prev); |
@@ -223,6 +234,10 @@ static struct task_struct* psnedf_schedule(struct task_struct * prev) | |||
223 | */ | 234 | */ |
224 | resched = preempt; | 235 | resched = preempt; |
225 | 236 | ||
237 | /* Send the signal that the budget has been exhausted */ | ||
238 | if (signal_budget) | ||
239 | send_sigbudget(pedf->scheduled); | ||
240 | |||
226 | /* If a task blocks we have no choice but to reschedule. | 241 | /* If a task blocks we have no choice but to reschedule. |
227 | */ | 242 | */ |
228 | if (blocks) | 243 | if (blocks) |
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c index 5ef8d09ab41f..f7f575346b54 100644 --- a/litmus/sched_task_trace.c +++ b/litmus/sched_task_trace.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
9 | #include <linux/percpu.h> | 9 | #include <linux/percpu.h> |
10 | #include <linux/hardirq.h> | ||
10 | 11 | ||
11 | #include <litmus/ftdev.h> | 12 | #include <litmus/ftdev.h> |
12 | #include <litmus/litmus.h> | 13 | #include <litmus/litmus.h> |
@@ -16,13 +17,13 @@ | |||
16 | #include <litmus/ftdev.h> | 17 | #include <litmus/ftdev.h> |
17 | 18 | ||
18 | 19 | ||
19 | #define NO_EVENTS (1 << CONFIG_SCHED_TASK_TRACE_SHIFT) | 20 | #define NUM_EVENTS (1 << (CONFIG_SCHED_TASK_TRACE_SHIFT+11)) |
20 | 21 | ||
21 | #define now() litmus_clock() | 22 | #define now() litmus_clock() |
22 | 23 | ||
23 | struct local_buffer { | 24 | struct local_buffer { |
24 | struct st_event_record record[NO_EVENTS]; | 25 | struct st_event_record record[NUM_EVENTS]; |
25 | char flag[NO_EVENTS]; | 26 | char flag[NUM_EVENTS]; |
26 | struct ft_buffer ftbuf; | 27 | struct ft_buffer ftbuf; |
27 | }; | 28 | }; |
28 | 29 | ||
@@ -41,7 +42,7 @@ static int __init init_sched_task_trace(void) | |||
41 | int i, ok = 0, err; | 42 | int i, ok = 0, err; |
42 | printk("Allocated %u sched_trace_xxx() events per CPU " | 43 | printk("Allocated %u sched_trace_xxx() events per CPU " |
43 | "(buffer size: %d bytes)\n", | 44 | "(buffer size: %d bytes)\n", |
44 | NO_EVENTS, (int) sizeof(struct local_buffer)); | 45 | NUM_EVENTS, (int) sizeof(struct local_buffer)); |
45 | 46 | ||
46 | err = ftdev_init(&st_dev, THIS_MODULE, | 47 | err = ftdev_init(&st_dev, THIS_MODULE, |
47 | num_online_cpus(), "sched_trace"); | 48 | num_online_cpus(), "sched_trace"); |
@@ -50,7 +51,7 @@ static int __init init_sched_task_trace(void) | |||
50 | 51 | ||
51 | for (i = 0; i < st_dev.minor_cnt; i++) { | 52 | for (i = 0; i < st_dev.minor_cnt; i++) { |
52 | buf = &per_cpu(st_event_buffer, i); | 53 | buf = &per_cpu(st_event_buffer, i); |
53 | ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS, | 54 | ok += init_ft_buffer(&buf->ftbuf, NUM_EVENTS, |
54 | sizeof(struct st_event_record), | 55 | sizeof(struct st_event_record), |
55 | buf->flag, | 56 | buf->flag, |
56 | buf->record); | 57 | buf->record); |
@@ -154,7 +155,8 @@ feather_callback void do_sched_trace_task_switch_to(unsigned long id, | |||
154 | { | 155 | { |
155 | struct task_struct *t = (struct task_struct*) _task; | 156 | struct task_struct *t = (struct task_struct*) _task; |
156 | struct st_event_record* rec; | 157 | struct st_event_record* rec; |
157 | if (is_realtime(t)) { | 158 | //if (is_realtime(t)) /* comment out to trace EVERYTHING */ |
159 | { | ||
158 | rec = get_record(ST_SWITCH_TO, t); | 160 | rec = get_record(ST_SWITCH_TO, t); |
159 | if (rec) { | 161 | if (rec) { |
160 | rec->data.switch_to.when = now(); | 162 | rec->data.switch_to.when = now(); |
@@ -169,7 +171,8 @@ feather_callback void do_sched_trace_task_switch_away(unsigned long id, | |||
169 | { | 171 | { |
170 | struct task_struct *t = (struct task_struct*) _task; | 172 | struct task_struct *t = (struct task_struct*) _task; |
171 | struct st_event_record* rec; | 173 | struct st_event_record* rec; |
172 | if (is_realtime(t)) { | 174 | //if (is_realtime(t)) /* comment out to trace EVERYTHING */ |
175 | { | ||
173 | rec = get_record(ST_SWITCH_AWAY, t); | 176 | rec = get_record(ST_SWITCH_AWAY, t); |
174 | if (rec) { | 177 | if (rec) { |
175 | rec->data.switch_away.when = now(); | 178 | rec->data.switch_away.when = now(); |
@@ -188,6 +191,9 @@ feather_callback void do_sched_trace_task_completion(unsigned long id, | |||
188 | if (rec) { | 191 | if (rec) { |
189 | rec->data.completion.when = now(); | 192 | rec->data.completion.when = now(); |
190 | rec->data.completion.forced = forced; | 193 | rec->data.completion.forced = forced; |
194 | #ifdef LITMUS_NVIDIA | ||
195 | rec->data.completion.nv_int_count = (u16)atomic_read(&tsk_rt(t)->nv_int_count); | ||
196 | #endif | ||
191 | put_record(rec); | 197 | put_record(rec); |
192 | } | 198 | } |
193 | } | 199 | } |
@@ -239,3 +245,265 @@ feather_callback void do_sched_trace_action(unsigned long id, | |||
239 | put_record(rec); | 245 | put_record(rec); |
240 | } | 246 | } |
241 | } | 247 | } |
248 | |||
249 | |||
250 | |||
251 | |||
252 | feather_callback void do_sched_trace_prediction_err(unsigned long id, | ||
253 | unsigned long _task, | ||
254 | unsigned long _distance, | ||
255 | unsigned long _rel_err) | ||
256 | { | ||
257 | struct task_struct *t = (struct task_struct*) _task; | ||
258 | struct st_event_record *rec = get_record(ST_PREDICTION_ERR, t); | ||
259 | |||
260 | if (rec) { | ||
261 | gpu_migration_dist_t* distance = (gpu_migration_dist_t*) _distance; | ||
262 | fp_t* rel_err = (fp_t*) _rel_err; | ||
263 | |||
264 | rec->data.prediction_err.distance = *distance; | ||
265 | rec->data.prediction_err.rel_err = rel_err->val; | ||
266 | put_record(rec); | ||
267 | } | ||
268 | } | ||
269 | |||
270 | |||
271 | feather_callback void do_sched_trace_migration(unsigned long id, | ||
272 | unsigned long _task, | ||
273 | unsigned long _mig_info) | ||
274 | { | ||
275 | struct task_struct *t = (struct task_struct*) _task; | ||
276 | struct st_event_record *rec = get_record(ST_MIGRATION, t); | ||
277 | |||
278 | if (rec) { | ||
279 | struct migration_info* mig_info = (struct migration_info*) _mig_info; | ||
280 | |||
281 | rec->hdr.extra = mig_info->distance; | ||
282 | rec->data.migration.observed = mig_info->observed; | ||
283 | rec->data.migration.estimated = mig_info->estimated; | ||
284 | |||
285 | put_record(rec); | ||
286 | } | ||
287 | } | ||
288 | |||
289 | |||
290 | |||
291 | |||
292 | |||
293 | |||
294 | |||
295 | |||
296 | |||
297 | feather_callback void do_sched_trace_tasklet_release(unsigned long id, | ||
298 | unsigned long _owner) | ||
299 | { | ||
300 | struct task_struct *t = (struct task_struct*) _owner; | ||
301 | struct st_event_record *rec = get_record(ST_TASKLET_RELEASE, t); | ||
302 | |||
303 | if (rec) { | ||
304 | rec->data.tasklet_release.when = now(); | ||
305 | put_record(rec); | ||
306 | } | ||
307 | } | ||
308 | |||
309 | |||
310 | feather_callback void do_sched_trace_tasklet_begin(unsigned long id, | ||
311 | unsigned long _owner) | ||
312 | { | ||
313 | struct task_struct *t = (struct task_struct*) _owner; | ||
314 | struct st_event_record *rec = get_record(ST_TASKLET_BEGIN, t); | ||
315 | |||
316 | if (rec) { | ||
317 | rec->data.tasklet_begin.when = now(); | ||
318 | |||
319 | if(!in_interrupt()) | ||
320 | rec->data.tasklet_begin.exe_pid = current->pid; | ||
321 | else | ||
322 | rec->data.tasklet_begin.exe_pid = 0; | ||
323 | |||
324 | put_record(rec); | ||
325 | } | ||
326 | } | ||
327 | EXPORT_SYMBOL(do_sched_trace_tasklet_begin); | ||
328 | |||
329 | |||
330 | feather_callback void do_sched_trace_tasklet_end(unsigned long id, | ||
331 | unsigned long _owner, | ||
332 | unsigned long _flushed) | ||
333 | { | ||
334 | struct task_struct *t = (struct task_struct*) _owner; | ||
335 | struct st_event_record *rec = get_record(ST_TASKLET_END, t); | ||
336 | |||
337 | if (rec) { | ||
338 | rec->data.tasklet_end.when = now(); | ||
339 | rec->data.tasklet_end.flushed = _flushed; | ||
340 | |||
341 | if(!in_interrupt()) | ||
342 | rec->data.tasklet_end.exe_pid = current->pid; | ||
343 | else | ||
344 | rec->data.tasklet_end.exe_pid = 0; | ||
345 | |||
346 | put_record(rec); | ||
347 | } | ||
348 | } | ||
349 | EXPORT_SYMBOL(do_sched_trace_tasklet_end); | ||
350 | |||
351 | |||
352 | feather_callback void do_sched_trace_work_release(unsigned long id, | ||
353 | unsigned long _owner) | ||
354 | { | ||
355 | struct task_struct *t = (struct task_struct*) _owner; | ||
356 | struct st_event_record *rec = get_record(ST_WORK_RELEASE, t); | ||
357 | |||
358 | if (rec) { | ||
359 | rec->data.work_release.when = now(); | ||
360 | put_record(rec); | ||
361 | } | ||
362 | } | ||
363 | |||
364 | |||
365 | feather_callback void do_sched_trace_work_begin(unsigned long id, | ||
366 | unsigned long _owner, | ||
367 | unsigned long _exe) | ||
368 | { | ||
369 | struct task_struct *t = (struct task_struct*) _owner; | ||
370 | struct st_event_record *rec = get_record(ST_WORK_BEGIN, t); | ||
371 | |||
372 | if (rec) { | ||
373 | struct task_struct *exe = (struct task_struct*) _exe; | ||
374 | rec->data.work_begin.exe_pid = exe->pid; | ||
375 | rec->data.work_begin.when = now(); | ||
376 | put_record(rec); | ||
377 | } | ||
378 | } | ||
379 | EXPORT_SYMBOL(do_sched_trace_work_begin); | ||
380 | |||
381 | |||
382 | feather_callback void do_sched_trace_work_end(unsigned long id, | ||
383 | unsigned long _owner, | ||
384 | unsigned long _exe, | ||
385 | unsigned long _flushed) | ||
386 | { | ||
387 | struct task_struct *t = (struct task_struct*) _owner; | ||
388 | struct st_event_record *rec = get_record(ST_WORK_END, t); | ||
389 | |||
390 | if (rec) { | ||
391 | struct task_struct *exe = (struct task_struct*) _exe; | ||
392 | rec->data.work_end.exe_pid = exe->pid; | ||
393 | rec->data.work_end.flushed = _flushed; | ||
394 | rec->data.work_end.when = now(); | ||
395 | put_record(rec); | ||
396 | } | ||
397 | } | ||
398 | EXPORT_SYMBOL(do_sched_trace_work_end); | ||
399 | |||
400 | |||
401 | feather_callback void do_sched_trace_eff_prio_change(unsigned long id, | ||
402 | unsigned long _task, | ||
403 | unsigned long _inh) | ||
404 | { | ||
405 | struct task_struct *t = (struct task_struct*) _task; | ||
406 | struct st_event_record *rec = get_record(ST_EFF_PRIO_CHANGE, t); | ||
407 | |||
408 | if (rec) { | ||
409 | struct task_struct *inh = (struct task_struct*) _inh; | ||
410 | rec->data.effective_priority_change.when = now(); | ||
411 | rec->data.effective_priority_change.inh_pid = (inh != NULL) ? | ||
412 | inh->pid : | ||
413 | 0xffff; | ||
414 | |||
415 | put_record(rec); | ||
416 | } | ||
417 | } | ||
418 | |||
419 | /* pray for no nesting of nv interrupts on same CPU... */ | ||
420 | struct tracing_interrupt_map | ||
421 | { | ||
422 | int active; | ||
423 | int count; | ||
424 | unsigned long data[128]; // assume nesting less than 128... | ||
425 | unsigned long serial[128]; | ||
426 | }; | ||
427 | DEFINE_PER_CPU(struct tracing_interrupt_map, active_interrupt_tracing); | ||
428 | |||
429 | |||
430 | DEFINE_PER_CPU(u32, intCounter); | ||
431 | |||
432 | feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id, | ||
433 | unsigned long _device) | ||
434 | { | ||
435 | struct st_event_record *rec; | ||
436 | u32 serialNum; | ||
437 | |||
438 | { | ||
439 | u32* serial; | ||
440 | struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id()); | ||
441 | if(!int_map->active == 0xcafebabe) | ||
442 | { | ||
443 | int_map->count++; | ||
444 | } | ||
445 | else | ||
446 | { | ||
447 | int_map->active = 0xcafebabe; | ||
448 | int_map->count = 1; | ||
449 | } | ||
450 | //int_map->data[int_map->count-1] = _device; | ||
451 | |||
452 | serial = &per_cpu(intCounter, smp_processor_id()); | ||
453 | *serial += num_online_cpus(); | ||
454 | serialNum = *serial; | ||
455 | int_map->serial[int_map->count-1] = serialNum; | ||
456 | } | ||
457 | |||
458 | rec = get_record(ST_NV_INTERRUPT_BEGIN, NULL); | ||
459 | if(rec) { | ||
460 | u32 device = _device; | ||
461 | rec->data.nv_interrupt_begin.when = now(); | ||
462 | rec->data.nv_interrupt_begin.device = device; | ||
463 | rec->data.nv_interrupt_begin.serialNumber = serialNum; | ||
464 | put_record(rec); | ||
465 | } | ||
466 | } | ||
467 | EXPORT_SYMBOL(do_sched_trace_nv_interrupt_begin); | ||
468 | |||
469 | /* | ||
470 | int is_interrupt_tracing_active(void) | ||
471 | { | ||
472 | struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id()); | ||
473 | if(int_map->active == 0xcafebabe) | ||
474 | return 1; | ||
475 | return 0; | ||
476 | } | ||
477 | */ | ||
478 | |||
479 | feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned long _device) | ||
480 | { | ||
481 | struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id()); | ||
482 | if(int_map->active == 0xcafebabe) | ||
483 | { | ||
484 | struct st_event_record *rec = get_record(ST_NV_INTERRUPT_END, NULL); | ||
485 | |||
486 | int_map->count--; | ||
487 | if(int_map->count == 0) | ||
488 | int_map->active = 0; | ||
489 | |||
490 | if(rec) { | ||
491 | u32 device = _device; | ||
492 | rec->data.nv_interrupt_end.when = now(); | ||
493 | //rec->data.nv_interrupt_end.device = int_map->data[int_map->count]; | ||
494 | rec->data.nv_interrupt_end.device = device; | ||
495 | rec->data.nv_interrupt_end.serialNumber = int_map->serial[int_map->count]; | ||
496 | put_record(rec); | ||
497 | } | ||
498 | } | ||
499 | } | ||
500 | EXPORT_SYMBOL(do_sched_trace_nv_interrupt_end); | ||
501 | |||
502 | |||
503 | |||
504 | |||
505 | |||
506 | |||
507 | |||
508 | |||
509 | |||
diff --git a/litmus/sched_trace_external.c b/litmus/sched_trace_external.c new file mode 100644 index 000000000000..cf8e1d78aa77 --- /dev/null +++ b/litmus/sched_trace_external.c | |||
@@ -0,0 +1,64 @@ | |||
1 | #include <linux/module.h> | ||
2 | |||
3 | #include <litmus/trace.h> | ||
4 | #include <litmus/sched_trace.h> | ||
5 | #include <litmus/litmus.h> | ||
6 | |||
7 | void __sched_trace_tasklet_begin_external(struct task_struct* t) | ||
8 | { | ||
9 | sched_trace_tasklet_begin(t); | ||
10 | } | ||
11 | EXPORT_SYMBOL(__sched_trace_tasklet_begin_external); | ||
12 | |||
13 | void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed) | ||
14 | { | ||
15 | sched_trace_tasklet_end(t, flushed); | ||
16 | } | ||
17 | EXPORT_SYMBOL(__sched_trace_tasklet_end_external); | ||
18 | |||
19 | |||
20 | |||
21 | void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e) | ||
22 | { | ||
23 | sched_trace_work_begin(t, e); | ||
24 | } | ||
25 | EXPORT_SYMBOL(__sched_trace_work_begin_external); | ||
26 | |||
27 | void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f) | ||
28 | { | ||
29 | sched_trace_work_end(t, e, f); | ||
30 | } | ||
31 | EXPORT_SYMBOL(__sched_trace_work_end_external); | ||
32 | |||
33 | |||
34 | |||
35 | void __sched_trace_nv_interrupt_begin_external(u32 device) | ||
36 | { | ||
37 | //unsigned long _device = device; | ||
38 | sched_trace_nv_interrupt_begin((unsigned long)device); | ||
39 | } | ||
40 | EXPORT_SYMBOL(__sched_trace_nv_interrupt_begin_external); | ||
41 | |||
42 | void __sched_trace_nv_interrupt_end_external(u32 device) | ||
43 | { | ||
44 | //unsigned long _device = device; | ||
45 | sched_trace_nv_interrupt_end((unsigned long)device); | ||
46 | } | ||
47 | EXPORT_SYMBOL(__sched_trace_nv_interrupt_end_external); | ||
48 | |||
49 | |||
50 | #ifdef CONFIG_LITMUS_NVIDIA | ||
51 | |||
52 | #define EXX_TS(evt) \ | ||
53 | void __##evt(void) { evt; } \ | ||
54 | EXPORT_SYMBOL(__##evt); | ||
55 | |||
56 | EXX_TS(TS_NV_TOPISR_START) | ||
57 | EXX_TS(TS_NV_TOPISR_END) | ||
58 | EXX_TS(TS_NV_BOTISR_START) | ||
59 | EXX_TS(TS_NV_BOTISR_END) | ||
60 | EXX_TS(TS_NV_RELEASE_BOTISR_START) | ||
61 | EXX_TS(TS_NV_RELEASE_BOTISR_END) | ||
62 | |||
63 | #endif | ||
64 | |||