diff options
-rw-r--r-- | include/litmus/rm_common.h | 37 | ||||
-rw-r--r-- | litmus/Kconfig | 10 | ||||
-rw-r--r-- | litmus/Makefile | 1 | ||||
-rw-r--r-- | litmus/rm_common.c | 300 | ||||
-rw-r--r-- | litmus/sched_crm.c | 2884 |
5 files changed, 3232 insertions, 0 deletions
diff --git a/include/litmus/rm_common.h b/include/litmus/rm_common.h new file mode 100644 index 000000000000..7f47934d68ee --- /dev/null +++ b/include/litmus/rm_common.h | |||
@@ -0,0 +1,37 @@ | |||
1 | /* | ||
2 | * RM common data structures and utility functions shared by all RM | ||
3 | * based scheduler plugins | ||
4 | */ | ||
5 | |||
6 | /* CLEANUP: Add comments and make it less messy. | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | #ifndef __UNC_RM_COMMON_H__ | ||
11 | #define __UNC_RM_COMMON_H__ | ||
12 | |||
13 | #include <litmus/rt_domain.h> | ||
14 | |||
15 | void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched, | ||
16 | release_jobs_t release); | ||
17 | |||
18 | int rm_higher_prio(struct task_struct* first, | ||
19 | struct task_struct* second); | ||
20 | |||
21 | int rm_ready_order(struct bheap_node* a, struct bheap_node* b); | ||
22 | |||
23 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
24 | /* binheap_nodes must be embedded within 'struct litmus_lock' */ | ||
25 | int rm_max_heap_order(struct binheap_node *a, struct binheap_node *b); | ||
26 | int rm_min_heap_order(struct binheap_node *a, struct binheap_node *b); | ||
27 | int rm_max_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b); | ||
28 | int rm_min_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b); | ||
29 | |||
30 | int __rm_higher_prio(struct task_struct* first, comparison_mode_t first_mode, | ||
31 | struct task_struct* second, comparison_mode_t second_mode); | ||
32 | |||
33 | #endif | ||
34 | |||
35 | int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t); | ||
36 | |||
37 | #endif | ||
diff --git a/litmus/Kconfig b/litmus/Kconfig index a1a6cc699348..3d3a3ec71243 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig | |||
@@ -12,6 +12,16 @@ config PLUGIN_CEDF | |||
12 | On smaller platforms (e.g., ARM PB11MPCore), using C-EDF | 12 | On smaller platforms (e.g., ARM PB11MPCore), using C-EDF |
13 | makes little sense since there aren't any shared caches. | 13 | makes little sense since there aren't any shared caches. |
14 | 14 | ||
15 | config PLUGIN_CRM | ||
16 | bool "Clustered-RM" | ||
17 | depends on X86 && SYSFS | ||
18 | default y | ||
19 | help | ||
20 | Include the Clustered RM (C-RM) plugin in the kernel. | ||
21 | This is appropriate for large platforms with shared caches. | ||
22 | On smaller platforms (e.g., ARM PB11MPCore), using C-EDF | ||
23 | makes little sense since there aren't any shared caches. | ||
24 | |||
15 | config RECURSIVE_READYQ_LOCK | 25 | config RECURSIVE_READYQ_LOCK |
16 | bool "Recursive Ready Queue Lock" | 26 | bool "Recursive Ready Queue Lock" |
17 | default n | 27 | default n |
diff --git a/litmus/Makefile b/litmus/Makefile index 642a03617d4a..60794ac3e2bc 100644 --- a/litmus/Makefile +++ b/litmus/Makefile | |||
@@ -25,6 +25,7 @@ obj-y = sched_plugin.o litmus.o \ | |||
25 | sched_pfp.o | 25 | sched_pfp.o |
26 | 26 | ||
27 | obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o | 27 | obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o |
28 | obj-$(CONFIG_PLUGIN_CRM) += rm_common.o sched_crm.o | ||
28 | obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o | 29 | obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o |
29 | obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o | 30 | obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o |
30 | 31 | ||
diff --git a/litmus/rm_common.c b/litmus/rm_common.c new file mode 100644 index 000000000000..8d4cdf4c71cf --- /dev/null +++ b/litmus/rm_common.c | |||
@@ -0,0 +1,300 @@ | |||
1 | /* | ||
2 | * kernel/rm_common.c | ||
3 | * | ||
4 | * Common functions for EDF based scheduler. | ||
5 | */ | ||
6 | |||
7 | #include <linux/percpu.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/list.h> | ||
10 | |||
11 | #include <litmus/litmus.h> | ||
12 | #include <litmus/sched_plugin.h> | ||
13 | #include <litmus/sched_trace.h> | ||
14 | |||
15 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
16 | #include <litmus/locking.h> | ||
17 | #endif | ||
18 | |||
19 | #include <litmus/rm_common.h> | ||
20 | |||
21 | |||
22 | /* rm_higher_prio - returns true if first has a higher EDF priority | ||
23 | * than second. Deadline ties are broken by PID. | ||
24 | * | ||
25 | * both first and second may be NULL | ||
26 | */ | ||
27 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
28 | int __rm_higher_prio( | ||
29 | struct task_struct* first, comparison_mode_t first_mode, | ||
30 | struct task_struct* second, comparison_mode_t second_mode) | ||
31 | #else | ||
32 | int rm_higher_prio(struct task_struct* first, struct task_struct* second) | ||
33 | #endif | ||
34 | { | ||
35 | struct task_struct *first_task = first; | ||
36 | struct task_struct *second_task = second; | ||
37 | |||
38 | /* There is no point in comparing a task to itself. */ | ||
39 | if (first && first == second) { | ||
40 | TRACE_CUR("WARNING: pointless rm priority comparison: %s/%d\n", first->comm, first->pid); | ||
41 | // WARN_ON(1); | ||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | /* check for NULL tasks */ | ||
46 | if (!first || !second) { | ||
47 | return first && !second; | ||
48 | } | ||
49 | /* check for non-realtime */ | ||
50 | if (!is_realtime(first) || !is_realtime(second)) { | ||
51 | return is_realtime(first) && !is_realtime(second); | ||
52 | } | ||
53 | |||
54 | /* There is some goofy stuff in this code here. There are three subclasses | ||
55 | * within the SCHED_LITMUS scheduling class: | ||
56 | * 1) Auxiliary tasks: COTS helper threads from the application level that | ||
57 | * are forced to be real-time. | ||
58 | * 2) klmirqd interrupt threads: Litmus threaded interrupt handlers. | ||
59 | * 3) Normal Litmus tasks. | ||
60 | * | ||
61 | * At their base priorities, #3 > #2 > #1. However, #1 and #2 threads might | ||
62 | * inherit a priority from a task of #3. | ||
63 | * | ||
64 | * The code proceeds in the following manner: | ||
65 | * 1) Make aux and klmirqd threads with base-priorities have low priorities. | ||
66 | * 2) Determine effective priorities. | ||
67 | * 3) Perform priority comparison. Favor #3 over #1 and #2 in case of tie. | ||
68 | */ | ||
69 | |||
70 | |||
71 | #if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_BOOSTED) | ||
72 | /* run aux tasks at max priority */ | ||
73 | if (tsk_rt(first)->is_aux_task != tsk_rt(second)->is_aux_task) { | ||
74 | return (tsk_rt(first)->is_aux_task > tsk_rt(second)->is_aux_task); | ||
75 | } | ||
76 | #elif defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE) | ||
77 | { | ||
78 | int first_lo_aux = tsk_rt(first)->is_aux_task && !tsk_rt(first)->inh_task; | ||
79 | int second_lo_aux = tsk_rt(second)->is_aux_task && !tsk_rt(second)->inh_task; | ||
80 | |||
81 | /* prioritize aux tasks without inheritance below real-time tasks */ | ||
82 | if (first_lo_aux || second_lo_aux) { | ||
83 | // one of these is an aux task without inheritance. | ||
84 | if (first_lo_aux != second_lo_aux) { | ||
85 | int temp = (first_lo_aux < second_lo_aux); // non-lo-aux has higher priority. | ||
86 | return temp; | ||
87 | } | ||
88 | else { | ||
89 | /* both MUST be lo_aux. tie-break. */ | ||
90 | //TRACE_CUR("aux tie break!\n"); | ||
91 | goto aux_tie_break; | ||
92 | } | ||
93 | } | ||
94 | |||
95 | if (tsk_rt(first)->is_aux_task && tsk_rt(second)->is_aux_task && | ||
96 | tsk_rt(first)->inh_task == tsk_rt(second)->inh_task) { | ||
97 | // inh_task is !NULL for both tasks since neither was a lo_aux task. | ||
98 | // Both aux tasks inherit from the same task, so tie-break | ||
99 | // by base priority of the aux tasks. | ||
100 | //TRACE_CUR("aux tie break!\n"); | ||
101 | goto aux_tie_break; | ||
102 | } | ||
103 | } | ||
104 | #endif | ||
105 | |||
106 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
107 | { | ||
108 | int first_lo_klmirqd = tsk_rt(first)->is_interrupt_thread && !tsk_rt(first)->inh_task; | ||
109 | int second_lo_klmirqd = tsk_rt(second)->is_interrupt_thread && !tsk_rt(second)->inh_task; | ||
110 | |||
111 | /* prioritize aux tasks without inheritance below real-time tasks */ | ||
112 | if (first_lo_klmirqd || second_lo_klmirqd) { | ||
113 | // one of these is an klmirqd thread without inheritance. | ||
114 | if (first_lo_klmirqd != second_lo_klmirqd) { | ||
115 | int temp = (first_lo_klmirqd < second_lo_klmirqd); // non-klmirqd has higher priority | ||
116 | return temp; | ||
117 | } | ||
118 | else { | ||
119 | /* both MUST be klmirqd. tie-break. */ | ||
120 | //TRACE_CUR("klmirqd tie break!\n"); | ||
121 | goto klmirqd_tie_break; | ||
122 | } | ||
123 | } | ||
124 | |||
125 | if (tsk_rt(first)->is_interrupt_thread && tsk_rt(second)->is_interrupt_thread && | ||
126 | tsk_rt(first)->inh_task == tsk_rt(second)->inh_task) { | ||
127 | // inh_task is !NULL for both tasks since neither was a lo_klmirqd task. | ||
128 | // Both klmirqd tasks inherit from the same task, so tie-break | ||
129 | // by base priority of the klmirqd tasks. | ||
130 | //TRACE_CUR("klmirqd tie break!\n"); | ||
131 | goto klmirqd_tie_break; | ||
132 | } | ||
133 | } | ||
134 | #endif | ||
135 | |||
136 | |||
137 | #ifdef CONFIG_LITMUS_LOCKING | ||
138 | /* Check for EFFECTIVE priorities. Change task | ||
139 | * used for comparison in such a case. | ||
140 | */ | ||
141 | if (unlikely(tsk_rt(first)->inh_task) | ||
142 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
143 | && (first_mode == EFFECTIVE) | ||
144 | #endif | ||
145 | ) { | ||
146 | first_task = tsk_rt(first)->inh_task; | ||
147 | } | ||
148 | if (unlikely(tsk_rt(second)->inh_task) | ||
149 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
150 | && (second_mode == EFFECTIVE) | ||
151 | #endif | ||
152 | ) { | ||
153 | second_task = tsk_rt(second)->inh_task; | ||
154 | } | ||
155 | |||
156 | /* Check for priority boosting. Tie-break by start of boosting. | ||
157 | */ | ||
158 | if (unlikely(is_priority_boosted(first_task))) { | ||
159 | /* first_task is boosted, how about second_task? */ | ||
160 | if (!is_priority_boosted(second_task) || | ||
161 | lt_before(get_boost_start(first_task), | ||
162 | get_boost_start(second_task))) { | ||
163 | return 1; | ||
164 | } | ||
165 | else { | ||
166 | return 0; | ||
167 | } | ||
168 | } | ||
169 | else if (unlikely(is_priority_boosted(second_task))) { | ||
170 | /* second_task is boosted, first is not*/ | ||
171 | return 0; | ||
172 | } | ||
173 | |||
174 | #endif | ||
175 | |||
176 | #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE | ||
177 | aux_tie_break: | ||
178 | #endif | ||
179 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
180 | klmirqd_tie_break: | ||
181 | #endif | ||
182 | |||
183 | // KLUDGE! This is reverse of fp_common's implementation!!! | ||
184 | if (get_period(first_task) < get_period(second_task)) | ||
185 | return 1; | ||
186 | else if (get_period(first_task) == get_period(second_task)) { | ||
187 | if (first_task->pid < second_task->pid) | ||
188 | return 1; | ||
189 | else if (first_task->pid == second_task->pid) { | ||
190 | /* there is inheritance going on. consider inheritors. */ | ||
191 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
192 | /* non-interrupt thread gets prio */ | ||
193 | if (!tsk_rt(first)->is_interrupt_thread && tsk_rt(second)->is_interrupt_thread) | ||
194 | return 1; | ||
195 | else if (tsk_rt(first)->is_interrupt_thread == tsk_rt(second)->is_interrupt_thread) { | ||
196 | #endif | ||
197 | |||
198 | #if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE) | ||
199 | /* non-aux thread gets prio */ | ||
200 | if (!tsk_rt(first)->is_aux_task && tsk_rt(second)->is_aux_task) | ||
201 | return 1; | ||
202 | else if (tsk_rt(first_task)->is_aux_task == tsk_rt(second_task)->is_aux_task) { | ||
203 | #endif | ||
204 | /* if both tasks inherit from the same task */ | ||
205 | if (tsk_rt(first)->inh_task == tsk_rt(second)->inh_task) { | ||
206 | /* TODO: Make a recurive call to rm_higher_prio, | ||
207 | comparing base priorities. */ | ||
208 | return (first->pid < second->pid); | ||
209 | } | ||
210 | else { | ||
211 | /* At least one task must inherit */ | ||
212 | BUG_ON(!tsk_rt(first)->inh_task && | ||
213 | !tsk_rt(second)->inh_task); | ||
214 | |||
215 | /* The task withOUT the inherited priority wins. */ | ||
216 | if (tsk_rt(second)->inh_task) { | ||
217 | return 1; | ||
218 | } | ||
219 | } | ||
220 | #if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE) | ||
221 | } | ||
222 | #endif | ||
223 | |||
224 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
225 | } | ||
226 | #endif | ||
227 | } | ||
228 | } | ||
229 | |||
230 | return 0; /* fall-through. prio(second_task) > prio(first_task) */ | ||
231 | } | ||
232 | |||
233 | |||
234 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
235 | int rm_higher_prio(struct task_struct* first, struct task_struct* second) | ||
236 | { | ||
237 | return __rm_higher_prio(first, EFFECTIVE, second, EFFECTIVE); | ||
238 | } | ||
239 | |||
240 | int rm_max_heap_order(struct binheap_node *a, struct binheap_node *b) | ||
241 | { | ||
242 | struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node); | ||
243 | struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node); | ||
244 | |||
245 | return __rm_higher_prio(l_a->hp_waiter_eff_prio, EFFECTIVE, l_b->hp_waiter_eff_prio, EFFECTIVE); | ||
246 | } | ||
247 | |||
248 | int rm_min_heap_order(struct binheap_node *a, struct binheap_node *b) | ||
249 | { | ||
250 | return rm_max_heap_order(b, a); // swap comparison | ||
251 | } | ||
252 | |||
253 | int rm_max_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b) | ||
254 | { | ||
255 | struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node); | ||
256 | struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node); | ||
257 | |||
258 | return __rm_higher_prio(l_a->hp_waiter_eff_prio, BASE, l_b->hp_waiter_eff_prio, BASE); | ||
259 | } | ||
260 | |||
261 | int rm_min_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b) | ||
262 | { | ||
263 | return rm_max_heap_base_priority_order(b, a); // swap comparison | ||
264 | } | ||
265 | #endif | ||
266 | |||
267 | |||
268 | int rm_ready_order(struct bheap_node* a, struct bheap_node* b) | ||
269 | { | ||
270 | return rm_higher_prio(bheap2task(a), bheap2task(b)); | ||
271 | } | ||
272 | |||
273 | void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched, | ||
274 | release_jobs_t release) | ||
275 | { | ||
276 | rt_domain_init(rt, rm_ready_order, resched, release); | ||
277 | } | ||
278 | |||
279 | /* need_to_preempt - check whether the task t needs to be preempted | ||
280 | * call only with irqs disabled and with ready_lock acquired | ||
281 | * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT! | ||
282 | */ | ||
283 | int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t) | ||
284 | { | ||
285 | /* we need the read lock for rm_ready_queue */ | ||
286 | /* no need to preempt if there is nothing pending */ | ||
287 | if (!__jobs_pending(rt)) | ||
288 | return 0; | ||
289 | /* we need to reschedule if t doesn't exist */ | ||
290 | if (!t) | ||
291 | return 1; | ||
292 | |||
293 | /* NOTE: We cannot check for non-preemptibility since we | ||
294 | * don't know what address space we're currently in. | ||
295 | */ | ||
296 | |||
297 | /* make sure to get non-rt stuff out of the way */ | ||
298 | return !is_realtime(t) || rm_higher_prio(__next_ready(rt), t); | ||
299 | } | ||
300 | |||
diff --git a/litmus/sched_crm.c b/litmus/sched_crm.c new file mode 100644 index 000000000000..a9721d6e09ae --- /dev/null +++ b/litmus/sched_crm.c | |||
@@ -0,0 +1,2884 @@ | |||
1 | /* | ||
2 | * litmus/sched_crm.c | ||
3 | * | ||
4 | * Implementation of the C-EDF scheduling algorithm. | ||
5 | * | ||
6 | * This implementation is based on G-EDF: | ||
7 | * - CPUs are clustered around L2 or L3 caches. | ||
8 | * - Clusters topology is automatically detected (this is arch dependent | ||
9 | * and is working only on x86 at the moment --- and only with modern | ||
10 | * cpus that exports cpuid4 information) | ||
11 | * - The plugins _does not_ attempt to put tasks in the right cluster i.e. | ||
12 | * the programmer needs to be aware of the topology to place tasks | ||
13 | * in the desired cluster | ||
14 | * - default clustering is around L2 cache (cache index = 2) | ||
15 | * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all | ||
16 | * online_cpus are placed in a single cluster). | ||
17 | * | ||
18 | * For details on functions, take a look at sched_gsn_edf.c | ||
19 | * | ||
20 | * Currently, we do not support changes in the number of online cpus. | ||
21 | * If the num_online_cpus() dynamically changes, the plugin is broken. | ||
22 | * | ||
23 | * This version uses the simple approach and serializes all scheduling | ||
24 | * decisions by the use of a queue lock. This is probably not the | ||
25 | * best way to do it, but it should suffice for now. | ||
26 | */ | ||
27 | |||
28 | #include <linux/spinlock.h> | ||
29 | #include <linux/percpu.h> | ||
30 | #include <linux/sched.h> | ||
31 | #include <linux/slab.h> | ||
32 | #include <linux/uaccess.h> | ||
33 | #include <linux/module.h> | ||
34 | |||
35 | #include <litmus/litmus.h> | ||
36 | #include <litmus/jobs.h> | ||
37 | #include <litmus/preempt.h> | ||
38 | #include <litmus/budget.h> | ||
39 | #include <litmus/sched_plugin.h> | ||
40 | #include <litmus/rm_common.h> | ||
41 | #include <litmus/sched_trace.h> | ||
42 | |||
43 | #include <litmus/clustered.h> | ||
44 | |||
45 | #include <litmus/bheap.h> | ||
46 | #include <litmus/binheap.h> | ||
47 | #include <litmus/trace.h> | ||
48 | |||
49 | #ifdef CONFIG_LITMUS_LOCKING | ||
50 | #include <litmus/kfmlp_lock.h> | ||
51 | #endif | ||
52 | |||
53 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
54 | #include <litmus/fifo_lock.h> | ||
55 | #include <litmus/prioq_lock.h> | ||
56 | #include <litmus/ikglp_lock.h> | ||
57 | #endif | ||
58 | |||
59 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
60 | #include <litmus/affinity.h> | ||
61 | #endif | ||
62 | |||
63 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
64 | #include <litmus/aux_tasks.h> | ||
65 | #endif | ||
66 | |||
67 | /* to configure the cluster size */ | ||
68 | #include <litmus/litmus_proc.h> | ||
69 | |||
70 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
71 | #include <litmus/affinity.h> | ||
72 | #endif | ||
73 | |||
74 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
75 | #include <litmus/litmus_softirq.h> | ||
76 | #endif | ||
77 | |||
78 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
79 | #include <linux/interrupt.h> | ||
80 | #endif | ||
81 | |||
82 | #ifdef CONFIG_LITMUS_NVIDIA | ||
83 | #include <litmus/nvidia_info.h> | ||
84 | #endif | ||
85 | |||
86 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
87 | #include <litmus/gpu_affinity.h> | ||
88 | #endif | ||
89 | |||
90 | /* Reference configuration variable. Determines which cache level is used to | ||
91 | * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that | ||
92 | * all CPUs form a single cluster (just like GSN-EDF). | ||
93 | */ | ||
94 | static enum cache_level cluster_config = GLOBAL_CLUSTER; | ||
95 | |||
96 | struct clusterdomain; | ||
97 | |||
98 | /* cpu_entry_t - maintain the linked and scheduled state | ||
99 | * | ||
100 | * A cpu also contains a pointer to the crm_domain_t cluster | ||
101 | * that owns it (struct clusterdomain*) | ||
102 | */ | ||
103 | typedef struct { | ||
104 | int cpu; | ||
105 | struct clusterdomain* cluster; /* owning cluster */ | ||
106 | struct task_struct* linked; /* only RT tasks */ | ||
107 | struct task_struct* scheduled; /* only RT tasks */ | ||
108 | atomic_t will_schedule; /* prevent unneeded IPIs */ | ||
109 | struct binheap_node hn; | ||
110 | } cpu_entry_t; | ||
111 | |||
112 | /* one cpu_entry_t per CPU */ | ||
113 | DEFINE_PER_CPU(cpu_entry_t, crm_cpu_entries); | ||
114 | |||
115 | #define set_will_schedule() \ | ||
116 | (atomic_set(&__get_cpu_var(crm_cpu_entries).will_schedule, 1)) | ||
117 | #define clear_will_schedule() \ | ||
118 | (atomic_set(&__get_cpu_var(crm_cpu_entries).will_schedule, 0)) | ||
119 | #define test_will_schedule(cpu) \ | ||
120 | (atomic_read(&per_cpu(crm_cpu_entries, cpu).will_schedule)) | ||
121 | |||
122 | /* | ||
123 | * In C-EDF there is a crm domain _per_ cluster | ||
124 | * The number of clusters is dynamically determined accordingly to the | ||
125 | * total cpu number and the cluster size | ||
126 | */ | ||
127 | typedef struct clusterdomain { | ||
128 | /* rt_domain for this cluster */ | ||
129 | rt_domain_t domain; | ||
130 | /* cpus in this cluster */ | ||
131 | cpu_entry_t* *cpus; | ||
132 | /* map of this cluster cpus */ | ||
133 | cpumask_var_t cpu_map; | ||
134 | /* the cpus queue themselves according to priority in here */ | ||
135 | struct binheap cpu_heap; | ||
136 | |||
137 | #define cluster_lock domain.ready_lock | ||
138 | |||
139 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
140 | struct tasklet_head pending_tasklets; | ||
141 | #endif | ||
142 | |||
143 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
144 | raw_spinlock_t dgl_lock; | ||
145 | #endif | ||
146 | |||
147 | int top_m_size; | ||
148 | struct binheap top_m; | ||
149 | struct binheap not_top_m; | ||
150 | |||
151 | } crm_domain_t; | ||
152 | |||
153 | |||
154 | /* a crm_domain per cluster; allocation is done at init/activation time */ | ||
155 | crm_domain_t *crm; | ||
156 | |||
157 | #define remote_cluster(cpu) ((crm_domain_t *) per_cpu(crm_cpu_entries, cpu).cluster) | ||
158 | #define task_cpu_cluster(task) remote_cluster(get_partition(task)) | ||
159 | |||
160 | /* total number of cluster */ | ||
161 | static int num_clusters; | ||
162 | /* we do not support cluster of different sizes */ | ||
163 | static unsigned int cluster_size; | ||
164 | |||
165 | static int clusters_allocated = 0; | ||
166 | |||
167 | |||
168 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) | ||
169 | static int num_gpu_clusters; | ||
170 | static unsigned int gpu_cluster_size; | ||
171 | #endif | ||
172 | |||
173 | inline static struct task_struct* binheap_node_to_task(struct binheap_node *bn) | ||
174 | { | ||
175 | struct budget_tracker *bt = binheap_entry(bn, struct budget_tracker, top_m_node); | ||
176 | struct task_struct *t = | ||
177 | container_of( | ||
178 | container_of(bt, struct rt_param, budget), | ||
179 | struct task_struct, | ||
180 | rt_param); | ||
181 | return t; | ||
182 | } | ||
183 | |||
184 | static int crm_max_heap_base_priority_order(struct binheap_node *a, | ||
185 | struct binheap_node *b) | ||
186 | { | ||
187 | struct task_struct* t_a = binheap_node_to_task(a); | ||
188 | struct task_struct* t_b = binheap_node_to_task(b); | ||
189 | return __rm_higher_prio(t_a, BASE, t_b, BASE); | ||
190 | } | ||
191 | |||
192 | static int crm_min_heap_base_priority_order(struct binheap_node *a, | ||
193 | struct binheap_node *b) | ||
194 | { | ||
195 | struct task_struct* t_a = binheap_node_to_task(a); | ||
196 | struct task_struct* t_b = binheap_node_to_task(b); | ||
197 | return __rm_higher_prio(t_b, BASE, t_a, BASE); | ||
198 | } | ||
199 | |||
200 | static void crm_track_in_top_m(struct task_struct *t) | ||
201 | { | ||
202 | /* cluster lock must be held */ | ||
203 | crm_domain_t *cluster = task_cpu_cluster(t); | ||
204 | struct budget_tracker *bt; | ||
205 | struct task_struct *mth_highest; | ||
206 | |||
207 | //BUG_ON(binheap_is_in_heap(&tsk_rt(t)->budget.top_m_node)); | ||
208 | if (binheap_is_in_heap(&tsk_rt(t)->budget.top_m_node)) { | ||
209 | // TRACE_TASK(t, "apparently already being tracked. top-m?: %s\n", | ||
210 | // (bt_flag_is_set(t, BTF_IS_TOP_M)) ? "Yes":"No"); | ||
211 | return; | ||
212 | } | ||
213 | |||
214 | /* TODO: do cluster_size-1 if release master is in this cluster */ | ||
215 | if (cluster->top_m_size < cluster_size) { | ||
216 | // TRACE_TASK(t, "unconditionally adding task to top-m.\n"); | ||
217 | binheap_add(&tsk_rt(t)->budget.top_m_node, &cluster->top_m, | ||
218 | struct budget_tracker, top_m_node); | ||
219 | ++cluster->top_m_size; | ||
220 | bt_flag_set(t, BTF_IS_TOP_M); | ||
221 | budget_state_machine(t,on_enter_top_m); | ||
222 | |||
223 | return; | ||
224 | } | ||
225 | |||
226 | BUG_ON(binheap_empty(&cluster->top_m)); | ||
227 | |||
228 | bt = binheap_top_entry(&cluster->top_m, struct budget_tracker, top_m_node); | ||
229 | mth_highest = | ||
230 | container_of( | ||
231 | container_of(bt, struct rt_param, budget), | ||
232 | struct task_struct, | ||
233 | rt_param); | ||
234 | |||
235 | if (__rm_higher_prio(t, BASE, mth_highest, BASE)) { | ||
236 | // TRACE_TASK(t, "adding to top-m (evicting %s/%d)\n", | ||
237 | // mth_highest->comm, mth_highest->pid); | ||
238 | |||
239 | binheap_delete_root(&cluster->top_m, struct budget_tracker, top_m_node); | ||
240 | INIT_BINHEAP_NODE(&tsk_rt(mth_highest)->budget.top_m_node); | ||
241 | binheap_add(&tsk_rt(mth_highest)->budget.top_m_node, | ||
242 | &cluster->not_top_m, | ||
243 | struct budget_tracker, top_m_node); | ||
244 | budget_state_machine(mth_highest,on_exit_top_m); | ||
245 | bt_flag_clear(mth_highest, BTF_IS_TOP_M); | ||
246 | |||
247 | binheap_add(&tsk_rt(t)->budget.top_m_node, &cluster->top_m, | ||
248 | struct budget_tracker, top_m_node); | ||
249 | bt_flag_set(t, BTF_IS_TOP_M); | ||
250 | budget_state_machine(t,on_enter_top_m); | ||
251 | } | ||
252 | else { | ||
253 | // TRACE_TASK(t, "adding to not-top-m\n"); | ||
254 | binheap_add(&tsk_rt(t)->budget.top_m_node, | ||
255 | &cluster->not_top_m, | ||
256 | struct budget_tracker, top_m_node); | ||
257 | } | ||
258 | } | ||
259 | |||
260 | static void crm_untrack_in_top_m(struct task_struct *t) | ||
261 | { | ||
262 | /* cluster lock must be held */ | ||
263 | crm_domain_t *cluster = task_cpu_cluster(t); | ||
264 | |||
265 | if (!binheap_is_in_heap(&tsk_rt(t)->budget.top_m_node)) { | ||
266 | // TRACE_TASK(t, "is not being tracked\n"); /* BUG() on this case? */ | ||
267 | return; | ||
268 | } | ||
269 | |||
270 | if (bt_flag_is_set(t, BTF_IS_TOP_M)) { | ||
271 | // TRACE_TASK(t, "removing task from top-m\n"); | ||
272 | |||
273 | /* delete t's entry */ | ||
274 | binheap_delete(&tsk_rt(t)->budget.top_m_node, &cluster->top_m); | ||
275 | budget_state_machine(t,on_exit_top_m); | ||
276 | bt_flag_clear(t, BTF_IS_TOP_M); | ||
277 | |||
278 | /* move a task over from the overflow heap */ | ||
279 | if(!binheap_empty(&cluster->not_top_m)) { | ||
280 | struct budget_tracker *bt = | ||
281 | binheap_top_entry(&cluster->not_top_m, struct budget_tracker, top_m_node); | ||
282 | struct task_struct *to_move = | ||
283 | container_of( | ||
284 | container_of(bt, struct rt_param, budget), | ||
285 | struct task_struct, | ||
286 | rt_param); | ||
287 | |||
288 | // TRACE_TASK(to_move, "being promoted to top-m\n"); | ||
289 | |||
290 | binheap_delete_root(&cluster->not_top_m, struct budget_tracker, top_m_node); | ||
291 | INIT_BINHEAP_NODE(&tsk_rt(to_move)->budget.top_m_node); | ||
292 | |||
293 | binheap_add(&tsk_rt(to_move)->budget.top_m_node, | ||
294 | &cluster->top_m, | ||
295 | struct budget_tracker, top_m_node); | ||
296 | bt_flag_set(to_move, BTF_IS_TOP_M); | ||
297 | budget_state_machine(to_move,on_enter_top_m); | ||
298 | } | ||
299 | else { | ||
300 | --cluster->top_m_size; | ||
301 | } | ||
302 | } | ||
303 | else { | ||
304 | // TRACE_TASK(t, "removing task from not-top-m\n"); | ||
305 | binheap_delete(&tsk_rt(t)->budget.top_m_node, &cluster->not_top_m); | ||
306 | } | ||
307 | } | ||
308 | |||
309 | |||
310 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
311 | static raw_spinlock_t* crm_get_dgl_spinlock(struct task_struct *t) | ||
312 | { | ||
313 | crm_domain_t *cluster = task_cpu_cluster(t); | ||
314 | return(&cluster->dgl_lock); | ||
315 | } | ||
316 | #endif | ||
317 | |||
318 | |||
319 | /* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling | ||
320 | * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose | ||
321 | * information during the initialization of the plugin (e.g., topology) | ||
322 | #define WANT_ALL_SCHED_EVENTS | ||
323 | */ | ||
324 | #define VERBOSE_INIT | ||
325 | |||
326 | static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b) | ||
327 | { | ||
328 | cpu_entry_t *a = binheap_entry(_a, cpu_entry_t, hn); | ||
329 | cpu_entry_t *b = binheap_entry(_b, cpu_entry_t, hn); | ||
330 | |||
331 | /* Note that a and b are inverted: we want the lowest-priority CPU at | ||
332 | * the top of the heap. | ||
333 | */ | ||
334 | return rm_higher_prio(b->linked, a->linked); | ||
335 | } | ||
336 | |||
337 | /* update_cpu_position - Move the cpu entry to the correct place to maintain | ||
338 | * order in the cpu queue. Caller must hold crm lock. | ||
339 | */ | ||
340 | static void update_cpu_position(cpu_entry_t *entry) | ||
341 | { | ||
342 | crm_domain_t *cluster = entry->cluster; | ||
343 | |||
344 | if (likely(binheap_is_in_heap(&entry->hn))) { | ||
345 | binheap_delete(&entry->hn, &cluster->cpu_heap); | ||
346 | } | ||
347 | |||
348 | binheap_add(&entry->hn, &cluster->cpu_heap, cpu_entry_t, hn); | ||
349 | } | ||
350 | |||
351 | /* caller must hold crm lock */ | ||
352 | static cpu_entry_t* lowest_prio_cpu(crm_domain_t *cluster) | ||
353 | { | ||
354 | return binheap_top_entry(&cluster->cpu_heap, cpu_entry_t, hn); | ||
355 | } | ||
356 | |||
357 | static noinline void unlink(struct task_struct* t); | ||
358 | |||
359 | /* link_task_to_cpu - Update the link of a CPU. | ||
360 | * Handles the case where the to-be-linked task is already | ||
361 | * scheduled on a different CPU. | ||
362 | */ | ||
363 | static noinline void link_task_to_cpu(struct task_struct* linked, | ||
364 | cpu_entry_t *entry) | ||
365 | { | ||
366 | cpu_entry_t *sched; | ||
367 | struct task_struct* tmp; | ||
368 | int on_cpu; | ||
369 | |||
370 | BUG_ON(linked && !is_realtime(linked)); | ||
371 | |||
372 | /* Currently linked task is set to be unlinked. */ | ||
373 | if (entry->linked) { | ||
374 | entry->linked->rt_param.linked_on = NO_CPU; | ||
375 | |||
376 | #ifdef CONFIG_LITMUS_LOCKING | ||
377 | if (tsk_rt(entry->linked)->inh_task) | ||
378 | clear_inh_task_linkback(entry->linked, tsk_rt(entry->linked)->inh_task); | ||
379 | #endif | ||
380 | } | ||
381 | |||
382 | /* Link new task to CPU. */ | ||
383 | if (linked) { | ||
384 | /* handle task is already scheduled somewhere! */ | ||
385 | on_cpu = linked->rt_param.scheduled_on; | ||
386 | if (on_cpu != NO_CPU) { | ||
387 | sched = &per_cpu(crm_cpu_entries, on_cpu); | ||
388 | |||
389 | BUG_ON(sched->linked == linked); | ||
390 | |||
391 | /* If we are already scheduled on the CPU to which we | ||
392 | * wanted to link, we don't need to do the swap -- | ||
393 | * we just link ourselves to the CPU and depend on | ||
394 | * the caller to get things right. | ||
395 | */ | ||
396 | if (entry != sched) { | ||
397 | TRACE_TASK(linked, | ||
398 | "already scheduled on %d, updating link.\n", | ||
399 | sched->cpu); | ||
400 | tmp = sched->linked; | ||
401 | linked->rt_param.linked_on = sched->cpu; | ||
402 | sched->linked = linked; | ||
403 | update_cpu_position(sched); | ||
404 | linked = tmp; | ||
405 | } | ||
406 | } | ||
407 | if (linked) { /* might be NULL due to swap */ | ||
408 | linked->rt_param.linked_on = entry->cpu; | ||
409 | |||
410 | #ifdef CONFIG_LITMUS_LOCKING | ||
411 | if (tsk_rt(linked)->inh_task) | ||
412 | set_inh_task_linkback(linked, tsk_rt(linked)->inh_task); | ||
413 | #endif | ||
414 | } | ||
415 | } | ||
416 | entry->linked = linked; | ||
417 | #ifdef WANT_ALL_SCHED_EVENTS | ||
418 | if (linked) | ||
419 | TRACE_TASK(linked, "linked to %d.\n", entry->cpu); | ||
420 | else | ||
421 | TRACE("NULL linked to %d.\n", entry->cpu); | ||
422 | #endif | ||
423 | update_cpu_position(entry); | ||
424 | } | ||
425 | |||
426 | /* unlink - Make sure a task is not linked any longer to an entry | ||
427 | * where it was linked before. Must hold cluster_lock. | ||
428 | */ | ||
429 | static noinline void unlink(struct task_struct* t) | ||
430 | { | ||
431 | if (t->rt_param.linked_on != NO_CPU) { | ||
432 | /* unlink */ | ||
433 | cpu_entry_t *entry = &per_cpu(crm_cpu_entries, t->rt_param.linked_on); | ||
434 | t->rt_param.linked_on = NO_CPU; | ||
435 | link_task_to_cpu(NULL, entry); | ||
436 | } else if (is_queued(t)) { | ||
437 | /* This is an interesting situation: t is scheduled, | ||
438 | * but was just recently unlinked. It cannot be | ||
439 | * linked anywhere else (because then it would have | ||
440 | * been relinked to this CPU), thus it must be in some | ||
441 | * queue. We must remove it from the list in this | ||
442 | * case. | ||
443 | * | ||
444 | * in C-EDF case is should be somewhere in the queue for | ||
445 | * its domain, therefore and we can get the domain using | ||
446 | * task_cpu_cluster | ||
447 | */ | ||
448 | remove(&(task_cpu_cluster(t))->domain, t); | ||
449 | } | ||
450 | } | ||
451 | |||
452 | |||
453 | /* preempt - force a CPU to reschedule | ||
454 | */ | ||
455 | static void preempt(cpu_entry_t *entry) | ||
456 | { | ||
457 | preempt_if_preemptable(entry->scheduled, entry->cpu); | ||
458 | } | ||
459 | |||
460 | /* requeue - Put an unlinked task into gsn-edf domain. | ||
461 | * Caller must hold cluster_lock. | ||
462 | */ | ||
463 | static noinline void requeue(struct task_struct* task) | ||
464 | { | ||
465 | crm_domain_t *cluster = task_cpu_cluster(task); | ||
466 | BUG_ON(!task); | ||
467 | /* sanity check before insertion */ | ||
468 | BUG_ON(is_queued(task)); | ||
469 | |||
470 | if (is_early_releasing(task) || is_released(task, litmus_clock()) || | ||
471 | tsk_rt(task)->job_params.is_backlogged_job) { | ||
472 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
473 | if (unlikely(tsk_rt(task)->is_aux_task && task->state != TASK_RUNNING && !tsk_rt(task)->aux_ready)) { | ||
474 | /* aux_task probably transitioned to real-time while it was blocked */ | ||
475 | TRACE_CUR("aux task %s/%d is not ready!\n", task->comm, task->pid); | ||
476 | tsk_rt(task)->aux_ready = 1; /* allow this to only happen once per aux task */ | ||
477 | } | ||
478 | else | ||
479 | #endif | ||
480 | __add_ready(&cluster->domain, task); | ||
481 | } | ||
482 | else { | ||
483 | TRACE_TASK(task, "not requeueing not-yet-released job\n"); | ||
484 | } | ||
485 | } | ||
486 | |||
487 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
488 | static cpu_entry_t* crm_get_nearest_available_cpu( | ||
489 | crm_domain_t *cluster, cpu_entry_t *start) | ||
490 | { | ||
491 | cpu_entry_t *affinity; | ||
492 | |||
493 | get_nearest_available_cpu(affinity, start, crm_cpu_entries, | ||
494 | #ifdef CONFIG_RELEASE_MASTER | ||
495 | cluster->domain.release_master | ||
496 | #else | ||
497 | NO_CPU | ||
498 | #endif | ||
499 | ); | ||
500 | |||
501 | /* make sure CPU is in our cluster */ | ||
502 | if (affinity && cpu_isset(affinity->cpu, *cluster->cpu_map)) | ||
503 | return(affinity); | ||
504 | else | ||
505 | return(NULL); | ||
506 | } | ||
507 | #endif | ||
508 | |||
509 | |||
510 | /* check for any necessary preemptions */ | ||
511 | static void check_for_preemptions(crm_domain_t *cluster) | ||
512 | { | ||
513 | struct task_struct *task; | ||
514 | cpu_entry_t *last; | ||
515 | |||
516 | for(last = lowest_prio_cpu(cluster); | ||
517 | rm_preemption_needed(&cluster->domain, last->linked); | ||
518 | last = lowest_prio_cpu(cluster)) { | ||
519 | /* preemption necessary */ | ||
520 | task = __take_ready(&cluster->domain); | ||
521 | TRACE("check_for_preemptions: attempting to link task %d to %d\n", | ||
522 | task->pid, last->cpu); | ||
523 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
524 | { | ||
525 | cpu_entry_t *affinity = | ||
526 | crm_get_nearest_available_cpu(cluster, | ||
527 | &per_cpu(crm_cpu_entries, task_cpu(task))); | ||
528 | if(affinity) | ||
529 | last = affinity; | ||
530 | else if(should_requeue_preempted_job(last->linked)) | ||
531 | requeue(last->linked); | ||
532 | } | ||
533 | #else | ||
534 | if (should_requeue_preempted_job(last->linked)) | ||
535 | requeue(last->linked); | ||
536 | #endif | ||
537 | link_task_to_cpu(task, last); | ||
538 | preempt(last); | ||
539 | } | ||
540 | } | ||
541 | |||
542 | /* crm_job_arrival: task is either resumed or released */ | ||
543 | static noinline void crm_job_arrival(struct task_struct* task) | ||
544 | { | ||
545 | crm_domain_t *cluster = task_cpu_cluster(task); | ||
546 | BUG_ON(!task); | ||
547 | |||
548 | requeue(task); | ||
549 | check_for_preemptions(cluster); | ||
550 | } | ||
551 | |||
552 | static void crm_track_on_release(struct bheap_node* n, void* dummy) | ||
553 | { | ||
554 | struct task_struct* t = bheap2task(n); | ||
555 | // TRACE_TASK(t, "released\n"); | ||
556 | |||
557 | crm_track_in_top_m(t); | ||
558 | } | ||
559 | |||
560 | static void crm_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
561 | { | ||
562 | crm_domain_t* cluster = container_of(rt, crm_domain_t, domain); | ||
563 | unsigned long flags; | ||
564 | |||
565 | raw_readyq_lock_irqsave(&cluster->cluster_lock, flags); | ||
566 | |||
567 | bheap_for_each(tasks, crm_track_on_release, NULL); | ||
568 | |||
569 | __merge_ready(&cluster->domain, tasks); | ||
570 | check_for_preemptions(cluster); | ||
571 | |||
572 | raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
573 | } | ||
574 | |||
575 | /* caller holds cluster_lock */ | ||
576 | static noinline void job_completion(struct task_struct *t, int forced) | ||
577 | { | ||
578 | int do_release = 0; | ||
579 | int do_backlogged_job = 0; | ||
580 | lt_t now; | ||
581 | |||
582 | BUG_ON(!t); | ||
583 | |||
584 | now = litmus_clock(); | ||
585 | |||
586 | /* DO BACKLOG TRACKING */ | ||
587 | |||
588 | /* job completed with budget remaining */ | ||
589 | if (get_release_policy(t) != SPORADIC) { | ||
590 | /* only jobs we know that will call sleep_next_job() can use backlogging */ | ||
591 | if (!forced) { | ||
592 | /* was it a backlogged job that completed? */ | ||
593 | if (tsk_rt(t)->job_params.is_backlogged_job) { | ||
594 | BUG_ON(!get_backlog(t)); | ||
595 | --get_backlog(t); | ||
596 | |||
597 | TRACE_TASK(t, "completed backlogged job\n"); | ||
598 | } | ||
599 | } | ||
600 | else { | ||
601 | /* budget was exhausted - force early release */ | ||
602 | if (get_backlog(t) == 0) { | ||
603 | TRACE_TASK(t, "first late job\n"); | ||
604 | ++get_backlog(t); | ||
605 | } | ||
606 | ++get_backlog(t); | ||
607 | TRACE_TASK(t, "adding backlogged job\n"); | ||
608 | } | ||
609 | |||
610 | do_backlogged_job = has_backlog(t); | ||
611 | TRACE_TASK(t, "number of backlogged jobs: %u\n", | ||
612 | get_backlog(t)); | ||
613 | } | ||
614 | |||
615 | /* SETUP FOR THE NEXT JOB */ | ||
616 | |||
617 | sched_trace_task_completion(t, forced); | ||
618 | |||
619 | TRACE_TASK(t, "job_completion() at %llu (forced = %d).\n", now, forced); | ||
620 | |||
621 | /* set flags */ | ||
622 | tsk_rt(t)->completed = 0; | ||
623 | |||
624 | if (unlikely(!forced && do_backlogged_job)) { | ||
625 | /* Don't advance deadline/refresh budget. Use the remaining budget for | ||
626 | * the backlogged job. | ||
627 | * | ||
628 | * NOTE: Allowing backlogged jobs comsume remaining budget may affect | ||
629 | * blocking bound analysis. | ||
630 | */ | ||
631 | } | ||
632 | else { | ||
633 | crm_untrack_in_top_m(t); | ||
634 | prepare_for_next_period(t); | ||
635 | |||
636 | do_release = (is_early_releasing(t) || is_released(t, now)); | ||
637 | |||
638 | if (do_backlogged_job) { | ||
639 | TRACE_TASK(t, "refreshing budget with early " | ||
640 | "release for backlogged job.\n"); | ||
641 | } | ||
642 | if (do_release || do_backlogged_job) { | ||
643 | /* log here to capture overheads */ | ||
644 | sched_trace_task_release(t); | ||
645 | } | ||
646 | } | ||
647 | |||
648 | unlink(t); | ||
649 | |||
650 | /* release or arm next job */ | ||
651 | if (is_running(t)) { | ||
652 | /* is our next job a backlogged job? */ | ||
653 | if (do_backlogged_job) { | ||
654 | TRACE_TASK(t, "next job is a backlogged job.\n"); | ||
655 | tsk_rt(t)->job_params.is_backlogged_job = 1; | ||
656 | } | ||
657 | else { | ||
658 | TRACE_TASK(t, "next job is a regular job.\n"); | ||
659 | tsk_rt(t)->job_params.is_backlogged_job = 0; | ||
660 | } | ||
661 | |||
662 | if (do_release || do_backlogged_job) { | ||
663 | crm_track_in_top_m(t); | ||
664 | crm_job_arrival(t); | ||
665 | } | ||
666 | else { | ||
667 | add_release(&task_cpu_cluster(t)->domain, t); | ||
668 | } | ||
669 | } | ||
670 | else { | ||
671 | BUG_ON(!forced); | ||
672 | /* budget was refreshed and job early released */ | ||
673 | TRACE_TASK(t, "job exhausted budget while sleeping\n"); | ||
674 | crm_track_in_top_m(t); | ||
675 | } | ||
676 | } | ||
677 | |||
678 | static enum hrtimer_restart crm_simple_on_exhausted(struct task_struct *t, int in_schedule) | ||
679 | { | ||
680 | /* Assumption: t is scheduled on the CPU executing this callback */ | ||
681 | |||
682 | if (in_schedule) { | ||
683 | BUG_ON(tsk_rt(t)->scheduled_on != smp_processor_id()); | ||
684 | if (budget_precisely_tracked(t) && cancel_enforcement_timer(t) < 0) { | ||
685 | TRACE_TASK(t, "raced with timer. deffering to timer.\n"); | ||
686 | goto out; | ||
687 | } | ||
688 | } | ||
689 | |||
690 | if (budget_signalled(t) && !bt_flag_is_set(t, BTF_SIG_BUDGET_SENT)) { | ||
691 | /* signal exhaustion */ | ||
692 | send_sigbudget(t); /* will set BTF_SIG_BUDGET_SENT */ | ||
693 | } | ||
694 | |||
695 | if (budget_enforced(t) && !bt_flag_test_and_set(t, BTF_BUDGET_EXHAUSTED)) { | ||
696 | if (likely(!is_np(t))) { | ||
697 | /* np tasks will be preempted when they become | ||
698 | * preemptable again | ||
699 | */ | ||
700 | if (!in_schedule) { | ||
701 | TRACE_TASK(t, "is preemptable => FORCE_RESCHED\n"); | ||
702 | litmus_reschedule_local(); | ||
703 | set_will_schedule(); | ||
704 | } | ||
705 | } else if (is_user_np(t)) { | ||
706 | TRACE_TASK(t, "is non-preemptable, preemption delayed.\n"); | ||
707 | request_exit_np(t); | ||
708 | } | ||
709 | } | ||
710 | |||
711 | out: | ||
712 | return HRTIMER_NORESTART; | ||
713 | } | ||
714 | |||
715 | |||
716 | static enum hrtimer_restart crm_simple_io_on_exhausted(struct task_struct *t, int in_schedule) | ||
717 | { | ||
718 | enum hrtimer_restart restart = HRTIMER_NORESTART; | ||
719 | |||
720 | if (in_schedule) { | ||
721 | BUG_ON(tsk_rt(t)->scheduled_on != smp_processor_id()); | ||
722 | if (budget_precisely_tracked(t) && cancel_enforcement_timer(t) == -1) { | ||
723 | TRACE_TASK(t, "raced with timer. deffering to timer.\n"); | ||
724 | goto out; | ||
725 | } | ||
726 | } | ||
727 | |||
728 | /* t may or may not be scheduled */ | ||
729 | |||
730 | if (budget_signalled(t) && !bt_flag_is_set(t, BTF_SIG_BUDGET_SENT)) { | ||
731 | /* signal exhaustion */ | ||
732 | |||
733 | /* Tasks should block SIG_BUDGET if they cannot gracefully respond to | ||
734 | * the signal while suspended. SIG_BUDGET is an rt-signal, so it will | ||
735 | * be queued and received when SIG_BUDGET is unblocked */ | ||
736 | send_sigbudget(t); /* will set BTF_SIG_BUDGET_SENT */ | ||
737 | } | ||
738 | |||
739 | if (budget_enforced(t) && !bt_flag_is_set(t, BTF_BUDGET_EXHAUSTED)) { | ||
740 | int cpu = (tsk_rt(t)->linked_on != NO_CPU) ? | ||
741 | tsk_rt(t)->linked_on : tsk_rt(t)->scheduled_on; | ||
742 | |||
743 | if (is_np(t) && is_user_np(t)) { | ||
744 | bt_flag_set(t, BTF_BUDGET_EXHAUSTED); | ||
745 | TRACE_TASK(t, "is non-preemptable, preemption delayed.\n"); | ||
746 | request_exit_np(t); | ||
747 | } | ||
748 | /* where do we need to call resched? */ | ||
749 | else if (cpu == smp_processor_id()) { | ||
750 | bt_flag_set(t, BTF_BUDGET_EXHAUSTED); | ||
751 | if (!in_schedule) { | ||
752 | TRACE_TASK(t, "is preemptable => FORCE_RESCHED\n"); | ||
753 | litmus_reschedule_local(); | ||
754 | set_will_schedule(); | ||
755 | } | ||
756 | } | ||
757 | else if (cpu != NO_CPU) { | ||
758 | bt_flag_set(t, BTF_BUDGET_EXHAUSTED); | ||
759 | if (!in_schedule) { | ||
760 | TRACE_TASK(t, "is preemptable on remote cpu (%d) => FORCE_RESCHED\n", cpu); | ||
761 | litmus_reschedule(cpu); | ||
762 | } | ||
763 | } | ||
764 | else if (unlikely(tsk_rt(t)->blocked_lock)) { | ||
765 | /* we shouldn't be draining while waiting for litmus lock, but we | ||
766 | * could have raced with the budget timer (?). */ | ||
767 | WARN_ON(1); | ||
768 | } | ||
769 | else { | ||
770 | lt_t remaining; | ||
771 | crm_domain_t *cluster; | ||
772 | unsigned long flags; | ||
773 | |||
774 | BUG_ON(in_schedule); | ||
775 | |||
776 | cluster = task_cpu_cluster(t); | ||
777 | |||
778 | // 1) refresh budget through job completion | ||
779 | // 2) if holds locks, tell the locking protocol to re-eval priority | ||
780 | // 3) -- the LP must undo any inheritance relations if appropriate | ||
781 | |||
782 | /* force job completion */ | ||
783 | TRACE_TASK(t, "blocked, postponing deadline\n"); | ||
784 | |||
785 | /* Outermost lock of the cluster. Recursive lock calls are | ||
786 | * possible on this code path. This should be the _ONLY_ | ||
787 | * scenario where recursive calls are made. */ | ||
788 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
789 | /* Unfortunately, we _might_ need to grab the DGL lock, so we | ||
790 | * must grab it every time since it must be take before the | ||
791 | * cluster lock. */ | ||
792 | raw_spin_lock_irqsave(&cluster->dgl_lock, flags); | ||
793 | raw_readyq_lock(&cluster->cluster_lock); | ||
794 | #else | ||
795 | raw_readyq_lock_irqsave(&cluster->cluster_lock, flags); | ||
796 | #endif | ||
797 | |||
798 | job_completion(t, 1); /* refreshes budget and pushes out deadline */ | ||
799 | |||
800 | #ifdef CONFIG_LITMUS_LOCKING | ||
801 | { | ||
802 | int i; | ||
803 | /* any linked task that inherits from 't' needs to have their | ||
804 | * cpu-position re-evaluated. we have to do this in two passes. | ||
805 | * pass 1: remove nodes from heap s.t. heap is in known good state. | ||
806 | * pass 2: re-add nodes. | ||
807 | * | ||
808 | */ | ||
809 | for (i = find_first_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots)); | ||
810 | i < BITS_PER_LONG; | ||
811 | i = find_next_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots), i+1)) | ||
812 | { | ||
813 | struct task_struct *to_update = tsk_rt(t)->inh_task_linkbacks[i]; | ||
814 | BUG_ON(!to_update); | ||
815 | if (tsk_rt(to_update)->linked_on != NO_CPU) { | ||
816 | cpu_entry_t *entry = &per_cpu(crm_cpu_entries, tsk_rt(to_update)->linked_on); | ||
817 | BUG_ON(!binheap_is_in_heap(&entry->hn)); | ||
818 | binheap_delete(&entry->hn, &cluster->cpu_heap); | ||
819 | } | ||
820 | } | ||
821 | for (i = find_first_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots)); | ||
822 | i < BITS_PER_LONG; | ||
823 | i = find_next_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots), i+1)) | ||
824 | { | ||
825 | struct task_struct *to_update = tsk_rt(t)->inh_task_linkbacks[i]; | ||
826 | BUG_ON(!to_update); | ||
827 | if (tsk_rt(to_update)->linked_on != NO_CPU) { | ||
828 | cpu_entry_t *entry = &per_cpu(crm_cpu_entries, tsk_rt(to_update)->linked_on); | ||
829 | binheap_add(&entry->hn, &cluster->cpu_heap, cpu_entry_t, hn); | ||
830 | } | ||
831 | } | ||
832 | } | ||
833 | |||
834 | /* Check our inheritance and propagate any changes forward. */ | ||
835 | reevaluate_inheritance(t); | ||
836 | #endif | ||
837 | /* No need to recheck priority of AUX tasks. They will always | ||
838 | * inherit from 't' if they are enabled. Their prio change was | ||
839 | * captured by the cpu-heap operations above. */ | ||
840 | |||
841 | #ifdef CONFIG_LITMUS_NVIDIA | ||
842 | /* Re-eval priority of GPU interrupt threads. */ | ||
843 | if(tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu) | ||
844 | gpu_owner_decrease_priority(t); | ||
845 | #endif | ||
846 | |||
847 | #ifdef CONFIG_LITMUS_LOCKING | ||
848 | /* double-check that everything is okay */ | ||
849 | check_for_preemptions(cluster); | ||
850 | #endif | ||
851 | |||
852 | /* should be the outermost unlock call */ | ||
853 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
854 | raw_readyq_unlock(&cluster->cluster_lock); | ||
855 | raw_spin_unlock_irqrestore(&cluster->dgl_lock, flags); | ||
856 | #else | ||
857 | raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
858 | #endif | ||
859 | |||
860 | /* we need to set up the budget timer since we're within the callback. */ | ||
861 | hrtimer_forward_now(&get_budget_timer(t).timer.timer, | ||
862 | ns_to_ktime(budget_remaining(t))); | ||
863 | remaining = hrtimer_get_expires_ns(&get_budget_timer(t).timer.timer); | ||
864 | |||
865 | TRACE_TASK(t, "rearmed timer to %ld\n", remaining); | ||
866 | restart = HRTIMER_RESTART; | ||
867 | } | ||
868 | } | ||
869 | |||
870 | out: | ||
871 | return restart; | ||
872 | } | ||
873 | |||
874 | |||
875 | #ifdef CONFIG_LITMUS_LOCKING | ||
876 | static void __crm_trigger_vunlock(struct task_struct *t) | ||
877 | { | ||
878 | TRACE_TASK(t, "triggering virtual unlock of lock %d\n", | ||
879 | tsk_rt(t)->outermost_lock->ident); | ||
880 | tsk_rt(t)->outermost_lock->ops->omlp_virtual_unlock(tsk_rt(t)->outermost_lock, t); | ||
881 | } | ||
882 | |||
883 | static void crm_trigger_vunlock(struct task_struct *t) | ||
884 | { | ||
885 | crm_domain_t *cluster = task_cpu_cluster(t); | ||
886 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
887 | unsigned long flags; | ||
888 | |||
889 | /* Unfortunately, we _might_ need to grab the DGL lock, so we | ||
890 | * must grab it every time since it must be take before the | ||
891 | * cluster lock. */ | ||
892 | raw_spin_lock_irqsave(&cluster->dgl_lock, flags); | ||
893 | #endif | ||
894 | |||
895 | __crm_trigger_vunlock(t); | ||
896 | |||
897 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
898 | raw_spin_unlock_irqrestore(&cluster->dgl_lock, flags); | ||
899 | #endif | ||
900 | } | ||
901 | #endif | ||
902 | |||
903 | static enum hrtimer_restart crm_sobliv_on_exhausted(struct task_struct *t, int in_schedule) | ||
904 | { | ||
905 | enum hrtimer_restart restart = HRTIMER_NORESTART; | ||
906 | |||
907 | if (in_schedule) { | ||
908 | BUG_ON(tsk_rt(t)->scheduled_on != smp_processor_id()); | ||
909 | if (budget_precisely_tracked(t) && cancel_enforcement_timer(t) == -1) { | ||
910 | TRACE_TASK(t, "raced with timer. deffering to timer.\n"); | ||
911 | goto out; | ||
912 | } | ||
913 | } | ||
914 | |||
915 | /* t may or may not be scheduled */ | ||
916 | |||
917 | if (budget_signalled(t) && !bt_flag_is_set(t, BTF_SIG_BUDGET_SENT)) { | ||
918 | /* signal exhaustion */ | ||
919 | |||
920 | /* Tasks should block SIG_BUDGET if they cannot gracefully respond to | ||
921 | * the signal while suspended. SIG_BUDGET is an rt-signal, so it will | ||
922 | * be queued and received when SIG_BUDGET is unblocked */ | ||
923 | send_sigbudget(t); /* will set BTF_SIG_BUDGET_SENT */ | ||
924 | } | ||
925 | |||
926 | if (budget_enforced(t) && !bt_flag_is_set(t, BTF_BUDGET_EXHAUSTED)) { | ||
927 | int cpu = (tsk_rt(t)->linked_on != NO_CPU) ? | ||
928 | tsk_rt(t)->linked_on : tsk_rt(t)->scheduled_on; | ||
929 | |||
930 | #ifdef CONFIG_LITMUS_LOCKING | ||
931 | /* if 't' running, trigger a virtual unlock of outermost held lock | ||
932 | * if supported. Case where 't' not running handled later in function. | ||
933 | */ | ||
934 | if (cpu != NO_CPU && | ||
935 | tsk_rt(t)->outermost_lock && | ||
936 | tsk_rt(t)->outermost_lock->ops->is_omlp_family) | ||
937 | crm_trigger_vunlock(t); | ||
938 | #endif | ||
939 | |||
940 | if (is_np(t) && is_user_np(t)) { | ||
941 | TRACE_TASK(t, "is non-preemptable, preemption delayed.\n"); | ||
942 | bt_flag_set(t, BTF_BUDGET_EXHAUSTED); | ||
943 | request_exit_np(t); | ||
944 | } | ||
945 | /* where do we need to call resched? */ | ||
946 | else if (cpu == smp_processor_id()) { | ||
947 | bt_flag_set(t, BTF_BUDGET_EXHAUSTED); | ||
948 | if (!in_schedule) { | ||
949 | TRACE_TASK(t, "is preemptable => FORCE_RESCHED\n"); | ||
950 | litmus_reschedule_local(); | ||
951 | set_will_schedule(); | ||
952 | } | ||
953 | } | ||
954 | else if (cpu != NO_CPU) { | ||
955 | bt_flag_set(t, BTF_BUDGET_EXHAUSTED); | ||
956 | if (!in_schedule) { | ||
957 | litmus_reschedule(cpu); | ||
958 | TRACE_TASK(t, "is preemptable on remote cpu (%d) => FORCE_RESCHED\n", cpu); | ||
959 | } | ||
960 | } | ||
961 | else { | ||
962 | lt_t remaining; | ||
963 | crm_domain_t *cluster; | ||
964 | unsigned long flags; | ||
965 | |||
966 | BUG_ON(in_schedule); | ||
967 | |||
968 | cluster = task_cpu_cluster(t); | ||
969 | |||
970 | // 1) refresh budget through job completion | ||
971 | // 2) if holds locks, tell the locking protocol to re-eval priority | ||
972 | // 3) -- the LP must undo any inheritance relations if appropriate | ||
973 | |||
974 | /* force job completion */ | ||
975 | TRACE_TASK(t, "blocked, postponing deadline\n"); | ||
976 | |||
977 | /* Outermost lock of the cluster. Recursive lock calls are | ||
978 | * possible on this code path. This should be the _ONLY_ | ||
979 | * scenario where recursive calls are made. */ | ||
980 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
981 | /* Unfortunately, we _might_ need to grab the DGL lock, so we | ||
982 | * must grab it every time since it must be take before the | ||
983 | * cluster lock. */ | ||
984 | raw_spin_lock_irqsave(&cluster->dgl_lock, flags); | ||
985 | raw_readyq_lock(&cluster->cluster_lock); | ||
986 | #else | ||
987 | raw_readyq_lock_irqsave(&cluster->cluster_lock, flags); | ||
988 | #endif | ||
989 | |||
990 | job_completion(t, 1); /* refreshes budget and pushes out deadline */ | ||
991 | |||
992 | #ifdef CONFIG_LITMUS_LOCKING | ||
993 | { | ||
994 | int i; | ||
995 | /* any linked task that inherits from 't' needs to have their | ||
996 | * cpu-position re-evaluated. we have to do this in two passes. | ||
997 | * pass 1: remove nodes from heap s.t. heap is in known good state. | ||
998 | * pass 2: re-add nodes. | ||
999 | * | ||
1000 | */ | ||
1001 | for (i = find_first_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots)); | ||
1002 | i < BITS_PER_LONG; | ||
1003 | i = find_next_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots), i+1)) | ||
1004 | { | ||
1005 | struct task_struct *to_update = tsk_rt(t)->inh_task_linkbacks[i]; | ||
1006 | BUG_ON(!to_update); | ||
1007 | if (tsk_rt(to_update)->linked_on != NO_CPU) { | ||
1008 | cpu_entry_t *entry = &per_cpu(crm_cpu_entries, tsk_rt(to_update)->linked_on); | ||
1009 | BUG_ON(!binheap_is_in_heap(&entry->hn)); | ||
1010 | binheap_delete(&entry->hn, &cluster->cpu_heap); | ||
1011 | } | ||
1012 | } | ||
1013 | for (i = find_first_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots)); | ||
1014 | i < BITS_PER_LONG; | ||
1015 | i = find_next_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots), i+1)) | ||
1016 | { | ||
1017 | struct task_struct *to_update = tsk_rt(t)->inh_task_linkbacks[i]; | ||
1018 | BUG_ON(!to_update); | ||
1019 | if (tsk_rt(to_update)->linked_on != NO_CPU) { | ||
1020 | cpu_entry_t *entry = &per_cpu(crm_cpu_entries, tsk_rt(to_update)->linked_on); | ||
1021 | binheap_add(&entry->hn, &cluster->cpu_heap, cpu_entry_t, hn); | ||
1022 | } | ||
1023 | } | ||
1024 | } | ||
1025 | |||
1026 | /* Check our inheritance and propagate any changes forward. */ | ||
1027 | reevaluate_inheritance(t); | ||
1028 | |||
1029 | if (tsk_rt(t)->outermost_lock && tsk_rt(t)->outermost_lock->ops->is_omlp_family) | ||
1030 | __crm_trigger_vunlock(t); | ||
1031 | #endif | ||
1032 | /* No need to recheck priority of AUX tasks. They will always | ||
1033 | * inherit from 't' if they are enabled. Their prio change was | ||
1034 | * captured by the cpu-heap operations above. */ | ||
1035 | |||
1036 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1037 | /* Re-eval priority of GPU interrupt threads. */ | ||
1038 | if(tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu) | ||
1039 | gpu_owner_decrease_priority(t); | ||
1040 | #endif | ||
1041 | |||
1042 | #ifdef CONFIG_LITMUS_LOCKING | ||
1043 | /* double-check that everything is okay */ | ||
1044 | check_for_preemptions(cluster); | ||
1045 | #endif | ||
1046 | |||
1047 | /* should be the outermost unlock call */ | ||
1048 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
1049 | raw_readyq_unlock(&cluster->cluster_lock); | ||
1050 | raw_spin_unlock_irqrestore(&cluster->dgl_lock, flags); | ||
1051 | #else | ||
1052 | raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
1053 | #endif | ||
1054 | |||
1055 | /* we need to set up the budget timer since we're within the callback. */ | ||
1056 | if (bt_flag_is_set(t, BTF_IS_TOP_M)) { | ||
1057 | hrtimer_forward_now(&get_budget_timer(t).timer.timer, | ||
1058 | ns_to_ktime(budget_remaining(t))); | ||
1059 | remaining = hrtimer_get_expires_ns(&get_budget_timer(t).timer.timer); | ||
1060 | |||
1061 | TRACE_TASK(t, "rearmed timer to %ld\n", remaining); | ||
1062 | restart = HRTIMER_RESTART; | ||
1063 | } | ||
1064 | } | ||
1065 | } | ||
1066 | |||
1067 | out: | ||
1068 | return restart; | ||
1069 | } | ||
1070 | |||
1071 | |||
1072 | /* crm_tick - this function is called for every local timer | ||
1073 | * interrupt. | ||
1074 | * | ||
1075 | * checks whether the current task has expired and checks | ||
1076 | * whether we need to preempt it if it has not expired | ||
1077 | */ | ||
1078 | static void crm_tick(struct task_struct* t) | ||
1079 | { | ||
1080 | if (is_realtime(t) && | ||
1081 | tsk_rt(t)->budget.ops && budget_quantum_tracked(t) && | ||
1082 | budget_exhausted(t)) { | ||
1083 | TRACE_TASK(t, "budget exhausted\n"); | ||
1084 | budget_state_machine2(t,on_exhausted,!IN_SCHEDULE); | ||
1085 | } | ||
1086 | } | ||
1087 | |||
1088 | |||
1089 | |||
1090 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
1091 | |||
1092 | static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed) | ||
1093 | { | ||
1094 | if (!atomic_read(&tasklet->count)) { | ||
1095 | if(tasklet->owner) { | ||
1096 | sched_trace_tasklet_begin(tasklet->owner); | ||
1097 | } | ||
1098 | |||
1099 | if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) | ||
1100 | { | ||
1101 | BUG(); | ||
1102 | } | ||
1103 | TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n", | ||
1104 | __FUNCTION__, | ||
1105 | (tasklet->owner) ? tasklet->owner->pid : 0, | ||
1106 | (tasklet->owner) ? 0 : 1); | ||
1107 | tasklet->func(tasklet->data); | ||
1108 | tasklet_unlock(tasklet); | ||
1109 | |||
1110 | if(tasklet->owner) { | ||
1111 | sched_trace_tasklet_end(tasklet->owner, flushed); | ||
1112 | } | ||
1113 | } | ||
1114 | else { | ||
1115 | BUG(); | ||
1116 | } | ||
1117 | } | ||
1118 | |||
1119 | |||
1120 | static void do_lit_tasklets(crm_domain_t* cluster, struct task_struct* sched_task) | ||
1121 | { | ||
1122 | int work_to_do = 1; | ||
1123 | struct tasklet_struct *tasklet = NULL; | ||
1124 | unsigned long flags; | ||
1125 | |||
1126 | while(work_to_do) { | ||
1127 | |||
1128 | TS_NV_SCHED_BOTISR_START; | ||
1129 | |||
1130 | raw_readyq_lock_irqsave(&cluster->cluster_lock, flags); | ||
1131 | |||
1132 | if(cluster->pending_tasklets.head != NULL) { | ||
1133 | // remove tasklet at head. | ||
1134 | struct tasklet_struct *prev = NULL; | ||
1135 | tasklet = cluster->pending_tasklets.head; | ||
1136 | |||
1137 | // find a tasklet with prio to execute; skip ones where | ||
1138 | // sched_task has a higher priority. | ||
1139 | // We use the '!edf' test instead of swaping function arguments since | ||
1140 | // both sched_task and owner could be NULL. In this case, we want to | ||
1141 | // still execute the tasklet. | ||
1142 | while(tasklet && !rm_higher_prio(tasklet->owner, sched_task)) { | ||
1143 | prev = tasklet; | ||
1144 | tasklet = tasklet->next; | ||
1145 | } | ||
1146 | |||
1147 | if(tasklet) { // found something to execuite | ||
1148 | // remove the tasklet from the queue | ||
1149 | if(prev) { | ||
1150 | prev->next = tasklet->next; | ||
1151 | if(prev->next == NULL) { | ||
1152 | TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); | ||
1153 | cluster->pending_tasklets.tail = &(prev); | ||
1154 | } | ||
1155 | } | ||
1156 | else { | ||
1157 | cluster->pending_tasklets.head = tasklet->next; | ||
1158 | if(tasklet->next == NULL) { | ||
1159 | TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid); | ||
1160 | cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head); | ||
1161 | } | ||
1162 | } | ||
1163 | } | ||
1164 | else { | ||
1165 | TRACE("%s: No tasklets with eligible priority.\n", __FUNCTION__); | ||
1166 | } | ||
1167 | } | ||
1168 | else { | ||
1169 | TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__); | ||
1170 | } | ||
1171 | |||
1172 | raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
1173 | |||
1174 | if(tasklet) { | ||
1175 | __do_lit_tasklet(tasklet, 0ul); | ||
1176 | tasklet = NULL; | ||
1177 | } | ||
1178 | else { | ||
1179 | work_to_do = 0; | ||
1180 | } | ||
1181 | |||
1182 | TS_NV_SCHED_BOTISR_END; | ||
1183 | } | ||
1184 | } | ||
1185 | |||
1186 | static void __add_pai_tasklet(struct tasklet_struct* tasklet, crm_domain_t* cluster) | ||
1187 | { | ||
1188 | struct tasklet_struct* step; | ||
1189 | |||
1190 | tasklet->next = NULL; // make sure there are no old values floating around | ||
1191 | |||
1192 | step = cluster->pending_tasklets.head; | ||
1193 | if(step == NULL) { | ||
1194 | TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid); | ||
1195 | // insert at tail. | ||
1196 | *(cluster->pending_tasklets.tail) = tasklet; | ||
1197 | cluster->pending_tasklets.tail = &(tasklet->next); | ||
1198 | } | ||
1199 | else if((*(cluster->pending_tasklets.tail) != NULL) && | ||
1200 | rm_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) { | ||
1201 | // insert at tail. | ||
1202 | TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid); | ||
1203 | |||
1204 | *(cluster->pending_tasklets.tail) = tasklet; | ||
1205 | cluster->pending_tasklets.tail = &(tasklet->next); | ||
1206 | } | ||
1207 | else { | ||
1208 | |||
1209 | // insert the tasklet somewhere in the middle. | ||
1210 | |||
1211 | TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__); | ||
1212 | |||
1213 | while(step->next && rm_higher_prio(step->next->owner, tasklet->owner)) { | ||
1214 | step = step->next; | ||
1215 | } | ||
1216 | |||
1217 | // insert tasklet right before step->next. | ||
1218 | |||
1219 | TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, | ||
1220 | tasklet->owner->pid, | ||
1221 | (step->owner) ? | ||
1222 | step->owner->pid : | ||
1223 | -1, | ||
1224 | (step->next) ? | ||
1225 | ((step->next->owner) ? | ||
1226 | step->next->owner->pid : | ||
1227 | -1) : | ||
1228 | -1); | ||
1229 | |||
1230 | tasklet->next = step->next; | ||
1231 | step->next = tasklet; | ||
1232 | |||
1233 | // patch up the head if needed. | ||
1234 | if(cluster->pending_tasklets.head == step) | ||
1235 | { | ||
1236 | TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid); | ||
1237 | cluster->pending_tasklets.head = tasklet; | ||
1238 | } | ||
1239 | } | ||
1240 | } | ||
1241 | |||
1242 | static void crm_run_tasklets(struct task_struct* sched_task) | ||
1243 | { | ||
1244 | crm_domain_t* cluster; | ||
1245 | |||
1246 | preempt_disable(); | ||
1247 | |||
1248 | cluster = (is_realtime(sched_task)) ? | ||
1249 | task_cpu_cluster(sched_task) : | ||
1250 | remote_cluster(smp_processor_id()); | ||
1251 | |||
1252 | if(cluster && cluster->pending_tasklets.head != NULL) { | ||
1253 | TRACE("%s: There are tasklets to process.\n", __FUNCTION__); | ||
1254 | do_lit_tasklets(cluster, sched_task); | ||
1255 | } | ||
1256 | |||
1257 | preempt_enable_no_resched(); | ||
1258 | } | ||
1259 | |||
1260 | |||
1261 | |||
1262 | static int crm_enqueue_pai_tasklet(struct tasklet_struct* tasklet) | ||
1263 | { | ||
1264 | #if 0 | ||
1265 | crm_domain_t *cluster = NULL; | ||
1266 | cpu_entry_t *targetCPU = NULL; | ||
1267 | int thisCPU; | ||
1268 | int runLocal = 0; | ||
1269 | int runNow = 0; | ||
1270 | unsigned long flags; | ||
1271 | |||
1272 | if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner))) | ||
1273 | { | ||
1274 | TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__); | ||
1275 | return 0; | ||
1276 | } | ||
1277 | |||
1278 | cluster = task_cpu_cluster(tasklet->owner); | ||
1279 | |||
1280 | raw_readyq_lock_irqsave(&cluster->cluster_lock, flags); | ||
1281 | |||
1282 | thisCPU = smp_processor_id(); | ||
1283 | |||
1284 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
1285 | { | ||
1286 | cpu_entry_t* affinity = NULL; | ||
1287 | |||
1288 | // use this CPU if it is in our cluster and isn't running any RT work. | ||
1289 | if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(crm_cpu_entries).linked == NULL)) { | ||
1290 | affinity = &(__get_cpu_var(crm_cpu_entries)); | ||
1291 | } | ||
1292 | else { | ||
1293 | // this CPU is busy or shouldn't run tasklet in this cluster. | ||
1294 | // look for available near by CPUs. | ||
1295 | // NOTE: Affinity towards owner and not this CPU. Is this right? | ||
1296 | affinity = | ||
1297 | crm_get_nearest_available_cpu(cluster, | ||
1298 | &per_cpu(crm_cpu_entries, task_cpu(tasklet->owner))); | ||
1299 | } | ||
1300 | |||
1301 | targetCPU = affinity; | ||
1302 | } | ||
1303 | #endif | ||
1304 | |||
1305 | if (targetCPU == NULL) { | ||
1306 | targetCPU = lowest_prio_cpu(cluster); | ||
1307 | } | ||
1308 | |||
1309 | if (rm_higher_prio(tasklet->owner, targetCPU->linked)) { | ||
1310 | if (thisCPU == targetCPU->cpu) { | ||
1311 | TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__); | ||
1312 | runLocal = 1; | ||
1313 | runNow = 1; | ||
1314 | } | ||
1315 | else { | ||
1316 | TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__); | ||
1317 | runLocal = 0; | ||
1318 | runNow = 1; | ||
1319 | } | ||
1320 | } | ||
1321 | else { | ||
1322 | runLocal = 0; | ||
1323 | runNow = 0; | ||
1324 | } | ||
1325 | |||
1326 | if(!runLocal) { | ||
1327 | // enqueue the tasklet | ||
1328 | __add_pai_tasklet(tasklet, cluster); | ||
1329 | } | ||
1330 | |||
1331 | raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
1332 | |||
1333 | |||
1334 | if (runLocal /*&& runNow */) { // runNow == 1 is implied | ||
1335 | TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__); | ||
1336 | __do_lit_tasklet(tasklet, 0ul); | ||
1337 | } | ||
1338 | else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied | ||
1339 | TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu); | ||
1340 | preempt(targetCPU); // need to be protected by cluster_lock? | ||
1341 | } | ||
1342 | else { | ||
1343 | TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__); | ||
1344 | } | ||
1345 | #else | ||
1346 | TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__); | ||
1347 | __do_lit_tasklet(tasklet, 0ul); | ||
1348 | #endif | ||
1349 | return(1); // success | ||
1350 | } | ||
1351 | |||
1352 | static void crm_change_prio_pai_tasklet(struct task_struct *old_prio, | ||
1353 | struct task_struct *new_prio) | ||
1354 | { | ||
1355 | struct tasklet_struct* step; | ||
1356 | unsigned long flags; | ||
1357 | crm_domain_t *cluster; | ||
1358 | struct task_struct *probe; | ||
1359 | |||
1360 | // identify the cluster by the assignment of these tasks. one should | ||
1361 | // be non-NULL. | ||
1362 | probe = (old_prio) ? old_prio : new_prio; | ||
1363 | |||
1364 | if(probe) { | ||
1365 | cluster = task_cpu_cluster(probe); | ||
1366 | |||
1367 | if(cluster->pending_tasklets.head != NULL) { | ||
1368 | raw_readyq_lock_irqsave(&cluster->cluster_lock, flags); | ||
1369 | for(step = cluster->pending_tasklets.head; step != NULL; step = step->next) { | ||
1370 | if(step->owner == old_prio) { | ||
1371 | TRACE("%s: Found tasklet to change: %d\n", __FUNCTION__, step->owner->pid); | ||
1372 | step->owner = new_prio; | ||
1373 | } | ||
1374 | } | ||
1375 | raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
1376 | } | ||
1377 | } | ||
1378 | else { | ||
1379 | TRACE("%s: Both priorities were NULL\n"); | ||
1380 | } | ||
1381 | } | ||
1382 | |||
1383 | #endif // PAI | ||
1384 | |||
1385 | #ifdef CONFIG_LITMUS_LOCKING | ||
1386 | static int __increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh); | ||
1387 | #endif | ||
1388 | |||
1389 | /* Getting schedule() right is a bit tricky. schedule() may not make any | ||
1390 | * assumptions on the state of the current task since it may be called for a | ||
1391 | * number of reasons. The reasons include a scheduler_tick() determined that it | ||
1392 | * was necessary, because sys_exit_np() was called, because some Linux | ||
1393 | * subsystem determined so, or even (in the worst case) because there is a bug | ||
1394 | * hidden somewhere. Thus, we must take extreme care to determine what the | ||
1395 | * current state is. | ||
1396 | * | ||
1397 | * The CPU could currently be scheduling a task (or not), be linked (or not). | ||
1398 | * | ||
1399 | * The following assertions for the scheduled task could hold: | ||
1400 | * | ||
1401 | * - !is_running(scheduled) // the job blocks | ||
1402 | * - scheduled->timeslice == 0 // the job completed (forcefully) | ||
1403 | * - is_completed() // the job completed (by syscall) | ||
1404 | * - linked != scheduled // we need to reschedule (for any reason) | ||
1405 | * - is_np(scheduled) // rescheduling must be delayed, | ||
1406 | * sys_exit_np must be requested | ||
1407 | * | ||
1408 | * Any of these can occur together. | ||
1409 | */ | ||
1410 | static struct task_struct* crm_schedule(struct task_struct * prev) | ||
1411 | { | ||
1412 | cpu_entry_t* entry = &__get_cpu_var(crm_cpu_entries); | ||
1413 | crm_domain_t *cluster = entry->cluster; | ||
1414 | int out_of_time, sleep, preempt, np, exists, blocks; | ||
1415 | struct task_struct* next = NULL; | ||
1416 | |||
1417 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1418 | int recheck_inheritance; | ||
1419 | #endif | ||
1420 | |||
1421 | #ifdef CONFIG_RELEASE_MASTER | ||
1422 | /* Bail out early if we are the release master. | ||
1423 | * The release master never schedules any real-time tasks. | ||
1424 | */ | ||
1425 | if (unlikely(cluster->domain.release_master == entry->cpu)) { | ||
1426 | sched_state_task_picked(); | ||
1427 | return NULL; | ||
1428 | } | ||
1429 | #endif | ||
1430 | |||
1431 | /* Detect and handle budget exhaustion if it hasn't already been done. | ||
1432 | * Do this before acquring any locks. */ | ||
1433 | if (prev && is_realtime(prev) && | ||
1434 | budget_exhausted(prev) && | ||
1435 | !is_completed(prev) && /* don't bother with jobs on their way out */ | ||
1436 | ((budget_enforced(prev) && !bt_flag_is_set(prev, BTF_BUDGET_EXHAUSTED)) || | ||
1437 | (budget_signalled(prev) && !bt_flag_is_set(prev, BTF_SIG_BUDGET_SENT))) ) { | ||
1438 | TRACE_TASK(prev, "handling exhaustion in schedule() at %llu\n", litmus_clock()); | ||
1439 | budget_state_machine2(prev,on_exhausted,IN_SCHEDULE); | ||
1440 | } | ||
1441 | |||
1442 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1443 | /* prevent updates to inheritance relations while we work with 'prev' */ | ||
1444 | /* recheck inheritance if the task holds locks, is running, and will | ||
1445 | * have its deadline pushed out by job_completion() */ | ||
1446 | recheck_inheritance = | ||
1447 | prev && | ||
1448 | is_realtime(prev) && | ||
1449 | holds_locks(prev) && | ||
1450 | !is_np(prev) && | ||
1451 | !is_completed(prev) && | ||
1452 | is_running(prev) && | ||
1453 | budget_enforced(prev) && | ||
1454 | bt_flag_is_set(prev, BTF_BUDGET_EXHAUSTED); | ||
1455 | if (recheck_inheritance) { | ||
1456 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
1457 | raw_spin_lock(&cluster->dgl_lock); | ||
1458 | #endif | ||
1459 | raw_spin_lock(&tsk_rt(prev)->hp_blocked_tasks_lock); | ||
1460 | } | ||
1461 | #endif | ||
1462 | |||
1463 | raw_readyq_lock(&cluster->cluster_lock); | ||
1464 | clear_will_schedule(); | ||
1465 | |||
1466 | /* sanity checking */ | ||
1467 | BUG_ON(entry->scheduled && entry->scheduled != prev); | ||
1468 | BUG_ON(entry->scheduled && !is_realtime(prev)); | ||
1469 | BUG_ON(is_realtime(prev) && !entry->scheduled); | ||
1470 | |||
1471 | /* (0) Determine state */ | ||
1472 | exists = entry->scheduled != NULL; | ||
1473 | blocks = exists && !is_running(entry->scheduled); | ||
1474 | out_of_time = exists && | ||
1475 | budget_enforced(entry->scheduled) && | ||
1476 | bt_flag_is_set(entry->scheduled, BTF_BUDGET_EXHAUSTED); | ||
1477 | np = exists && is_np(entry->scheduled); | ||
1478 | sleep = exists && is_completed(entry->scheduled); | ||
1479 | preempt = entry->scheduled != entry->linked; | ||
1480 | |||
1481 | #ifdef WANT_ALL_SCHED_EVENTS | ||
1482 | TRACE_TASK(prev, "invoked crm_schedule.\n"); | ||
1483 | #endif | ||
1484 | |||
1485 | if (exists) { | ||
1486 | TRACE_TASK(prev, | ||
1487 | "blocks:%d out_of_time:%d np:%d completed:%d preempt:%d " | ||
1488 | "state:%d sig:%d\n", | ||
1489 | blocks, out_of_time, np, sleep, preempt, | ||
1490 | prev->state, signal_pending(prev)); | ||
1491 | } | ||
1492 | if (entry->linked && preempt) | ||
1493 | TRACE_TASK(prev, "will be preempted by %s/%d\n", | ||
1494 | entry->linked->comm, entry->linked->pid); | ||
1495 | |||
1496 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1497 | if (tsk_rt(prev)->is_aux_task && | ||
1498 | (prev->state == TASK_INTERRUPTIBLE) && | ||
1499 | !blocks) { | ||
1500 | TRACE_TASK(prev, "Deferring descheduling of aux task %s/%d.\n", | ||
1501 | prev->comm, prev->pid); | ||
1502 | next = prev; /* allow prev to continue. */ | ||
1503 | goto out_set_state; | ||
1504 | } | ||
1505 | #endif | ||
1506 | |||
1507 | /* Do budget stuff */ | ||
1508 | if (blocks) { | ||
1509 | if (likely(!bt_flag_is_set(prev, BTF_WAITING_FOR_RELEASE))) | ||
1510 | budget_state_machine(prev,on_blocked); | ||
1511 | else { | ||
1512 | /* waiting for release. 'exit' the scheduler. */ | ||
1513 | crm_untrack_in_top_m(prev); | ||
1514 | budget_state_machine(prev,on_exit); | ||
1515 | } | ||
1516 | } | ||
1517 | else if (sleep) | ||
1518 | budget_state_machine(prev,on_sleep); | ||
1519 | else if (preempt) | ||
1520 | budget_state_machine(prev,on_preempt); | ||
1521 | |||
1522 | /* If a task blocks we have no choice but to reschedule. | ||
1523 | */ | ||
1524 | if (blocks) | ||
1525 | unlink(entry->scheduled); | ||
1526 | |||
1527 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) | ||
1528 | if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) { | ||
1529 | if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) { | ||
1530 | // don't track preemptions or locking protocol suspensions. | ||
1531 | TRACE_TASK(entry->scheduled, "stopping GPU tracker.\n"); | ||
1532 | stop_gpu_tracker(entry->scheduled); | ||
1533 | } | ||
1534 | else if(blocks && !tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) { | ||
1535 | TRACE_TASK(entry->scheduled, "GPU tracker remains on during suspension.\n"); | ||
1536 | } | ||
1537 | } | ||
1538 | #endif | ||
1539 | |||
1540 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
1541 | * We need to make sure to update the link structure anyway in case | ||
1542 | * that we are still linked. Multiple calls to request_exit_np() don't | ||
1543 | * hurt. | ||
1544 | */ | ||
1545 | if (np && (out_of_time || preempt || sleep)) { | ||
1546 | unlink(entry->scheduled); | ||
1547 | request_exit_np(entry->scheduled); | ||
1548 | } | ||
1549 | |||
1550 | /* Any task that is preemptable and either exhausts its execution | ||
1551 | * budget or wants to sleep completes. We may have to reschedule after | ||
1552 | * this. Don't do a job completion if we block (can't have timers running | ||
1553 | * for blocked jobs). | ||
1554 | */ | ||
1555 | if (!np && (out_of_time || sleep) && !blocks) { | ||
1556 | job_completion(entry->scheduled, !sleep); | ||
1557 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1558 | /* check if job completion enables an inheritance relation. no need to | ||
1559 | * recheck if task already inherits a priority since job_completion() | ||
1560 | * will not enable a higher-prio relation */ | ||
1561 | if (unlikely(recheck_inheritance && !tsk_rt(entry->scheduled)->inh_task)) { | ||
1562 | struct task_struct *hp_blocked; | ||
1563 | TRACE_TASK(entry->scheduled, "rechecking inheritance.\n"); | ||
1564 | hp_blocked = top_priority(&tsk_rt(entry->scheduled)->hp_blocked_tasks); | ||
1565 | /* hp_blocked_tasks_lock is held */ | ||
1566 | if (rm_higher_prio(hp_blocked, entry->scheduled)) | ||
1567 | __increase_priority_inheritance(entry->scheduled, effective_priority(hp_blocked)); | ||
1568 | } | ||
1569 | #endif | ||
1570 | } | ||
1571 | |||
1572 | /* Link pending task if we became unlinked. | ||
1573 | */ | ||
1574 | if (!entry->linked) | ||
1575 | link_task_to_cpu(__take_ready(&cluster->domain), entry); | ||
1576 | |||
1577 | /* The final scheduling decision. Do we need to switch for some reason? | ||
1578 | * If linked is different from scheduled, then select linked as next. | ||
1579 | */ | ||
1580 | if ((!np || blocks) && | ||
1581 | entry->linked != entry->scheduled) { | ||
1582 | /* Schedule a linked job? */ | ||
1583 | if (entry->linked) { | ||
1584 | entry->linked->rt_param.scheduled_on = entry->cpu; | ||
1585 | next = entry->linked; | ||
1586 | } | ||
1587 | if (entry->scheduled) { | ||
1588 | /* not gonna be scheduled soon */ | ||
1589 | entry->scheduled->rt_param.scheduled_on = NO_CPU; | ||
1590 | TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); | ||
1591 | } | ||
1592 | } | ||
1593 | else { | ||
1594 | /* Only override Linux scheduler if we have a real-time task | ||
1595 | * scheduled that needs to continue. | ||
1596 | */ | ||
1597 | if (exists) { | ||
1598 | next = prev; | ||
1599 | } | ||
1600 | } | ||
1601 | |||
1602 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1603 | out_set_state: | ||
1604 | #endif | ||
1605 | |||
1606 | sched_state_task_picked(); | ||
1607 | raw_readyq_unlock(&cluster->cluster_lock); | ||
1608 | |||
1609 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1610 | if (recheck_inheritance) { | ||
1611 | raw_spin_unlock(&tsk_rt(prev)->hp_blocked_tasks_lock); | ||
1612 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
1613 | raw_spin_unlock(&cluster->dgl_lock); | ||
1614 | #endif | ||
1615 | } | ||
1616 | #endif | ||
1617 | |||
1618 | #ifdef WANT_ALL_SCHED_EVENTS | ||
1619 | TRACE("cluster_lock released, next=0x%p\n", next); | ||
1620 | |||
1621 | if (next) | ||
1622 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | ||
1623 | else if (exists && !next) | ||
1624 | TRACE("becomes idle at %llu.\n", litmus_clock()); | ||
1625 | #endif | ||
1626 | |||
1627 | return next; | ||
1628 | } | ||
1629 | |||
1630 | |||
1631 | /* _finish_switch - we just finished the switch away from prev | ||
1632 | */ | ||
1633 | static void crm_finish_switch(struct task_struct *prev) | ||
1634 | { | ||
1635 | cpu_entry_t* entry = &__get_cpu_var(crm_cpu_entries); | ||
1636 | |||
1637 | entry->scheduled = is_realtime(current) ? current : NULL; | ||
1638 | #ifdef WANT_ALL_SCHED_EVENTS | ||
1639 | TRACE_TASK(prev, "switched away from\n"); | ||
1640 | #endif | ||
1641 | } | ||
1642 | |||
1643 | |||
1644 | /* Prepare a task for running in RT mode | ||
1645 | */ | ||
1646 | static void crm_task_new(struct task_struct * t, int on_rq, int running) | ||
1647 | { | ||
1648 | unsigned long flags; | ||
1649 | cpu_entry_t* entry; | ||
1650 | crm_domain_t* cluster; | ||
1651 | |||
1652 | TRACE("c-fp: task new %d (param running = %d, is_running = %d)\n", t->pid, running, is_running(t)); | ||
1653 | |||
1654 | /* the cluster doesn't change even if t is running */ | ||
1655 | cluster = task_cpu_cluster(t); | ||
1656 | |||
1657 | raw_readyq_lock_irqsave(&cluster->cluster_lock, flags); | ||
1658 | |||
1659 | /* setup job params */ | ||
1660 | release_at(t, litmus_clock()); | ||
1661 | |||
1662 | t->rt_param.linked_on = NO_CPU; | ||
1663 | |||
1664 | if (running) { | ||
1665 | entry = &per_cpu(crm_cpu_entries, task_cpu(t)); | ||
1666 | BUG_ON(entry->scheduled); | ||
1667 | |||
1668 | #ifdef CONFIG_RELEASE_MASTER | ||
1669 | if (entry->cpu != cluster->domain.release_master) { | ||
1670 | #endif | ||
1671 | entry->scheduled = t; | ||
1672 | tsk_rt(t)->scheduled_on = task_cpu(t); | ||
1673 | #ifdef CONFIG_RELEASE_MASTER | ||
1674 | } else { | ||
1675 | /* do not schedule on release master */ | ||
1676 | preempt(entry); /* force resched */ | ||
1677 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
1678 | } | ||
1679 | #endif | ||
1680 | } else { | ||
1681 | t->rt_param.scheduled_on = NO_CPU; | ||
1682 | } | ||
1683 | |||
1684 | if (is_running(t)) { | ||
1685 | crm_track_in_top_m(t); | ||
1686 | crm_job_arrival(t); | ||
1687 | } | ||
1688 | |||
1689 | raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
1690 | } | ||
1691 | |||
1692 | static void crm_task_wake_up(struct task_struct *t) | ||
1693 | { | ||
1694 | unsigned long flags; | ||
1695 | crm_domain_t *cluster; | ||
1696 | lt_t now; | ||
1697 | |||
1698 | cluster = task_cpu_cluster(t); | ||
1699 | |||
1700 | raw_readyq_lock_irqsave(&cluster->cluster_lock, flags); | ||
1701 | |||
1702 | now = litmus_clock(); | ||
1703 | TRACE_TASK(t, "wake_up at %llu\n", now); | ||
1704 | |||
1705 | if (is_sporadic(t) && is_tardy(t, now)) { | ||
1706 | release_at(t, now); | ||
1707 | sched_trace_task_release(t); | ||
1708 | } | ||
1709 | else { | ||
1710 | /* periodic task model. don't force job to end. | ||
1711 | * rely on user to say when jobs complete or when budget expires. */ | ||
1712 | tsk_rt(t)->completed = 0; | ||
1713 | } | ||
1714 | |||
1715 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1716 | if (tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->hide_from_aux_tasks) { | ||
1717 | TRACE_CUR("%s/%d is ready so aux tasks may not inherit.\n", t->comm, t->pid); | ||
1718 | disable_aux_task_owner(t); | ||
1719 | } | ||
1720 | #endif | ||
1721 | |||
1722 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1723 | if (tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu) { | ||
1724 | TRACE_CUR("%s/%d is ready so gpu klmirqd tasks may not inherit.\n", t->comm, t->pid); | ||
1725 | disable_gpu_owner(t); | ||
1726 | } | ||
1727 | #endif | ||
1728 | |||
1729 | budget_state_machine(t,on_wakeup); | ||
1730 | crm_job_arrival(t); | ||
1731 | |||
1732 | raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
1733 | } | ||
1734 | |||
1735 | static void crm_task_block(struct task_struct *t) | ||
1736 | { | ||
1737 | unsigned long flags; | ||
1738 | crm_domain_t *cluster; | ||
1739 | |||
1740 | TRACE_TASK(t, "block at %llu\n", litmus_clock()); | ||
1741 | |||
1742 | cluster = task_cpu_cluster(t); | ||
1743 | |||
1744 | /* unlink if necessary */ | ||
1745 | raw_readyq_lock_irqsave(&cluster->cluster_lock, flags); | ||
1746 | |||
1747 | unlink(t); | ||
1748 | |||
1749 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1750 | if (tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->hide_from_aux_tasks) { | ||
1751 | |||
1752 | TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid); | ||
1753 | enable_aux_task_owner(t); | ||
1754 | } | ||
1755 | #endif | ||
1756 | |||
1757 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1758 | if (tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu) { | ||
1759 | |||
1760 | TRACE_CUR("%s/%d is blocked so klmirqd threads may inherit.\n", t->comm, t->pid); | ||
1761 | enable_gpu_owner(t); | ||
1762 | } | ||
1763 | #endif | ||
1764 | |||
1765 | raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
1766 | |||
1767 | BUG_ON(!is_realtime(t)); | ||
1768 | } | ||
1769 | |||
1770 | |||
1771 | static void crm_task_exit(struct task_struct * t) | ||
1772 | { | ||
1773 | unsigned long flags; | ||
1774 | crm_domain_t *cluster = task_cpu_cluster(t); | ||
1775 | |||
1776 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
1777 | crm_change_prio_pai_tasklet(t, NULL); | ||
1778 | #endif | ||
1779 | |||
1780 | /* unlink if necessary */ | ||
1781 | raw_readyq_lock_irqsave(&cluster->cluster_lock, flags); | ||
1782 | |||
1783 | if (tsk_rt(t)->inh_task) { | ||
1784 | WARN_ON(1); | ||
1785 | clear_inh_task_linkback(t, tsk_rt(t)->inh_task); | ||
1786 | } | ||
1787 | |||
1788 | /* disable budget enforcement */ | ||
1789 | crm_untrack_in_top_m(t); | ||
1790 | budget_state_machine(t,on_exit); | ||
1791 | |||
1792 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1793 | /* make sure we clean up on our way out */ | ||
1794 | if (unlikely(tsk_rt(t)->is_aux_task)) | ||
1795 | exit_aux_task(t); | ||
1796 | else if(tsk_rt(t)->has_aux_tasks) | ||
1797 | disable_aux_task_owner(t); | ||
1798 | #endif | ||
1799 | |||
1800 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1801 | /* make sure we clean up on our way out */ | ||
1802 | if(tsk_rt(t)->held_gpus) | ||
1803 | disable_gpu_owner(t); | ||
1804 | #endif | ||
1805 | |||
1806 | unlink(t); | ||
1807 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | ||
1808 | cpu_entry_t *cpu; | ||
1809 | cpu = &per_cpu(crm_cpu_entries, tsk_rt(t)->scheduled_on); | ||
1810 | cpu->scheduled = NULL; | ||
1811 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
1812 | } | ||
1813 | raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
1814 | |||
1815 | BUG_ON(!is_realtime(t)); | ||
1816 | TRACE_TASK(t, "RIP\n"); | ||
1817 | } | ||
1818 | |||
1819 | |||
1820 | |||
1821 | |||
1822 | |||
1823 | |||
1824 | static struct budget_tracker_ops crm_drain_simple_ops = | ||
1825 | { | ||
1826 | .on_scheduled = simple_on_scheduled, | ||
1827 | .on_blocked = simple_on_blocked, | ||
1828 | .on_preempt = simple_on_preempt, | ||
1829 | .on_sleep = simple_on_sleep, | ||
1830 | .on_exit = simple_on_exit, | ||
1831 | |||
1832 | .on_wakeup = NULL, | ||
1833 | .on_inherit = NULL, | ||
1834 | .on_disinherit = NULL, | ||
1835 | .on_enter_top_m = NULL, | ||
1836 | .on_exit_top_m = NULL, | ||
1837 | |||
1838 | .on_exhausted = crm_simple_on_exhausted, | ||
1839 | }; | ||
1840 | |||
1841 | static struct budget_tracker_ops crm_drain_simple_io_ops = | ||
1842 | { | ||
1843 | .on_scheduled = simple_io_on_scheduled, | ||
1844 | .on_blocked = simple_io_on_blocked, | ||
1845 | .on_preempt = simple_io_on_preempt, | ||
1846 | .on_sleep = simple_io_on_sleep, | ||
1847 | .on_exit = simple_io_on_exit, | ||
1848 | |||
1849 | .on_wakeup = simple_io_on_wakeup, | ||
1850 | .on_inherit = NULL, | ||
1851 | .on_disinherit = NULL, | ||
1852 | .on_enter_top_m = NULL, | ||
1853 | .on_exit_top_m = NULL, | ||
1854 | |||
1855 | .on_exhausted = crm_simple_io_on_exhausted, | ||
1856 | }; | ||
1857 | |||
1858 | static struct budget_tracker_ops crm_drain_sobliv_ops = | ||
1859 | { | ||
1860 | .on_scheduled = NULL, | ||
1861 | .on_preempt = NULL, | ||
1862 | .on_sleep = NULL, | ||
1863 | |||
1864 | .on_blocked = sobliv_on_blocked, | ||
1865 | .on_wakeup = sobliv_on_wakeup, | ||
1866 | .on_exit = sobliv_on_exit, | ||
1867 | .on_inherit = sobliv_on_inherit, | ||
1868 | .on_disinherit = sobliv_on_disinherit, | ||
1869 | .on_enter_top_m = sobliv_on_enter_top_m, | ||
1870 | .on_exit_top_m = sobliv_on_exit_top_m, | ||
1871 | |||
1872 | .on_exhausted = crm_sobliv_on_exhausted, | ||
1873 | }; | ||
1874 | |||
1875 | static long crm_admit_task(struct task_struct* tsk) | ||
1876 | { | ||
1877 | struct budget_tracker_ops* ops = NULL; | ||
1878 | |||
1879 | if (remote_cluster(task_cpu(tsk)) != task_cpu_cluster(tsk)) { | ||
1880 | // printk("rejected admit: incorrect cluster.\n"); | ||
1881 | // return -EINVAL; | ||
1882 | } | ||
1883 | |||
1884 | if (budget_enforced(tsk) || budget_signalled(tsk)) { | ||
1885 | switch(get_drain_policy(tsk)) { | ||
1886 | case DRAIN_SIMPLE: | ||
1887 | ops = &crm_drain_simple_ops; | ||
1888 | break; | ||
1889 | case DRAIN_SIMPLE_IO: | ||
1890 | ops = &crm_drain_simple_io_ops; | ||
1891 | break; | ||
1892 | case DRAIN_SOBLIV: | ||
1893 | /* budget_policy and budget_signal_policy cannot be quantum-based */ | ||
1894 | if (!budget_quantum_tracked(tsk) && budget_precisely_tracked(tsk)) { | ||
1895 | ops = &crm_drain_sobliv_ops; | ||
1896 | } | ||
1897 | else { | ||
1898 | printk("rejected admit: QUANTUM_ENFORCEMENT and QUANTUM_SIGNALS is " | ||
1899 | "unsupported with DRAIN_SOBLIV.\n"); | ||
1900 | return -EINVAL; | ||
1901 | } | ||
1902 | break; | ||
1903 | default: | ||
1904 | printk("rejected admit: Unsupported budget draining mode.\n"); | ||
1905 | return -EINVAL; | ||
1906 | } | ||
1907 | } | ||
1908 | |||
1909 | /* always init the budget tracker, even if we're not using timers */ | ||
1910 | init_budget_tracker(&tsk_rt(tsk)->budget, ops); | ||
1911 | |||
1912 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1913 | INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks, | ||
1914 | rm_max_heap_base_priority_order); | ||
1915 | #endif | ||
1916 | |||
1917 | return 0; | ||
1918 | } | ||
1919 | |||
1920 | |||
1921 | |||
1922 | #ifdef CONFIG_LITMUS_LOCKING | ||
1923 | |||
1924 | #include <litmus/fdso.h> | ||
1925 | |||
1926 | /* called with IRQs off */ | ||
1927 | static int __increase_priority_inheritance(struct task_struct* t, | ||
1928 | struct task_struct* prio_inh) | ||
1929 | { | ||
1930 | int success = 1; | ||
1931 | int linked_on; | ||
1932 | int check_preempt = 0; | ||
1933 | crm_domain_t* cluster; | ||
1934 | struct task_struct* old_prio_inh = tsk_rt(t)->inh_task; | ||
1935 | |||
1936 | if (prio_inh && prio_inh == effective_priority(t)) { | ||
1937 | /* relationship already established. */ | ||
1938 | TRACE_TASK(t, "already has effective priority of %s/%d\n", | ||
1939 | prio_inh->comm, prio_inh->pid); | ||
1940 | goto out; | ||
1941 | } | ||
1942 | |||
1943 | if (prio_inh && (effective_priority(prio_inh) != prio_inh)) { | ||
1944 | TRACE_TASK(t, "Inheriting from %s/%d instead of the eff_prio = %s/%d!\n", | ||
1945 | prio_inh->comm, prio_inh->pid, | ||
1946 | effective_priority(prio_inh)->comm, | ||
1947 | effective_priority(prio_inh)->pid); | ||
1948 | #ifndef CONFIG_LITMUS_NESTED_LOCKING | ||
1949 | /* Tasks should only inherit the base priority of a task. | ||
1950 | If 't' inherits a priority, then tsk_rt(t)->inh_task should | ||
1951 | be passed to this function instead. This includes transitive | ||
1952 | inheritance relations (tsk_rt(tsk_rt(...)->inh_task)->inh_task). */ | ||
1953 | BUG(); | ||
1954 | #else | ||
1955 | /* Not a bug with nested locking since inheritance propagation is | ||
1956 | not atomic. */ | ||
1957 | |||
1958 | /* TODO: Is the following 'helping' short-cut safe? | ||
1959 | prio_inh = effective_priority(prio_inh); | ||
1960 | */ | ||
1961 | #endif | ||
1962 | } | ||
1963 | |||
1964 | cluster = task_cpu_cluster(t); | ||
1965 | |||
1966 | #if 0 | ||
1967 | if (prio_inh && task_cpu_cluster(prio_inh) != cluster) { | ||
1968 | WARN_ONCE(1, "Illegal to inherit between clusters. " \ | ||
1969 | "target (%s/%d) on cluster w/ CPU %d and " \ | ||
1970 | "inh_prio (%s/%d) on w/ CPU %d\n", \ | ||
1971 | t->comm, t->pid, cluster->cpus[0]->cpu, \ | ||
1972 | prio_inh->comm, prio_inh->pid, \ | ||
1973 | task_cpu_cluster(prio_inh)->cpus[0]->cpu); | ||
1974 | return 1; | ||
1975 | } | ||
1976 | #endif | ||
1977 | |||
1978 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1979 | /* this sanity check allows for weaker locking in protocols */ | ||
1980 | /* TODO (klmirqd): Skip this check if 't' is a proxy thread (???) */ | ||
1981 | if(__rm_higher_prio(prio_inh, BASE, t, EFFECTIVE)) { | ||
1982 | #endif | ||
1983 | sched_trace_eff_prio_change(t, prio_inh); | ||
1984 | |||
1985 | /* clear out old inheritance relation */ | ||
1986 | if (old_prio_inh) { | ||
1987 | budget_state_machine_chgprio(t,old_prio_inh,on_disinherit); | ||
1988 | clear_inh_task_linkback(t, old_prio_inh); | ||
1989 | } | ||
1990 | |||
1991 | TRACE_TASK(t, "inherits priority from %s/%d\n", | ||
1992 | prio_inh->comm, prio_inh->pid); | ||
1993 | tsk_rt(t)->inh_task = prio_inh; | ||
1994 | |||
1995 | /* update inheritance relation */ | ||
1996 | if (prio_inh) | ||
1997 | budget_state_machine_chgprio(t,prio_inh,on_inherit); | ||
1998 | |||
1999 | linked_on = tsk_rt(t)->linked_on; | ||
2000 | |||
2001 | /* If it is scheduled, then we need to reorder the CPU heap. */ | ||
2002 | if (linked_on != NO_CPU) { | ||
2003 | TRACE_TASK(t, "%s: linked on %d\n", | ||
2004 | __FUNCTION__, linked_on); | ||
2005 | /* Holder is scheduled; need to re-order CPUs. | ||
2006 | * We can't use heap_decrease() here since | ||
2007 | * the cpu_heap is ordered in reverse direction, so | ||
2008 | * it is actually an increase. */ | ||
2009 | binheap_delete(&per_cpu(crm_cpu_entries, linked_on).hn, | ||
2010 | &cluster->cpu_heap); | ||
2011 | binheap_add(&per_cpu(crm_cpu_entries, linked_on).hn, | ||
2012 | &cluster->cpu_heap, cpu_entry_t, hn); | ||
2013 | |||
2014 | /* tell prio_inh that we're __running__ with its priority */ | ||
2015 | set_inh_task_linkback(t, prio_inh); | ||
2016 | } | ||
2017 | else { | ||
2018 | /* holder may be queued: first stop queue changes */ | ||
2019 | raw_spin_lock(&cluster->domain.release_lock); | ||
2020 | if (is_queued(t)) { | ||
2021 | TRACE_TASK(t, "%s: is queued\n", | ||
2022 | __FUNCTION__); | ||
2023 | /* We need to update the position of holder in some | ||
2024 | * heap. Note that this could be a release heap if we | ||
2025 | * budget enforcement is used and this job overran. */ | ||
2026 | check_preempt = | ||
2027 | !bheap_decrease(rm_ready_order, tsk_rt(t)->heap_node); | ||
2028 | } else { | ||
2029 | /* Nothing to do: if it is not queued and not linked | ||
2030 | * then it is either sleeping or currently being moved | ||
2031 | * by other code (e.g., a timer interrupt handler) that | ||
2032 | * will use the correct priority when enqueuing the | ||
2033 | * task. */ | ||
2034 | TRACE_TASK(t, "%s: is NOT queued => Done.\n", | ||
2035 | __FUNCTION__); | ||
2036 | } | ||
2037 | raw_spin_unlock(&cluster->domain.release_lock); | ||
2038 | |||
2039 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
2040 | /* propagate to aux tasks */ | ||
2041 | if (tsk_rt(t)->has_aux_tasks) { | ||
2042 | aux_task_owner_increase_priority(t); | ||
2043 | } | ||
2044 | #endif | ||
2045 | |||
2046 | #ifdef CONFIG_LITMUS_NVIDIA | ||
2047 | /* propagate to gpu klmirqd */ | ||
2048 | if (tsk_rt(t)->held_gpus) { | ||
2049 | gpu_owner_increase_priority(t); | ||
2050 | } | ||
2051 | #endif | ||
2052 | |||
2053 | /* If holder was enqueued in a release heap, then the following | ||
2054 | * preemption check is pointless, but we can't easily detect | ||
2055 | * that case. If you want to fix this, then consider that | ||
2056 | * simply adding a state flag requires O(n) time to update when | ||
2057 | * releasing n tasks, which conflicts with the goal to have | ||
2058 | * O(log n) merges. */ | ||
2059 | if (check_preempt) { | ||
2060 | /* heap_decrease() hit the top level of the heap: make | ||
2061 | * sure preemption checks get the right task, not the | ||
2062 | * potentially stale cache. */ | ||
2063 | bheap_uncache_min(rm_ready_order, | ||
2064 | &cluster->domain.ready_queue); | ||
2065 | check_for_preemptions(cluster); | ||
2066 | } | ||
2067 | } | ||
2068 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
2069 | } | ||
2070 | else { | ||
2071 | /* Occurance is okay under two scenarios: | ||
2072 | * 1. Fine-grain nested locks (no compiled DGL support): Concurrent | ||
2073 | * updates are chasing each other through the wait-for chain. | ||
2074 | * 2. Budget exhausion caused the HP waiter to loose its priority, but | ||
2075 | * the lock structure hasn't yet been updated (but soon will be). | ||
2076 | */ | ||
2077 | TRACE_TASK(t, "Spurious invalid priority increase. " | ||
2078 | "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d" | ||
2079 | "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n", | ||
2080 | t->comm, t->pid, | ||
2081 | effective_priority(t)->comm, effective_priority(t)->pid, | ||
2082 | (prio_inh) ? prio_inh->comm : "null", | ||
2083 | (prio_inh) ? prio_inh->pid : 0); | ||
2084 | WARN_ON(!prio_inh); | ||
2085 | success = 0; | ||
2086 | } | ||
2087 | #endif | ||
2088 | |||
2089 | out: | ||
2090 | return success; | ||
2091 | } | ||
2092 | |||
2093 | /* called with IRQs off */ | ||
2094 | static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) | ||
2095 | { | ||
2096 | crm_domain_t* cluster = task_cpu_cluster(t); | ||
2097 | |||
2098 | raw_readyq_lock(&cluster->cluster_lock); | ||
2099 | |||
2100 | TRACE_TASK(t, "to inherit from %s/%d\n", prio_inh->comm, prio_inh->pid); | ||
2101 | |||
2102 | __increase_priority_inheritance(t, prio_inh); | ||
2103 | |||
2104 | raw_readyq_unlock(&cluster->cluster_lock); | ||
2105 | |||
2106 | #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA) | ||
2107 | if(tsk_rt(t)->held_gpus) { | ||
2108 | int i; | ||
2109 | for(i = find_first_bit(&tsk_rt(t)->held_gpus, BITS_PER_BYTE*sizeof(tsk_rt(t)->held_gpus)); | ||
2110 | i < NV_DEVICE_NUM; | ||
2111 | i = find_next_bit(&tsk_rt(t)->held_gpus, BITS_PER_BYTE*sizeof(tsk_rt(t)->held_gpus), i+1)) { | ||
2112 | pai_check_priority_increase(t, i); | ||
2113 | } | ||
2114 | } | ||
2115 | #endif | ||
2116 | } | ||
2117 | |||
2118 | /* called with IRQs off */ | ||
2119 | static int __decrease_priority_inheritance(struct task_struct* t, | ||
2120 | struct task_struct* prio_inh, | ||
2121 | int budget_tiggered) | ||
2122 | { | ||
2123 | crm_domain_t* cluster; | ||
2124 | int success = 1; | ||
2125 | struct task_struct* old_prio_inh = tsk_rt(t)->inh_task; | ||
2126 | |||
2127 | if (prio_inh == old_prio_inh) { | ||
2128 | /* relationship already established. */ | ||
2129 | TRACE_TASK(t, "already inherits priority from %s/%d\n", | ||
2130 | (prio_inh) ? prio_inh->comm : "(null)", | ||
2131 | (prio_inh) ? prio_inh->pid : 0); | ||
2132 | goto out; | ||
2133 | } | ||
2134 | |||
2135 | if (prio_inh && (effective_priority(prio_inh) != prio_inh)) { | ||
2136 | TRACE_TASK(t, "Inheriting from %s/%d instead of the eff_prio = %s/%d!\n", | ||
2137 | prio_inh->comm, prio_inh->pid, | ||
2138 | effective_priority(prio_inh)->comm, | ||
2139 | effective_priority(prio_inh)->pid); | ||
2140 | #ifndef CONFIG_LITMUS_NESTED_LOCKING | ||
2141 | /* Tasks should only inherit the base priority of a task. | ||
2142 | If 't' inherits a priority, then tsk_rt(t)->inh_task should | ||
2143 | be passed to this function instead. This includes transitive | ||
2144 | inheritance relations (tsk_rt(tsk_rt(...)->inh_task)->inh_task). */ | ||
2145 | BUG(); | ||
2146 | #else | ||
2147 | /* Not a bug with nested locking since inheritance propagation is | ||
2148 | not atomic. */ | ||
2149 | |||
2150 | /* TODO: Is the following 'helping' short-cut safe? | ||
2151 | prio_inh = effective_priority(prio_inh); | ||
2152 | */ | ||
2153 | #endif | ||
2154 | } | ||
2155 | |||
2156 | cluster = task_cpu_cluster(t); | ||
2157 | |||
2158 | #if 0 | ||
2159 | if (prio_inh && task_cpu_cluster(prio_inh) != cluster) { | ||
2160 | WARN_ONCE(1, "Illegal to inherit between clusters. " \ | ||
2161 | "target (%s/%d) on cluster w/ CPU %d and " \ | ||
2162 | "inh_prio (%s/%d) on w/ CPU %d\n", \ | ||
2163 | t->comm, t->pid, cluster->cpus[0]->cpu, \ | ||
2164 | prio_inh->comm, prio_inh->pid, \ | ||
2165 | task_cpu_cluster(prio_inh)->cpus[0]->cpu); | ||
2166 | return 1; | ||
2167 | } | ||
2168 | #endif | ||
2169 | |||
2170 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
2171 | if(budget_tiggered || __rm_higher_prio(t, EFFECTIVE, prio_inh, BASE)) { | ||
2172 | #endif | ||
2173 | sched_trace_eff_prio_change(t, prio_inh); | ||
2174 | |||
2175 | if (budget_tiggered) { | ||
2176 | BUG_ON(!old_prio_inh); | ||
2177 | TRACE_TASK(t, "budget-triggered 'decrease' in priority. " | ||
2178 | "%s/%d's budget should have just been exhuasted.\n", | ||
2179 | old_prio_inh->comm, old_prio_inh->pid); | ||
2180 | } | ||
2181 | |||
2182 | /* clear out old inheritance relation */ | ||
2183 | if (old_prio_inh) { | ||
2184 | budget_state_machine_chgprio(t,old_prio_inh,on_disinherit); | ||
2185 | clear_inh_task_linkback(t, old_prio_inh); | ||
2186 | } | ||
2187 | |||
2188 | /* A job only stops inheriting a priority when it releases a | ||
2189 | * resource. Thus we can make the following assumption.*/ | ||
2190 | if(prio_inh) | ||
2191 | TRACE_TASK(t, "EFFECTIVE priority decreased to %s/%d\n", | ||
2192 | prio_inh->comm, prio_inh->pid); | ||
2193 | else | ||
2194 | TRACE_TASK(t, "base priority restored.\n"); | ||
2195 | |||
2196 | /* set up new inheritance relation */ | ||
2197 | tsk_rt(t)->inh_task = prio_inh; | ||
2198 | |||
2199 | if (prio_inh) | ||
2200 | budget_state_machine_chgprio(t,prio_inh,on_inherit); | ||
2201 | |||
2202 | if(tsk_rt(t)->scheduled_on != NO_CPU) { | ||
2203 | TRACE_TASK(t, "is scheduled.\n"); | ||
2204 | |||
2205 | /* link back to new inheritance */ | ||
2206 | if (prio_inh) | ||
2207 | set_inh_task_linkback(t, prio_inh); | ||
2208 | |||
2209 | /* Check if rescheduling is necessary. We can't use heap_decrease() | ||
2210 | * since the priority was effectively lowered. */ | ||
2211 | unlink(t); | ||
2212 | crm_job_arrival(t); | ||
2213 | } | ||
2214 | else { | ||
2215 | /* task is queued */ | ||
2216 | raw_spin_lock(&cluster->domain.release_lock); | ||
2217 | if (is_queued(t)) { | ||
2218 | TRACE_TASK(t, "is queued.\n"); | ||
2219 | |||
2220 | BUG_ON( | ||
2221 | !is_released(t, litmus_clock()) && | ||
2222 | !tsk_rt(t)->job_params.is_backlogged_job && | ||
2223 | !is_early_releasing(t)); | ||
2224 | |||
2225 | unlink(t); | ||
2226 | crm_job_arrival(t); | ||
2227 | } | ||
2228 | else { | ||
2229 | TRACE_TASK(t, "is not in scheduler. Probably on wait queue somewhere.\n"); | ||
2230 | } | ||
2231 | raw_spin_unlock(&cluster->domain.release_lock); | ||
2232 | } | ||
2233 | |||
2234 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
2235 | /* propagate to aux tasks */ | ||
2236 | if (tsk_rt(t)->has_aux_tasks) | ||
2237 | aux_task_owner_decrease_priority(t); | ||
2238 | #endif | ||
2239 | |||
2240 | #ifdef CONFIG_LITMUS_NVIDIA | ||
2241 | /* propagate to gpu */ | ||
2242 | if (tsk_rt(t)->held_gpus) | ||
2243 | gpu_owner_decrease_priority(t); | ||
2244 | #endif | ||
2245 | |||
2246 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
2247 | } | ||
2248 | else { | ||
2249 | TRACE_TASK(t, "Spurious invalid priority decrease. " | ||
2250 | "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n" | ||
2251 | "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n", | ||
2252 | t->comm, t->pid, | ||
2253 | effective_priority(t)->comm, effective_priority(t)->pid, | ||
2254 | (prio_inh) ? prio_inh->comm : "null", | ||
2255 | (prio_inh) ? prio_inh->pid : 0); | ||
2256 | success = 0; | ||
2257 | } | ||
2258 | #endif | ||
2259 | |||
2260 | out: | ||
2261 | return success; | ||
2262 | } | ||
2263 | |||
2264 | static void decrease_priority_inheritance(struct task_struct* t, | ||
2265 | struct task_struct* prio_inh, | ||
2266 | int budget_tiggered) | ||
2267 | { | ||
2268 | crm_domain_t* cluster = task_cpu_cluster(t); | ||
2269 | |||
2270 | raw_readyq_lock(&cluster->cluster_lock); | ||
2271 | |||
2272 | TRACE_TASK(t, "to inherit from %s/%d (decrease)\n", | ||
2273 | (prio_inh) ? prio_inh->comm : "null", | ||
2274 | (prio_inh) ? prio_inh->pid : 0); | ||
2275 | |||
2276 | __decrease_priority_inheritance(t, prio_inh, budget_tiggered); | ||
2277 | |||
2278 | raw_readyq_unlock(&cluster->cluster_lock); | ||
2279 | |||
2280 | #if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA) | ||
2281 | if(tsk_rt(t)->held_gpus) { | ||
2282 | int i; | ||
2283 | for(i = find_first_bit(&tsk_rt(t)->held_gpus, BITS_PER_BYTE*sizeof(tsk_rt(t)->held_gpus)); | ||
2284 | i < NV_DEVICE_NUM; | ||
2285 | i = find_next_bit(&tsk_rt(t)->held_gpus, BITS_PER_BYTE*sizeof(tsk_rt(t)->held_gpus), i+1)) { | ||
2286 | pai_check_priority_decrease(t, i); | ||
2287 | } | ||
2288 | } | ||
2289 | #endif | ||
2290 | } | ||
2291 | |||
2292 | |||
2293 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
2294 | |||
2295 | /* called with IRQs off */ | ||
2296 | /* preconditions: | ||
2297 | (1) The 'hp_blocked_tasks_lock' of task 't' is held. | ||
2298 | (2) The lock 'to_unlock' is held. | ||
2299 | */ | ||
2300 | static void nested_increase_priority_inheritance(struct task_struct* t, | ||
2301 | struct task_struct* prio_inh, | ||
2302 | raw_spinlock_t *to_unlock, | ||
2303 | unsigned long irqflags) | ||
2304 | { | ||
2305 | struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock; | ||
2306 | |||
2307 | if(tsk_rt(t)->inh_task != prio_inh) { // shield redundent calls. | ||
2308 | increase_priority_inheritance(t, prio_inh); // increase our prio. | ||
2309 | } | ||
2310 | |||
2311 | /* note: cluster lock is not held continuously during propagation, so there | ||
2312 | may be momentary inconsistencies while nested priority propagation 'chases' | ||
2313 | other updates. */ | ||
2314 | |||
2315 | raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap. | ||
2316 | |||
2317 | if(blocked_lock) { | ||
2318 | if(blocked_lock->ops->supports_nesting) { | ||
2319 | TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n", | ||
2320 | blocked_lock->ident); | ||
2321 | |||
2322 | // beware: recursion | ||
2323 | blocked_lock->ops->propagate_increase_inheritance(blocked_lock, | ||
2324 | t, to_unlock, | ||
2325 | irqflags); | ||
2326 | } | ||
2327 | else { | ||
2328 | TRACE_TASK(t, "Inheritor is blocked on litmus lock (%d) that does not support nesting!\n", | ||
2329 | blocked_lock->ident); | ||
2330 | unlock_fine_irqrestore(to_unlock, irqflags); | ||
2331 | } | ||
2332 | } | ||
2333 | else { | ||
2334 | TRACE_TASK(t, "is not blocked on litmus lock. No propagation.\n"); | ||
2335 | unlock_fine_irqrestore(to_unlock, irqflags); | ||
2336 | } | ||
2337 | } | ||
2338 | |||
2339 | /* called with IRQs off */ | ||
2340 | /* preconditions: | ||
2341 | (1) The 'hp_blocked_tasks_lock' of task 't' is held. | ||
2342 | (2) The lock 'to_unlock' is held. | ||
2343 | */ | ||
2344 | static void nested_decrease_priority_inheritance(struct task_struct* t, | ||
2345 | struct task_struct* prio_inh, | ||
2346 | raw_spinlock_t *to_unlock, | ||
2347 | unsigned long irqflags, | ||
2348 | int budget_tiggered) | ||
2349 | { | ||
2350 | struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock; | ||
2351 | decrease_priority_inheritance(t, prio_inh, budget_tiggered); | ||
2352 | |||
2353 | raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap. | ||
2354 | |||
2355 | if(blocked_lock) { | ||
2356 | if(blocked_lock->ops->supports_nesting) { | ||
2357 | TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n", | ||
2358 | blocked_lock->ident); | ||
2359 | // beware: recursion | ||
2360 | blocked_lock->ops->propagate_decrease_inheritance(blocked_lock, t, | ||
2361 | to_unlock, | ||
2362 | irqflags, | ||
2363 | budget_tiggered); | ||
2364 | } | ||
2365 | else { | ||
2366 | TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n", | ||
2367 | blocked_lock); | ||
2368 | unlock_fine_irqrestore(to_unlock, irqflags); | ||
2369 | } | ||
2370 | } | ||
2371 | else { | ||
2372 | TRACE_TASK(t, "is not blocked. No propagation.\n"); | ||
2373 | unlock_fine_irqrestore(to_unlock, irqflags); | ||
2374 | } | ||
2375 | } | ||
2376 | |||
2377 | |||
2378 | /* ******************** FIFO MUTEX ********************** */ | ||
2379 | |||
2380 | static struct litmus_lock_ops crm_fifo_mutex_lock_ops = { | ||
2381 | .lock = fifo_mutex_lock, | ||
2382 | .unlock = fifo_mutex_unlock, | ||
2383 | .should_yield_lock = fifo_mutex_should_yield_lock, | ||
2384 | .close = fifo_mutex_close, | ||
2385 | .deallocate = fifo_mutex_free, | ||
2386 | |||
2387 | .budget_exhausted = fifo_mutex_budget_exhausted, | ||
2388 | .propagate_increase_inheritance = fifo_mutex_propagate_increase_inheritance, | ||
2389 | .propagate_decrease_inheritance = fifo_mutex_propagate_decrease_inheritance, | ||
2390 | |||
2391 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
2392 | .dgl_lock = fifo_mutex_dgl_lock, | ||
2393 | .is_owner = fifo_mutex_is_owner, | ||
2394 | .get_owner = fifo_mutex_get_owner, | ||
2395 | .enable_priority = fifo_mutex_enable_priority, | ||
2396 | |||
2397 | .dgl_can_quick_lock = NULL, | ||
2398 | .dgl_quick_lock = NULL, | ||
2399 | |||
2400 | .supports_dgl = 1, | ||
2401 | .requires_atomic_dgl = 0, | ||
2402 | #endif | ||
2403 | .supports_nesting = 1, | ||
2404 | .supports_budget_exhaustion = 1, | ||
2405 | .is_omlp_family = 0, | ||
2406 | }; | ||
2407 | |||
2408 | static struct litmus_lock* crm_new_fifo_mutex(void) | ||
2409 | { | ||
2410 | return fifo_mutex_new(&crm_fifo_mutex_lock_ops); | ||
2411 | } | ||
2412 | |||
2413 | /* ******************** PRIOQ MUTEX ********************** */ | ||
2414 | |||
2415 | static struct litmus_lock_ops crm_prioq_mutex_lock_ops = { | ||
2416 | .lock = prioq_mutex_lock, | ||
2417 | .unlock = prioq_mutex_unlock, | ||
2418 | .should_yield_lock = prioq_mutex_should_yield_lock, | ||
2419 | .close = prioq_mutex_close, | ||
2420 | .deallocate = prioq_mutex_free, | ||
2421 | |||
2422 | .budget_exhausted = prioq_mutex_budget_exhausted, | ||
2423 | .propagate_increase_inheritance = prioq_mutex_propagate_increase_inheritance, | ||
2424 | .propagate_decrease_inheritance = prioq_mutex_propagate_decrease_inheritance, | ||
2425 | |||
2426 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
2427 | .dgl_lock = prioq_mutex_dgl_lock, | ||
2428 | .is_owner = prioq_mutex_is_owner, | ||
2429 | .get_owner = prioq_mutex_get_owner, | ||
2430 | .enable_priority = prioq_mutex_enable_priority, | ||
2431 | |||
2432 | .dgl_can_quick_lock = prioq_mutex_dgl_can_quick_lock, | ||
2433 | .dgl_quick_lock = prioq_mutex_dgl_quick_lock, | ||
2434 | |||
2435 | .supports_dgl = 1, | ||
2436 | .requires_atomic_dgl = 1, | ||
2437 | #endif | ||
2438 | .supports_nesting = 1, | ||
2439 | .supports_budget_exhaustion = 1, | ||
2440 | .is_omlp_family = 0, | ||
2441 | }; | ||
2442 | |||
2443 | static struct litmus_lock* crm_new_prioq_mutex(void) | ||
2444 | { | ||
2445 | return prioq_mutex_new(&crm_prioq_mutex_lock_ops); | ||
2446 | } | ||
2447 | |||
2448 | /* ******************** IKGLP ********************** */ | ||
2449 | |||
2450 | static struct litmus_lock_ops crm_ikglp_lock_ops = { | ||
2451 | .lock = ikglp_lock, | ||
2452 | .unlock = ikglp_unlock, | ||
2453 | .should_yield_lock = NULL, | ||
2454 | .close = ikglp_close, | ||
2455 | .deallocate = ikglp_free, | ||
2456 | |||
2457 | .budget_exhausted = ikglp_budget_exhausted, | ||
2458 | .omlp_virtual_unlock = ikglp_virtual_unlock, | ||
2459 | |||
2460 | // ikglp can only be an outer-most lock. | ||
2461 | .propagate_increase_inheritance = NULL, | ||
2462 | .propagate_decrease_inheritance = NULL, | ||
2463 | |||
2464 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
2465 | .supports_dgl = 0, | ||
2466 | .requires_atomic_dgl = 0, | ||
2467 | #endif | ||
2468 | .supports_nesting = 0, | ||
2469 | .supports_budget_exhaustion = 1, | ||
2470 | .is_omlp_family = 1, | ||
2471 | }; | ||
2472 | |||
2473 | static struct litmus_lock* crm_new_ikglp(void* __user arg) | ||
2474 | { | ||
2475 | // assumes clusters of uniform size. | ||
2476 | return ikglp_new(cluster_size, &crm_ikglp_lock_ops, arg); | ||
2477 | } | ||
2478 | |||
2479 | |||
2480 | /* ******************** KFMLP support ********************** */ | ||
2481 | |||
2482 | static struct litmus_lock_ops crm_kfmlp_lock_ops = { | ||
2483 | .lock = kfmlp_lock, | ||
2484 | .unlock = kfmlp_unlock, | ||
2485 | .should_yield_lock = NULL, | ||
2486 | .close = kfmlp_close, | ||
2487 | .deallocate = kfmlp_free, | ||
2488 | |||
2489 | // kfmlp can only be an outer-most lock. | ||
2490 | .propagate_increase_inheritance = NULL, | ||
2491 | .propagate_decrease_inheritance = NULL, | ||
2492 | |||
2493 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
2494 | .supports_dgl = 0, | ||
2495 | .requires_atomic_dgl = 0, | ||
2496 | #endif | ||
2497 | .supports_nesting = 0, | ||
2498 | .supports_budget_exhaustion = 0, | ||
2499 | .is_omlp_family = 0, | ||
2500 | }; | ||
2501 | |||
2502 | |||
2503 | static struct litmus_lock* crm_new_kfmlp(void* __user arg) | ||
2504 | { | ||
2505 | return kfmlp_new(&crm_kfmlp_lock_ops, arg); | ||
2506 | } | ||
2507 | |||
2508 | |||
2509 | /* **** lock constructor **** */ | ||
2510 | |||
2511 | static long crm_allocate_lock(struct litmus_lock **lock, int type, | ||
2512 | void* __user args) | ||
2513 | { | ||
2514 | int err; | ||
2515 | |||
2516 | switch (type) { | ||
2517 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
2518 | case FIFO_MUTEX: | ||
2519 | *lock = crm_new_fifo_mutex(); | ||
2520 | break; | ||
2521 | |||
2522 | case PRIOQ_MUTEX: | ||
2523 | *lock = crm_new_prioq_mutex(); | ||
2524 | break; | ||
2525 | |||
2526 | case IKGLP_SEM: | ||
2527 | *lock = crm_new_ikglp(args); | ||
2528 | break; | ||
2529 | #endif | ||
2530 | case KFMLP_SEM: | ||
2531 | *lock = crm_new_kfmlp(args); | ||
2532 | break; | ||
2533 | |||
2534 | default: | ||
2535 | err = -ENXIO; | ||
2536 | goto UNSUPPORTED_LOCK; | ||
2537 | }; | ||
2538 | |||
2539 | if (*lock) | ||
2540 | err = 0; | ||
2541 | else | ||
2542 | err = -ENOMEM; | ||
2543 | |||
2544 | UNSUPPORTED_LOCK: | ||
2545 | return err; | ||
2546 | } | ||
2547 | |||
2548 | #endif // CONFIG_LITMUS_LOCKING | ||
2549 | |||
2550 | |||
2551 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
2552 | static struct affinity_observer_ops crm_kfmlp_affinity_ops __attribute__ ((unused)) = { | ||
2553 | .close = kfmlp_aff_obs_close, | ||
2554 | .deallocate = kfmlp_aff_obs_free, | ||
2555 | }; | ||
2556 | |||
2557 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
2558 | static struct affinity_observer_ops crm_ikglp_affinity_ops __attribute__ ((unused)) = { | ||
2559 | .close = ikglp_aff_obs_close, | ||
2560 | .deallocate = ikglp_aff_obs_free, | ||
2561 | }; | ||
2562 | #endif | ||
2563 | |||
2564 | static long crm_allocate_affinity_observer(struct affinity_observer **aff_obs, | ||
2565 | int type, | ||
2566 | void* __user args) | ||
2567 | { | ||
2568 | int err; | ||
2569 | |||
2570 | switch (type) { | ||
2571 | #ifdef CONFIG_LITMUS_NVIDIA | ||
2572 | case KFMLP_SIMPLE_GPU_AFF_OBS: | ||
2573 | *aff_obs = kfmlp_simple_gpu_aff_obs_new(&crm_kfmlp_affinity_ops, args); | ||
2574 | break; | ||
2575 | |||
2576 | case KFMLP_GPU_AFF_OBS: | ||
2577 | *aff_obs = kfmlp_gpu_aff_obs_new(&crm_kfmlp_affinity_ops, args); | ||
2578 | break; | ||
2579 | |||
2580 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
2581 | case IKGLP_SIMPLE_GPU_AFF_OBS: | ||
2582 | *aff_obs = ikglp_simple_gpu_aff_obs_new(&crm_ikglp_affinity_ops, args); | ||
2583 | break; | ||
2584 | |||
2585 | case IKGLP_GPU_AFF_OBS: | ||
2586 | *aff_obs = ikglp_gpu_aff_obs_new(&crm_ikglp_affinity_ops, args); | ||
2587 | break; | ||
2588 | #endif | ||
2589 | #endif | ||
2590 | default: | ||
2591 | err = -ENXIO; | ||
2592 | goto UNSUPPORTED_AFF_OBS; | ||
2593 | }; | ||
2594 | |||
2595 | if (*aff_obs) | ||
2596 | err = 0; | ||
2597 | else | ||
2598 | err = -ENOMEM; | ||
2599 | |||
2600 | UNSUPPORTED_AFF_OBS: | ||
2601 | return err; | ||
2602 | } | ||
2603 | #endif | ||
2604 | |||
2605 | |||
2606 | |||
2607 | #endif // CONFIG_LITMUS_NESTED_LOCKING | ||
2608 | |||
2609 | |||
2610 | #ifdef VERBOSE_INIT | ||
2611 | static void print_cluster_topology(cpumask_var_t mask, int cpu) | ||
2612 | { | ||
2613 | int chk; | ||
2614 | char buf[255]; | ||
2615 | |||
2616 | chk = cpulist_scnprintf(buf, 254, mask); | ||
2617 | buf[chk] = '\0'; | ||
2618 | printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf); | ||
2619 | |||
2620 | } | ||
2621 | #endif | ||
2622 | |||
2623 | static void cleanup_crm(void) | ||
2624 | { | ||
2625 | int i; | ||
2626 | |||
2627 | if (clusters_allocated) { | ||
2628 | for (i = 0; i < num_clusters; i++) { | ||
2629 | kfree(crm[i].cpus); | ||
2630 | free_cpumask_var(crm[i].cpu_map); | ||
2631 | } | ||
2632 | |||
2633 | kfree(crm); | ||
2634 | } | ||
2635 | } | ||
2636 | |||
2637 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) | ||
2638 | static int crm_map_gpu_to_cpu(int gpu) | ||
2639 | { | ||
2640 | int default_cpu; | ||
2641 | int cpu_cluster = gpu / gpu_cluster_size; | ||
2642 | |||
2643 | /* bonham-specific hack for the fully partitioned case (both CPUs and GPUs partitioned) */ | ||
2644 | /* TODO: Make this aware of the NUMA topology generically */ | ||
2645 | if(num_clusters == 12 && num_gpu_clusters == 8) { | ||
2646 | if(gpu >= 4) { | ||
2647 | cpu_cluster += 2; // assign the GPU to a CPU on the same NUMA node | ||
2648 | } | ||
2649 | } | ||
2650 | |||
2651 | default_cpu = crm[cpu_cluster].cpus[0]->cpu; // first CPU in given cluster | ||
2652 | |||
2653 | TRACE("CPU %d is default for GPU %d interrupt threads.\n", default_cpu, gpu); | ||
2654 | |||
2655 | return default_cpu; | ||
2656 | } | ||
2657 | #endif | ||
2658 | |||
2659 | static long crm_activate_plugin(void) | ||
2660 | { | ||
2661 | int i, j, cpu, ccpu, cpu_count; | ||
2662 | cpu_entry_t *entry; | ||
2663 | |||
2664 | cpumask_var_t mask; | ||
2665 | int chk = 0; | ||
2666 | |||
2667 | /* de-allocate old clusters, if any */ | ||
2668 | cleanup_crm(); | ||
2669 | |||
2670 | printk(KERN_INFO "C-RM: Activate Plugin, cluster configuration = %d\n", | ||
2671 | cluster_config); | ||
2672 | |||
2673 | /* need to get cluster_size first */ | ||
2674 | if(!zalloc_cpumask_var(&mask, GFP_ATOMIC)) | ||
2675 | return -ENOMEM; | ||
2676 | |||
2677 | if (unlikely(cluster_config == GLOBAL_CLUSTER)) { | ||
2678 | cluster_size = num_online_cpus(); | ||
2679 | } else { | ||
2680 | chk = get_shared_cpu_map(mask, 0, cluster_config); | ||
2681 | if (chk) { | ||
2682 | /* if chk != 0 then it is the max allowed index */ | ||
2683 | printk(KERN_INFO "C-RM: Cluster configuration = %d " | ||
2684 | "is not supported on this hardware.\n", | ||
2685 | cluster_config); | ||
2686 | /* User should notice that the configuration failed, so | ||
2687 | * let's bail out. */ | ||
2688 | return -EINVAL; | ||
2689 | } | ||
2690 | |||
2691 | cluster_size = cpumask_weight(mask); | ||
2692 | } | ||
2693 | |||
2694 | if ((num_online_cpus() % cluster_size) != 0) { | ||
2695 | /* this can't be right, some cpus are left out */ | ||
2696 | printk(KERN_ERR "C-RM: Trying to group %d cpus in %d!\n", | ||
2697 | num_online_cpus(), cluster_size); | ||
2698 | return -1; | ||
2699 | } | ||
2700 | |||
2701 | num_clusters = num_online_cpus() / cluster_size; | ||
2702 | printk(KERN_INFO "C-RM: %d cluster(s) of size = %d\n", | ||
2703 | num_clusters, cluster_size); | ||
2704 | |||
2705 | |||
2706 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) | ||
2707 | num_gpu_clusters = min(num_clusters, num_online_gpus()); | ||
2708 | gpu_cluster_size = num_online_gpus() / num_gpu_clusters; | ||
2709 | |||
2710 | if (((num_online_gpus() % gpu_cluster_size) != 0) || | ||
2711 | (num_gpu_clusters != num_clusters)) { | ||
2712 | printk(KERN_WARNING "C-RM: GPUs not uniformly distributed among CPU clusters.\n"); | ||
2713 | } | ||
2714 | #endif | ||
2715 | |||
2716 | /* initialize clusters */ | ||
2717 | crm = kmalloc(num_clusters * sizeof(crm_domain_t), GFP_ATOMIC); | ||
2718 | for (i = 0; i < num_clusters; i++) { | ||
2719 | |||
2720 | crm[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t), | ||
2721 | GFP_ATOMIC); | ||
2722 | INIT_BINHEAP_HANDLE(&(crm[i].cpu_heap), cpu_lower_prio); | ||
2723 | rm_domain_init(&(crm[i].domain), NULL, crm_release_jobs); | ||
2724 | |||
2725 | |||
2726 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
2727 | crm[i].pending_tasklets.head = NULL; | ||
2728 | crm[i].pending_tasklets.tail = &(crm[i].pending_tasklets.head); | ||
2729 | #endif | ||
2730 | |||
2731 | |||
2732 | if(!zalloc_cpumask_var(&crm[i].cpu_map, GFP_ATOMIC)) | ||
2733 | return -ENOMEM; | ||
2734 | #ifdef CONFIG_RELEASE_MASTER | ||
2735 | crm[i].domain.release_master = atomic_read(&release_master_cpu); | ||
2736 | #endif | ||
2737 | } | ||
2738 | |||
2739 | /* cycle through cluster and add cpus to them */ | ||
2740 | for (i = 0; i < num_clusters; i++) { | ||
2741 | |||
2742 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
2743 | raw_spin_lock_init(&crm[i].dgl_lock); | ||
2744 | #endif | ||
2745 | |||
2746 | #ifdef RECURSIVE_READY_QUEUE_LOCK | ||
2747 | crm[i].recursive_depth = 0; | ||
2748 | atomic_set(&crm[i].owner_cpu, NO_CPU); | ||
2749 | #endif | ||
2750 | |||
2751 | crm[i].top_m_size = 0; | ||
2752 | INIT_BINHEAP_HANDLE(&crm[i].top_m, crm_min_heap_base_priority_order); | ||
2753 | INIT_BINHEAP_HANDLE(&crm[i].not_top_m, crm_max_heap_base_priority_order); | ||
2754 | |||
2755 | for_each_online_cpu(cpu) { | ||
2756 | /* check if the cpu is already in a cluster */ | ||
2757 | for (j = 0; j < num_clusters; j++) | ||
2758 | if (cpumask_test_cpu(cpu, crm[j].cpu_map)) | ||
2759 | break; | ||
2760 | /* if it is in a cluster go to next cpu */ | ||
2761 | if (j < num_clusters && | ||
2762 | cpumask_test_cpu(cpu, crm[j].cpu_map)) | ||
2763 | continue; | ||
2764 | |||
2765 | /* this cpu isn't in any cluster */ | ||
2766 | /* get the shared cpus */ | ||
2767 | if (unlikely(cluster_config == GLOBAL_CLUSTER)) | ||
2768 | cpumask_copy(mask, cpu_online_mask); | ||
2769 | else | ||
2770 | get_shared_cpu_map(mask, cpu, cluster_config); | ||
2771 | |||
2772 | cpumask_copy(crm[i].cpu_map, mask); | ||
2773 | #ifdef VERBOSE_INIT | ||
2774 | print_cluster_topology(mask, cpu); | ||
2775 | #endif | ||
2776 | /* add cpus to current cluster and init cpu_entry_t */ | ||
2777 | cpu_count = 0; | ||
2778 | for_each_cpu(ccpu, crm[i].cpu_map) { | ||
2779 | |||
2780 | entry = &per_cpu(crm_cpu_entries, ccpu); | ||
2781 | crm[i].cpus[cpu_count] = entry; | ||
2782 | |||
2783 | memset(entry, 0, sizeof(*entry)); | ||
2784 | entry->cpu = ccpu; | ||
2785 | entry->cluster = &crm[i]; | ||
2786 | INIT_BINHEAP_NODE(&entry->hn); | ||
2787 | mb(); | ||
2788 | |||
2789 | ++cpu_count; | ||
2790 | |||
2791 | #ifdef CONFIG_RELEASE_MASTER | ||
2792 | /* only add CPUs that should schedule jobs */ | ||
2793 | if (entry->cpu != entry->cluster->domain.release_master) | ||
2794 | #endif | ||
2795 | update_cpu_position(entry); | ||
2796 | } | ||
2797 | /* done with this cluster */ | ||
2798 | break; | ||
2799 | } | ||
2800 | } | ||
2801 | |||
2802 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
2803 | init_klmirqd(); | ||
2804 | #endif | ||
2805 | |||
2806 | #ifdef CONFIG_LITMUS_NVIDIA | ||
2807 | init_nvidia_info(); | ||
2808 | #endif | ||
2809 | |||
2810 | free_cpumask_var(mask); | ||
2811 | clusters_allocated = 1; | ||
2812 | return 0; | ||
2813 | } | ||
2814 | |||
2815 | /* Plugin object */ | ||
2816 | static struct sched_plugin crm_plugin __cacheline_aligned_in_smp = { | ||
2817 | .plugin_name = "C-RM", // for now | ||
2818 | .finish_switch = crm_finish_switch, | ||
2819 | .tick = crm_tick, | ||
2820 | .task_new = crm_task_new, | ||
2821 | .complete_job = complete_job, | ||
2822 | .task_exit = crm_task_exit, | ||
2823 | .schedule = crm_schedule, | ||
2824 | .task_wake_up = crm_task_wake_up, | ||
2825 | .task_block = crm_task_block, | ||
2826 | .admit_task = crm_admit_task, | ||
2827 | .activate_plugin = crm_activate_plugin, | ||
2828 | .compare = rm_higher_prio, | ||
2829 | #ifdef CONFIG_LITMUS_LOCKING | ||
2830 | .allocate_lock = crm_allocate_lock, | ||
2831 | .increase_prio = increase_priority_inheritance, | ||
2832 | .decrease_prio = decrease_priority_inheritance, | ||
2833 | .__increase_prio = __increase_priority_inheritance, | ||
2834 | .__decrease_prio = __decrease_priority_inheritance, | ||
2835 | #endif | ||
2836 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
2837 | .nested_increase_prio = nested_increase_priority_inheritance, | ||
2838 | .nested_decrease_prio = nested_decrease_priority_inheritance, | ||
2839 | .__compare = __rm_higher_prio, | ||
2840 | #endif | ||
2841 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
2842 | .get_dgl_spinlock = crm_get_dgl_spinlock, | ||
2843 | #endif | ||
2844 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
2845 | .allocate_aff_obs = crm_allocate_affinity_observer, | ||
2846 | #endif | ||
2847 | #ifdef CONFIG_LITMUS_PAI_SOFTIRQD | ||
2848 | .enqueue_pai_tasklet = crm_enqueue_pai_tasklet, | ||
2849 | .change_prio_pai_tasklet = crm_change_prio_pai_tasklet, | ||
2850 | .run_tasklets = crm_run_tasklets, | ||
2851 | #endif | ||
2852 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) | ||
2853 | .map_gpu_to_cpu = crm_map_gpu_to_cpu, | ||
2854 | #endif | ||
2855 | }; | ||
2856 | |||
2857 | static struct proc_dir_entry *cluster_file = NULL, *crm_dir = NULL; | ||
2858 | |||
2859 | static int __init init_crm(void) | ||
2860 | { | ||
2861 | int err, fs; | ||
2862 | |||
2863 | err = register_sched_plugin(&crm_plugin); | ||
2864 | if (!err) { | ||
2865 | fs = make_plugin_proc_dir(&crm_plugin, &crm_dir); | ||
2866 | if (!fs) | ||
2867 | cluster_file = create_cluster_file(crm_dir, &cluster_config); | ||
2868 | else | ||
2869 | printk(KERN_ERR "Could not allocate C-RM procfs dir.\n"); | ||
2870 | } | ||
2871 | return err; | ||
2872 | } | ||
2873 | |||
2874 | static void clean_crm(void) | ||
2875 | { | ||
2876 | cleanup_crm(); | ||
2877 | if (cluster_file) | ||
2878 | remove_proc_entry("cluster", crm_dir); | ||
2879 | if (crm_dir) | ||
2880 | remove_plugin_proc_dir(&crm_plugin); | ||
2881 | } | ||
2882 | |||
2883 | module_init(init_crm); | ||
2884 | module_exit(clean_crm); | ||