diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2013-10-09 15:48:51 -0400 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2013-10-09 15:48:51 -0400 |
commit | 91ac0052eec03b8624ea3613acd50825c17981f1 (patch) | |
tree | 3a174eaaecb516e67009ba6284f3957a30e013d8 | |
parent | 7fb54decc59fa5855cd273bfda908fd443a37e67 (diff) |
Remove C-RM support for now.
Remove C-RM support. It will be easier to re-add C-RM
after we've ported C-EDF forward.
-rw-r--r-- | litmus/Kconfig | 10 | ||||
-rw-r--r-- | litmus/Makefile | 1 | ||||
-rw-r--r-- | litmus/rm_common.c | 300 | ||||
-rw-r--r-- | litmus/sched_crm.c | 2562 |
4 files changed, 0 insertions, 2873 deletions
diff --git a/litmus/Kconfig b/litmus/Kconfig index 03f31157abc7..3adfa1fe9800 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig | |||
@@ -12,16 +12,6 @@ config PLUGIN_CEDF | |||
12 | On smaller platforms (e.g., ARM PB11MPCore), using C-EDF | 12 | On smaller platforms (e.g., ARM PB11MPCore), using C-EDF |
13 | makes little sense since there aren't any shared caches. | 13 | makes little sense since there aren't any shared caches. |
14 | 14 | ||
15 | config PLUGIN_CRM | ||
16 | bool "Clustered-RM" | ||
17 | depends on X86 && SYSFS | ||
18 | default y | ||
19 | help | ||
20 | Include the Clustered RM (C-RM) plugin in the kernel. | ||
21 | This is appropriate for large platforms with shared caches. | ||
22 | On smaller platforms (e.g., ARM PB11MPCore), using C-EDF | ||
23 | makes little sense since there aren't any shared caches. | ||
24 | |||
25 | config RECURSIVE_READYQ_LOCK | 15 | config RECURSIVE_READYQ_LOCK |
26 | bool "Recursive Ready Queue Lock" | 16 | bool "Recursive Ready Queue Lock" |
27 | default n | 17 | default n |
diff --git a/litmus/Makefile b/litmus/Makefile index 08ed4a663d8f..264640dd013b 100644 --- a/litmus/Makefile +++ b/litmus/Makefile | |||
@@ -25,7 +25,6 @@ obj-y = sched_plugin.o litmus.o \ | |||
25 | sched_pfp.o | 25 | sched_pfp.o |
26 | 26 | ||
27 | obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o | 27 | obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o |
28 | obj-$(CONFIG_PLUGIN_CRM) += rm_common.o sched_crm.o | ||
29 | obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o | 28 | obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o |
30 | obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o | 29 | obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o |
31 | 30 | ||
diff --git a/litmus/rm_common.c b/litmus/rm_common.c deleted file mode 100644 index 8d4cdf4c71cf..000000000000 --- a/litmus/rm_common.c +++ /dev/null | |||
@@ -1,300 +0,0 @@ | |||
1 | /* | ||
2 | * kernel/rm_common.c | ||
3 | * | ||
4 | * Common functions for EDF based scheduler. | ||
5 | */ | ||
6 | |||
7 | #include <linux/percpu.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/list.h> | ||
10 | |||
11 | #include <litmus/litmus.h> | ||
12 | #include <litmus/sched_plugin.h> | ||
13 | #include <litmus/sched_trace.h> | ||
14 | |||
15 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
16 | #include <litmus/locking.h> | ||
17 | #endif | ||
18 | |||
19 | #include <litmus/rm_common.h> | ||
20 | |||
21 | |||
22 | /* rm_higher_prio - returns true if first has a higher EDF priority | ||
23 | * than second. Deadline ties are broken by PID. | ||
24 | * | ||
25 | * both first and second may be NULL | ||
26 | */ | ||
27 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
28 | int __rm_higher_prio( | ||
29 | struct task_struct* first, comparison_mode_t first_mode, | ||
30 | struct task_struct* second, comparison_mode_t second_mode) | ||
31 | #else | ||
32 | int rm_higher_prio(struct task_struct* first, struct task_struct* second) | ||
33 | #endif | ||
34 | { | ||
35 | struct task_struct *first_task = first; | ||
36 | struct task_struct *second_task = second; | ||
37 | |||
38 | /* There is no point in comparing a task to itself. */ | ||
39 | if (first && first == second) { | ||
40 | TRACE_CUR("WARNING: pointless rm priority comparison: %s/%d\n", first->comm, first->pid); | ||
41 | // WARN_ON(1); | ||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | /* check for NULL tasks */ | ||
46 | if (!first || !second) { | ||
47 | return first && !second; | ||
48 | } | ||
49 | /* check for non-realtime */ | ||
50 | if (!is_realtime(first) || !is_realtime(second)) { | ||
51 | return is_realtime(first) && !is_realtime(second); | ||
52 | } | ||
53 | |||
54 | /* There is some goofy stuff in this code here. There are three subclasses | ||
55 | * within the SCHED_LITMUS scheduling class: | ||
56 | * 1) Auxiliary tasks: COTS helper threads from the application level that | ||
57 | * are forced to be real-time. | ||
58 | * 2) klmirqd interrupt threads: Litmus threaded interrupt handlers. | ||
59 | * 3) Normal Litmus tasks. | ||
60 | * | ||
61 | * At their base priorities, #3 > #2 > #1. However, #1 and #2 threads might | ||
62 | * inherit a priority from a task of #3. | ||
63 | * | ||
64 | * The code proceeds in the following manner: | ||
65 | * 1) Make aux and klmirqd threads with base-priorities have low priorities. | ||
66 | * 2) Determine effective priorities. | ||
67 | * 3) Perform priority comparison. Favor #3 over #1 and #2 in case of tie. | ||
68 | */ | ||
69 | |||
70 | |||
71 | #if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_BOOSTED) | ||
72 | /* run aux tasks at max priority */ | ||
73 | if (tsk_rt(first)->is_aux_task != tsk_rt(second)->is_aux_task) { | ||
74 | return (tsk_rt(first)->is_aux_task > tsk_rt(second)->is_aux_task); | ||
75 | } | ||
76 | #elif defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE) | ||
77 | { | ||
78 | int first_lo_aux = tsk_rt(first)->is_aux_task && !tsk_rt(first)->inh_task; | ||
79 | int second_lo_aux = tsk_rt(second)->is_aux_task && !tsk_rt(second)->inh_task; | ||
80 | |||
81 | /* prioritize aux tasks without inheritance below real-time tasks */ | ||
82 | if (first_lo_aux || second_lo_aux) { | ||
83 | // one of these is an aux task without inheritance. | ||
84 | if (first_lo_aux != second_lo_aux) { | ||
85 | int temp = (first_lo_aux < second_lo_aux); // non-lo-aux has higher priority. | ||
86 | return temp; | ||
87 | } | ||
88 | else { | ||
89 | /* both MUST be lo_aux. tie-break. */ | ||
90 | //TRACE_CUR("aux tie break!\n"); | ||
91 | goto aux_tie_break; | ||
92 | } | ||
93 | } | ||
94 | |||
95 | if (tsk_rt(first)->is_aux_task && tsk_rt(second)->is_aux_task && | ||
96 | tsk_rt(first)->inh_task == tsk_rt(second)->inh_task) { | ||
97 | // inh_task is !NULL for both tasks since neither was a lo_aux task. | ||
98 | // Both aux tasks inherit from the same task, so tie-break | ||
99 | // by base priority of the aux tasks. | ||
100 | //TRACE_CUR("aux tie break!\n"); | ||
101 | goto aux_tie_break; | ||
102 | } | ||
103 | } | ||
104 | #endif | ||
105 | |||
106 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
107 | { | ||
108 | int first_lo_klmirqd = tsk_rt(first)->is_interrupt_thread && !tsk_rt(first)->inh_task; | ||
109 | int second_lo_klmirqd = tsk_rt(second)->is_interrupt_thread && !tsk_rt(second)->inh_task; | ||
110 | |||
111 | /* prioritize aux tasks without inheritance below real-time tasks */ | ||
112 | if (first_lo_klmirqd || second_lo_klmirqd) { | ||
113 | // one of these is an klmirqd thread without inheritance. | ||
114 | if (first_lo_klmirqd != second_lo_klmirqd) { | ||
115 | int temp = (first_lo_klmirqd < second_lo_klmirqd); // non-klmirqd has higher priority | ||
116 | return temp; | ||
117 | } | ||
118 | else { | ||
119 | /* both MUST be klmirqd. tie-break. */ | ||
120 | //TRACE_CUR("klmirqd tie break!\n"); | ||
121 | goto klmirqd_tie_break; | ||
122 | } | ||
123 | } | ||
124 | |||
125 | if (tsk_rt(first)->is_interrupt_thread && tsk_rt(second)->is_interrupt_thread && | ||
126 | tsk_rt(first)->inh_task == tsk_rt(second)->inh_task) { | ||
127 | // inh_task is !NULL for both tasks since neither was a lo_klmirqd task. | ||
128 | // Both klmirqd tasks inherit from the same task, so tie-break | ||
129 | // by base priority of the klmirqd tasks. | ||
130 | //TRACE_CUR("klmirqd tie break!\n"); | ||
131 | goto klmirqd_tie_break; | ||
132 | } | ||
133 | } | ||
134 | #endif | ||
135 | |||
136 | |||
137 | #ifdef CONFIG_LITMUS_LOCKING | ||
138 | /* Check for EFFECTIVE priorities. Change task | ||
139 | * used for comparison in such a case. | ||
140 | */ | ||
141 | if (unlikely(tsk_rt(first)->inh_task) | ||
142 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
143 | && (first_mode == EFFECTIVE) | ||
144 | #endif | ||
145 | ) { | ||
146 | first_task = tsk_rt(first)->inh_task; | ||
147 | } | ||
148 | if (unlikely(tsk_rt(second)->inh_task) | ||
149 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
150 | && (second_mode == EFFECTIVE) | ||
151 | #endif | ||
152 | ) { | ||
153 | second_task = tsk_rt(second)->inh_task; | ||
154 | } | ||
155 | |||
156 | /* Check for priority boosting. Tie-break by start of boosting. | ||
157 | */ | ||
158 | if (unlikely(is_priority_boosted(first_task))) { | ||
159 | /* first_task is boosted, how about second_task? */ | ||
160 | if (!is_priority_boosted(second_task) || | ||
161 | lt_before(get_boost_start(first_task), | ||
162 | get_boost_start(second_task))) { | ||
163 | return 1; | ||
164 | } | ||
165 | else { | ||
166 | return 0; | ||
167 | } | ||
168 | } | ||
169 | else if (unlikely(is_priority_boosted(second_task))) { | ||
170 | /* second_task is boosted, first is not*/ | ||
171 | return 0; | ||
172 | } | ||
173 | |||
174 | #endif | ||
175 | |||
176 | #ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE | ||
177 | aux_tie_break: | ||
178 | #endif | ||
179 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
180 | klmirqd_tie_break: | ||
181 | #endif | ||
182 | |||
183 | // KLUDGE! This is reverse of fp_common's implementation!!! | ||
184 | if (get_period(first_task) < get_period(second_task)) | ||
185 | return 1; | ||
186 | else if (get_period(first_task) == get_period(second_task)) { | ||
187 | if (first_task->pid < second_task->pid) | ||
188 | return 1; | ||
189 | else if (first_task->pid == second_task->pid) { | ||
190 | /* there is inheritance going on. consider inheritors. */ | ||
191 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
192 | /* non-interrupt thread gets prio */ | ||
193 | if (!tsk_rt(first)->is_interrupt_thread && tsk_rt(second)->is_interrupt_thread) | ||
194 | return 1; | ||
195 | else if (tsk_rt(first)->is_interrupt_thread == tsk_rt(second)->is_interrupt_thread) { | ||
196 | #endif | ||
197 | |||
198 | #if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE) | ||
199 | /* non-aux thread gets prio */ | ||
200 | if (!tsk_rt(first)->is_aux_task && tsk_rt(second)->is_aux_task) | ||
201 | return 1; | ||
202 | else if (tsk_rt(first_task)->is_aux_task == tsk_rt(second_task)->is_aux_task) { | ||
203 | #endif | ||
204 | /* if both tasks inherit from the same task */ | ||
205 | if (tsk_rt(first)->inh_task == tsk_rt(second)->inh_task) { | ||
206 | /* TODO: Make a recurive call to rm_higher_prio, | ||
207 | comparing base priorities. */ | ||
208 | return (first->pid < second->pid); | ||
209 | } | ||
210 | else { | ||
211 | /* At least one task must inherit */ | ||
212 | BUG_ON(!tsk_rt(first)->inh_task && | ||
213 | !tsk_rt(second)->inh_task); | ||
214 | |||
215 | /* The task withOUT the inherited priority wins. */ | ||
216 | if (tsk_rt(second)->inh_task) { | ||
217 | return 1; | ||
218 | } | ||
219 | } | ||
220 | #if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE) | ||
221 | } | ||
222 | #endif | ||
223 | |||
224 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
225 | } | ||
226 | #endif | ||
227 | } | ||
228 | } | ||
229 | |||
230 | return 0; /* fall-through. prio(second_task) > prio(first_task) */ | ||
231 | } | ||
232 | |||
233 | |||
234 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
235 | int rm_higher_prio(struct task_struct* first, struct task_struct* second) | ||
236 | { | ||
237 | return __rm_higher_prio(first, EFFECTIVE, second, EFFECTIVE); | ||
238 | } | ||
239 | |||
240 | int rm_max_heap_order(struct binheap_node *a, struct binheap_node *b) | ||
241 | { | ||
242 | struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node); | ||
243 | struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node); | ||
244 | |||
245 | return __rm_higher_prio(l_a->hp_waiter_eff_prio, EFFECTIVE, l_b->hp_waiter_eff_prio, EFFECTIVE); | ||
246 | } | ||
247 | |||
248 | int rm_min_heap_order(struct binheap_node *a, struct binheap_node *b) | ||
249 | { | ||
250 | return rm_max_heap_order(b, a); // swap comparison | ||
251 | } | ||
252 | |||
253 | int rm_max_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b) | ||
254 | { | ||
255 | struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node); | ||
256 | struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node); | ||
257 | |||
258 | return __rm_higher_prio(l_a->hp_waiter_eff_prio, BASE, l_b->hp_waiter_eff_prio, BASE); | ||
259 | } | ||
260 | |||
261 | int rm_min_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b) | ||
262 | { | ||
263 | return rm_max_heap_base_priority_order(b, a); // swap comparison | ||
264 | } | ||
265 | #endif | ||
266 | |||
267 | |||
268 | int rm_ready_order(struct bheap_node* a, struct bheap_node* b) | ||
269 | { | ||
270 | return rm_higher_prio(bheap2task(a), bheap2task(b)); | ||
271 | } | ||
272 | |||
273 | void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched, | ||
274 | release_jobs_t release) | ||
275 | { | ||
276 | rt_domain_init(rt, rm_ready_order, resched, release); | ||
277 | } | ||
278 | |||
279 | /* need_to_preempt - check whether the task t needs to be preempted | ||
280 | * call only with irqs disabled and with ready_lock acquired | ||
281 | * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT! | ||
282 | */ | ||
283 | int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t) | ||
284 | { | ||
285 | /* we need the read lock for rm_ready_queue */ | ||
286 | /* no need to preempt if there is nothing pending */ | ||
287 | if (!__jobs_pending(rt)) | ||
288 | return 0; | ||
289 | /* we need to reschedule if t doesn't exist */ | ||
290 | if (!t) | ||
291 | return 1; | ||
292 | |||
293 | /* NOTE: We cannot check for non-preemptibility since we | ||
294 | * don't know what address space we're currently in. | ||
295 | */ | ||
296 | |||
297 | /* make sure to get non-rt stuff out of the way */ | ||
298 | return !is_realtime(t) || rm_higher_prio(__next_ready(rt), t); | ||
299 | } | ||
300 | |||
diff --git a/litmus/sched_crm.c b/litmus/sched_crm.c deleted file mode 100644 index 791b9979190e..000000000000 --- a/litmus/sched_crm.c +++ /dev/null | |||
@@ -1,2562 +0,0 @@ | |||
1 | /* | ||
2 | * litmus/sched_crm.c | ||
3 | * | ||
4 | * Implementation of the C-EDF scheduling algorithm. | ||
5 | * | ||
6 | * This implementation is based on G-EDF: | ||
7 | * - CPUs are clustered around L2 or L3 caches. | ||
8 | * - Clusters topology is automatically detected (this is arch dependent | ||
9 | * and is working only on x86 at the moment --- and only with modern | ||
10 | * cpus that exports cpuid4 information) | ||
11 | * - The plugins _does not_ attempt to put tasks in the right cluster i.e. | ||
12 | * the programmer needs to be aware of the topology to place tasks | ||
13 | * in the desired cluster | ||
14 | * - default clustering is around L2 cache (cache index = 2) | ||
15 | * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all | ||
16 | * online_cpus are placed in a single cluster). | ||
17 | * | ||
18 | * For details on functions, take a look at sched_gsn_edf.c | ||
19 | * | ||
20 | * Currently, we do not support changes in the number of online cpus. | ||
21 | * If the num_online_cpus() dynamically changes, the plugin is broken. | ||
22 | * | ||
23 | * This version uses the simple approach and serializes all scheduling | ||
24 | * decisions by the use of a queue lock. This is probably not the | ||
25 | * best way to do it, but it should suffice for now. | ||
26 | */ | ||
27 | |||
28 | #include <linux/spinlock.h> | ||
29 | #include <linux/percpu.h> | ||
30 | #include <linux/sched.h> | ||
31 | #include <linux/slab.h> | ||
32 | #include <linux/uaccess.h> | ||
33 | #include <linux/module.h> | ||
34 | |||
35 | #include <litmus/litmus.h> | ||
36 | #include <litmus/jobs.h> | ||
37 | #include <litmus/preempt.h> | ||
38 | #include <litmus/budget.h> | ||
39 | #include <litmus/sched_plugin.h> | ||
40 | #include <litmus/rm_common.h> | ||
41 | #include <litmus/sched_trace.h> | ||
42 | |||
43 | #include <litmus/clustered.h> | ||
44 | |||
45 | #include <litmus/bheap.h> | ||
46 | #include <litmus/binheap.h> | ||
47 | #include <litmus/trace.h> | ||
48 | |||
49 | #ifdef CONFIG_LITMUS_LOCKING | ||
50 | #include <litmus/kfmlp_lock.h> | ||
51 | #endif | ||
52 | |||
53 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
54 | #include <litmus/fifo_lock.h> | ||
55 | #include <litmus/prioq_lock.h> | ||
56 | #include <litmus/ikglp_lock.h> | ||
57 | #endif | ||
58 | |||
59 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
60 | #include <litmus/affinity.h> | ||
61 | #endif | ||
62 | |||
63 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
64 | #include <litmus/aux_tasks.h> | ||
65 | #endif | ||
66 | |||
67 | /* to configure the cluster size */ | ||
68 | #include <litmus/litmus_proc.h> | ||
69 | |||
70 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
71 | #include <litmus/affinity.h> | ||
72 | #endif | ||
73 | |||
74 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
75 | #include <litmus/litmus_softirq.h> | ||
76 | #endif | ||
77 | |||
78 | #ifdef CONFIG_LITMUS_NVIDIA | ||
79 | #include <litmus/nvidia_info.h> | ||
80 | #endif | ||
81 | |||
82 | #if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA) | ||
83 | #include <litmus/gpu_affinity.h> | ||
84 | #endif | ||
85 | |||
86 | /* Reference configuration variable. Determines which cache level is used to | ||
87 | * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that | ||
88 | * all CPUs form a single cluster (just like GSN-EDF). | ||
89 | */ | ||
90 | static enum cache_level cluster_config = GLOBAL_CLUSTER; | ||
91 | |||
92 | struct clusterdomain; | ||
93 | |||
94 | /* cpu_entry_t - maintain the linked and scheduled state | ||
95 | * | ||
96 | * A cpu also contains a pointer to the crm_domain_t cluster | ||
97 | * that owns it (struct clusterdomain*) | ||
98 | */ | ||
99 | typedef struct { | ||
100 | int cpu; | ||
101 | struct clusterdomain* cluster; /* owning cluster */ | ||
102 | struct task_struct* linked; /* only RT tasks */ | ||
103 | struct task_struct* scheduled; /* only RT tasks */ | ||
104 | atomic_t will_schedule; /* prevent unneeded IPIs */ | ||
105 | struct binheap_node hn; | ||
106 | } cpu_entry_t; | ||
107 | |||
108 | /* one cpu_entry_t per CPU */ | ||
109 | DEFINE_PER_CPU(cpu_entry_t, crm_cpu_entries); | ||
110 | |||
111 | #define set_will_schedule() \ | ||
112 | (atomic_set(&__get_cpu_var(crm_cpu_entries).will_schedule, 1)) | ||
113 | #define clear_will_schedule() \ | ||
114 | (atomic_set(&__get_cpu_var(crm_cpu_entries).will_schedule, 0)) | ||
115 | #define test_will_schedule(cpu) \ | ||
116 | (atomic_read(&per_cpu(crm_cpu_entries, cpu).will_schedule)) | ||
117 | |||
118 | /* | ||
119 | * In C-EDF there is a crm domain _per_ cluster | ||
120 | * The number of clusters is dynamically determined accordingly to the | ||
121 | * total cpu number and the cluster size | ||
122 | */ | ||
123 | typedef struct clusterdomain { | ||
124 | /* rt_domain for this cluster */ | ||
125 | rt_domain_t domain; | ||
126 | /* cpus in this cluster */ | ||
127 | cpu_entry_t* *cpus; | ||
128 | /* map of this cluster cpus */ | ||
129 | cpumask_var_t cpu_map; | ||
130 | /* the cpus queue themselves according to priority in here */ | ||
131 | struct binheap cpu_heap; | ||
132 | |||
133 | #define cluster_lock domain.ready_lock | ||
134 | |||
135 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
136 | raw_spinlock_t dgl_lock; | ||
137 | #endif | ||
138 | |||
139 | int top_m_size; | ||
140 | struct binheap top_m; | ||
141 | struct binheap not_top_m; | ||
142 | |||
143 | } crm_domain_t; | ||
144 | |||
145 | |||
146 | /* a crm_domain per cluster; allocation is done at init/activation time */ | ||
147 | crm_domain_t *crm; | ||
148 | |||
149 | #define remote_cluster(cpu) ((crm_domain_t *) per_cpu(crm_cpu_entries, cpu).cluster) | ||
150 | #define task_cpu_cluster(task) remote_cluster(get_partition(task)) | ||
151 | |||
152 | /* total number of cluster */ | ||
153 | static int num_clusters; | ||
154 | /* we do not support cluster of different sizes */ | ||
155 | static unsigned int cluster_size; | ||
156 | |||
157 | static int clusters_allocated = 0; | ||
158 | |||
159 | |||
160 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) | ||
161 | static int num_gpu_clusters; | ||
162 | static unsigned int gpu_cluster_size; | ||
163 | #endif | ||
164 | |||
165 | inline static struct task_struct* binheap_node_to_task(struct binheap_node *bn) | ||
166 | { | ||
167 | struct budget_tracker *bt = binheap_entry(bn, struct budget_tracker, top_m_node); | ||
168 | struct task_struct *t = | ||
169 | container_of( | ||
170 | container_of(bt, struct rt_param, budget), | ||
171 | struct task_struct, | ||
172 | rt_param); | ||
173 | return t; | ||
174 | } | ||
175 | |||
176 | static int crm_max_heap_base_priority_order(struct binheap_node *a, | ||
177 | struct binheap_node *b) | ||
178 | { | ||
179 | struct task_struct* t_a = binheap_node_to_task(a); | ||
180 | struct task_struct* t_b = binheap_node_to_task(b); | ||
181 | return __rm_higher_prio(t_a, BASE, t_b, BASE); | ||
182 | } | ||
183 | |||
184 | static int crm_min_heap_base_priority_order(struct binheap_node *a, | ||
185 | struct binheap_node *b) | ||
186 | { | ||
187 | struct task_struct* t_a = binheap_node_to_task(a); | ||
188 | struct task_struct* t_b = binheap_node_to_task(b); | ||
189 | return __rm_higher_prio(t_b, BASE, t_a, BASE); | ||
190 | } | ||
191 | |||
192 | static void crm_track_in_top_m(struct task_struct *t) | ||
193 | { | ||
194 | /* cluster lock must be held */ | ||
195 | crm_domain_t *cluster = task_cpu_cluster(t); | ||
196 | struct budget_tracker *bt; | ||
197 | struct task_struct *mth_highest; | ||
198 | |||
199 | //BUG_ON(binheap_is_in_heap(&tsk_rt(t)->budget.top_m_node)); | ||
200 | if (binheap_is_in_heap(&tsk_rt(t)->budget.top_m_node)) { | ||
201 | // TRACE_TASK(t, "apparently already being tracked. top-m?: %s\n", | ||
202 | // (bt_flag_is_set(t, BTF_IS_TOP_M)) ? "Yes":"No"); | ||
203 | return; | ||
204 | } | ||
205 | |||
206 | /* TODO: do cluster_size-1 if release master is in this cluster */ | ||
207 | if (cluster->top_m_size < cluster_size) { | ||
208 | // TRACE_TASK(t, "unconditionally adding task to top-m.\n"); | ||
209 | binheap_add(&tsk_rt(t)->budget.top_m_node, &cluster->top_m, | ||
210 | struct budget_tracker, top_m_node); | ||
211 | ++cluster->top_m_size; | ||
212 | bt_flag_set(t, BTF_IS_TOP_M); | ||
213 | budget_state_machine(t,on_enter_top_m); | ||
214 | |||
215 | return; | ||
216 | } | ||
217 | |||
218 | BUG_ON(binheap_empty(&cluster->top_m)); | ||
219 | |||
220 | bt = binheap_top_entry(&cluster->top_m, struct budget_tracker, top_m_node); | ||
221 | mth_highest = | ||
222 | container_of( | ||
223 | container_of(bt, struct rt_param, budget), | ||
224 | struct task_struct, | ||
225 | rt_param); | ||
226 | |||
227 | if (__rm_higher_prio(t, BASE, mth_highest, BASE)) { | ||
228 | // TRACE_TASK(t, "adding to top-m (evicting %s/%d)\n", | ||
229 | // mth_highest->comm, mth_highest->pid); | ||
230 | |||
231 | binheap_delete_root(&cluster->top_m, struct budget_tracker, top_m_node); | ||
232 | INIT_BINHEAP_NODE(&tsk_rt(mth_highest)->budget.top_m_node); | ||
233 | binheap_add(&tsk_rt(mth_highest)->budget.top_m_node, | ||
234 | &cluster->not_top_m, | ||
235 | struct budget_tracker, top_m_node); | ||
236 | budget_state_machine(mth_highest,on_exit_top_m); | ||
237 | bt_flag_clear(mth_highest, BTF_IS_TOP_M); | ||
238 | |||
239 | binheap_add(&tsk_rt(t)->budget.top_m_node, &cluster->top_m, | ||
240 | struct budget_tracker, top_m_node); | ||
241 | bt_flag_set(t, BTF_IS_TOP_M); | ||
242 | budget_state_machine(t,on_enter_top_m); | ||
243 | } | ||
244 | else { | ||
245 | // TRACE_TASK(t, "adding to not-top-m\n"); | ||
246 | binheap_add(&tsk_rt(t)->budget.top_m_node, | ||
247 | &cluster->not_top_m, | ||
248 | struct budget_tracker, top_m_node); | ||
249 | } | ||
250 | } | ||
251 | |||
252 | static void crm_untrack_in_top_m(struct task_struct *t) | ||
253 | { | ||
254 | /* cluster lock must be held */ | ||
255 | crm_domain_t *cluster = task_cpu_cluster(t); | ||
256 | |||
257 | if (!binheap_is_in_heap(&tsk_rt(t)->budget.top_m_node)) { | ||
258 | // TRACE_TASK(t, "is not being tracked\n"); /* BUG() on this case? */ | ||
259 | return; | ||
260 | } | ||
261 | |||
262 | if (bt_flag_is_set(t, BTF_IS_TOP_M)) { | ||
263 | // TRACE_TASK(t, "removing task from top-m\n"); | ||
264 | |||
265 | /* delete t's entry */ | ||
266 | binheap_delete(&tsk_rt(t)->budget.top_m_node, &cluster->top_m); | ||
267 | budget_state_machine(t,on_exit_top_m); | ||
268 | bt_flag_clear(t, BTF_IS_TOP_M); | ||
269 | |||
270 | /* move a task over from the overflow heap */ | ||
271 | if(!binheap_empty(&cluster->not_top_m)) { | ||
272 | struct budget_tracker *bt = | ||
273 | binheap_top_entry(&cluster->not_top_m, struct budget_tracker, top_m_node); | ||
274 | struct task_struct *to_move = | ||
275 | container_of( | ||
276 | container_of(bt, struct rt_param, budget), | ||
277 | struct task_struct, | ||
278 | rt_param); | ||
279 | |||
280 | // TRACE_TASK(to_move, "being promoted to top-m\n"); | ||
281 | |||
282 | binheap_delete_root(&cluster->not_top_m, struct budget_tracker, top_m_node); | ||
283 | INIT_BINHEAP_NODE(&tsk_rt(to_move)->budget.top_m_node); | ||
284 | |||
285 | binheap_add(&tsk_rt(to_move)->budget.top_m_node, | ||
286 | &cluster->top_m, | ||
287 | struct budget_tracker, top_m_node); | ||
288 | bt_flag_set(to_move, BTF_IS_TOP_M); | ||
289 | budget_state_machine(to_move,on_enter_top_m); | ||
290 | } | ||
291 | else { | ||
292 | --cluster->top_m_size; | ||
293 | } | ||
294 | } | ||
295 | else { | ||
296 | // TRACE_TASK(t, "removing task from not-top-m\n"); | ||
297 | binheap_delete(&tsk_rt(t)->budget.top_m_node, &cluster->not_top_m); | ||
298 | } | ||
299 | } | ||
300 | |||
301 | |||
302 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
303 | static raw_spinlock_t* crm_get_dgl_spinlock(struct task_struct *t) | ||
304 | { | ||
305 | crm_domain_t *cluster = task_cpu_cluster(t); | ||
306 | return(&cluster->dgl_lock); | ||
307 | } | ||
308 | #endif | ||
309 | |||
310 | |||
311 | /* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling | ||
312 | * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose | ||
313 | * information during the initialization of the plugin (e.g., topology) | ||
314 | #define WANT_ALL_SCHED_EVENTS | ||
315 | */ | ||
316 | #define VERBOSE_INIT | ||
317 | |||
318 | static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b) | ||
319 | { | ||
320 | cpu_entry_t *a = binheap_entry(_a, cpu_entry_t, hn); | ||
321 | cpu_entry_t *b = binheap_entry(_b, cpu_entry_t, hn); | ||
322 | |||
323 | /* Note that a and b are inverted: we want the lowest-priority CPU at | ||
324 | * the top of the heap. | ||
325 | */ | ||
326 | return rm_higher_prio(b->linked, a->linked); | ||
327 | } | ||
328 | |||
329 | /* update_cpu_position - Move the cpu entry to the correct place to maintain | ||
330 | * order in the cpu queue. Caller must hold crm lock. | ||
331 | */ | ||
332 | static void update_cpu_position(cpu_entry_t *entry) | ||
333 | { | ||
334 | crm_domain_t *cluster = entry->cluster; | ||
335 | |||
336 | if (likely(binheap_is_in_heap(&entry->hn))) { | ||
337 | binheap_delete(&entry->hn, &cluster->cpu_heap); | ||
338 | } | ||
339 | |||
340 | binheap_add(&entry->hn, &cluster->cpu_heap, cpu_entry_t, hn); | ||
341 | } | ||
342 | |||
343 | /* caller must hold crm lock */ | ||
344 | static cpu_entry_t* lowest_prio_cpu(crm_domain_t *cluster) | ||
345 | { | ||
346 | return binheap_top_entry(&cluster->cpu_heap, cpu_entry_t, hn); | ||
347 | } | ||
348 | |||
349 | static noinline void unlink(struct task_struct* t); | ||
350 | |||
351 | /* link_task_to_cpu - Update the link of a CPU. | ||
352 | * Handles the case where the to-be-linked task is already | ||
353 | * scheduled on a different CPU. | ||
354 | */ | ||
355 | static noinline void link_task_to_cpu(struct task_struct* linked, | ||
356 | cpu_entry_t *entry) | ||
357 | { | ||
358 | cpu_entry_t *sched; | ||
359 | struct task_struct* tmp; | ||
360 | int on_cpu; | ||
361 | |||
362 | BUG_ON(linked && !is_realtime(linked)); | ||
363 | |||
364 | /* Currently linked task is set to be unlinked. */ | ||
365 | if (entry->linked) { | ||
366 | entry->linked->rt_param.linked_on = NO_CPU; | ||
367 | |||
368 | #ifdef CONFIG_LITMUS_LOCKING | ||
369 | if (tsk_rt(entry->linked)->inh_task) | ||
370 | clear_inh_task_linkback(entry->linked, tsk_rt(entry->linked)->inh_task); | ||
371 | #endif | ||
372 | } | ||
373 | |||
374 | /* Link new task to CPU. */ | ||
375 | if (linked) { | ||
376 | /* handle task is already scheduled somewhere! */ | ||
377 | on_cpu = linked->rt_param.scheduled_on; | ||
378 | if (on_cpu != NO_CPU) { | ||
379 | sched = &per_cpu(crm_cpu_entries, on_cpu); | ||
380 | |||
381 | BUG_ON(sched->linked == linked); | ||
382 | |||
383 | /* If we are already scheduled on the CPU to which we | ||
384 | * wanted to link, we don't need to do the swap -- | ||
385 | * we just link ourselves to the CPU and depend on | ||
386 | * the caller to get things right. | ||
387 | */ | ||
388 | if (entry != sched) { | ||
389 | TRACE_TASK(linked, | ||
390 | "already scheduled on %d, updating link.\n", | ||
391 | sched->cpu); | ||
392 | tmp = sched->linked; | ||
393 | linked->rt_param.linked_on = sched->cpu; | ||
394 | sched->linked = linked; | ||
395 | update_cpu_position(sched); | ||
396 | linked = tmp; | ||
397 | } | ||
398 | } | ||
399 | if (linked) { /* might be NULL due to swap */ | ||
400 | linked->rt_param.linked_on = entry->cpu; | ||
401 | |||
402 | #ifdef CONFIG_LITMUS_LOCKING | ||
403 | if (tsk_rt(linked)->inh_task) | ||
404 | set_inh_task_linkback(linked, tsk_rt(linked)->inh_task); | ||
405 | #endif | ||
406 | } | ||
407 | } | ||
408 | entry->linked = linked; | ||
409 | #ifdef WANT_ALL_SCHED_EVENTS | ||
410 | if (linked) | ||
411 | TRACE_TASK(linked, "linked to %d.\n", entry->cpu); | ||
412 | else | ||
413 | TRACE("NULL linked to %d.\n", entry->cpu); | ||
414 | #endif | ||
415 | update_cpu_position(entry); | ||
416 | } | ||
417 | |||
418 | /* unlink - Make sure a task is not linked any longer to an entry | ||
419 | * where it was linked before. Must hold cluster_lock. | ||
420 | */ | ||
421 | static noinline void unlink(struct task_struct* t) | ||
422 | { | ||
423 | if (t->rt_param.linked_on != NO_CPU) { | ||
424 | /* unlink */ | ||
425 | cpu_entry_t *entry = &per_cpu(crm_cpu_entries, t->rt_param.linked_on); | ||
426 | t->rt_param.linked_on = NO_CPU; | ||
427 | link_task_to_cpu(NULL, entry); | ||
428 | } else if (is_queued(t)) { | ||
429 | /* This is an interesting situation: t is scheduled, | ||
430 | * but was just recently unlinked. It cannot be | ||
431 | * linked anywhere else (because then it would have | ||
432 | * been relinked to this CPU), thus it must be in some | ||
433 | * queue. We must remove it from the list in this | ||
434 | * case. | ||
435 | * | ||
436 | * in C-EDF case is should be somewhere in the queue for | ||
437 | * its domain, therefore and we can get the domain using | ||
438 | * task_cpu_cluster | ||
439 | */ | ||
440 | remove(&(task_cpu_cluster(t))->domain, t); | ||
441 | } | ||
442 | } | ||
443 | |||
444 | |||
445 | /* preempt - force a CPU to reschedule | ||
446 | */ | ||
447 | static void preempt(cpu_entry_t *entry) | ||
448 | { | ||
449 | preempt_if_preemptable(entry->scheduled, entry->cpu); | ||
450 | } | ||
451 | |||
452 | /* requeue - Put an unlinked task into gsn-edf domain. | ||
453 | * Caller must hold cluster_lock. | ||
454 | */ | ||
455 | static noinline void requeue(struct task_struct* task) | ||
456 | { | ||
457 | crm_domain_t *cluster = task_cpu_cluster(task); | ||
458 | BUG_ON(!task); | ||
459 | /* sanity check before insertion */ | ||
460 | BUG_ON(is_queued(task)); | ||
461 | |||
462 | if (is_early_releasing(task) || is_released(task, litmus_clock()) || | ||
463 | tsk_rt(task)->job_params.is_backlogged_job) { | ||
464 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
465 | if (unlikely(tsk_rt(task)->is_aux_task && task->state != TASK_RUNNING && !tsk_rt(task)->aux_ready)) { | ||
466 | /* aux_task probably transitioned to real-time while it was blocked */ | ||
467 | TRACE_CUR("aux task %s/%d is not ready!\n", task->comm, task->pid); | ||
468 | tsk_rt(task)->aux_ready = 1; /* allow this to only happen once per aux task */ | ||
469 | } | ||
470 | else | ||
471 | #endif | ||
472 | __add_ready(&cluster->domain, task); | ||
473 | } | ||
474 | else { | ||
475 | TRACE_TASK(task, "not requeueing not-yet-released job\n"); | ||
476 | } | ||
477 | } | ||
478 | |||
479 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
480 | static cpu_entry_t* crm_get_nearest_available_cpu( | ||
481 | crm_domain_t *cluster, cpu_entry_t *start) | ||
482 | { | ||
483 | cpu_entry_t *affinity; | ||
484 | |||
485 | get_nearest_available_cpu(affinity, start, crm_cpu_entries, | ||
486 | #ifdef CONFIG_RELEASE_MASTER | ||
487 | cluster->domain.release_master | ||
488 | #else | ||
489 | NO_CPU | ||
490 | #endif | ||
491 | ); | ||
492 | |||
493 | /* make sure CPU is in our cluster */ | ||
494 | if (affinity && cpu_isset(affinity->cpu, *cluster->cpu_map)) | ||
495 | return(affinity); | ||
496 | else | ||
497 | return(NULL); | ||
498 | } | ||
499 | #endif | ||
500 | |||
501 | |||
502 | /* check for any necessary preemptions */ | ||
503 | static void check_for_preemptions(crm_domain_t *cluster) | ||
504 | { | ||
505 | struct task_struct *task; | ||
506 | cpu_entry_t *last; | ||
507 | |||
508 | for(last = lowest_prio_cpu(cluster); | ||
509 | rm_preemption_needed(&cluster->domain, last->linked); | ||
510 | last = lowest_prio_cpu(cluster)) { | ||
511 | /* preemption necessary */ | ||
512 | task = __take_ready(&cluster->domain); | ||
513 | TRACE("check_for_preemptions: attempting to link task %d to %d\n", | ||
514 | task->pid, last->cpu); | ||
515 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
516 | { | ||
517 | cpu_entry_t *affinity = | ||
518 | crm_get_nearest_available_cpu(cluster, | ||
519 | &per_cpu(crm_cpu_entries, task_cpu(task))); | ||
520 | if(affinity) | ||
521 | last = affinity; | ||
522 | else if(should_requeue_preempted_job(last->linked)) | ||
523 | requeue(last->linked); | ||
524 | } | ||
525 | #else | ||
526 | if (should_requeue_preempted_job(last->linked)) | ||
527 | requeue(last->linked); | ||
528 | #endif | ||
529 | link_task_to_cpu(task, last); | ||
530 | preempt(last); | ||
531 | } | ||
532 | } | ||
533 | |||
534 | /* crm_job_arrival: task is either resumed or released */ | ||
535 | static noinline void crm_job_arrival(struct task_struct* task) | ||
536 | { | ||
537 | crm_domain_t *cluster = task_cpu_cluster(task); | ||
538 | BUG_ON(!task); | ||
539 | |||
540 | requeue(task); | ||
541 | check_for_preemptions(cluster); | ||
542 | } | ||
543 | |||
544 | static void crm_track_on_release(struct bheap_node* n, void* dummy) | ||
545 | { | ||
546 | struct task_struct* t = bheap2task(n); | ||
547 | // TRACE_TASK(t, "released\n"); | ||
548 | |||
549 | crm_track_in_top_m(t); | ||
550 | } | ||
551 | |||
552 | static void crm_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
553 | { | ||
554 | crm_domain_t* cluster = container_of(rt, crm_domain_t, domain); | ||
555 | unsigned long flags; | ||
556 | |||
557 | raw_readyq_lock_irqsave(&cluster->cluster_lock, flags); | ||
558 | |||
559 | bheap_for_each(tasks, crm_track_on_release, NULL); | ||
560 | |||
561 | __merge_ready(&cluster->domain, tasks); | ||
562 | check_for_preemptions(cluster); | ||
563 | |||
564 | raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
565 | } | ||
566 | |||
567 | /* caller holds cluster_lock */ | ||
568 | static noinline void job_completion(struct task_struct *t, int forced) | ||
569 | { | ||
570 | int do_release = 0; | ||
571 | int backlogged = 0; | ||
572 | lt_t now; | ||
573 | |||
574 | BUG_ON(!t); | ||
575 | |||
576 | now = litmus_clock(); | ||
577 | |||
578 | /* DO BACKLOG TRACKING */ | ||
579 | |||
580 | /* job completed with budget remaining */ | ||
581 | if (get_release_policy(t) != SPORADIC) { | ||
582 | /* only jobs we know that will call sleep_next_job() can use backlogging */ | ||
583 | if (!forced) { | ||
584 | /* was it a backlogged job that completed? */ | ||
585 | if (tsk_rt(t)->job_params.is_backlogged_job) { | ||
586 | TRACE_TASK(t, "completed backlogged job\n"); | ||
587 | if (get_backlog(t)) { | ||
588 | --get_backlog(t); | ||
589 | /* is_backlogged_job remains asserted */ | ||
590 | } | ||
591 | else { | ||
592 | /* caught up completely */ | ||
593 | TRACE_TASK(t, "completely caught up.\n"); | ||
594 | tsk_rt(t)->job_params.is_backlogged_job = 0; | ||
595 | /* we now look like a normally completing job. */ | ||
596 | } | ||
597 | } | ||
598 | } | ||
599 | else { | ||
600 | ++get_backlog(t); | ||
601 | TRACE_TASK(t, "adding backlogged job\n"); | ||
602 | } | ||
603 | |||
604 | backlogged = has_backlog(t); | ||
605 | TRACE_TASK(t, "number of backlogged jobs: %u\n", | ||
606 | get_backlog(t)); | ||
607 | } | ||
608 | |||
609 | /* SETUP FOR THE NEXT JOB */ | ||
610 | |||
611 | sched_trace_task_completion(t, forced); | ||
612 | |||
613 | TRACE_TASK(t, "job_completion() at %llu (forced = %d).\n", now, forced); | ||
614 | |||
615 | /* set flags */ | ||
616 | tsk_rt(t)->completed = 0; | ||
617 | |||
618 | #if 0 | ||
619 | if (unlikely(!forced && backlogged)) { | ||
620 | /* Don't advance deadline/refresh budget. Use the remaining budget for | ||
621 | * the backlogged job. | ||
622 | * | ||
623 | * NOTE: Allowing backlogged jobs comsume remaining budget may affect | ||
624 | * blocking bound analysis. | ||
625 | */ | ||
626 | } | ||
627 | else if (unlikely(!forced && tsk_rt(t)->job_params.is_backlogged_job)) { | ||
628 | /* we've just about caught up, but we still have the job of this | ||
629 | * budget's allocation to do (even if it's for the future)... */ | ||
630 | TRACE_TASK(t, "Releasing final catch-up job.\n"); | ||
631 | backlogged = 1; | ||
632 | do_release = 1; | ||
633 | } | ||
634 | else { | ||
635 | #endif | ||
636 | crm_untrack_in_top_m(t); | ||
637 | prepare_for_next_period(t); | ||
638 | |||
639 | do_release = (is_early_releasing(t) || is_released(t, now)); | ||
640 | |||
641 | if (backlogged) { | ||
642 | TRACE_TASK(t, "refreshing budget with early " | ||
643 | "release for backlogged job.\n"); | ||
644 | } | ||
645 | if (do_release || backlogged) { | ||
646 | /* log here to capture overheads */ | ||
647 | sched_trace_task_release(t); | ||
648 | } | ||
649 | // } | ||
650 | |||
651 | unlink(t); | ||
652 | |||
653 | /* release or arm next job */ | ||
654 | if (is_running(t)) { | ||
655 | /* is our next job a backlogged job? */ | ||
656 | if (backlogged) { | ||
657 | TRACE_TASK(t, "next job is a backlogged job.\n"); | ||
658 | tsk_rt(t)->job_params.is_backlogged_job = 1; | ||
659 | } | ||
660 | else { | ||
661 | TRACE_TASK(t, "next job is a regular job.\n"); | ||
662 | tsk_rt(t)->job_params.is_backlogged_job = 0; | ||
663 | } | ||
664 | |||
665 | if (do_release || backlogged) { | ||
666 | crm_track_in_top_m(t); | ||
667 | crm_job_arrival(t); | ||
668 | } | ||
669 | else { | ||
670 | add_release(&task_cpu_cluster(t)->domain, t); | ||
671 | } | ||
672 | } | ||
673 | else { | ||
674 | BUG_ON(!forced); | ||
675 | /* budget was refreshed and job early released */ | ||
676 | TRACE_TASK(t, "job exhausted budget while sleeping\n"); | ||
677 | crm_track_in_top_m(t); | ||
678 | } | ||
679 | } | ||
680 | |||
681 | static enum hrtimer_restart crm_simple_on_exhausted(struct task_struct *t, int in_schedule) | ||
682 | { | ||
683 | /* Assumption: t is scheduled on the CPU executing this callback */ | ||
684 | |||
685 | if (in_schedule) { | ||
686 | BUG_ON(tsk_rt(t)->scheduled_on != smp_processor_id()); | ||
687 | if (budget_precisely_tracked(t) && cancel_enforcement_timer(t) < 0) { | ||
688 | TRACE_TASK(t, "raced with timer. deffering to timer.\n"); | ||
689 | goto out; | ||
690 | } | ||
691 | } | ||
692 | |||
693 | if (budget_signalled(t) && !bt_flag_is_set(t, BTF_SIG_BUDGET_SENT)) { | ||
694 | /* signal exhaustion */ | ||
695 | send_sigbudget(t); /* will set BTF_SIG_BUDGET_SENT */ | ||
696 | } | ||
697 | |||
698 | if (budget_enforced(t) && !bt_flag_test_and_set(t, BTF_BUDGET_EXHAUSTED)) { | ||
699 | if (likely(!is_np(t))) { | ||
700 | /* np tasks will be preempted when they become | ||
701 | * preemptable again | ||
702 | */ | ||
703 | if (!in_schedule) { | ||
704 | TRACE_TASK(t, "is preemptable => FORCE_RESCHED\n"); | ||
705 | litmus_reschedule_local(); | ||
706 | set_will_schedule(); | ||
707 | } | ||
708 | } else if (is_user_np(t)) { | ||
709 | TRACE_TASK(t, "is non-preemptable, preemption delayed.\n"); | ||
710 | request_exit_np(t); | ||
711 | } | ||
712 | } | ||
713 | |||
714 | out: | ||
715 | return HRTIMER_NORESTART; | ||
716 | } | ||
717 | |||
718 | |||
719 | static enum hrtimer_restart crm_simple_io_on_exhausted(struct task_struct *t, int in_schedule) | ||
720 | { | ||
721 | enum hrtimer_restart restart = HRTIMER_NORESTART; | ||
722 | |||
723 | if (in_schedule) { | ||
724 | BUG_ON(tsk_rt(t)->scheduled_on != smp_processor_id()); | ||
725 | if (budget_precisely_tracked(t) && cancel_enforcement_timer(t) == -1) { | ||
726 | TRACE_TASK(t, "raced with timer. deffering to timer.\n"); | ||
727 | goto out; | ||
728 | } | ||
729 | } | ||
730 | |||
731 | /* t may or may not be scheduled */ | ||
732 | |||
733 | if (budget_signalled(t) && !bt_flag_is_set(t, BTF_SIG_BUDGET_SENT)) { | ||
734 | /* signal exhaustion */ | ||
735 | |||
736 | /* Tasks should block SIG_BUDGET if they cannot gracefully respond to | ||
737 | * the signal while suspended. SIG_BUDGET is an rt-signal, so it will | ||
738 | * be queued and received when SIG_BUDGET is unblocked */ | ||
739 | send_sigbudget(t); /* will set BTF_SIG_BUDGET_SENT */ | ||
740 | } | ||
741 | |||
742 | if (budget_enforced(t) && !bt_flag_is_set(t, BTF_BUDGET_EXHAUSTED)) { | ||
743 | int cpu = (tsk_rt(t)->linked_on != NO_CPU) ? | ||
744 | tsk_rt(t)->linked_on : tsk_rt(t)->scheduled_on; | ||
745 | |||
746 | if (is_np(t) && is_user_np(t)) { | ||
747 | bt_flag_set(t, BTF_BUDGET_EXHAUSTED); | ||
748 | TRACE_TASK(t, "is non-preemptable, preemption delayed.\n"); | ||
749 | request_exit_np(t); | ||
750 | } | ||
751 | /* where do we need to call resched? */ | ||
752 | else if (cpu == smp_processor_id()) { | ||
753 | bt_flag_set(t, BTF_BUDGET_EXHAUSTED); | ||
754 | if (!in_schedule) { | ||
755 | TRACE_TASK(t, "is preemptable => FORCE_RESCHED\n"); | ||
756 | litmus_reschedule_local(); | ||
757 | set_will_schedule(); | ||
758 | } | ||
759 | } | ||
760 | else if (cpu != NO_CPU) { | ||
761 | bt_flag_set(t, BTF_BUDGET_EXHAUSTED); | ||
762 | if (!in_schedule) { | ||
763 | TRACE_TASK(t, "is preemptable on remote cpu (%d) => FORCE_RESCHED\n", cpu); | ||
764 | litmus_reschedule(cpu); | ||
765 | } | ||
766 | } | ||
767 | else if (unlikely(tsk_rt(t)->blocked_lock)) { | ||
768 | /* we shouldn't be draining while waiting for litmus lock, but we | ||
769 | * could have raced with the budget timer (?). */ | ||
770 | WARN_ON(1); | ||
771 | } | ||
772 | else { | ||
773 | lt_t remaining; | ||
774 | crm_domain_t *cluster; | ||
775 | unsigned long flags, kludge_flags; | ||
776 | |||
777 | BUG_ON(in_schedule); | ||
778 | |||
779 | cluster = task_cpu_cluster(t); | ||
780 | |||
781 | // 1) refresh budget through job completion | ||
782 | // 2) if holds locks, tell the locking protocol to re-eval priority | ||
783 | // 3) -- the LP must undo any inheritance relations if appropriate | ||
784 | |||
785 | /* force job completion */ | ||
786 | TRACE_TASK(t, "blocked, postponing deadline\n"); | ||
787 | |||
788 | local_irq_save(kludge_flags); | ||
789 | |||
790 | /* Outermost lock of the cluster. Recursive lock calls are | ||
791 | * possible on this code path. This should be the _ONLY_ | ||
792 | * scenario where recursive calls are made. */ | ||
793 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
794 | /* Unfortunately, we _might_ need to grab the DGL lock, so we | ||
795 | * must grab it every time since it must be take before the | ||
796 | * cluster lock. */ | ||
797 | raw_spin_lock_irqsave(&cluster->dgl_lock, flags); | ||
798 | raw_readyq_lock(&cluster->cluster_lock); | ||
799 | #else | ||
800 | raw_readyq_lock_irqsave(&cluster->cluster_lock, flags); | ||
801 | #endif | ||
802 | |||
803 | job_completion(t, 1); /* refreshes budget and pushes out deadline */ | ||
804 | |||
805 | #ifdef CONFIG_LITMUS_LOCKING | ||
806 | { | ||
807 | int i; | ||
808 | /* any linked task that inherits from 't' needs to have their | ||
809 | * cpu-position re-evaluated. we have to do this in two passes. | ||
810 | * pass 1: remove nodes from heap s.t. heap is in known good state. | ||
811 | * pass 2: re-add nodes. | ||
812 | * | ||
813 | */ | ||
814 | for (i = find_first_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots)); | ||
815 | i < BITS_PER_LONG; | ||
816 | i = find_next_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots), i+1)) | ||
817 | { | ||
818 | struct task_struct *to_update = tsk_rt(t)->inh_task_linkbacks[i]; | ||
819 | BUG_ON(!to_update); | ||
820 | if (tsk_rt(to_update)->linked_on != NO_CPU) { | ||
821 | cpu_entry_t *entry = &per_cpu(crm_cpu_entries, tsk_rt(to_update)->linked_on); | ||
822 | BUG_ON(!binheap_is_in_heap(&entry->hn)); | ||
823 | binheap_delete(&entry->hn, &cluster->cpu_heap); | ||
824 | } | ||
825 | } | ||
826 | for (i = find_first_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots)); | ||
827 | i < BITS_PER_LONG; | ||
828 | i = find_next_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots), i+1)) | ||
829 | { | ||
830 | struct task_struct *to_update = tsk_rt(t)->inh_task_linkbacks[i]; | ||
831 | BUG_ON(!to_update); | ||
832 | if (tsk_rt(to_update)->linked_on != NO_CPU) { | ||
833 | cpu_entry_t *entry = &per_cpu(crm_cpu_entries, tsk_rt(to_update)->linked_on); | ||
834 | binheap_add(&entry->hn, &cluster->cpu_heap, cpu_entry_t, hn); | ||
835 | } | ||
836 | } | ||
837 | } | ||
838 | |||
839 | /* Check our inheritance and propagate any changes forward. */ | ||
840 | reevaluate_inheritance(t); | ||
841 | #endif | ||
842 | /* No need to recheck priority of AUX tasks. They will always | ||
843 | * inherit from 't' if they are enabled. Their prio change was | ||
844 | * captured by the cpu-heap operations above. */ | ||
845 | |||
846 | #ifdef CONFIG_LITMUS_NVIDIA | ||
847 | /* Re-eval priority of GPU interrupt threads. */ | ||
848 | if(tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu) | ||
849 | gpu_owner_decrease_priority(t); | ||
850 | #endif | ||
851 | |||
852 | #ifdef CONFIG_LITMUS_LOCKING | ||
853 | /* double-check that everything is okay */ | ||
854 | check_for_preemptions(cluster); | ||
855 | #endif | ||
856 | |||
857 | /* should be the outermost unlock call */ | ||
858 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
859 | raw_readyq_unlock(&cluster->cluster_lock); | ||
860 | raw_spin_unlock_irqrestore(&cluster->dgl_lock, flags); | ||
861 | #else | ||
862 | raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
863 | #endif | ||
864 | flush_pending_wakes(); | ||
865 | local_irq_restore(kludge_flags); | ||
866 | |||
867 | /* we need to set up the budget timer since we're within the callback. */ | ||
868 | hrtimer_forward_now(&get_budget_timer(t).timer.timer, | ||
869 | ns_to_ktime(budget_remaining(t))); | ||
870 | remaining = hrtimer_get_expires_ns(&get_budget_timer(t).timer.timer); | ||
871 | |||
872 | TRACE_TASK(t, "rearmed timer to %ld\n", remaining); | ||
873 | restart = HRTIMER_RESTART; | ||
874 | } | ||
875 | } | ||
876 | |||
877 | out: | ||
878 | return restart; | ||
879 | } | ||
880 | |||
881 | |||
882 | #ifdef CONFIG_LITMUS_LOCKING | ||
883 | static void __crm_trigger_vunlock(struct task_struct *t) | ||
884 | { | ||
885 | TRACE_TASK(t, "triggering virtual unlock of lock %d\n", | ||
886 | tsk_rt(t)->outermost_lock->ident); | ||
887 | tsk_rt(t)->outermost_lock->ops->omlp_virtual_unlock(tsk_rt(t)->outermost_lock, t); | ||
888 | } | ||
889 | |||
890 | static void crm_trigger_vunlock(struct task_struct *t) | ||
891 | { | ||
892 | crm_domain_t *cluster = task_cpu_cluster(t); | ||
893 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
894 | unsigned long flags; | ||
895 | |||
896 | /* Unfortunately, we _might_ need to grab the DGL lock, so we | ||
897 | * must grab it every time since it must be take before the | ||
898 | * cluster lock. */ | ||
899 | raw_spin_lock_irqsave(&cluster->dgl_lock, flags); | ||
900 | #endif | ||
901 | |||
902 | __crm_trigger_vunlock(t); | ||
903 | |||
904 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
905 | raw_spin_unlock_irqrestore(&cluster->dgl_lock, flags); | ||
906 | #endif | ||
907 | } | ||
908 | #endif | ||
909 | |||
910 | static enum hrtimer_restart crm_sobliv_on_exhausted(struct task_struct *t, int in_schedule) | ||
911 | { | ||
912 | enum hrtimer_restart restart = HRTIMER_NORESTART; | ||
913 | |||
914 | if (in_schedule) { | ||
915 | BUG_ON(tsk_rt(t)->scheduled_on != smp_processor_id()); | ||
916 | if (budget_precisely_tracked(t) && cancel_enforcement_timer(t) == -1) { | ||
917 | TRACE_TASK(t, "raced with timer. deffering to timer.\n"); | ||
918 | goto out; | ||
919 | } | ||
920 | } | ||
921 | |||
922 | /* t may or may not be scheduled */ | ||
923 | |||
924 | if (budget_signalled(t) && !bt_flag_is_set(t, BTF_SIG_BUDGET_SENT)) { | ||
925 | /* signal exhaustion */ | ||
926 | |||
927 | /* Tasks should block SIG_BUDGET if they cannot gracefully respond to | ||
928 | * the signal while suspended. SIG_BUDGET is an rt-signal, so it will | ||
929 | * be queued and received when SIG_BUDGET is unblocked */ | ||
930 | send_sigbudget(t); /* will set BTF_SIG_BUDGET_SENT */ | ||
931 | } | ||
932 | |||
933 | if (budget_enforced(t) && !bt_flag_is_set(t, BTF_BUDGET_EXHAUSTED)) { | ||
934 | int cpu = (tsk_rt(t)->linked_on != NO_CPU) ? | ||
935 | tsk_rt(t)->linked_on : tsk_rt(t)->scheduled_on; | ||
936 | |||
937 | #ifdef CONFIG_LITMUS_LOCKING | ||
938 | /* if 't' running, trigger a virtual unlock of outermost held lock | ||
939 | * if supported. Case where 't' not running handled later in function. | ||
940 | */ | ||
941 | if (cpu != NO_CPU && | ||
942 | tsk_rt(t)->outermost_lock && | ||
943 | tsk_rt(t)->outermost_lock->ops->is_omlp_family) | ||
944 | crm_trigger_vunlock(t); | ||
945 | #endif | ||
946 | |||
947 | if (is_np(t) && is_user_np(t)) { | ||
948 | TRACE_TASK(t, "is non-preemptable, preemption delayed.\n"); | ||
949 | bt_flag_set(t, BTF_BUDGET_EXHAUSTED); | ||
950 | request_exit_np(t); | ||
951 | } | ||
952 | /* where do we need to call resched? */ | ||
953 | else if (cpu == smp_processor_id()) { | ||
954 | bt_flag_set(t, BTF_BUDGET_EXHAUSTED); | ||
955 | if (!in_schedule) { | ||
956 | TRACE_TASK(t, "is preemptable => FORCE_RESCHED\n"); | ||
957 | litmus_reschedule_local(); | ||
958 | set_will_schedule(); | ||
959 | } | ||
960 | } | ||
961 | else if (cpu != NO_CPU) { | ||
962 | bt_flag_set(t, BTF_BUDGET_EXHAUSTED); | ||
963 | if (!in_schedule) { | ||
964 | litmus_reschedule(cpu); | ||
965 | TRACE_TASK(t, "is preemptable on remote cpu (%d) => FORCE_RESCHED\n", cpu); | ||
966 | } | ||
967 | } | ||
968 | else { | ||
969 | lt_t remaining; | ||
970 | crm_domain_t *cluster; | ||
971 | unsigned long flags, kludge_flags; | ||
972 | |||
973 | BUG_ON(in_schedule); | ||
974 | |||
975 | cluster = task_cpu_cluster(t); | ||
976 | |||
977 | // 1) refresh budget through job completion | ||
978 | // 2) if holds locks, tell the locking protocol to re-eval priority | ||
979 | // 3) -- the LP must undo any inheritance relations if appropriate | ||
980 | |||
981 | /* force job completion */ | ||
982 | TRACE_TASK(t, "blocked, postponing deadline\n"); | ||
983 | |||
984 | /* Outermost lock of the cluster. Recursive lock calls are | ||
985 | * possible on this code path. This should be the _ONLY_ | ||
986 | * scenario where recursive calls are made. */ | ||
987 | local_irq_save(kludge_flags); | ||
988 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
989 | /* Unfortunately, we _might_ need to grab the DGL lock, so we | ||
990 | * must grab it every time since it must be take before the | ||
991 | * cluster lock. */ | ||
992 | raw_spin_lock_irqsave(&cluster->dgl_lock, flags); | ||
993 | raw_readyq_lock(&cluster->cluster_lock); | ||
994 | #else | ||
995 | raw_readyq_lock_irqsave(&cluster->cluster_lock, flags); | ||
996 | #endif | ||
997 | |||
998 | job_completion(t, 1); /* refreshes budget and pushes out deadline */ | ||
999 | |||
1000 | #ifdef CONFIG_LITMUS_LOCKING | ||
1001 | { | ||
1002 | int i; | ||
1003 | /* any linked task that inherits from 't' needs to have their | ||
1004 | * cpu-position re-evaluated. we have to do this in two passes. | ||
1005 | * pass 1: remove nodes from heap s.t. heap is in known good state. | ||
1006 | * pass 2: re-add nodes. | ||
1007 | * | ||
1008 | */ | ||
1009 | for (i = find_first_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots)); | ||
1010 | i < BITS_PER_LONG; | ||
1011 | i = find_next_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots), i+1)) | ||
1012 | { | ||
1013 | struct task_struct *to_update = tsk_rt(t)->inh_task_linkbacks[i]; | ||
1014 | BUG_ON(!to_update); | ||
1015 | if (tsk_rt(to_update)->linked_on != NO_CPU) { | ||
1016 | cpu_entry_t *entry = &per_cpu(crm_cpu_entries, tsk_rt(to_update)->linked_on); | ||
1017 | BUG_ON(!binheap_is_in_heap(&entry->hn)); | ||
1018 | binheap_delete(&entry->hn, &cluster->cpu_heap); | ||
1019 | } | ||
1020 | } | ||
1021 | for (i = find_first_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots)); | ||
1022 | i < BITS_PER_LONG; | ||
1023 | i = find_next_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots), i+1)) | ||
1024 | { | ||
1025 | struct task_struct *to_update = tsk_rt(t)->inh_task_linkbacks[i]; | ||
1026 | BUG_ON(!to_update); | ||
1027 | if (tsk_rt(to_update)->linked_on != NO_CPU) { | ||
1028 | cpu_entry_t *entry = &per_cpu(crm_cpu_entries, tsk_rt(to_update)->linked_on); | ||
1029 | binheap_add(&entry->hn, &cluster->cpu_heap, cpu_entry_t, hn); | ||
1030 | } | ||
1031 | } | ||
1032 | } | ||
1033 | |||
1034 | /* Check our inheritance and propagate any changes forward. */ | ||
1035 | reevaluate_inheritance(t); | ||
1036 | |||
1037 | if (tsk_rt(t)->outermost_lock && tsk_rt(t)->outermost_lock->ops->is_omlp_family) | ||
1038 | __crm_trigger_vunlock(t); | ||
1039 | #endif | ||
1040 | /* No need to recheck priority of AUX tasks. They will always | ||
1041 | * inherit from 't' if they are enabled. Their prio change was | ||
1042 | * captured by the cpu-heap operations above. */ | ||
1043 | |||
1044 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1045 | /* Re-eval priority of GPU interrupt threads. */ | ||
1046 | if(tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu) | ||
1047 | gpu_owner_decrease_priority(t); | ||
1048 | #endif | ||
1049 | |||
1050 | #ifdef CONFIG_LITMUS_LOCKING | ||
1051 | /* double-check that everything is okay */ | ||
1052 | check_for_preemptions(cluster); | ||
1053 | #endif | ||
1054 | |||
1055 | /* should be the outermost unlock call */ | ||
1056 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
1057 | raw_readyq_unlock(&cluster->cluster_lock); | ||
1058 | raw_spin_unlock_irqrestore(&cluster->dgl_lock, flags); | ||
1059 | #else | ||
1060 | raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
1061 | #endif | ||
1062 | flush_pending_wakes(); | ||
1063 | local_irq_restore(kludge_flags); | ||
1064 | |||
1065 | /* we need to set up the budget timer since we're within the callback. */ | ||
1066 | if (bt_flag_is_set(t, BTF_IS_TOP_M)) { | ||
1067 | hrtimer_forward_now(&get_budget_timer(t).timer.timer, | ||
1068 | ns_to_ktime(budget_remaining(t))); | ||
1069 | remaining = hrtimer_get_expires_ns(&get_budget_timer(t).timer.timer); | ||
1070 | |||
1071 | TRACE_TASK(t, "rearmed timer to %ld\n", remaining); | ||
1072 | restart = HRTIMER_RESTART; | ||
1073 | } | ||
1074 | } | ||
1075 | } | ||
1076 | |||
1077 | out: | ||
1078 | return restart; | ||
1079 | } | ||
1080 | |||
1081 | |||
1082 | /* crm_tick - this function is called for every local timer | ||
1083 | * interrupt. | ||
1084 | * | ||
1085 | * checks whether the current task has expired and checks | ||
1086 | * whether we need to preempt it if it has not expired | ||
1087 | */ | ||
1088 | static void crm_tick(struct task_struct* t) | ||
1089 | { | ||
1090 | if (is_realtime(t) && | ||
1091 | tsk_rt(t)->budget.ops && budget_quantum_tracked(t) && | ||
1092 | budget_exhausted(t)) { | ||
1093 | TRACE_TASK(t, "budget exhausted\n"); | ||
1094 | budget_state_machine2(t,on_exhausted,!IN_SCHEDULE); | ||
1095 | } | ||
1096 | } | ||
1097 | |||
1098 | #ifdef CONFIG_LITMUS_LOCKING | ||
1099 | static int __increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh); | ||
1100 | #endif | ||
1101 | |||
1102 | /* Getting schedule() right is a bit tricky. schedule() may not make any | ||
1103 | * assumptions on the state of the current task since it may be called for a | ||
1104 | * number of reasons. The reasons include a scheduler_tick() determined that it | ||
1105 | * was necessary, because sys_exit_np() was called, because some Linux | ||
1106 | * subsystem determined so, or even (in the worst case) because there is a bug | ||
1107 | * hidden somewhere. Thus, we must take extreme care to determine what the | ||
1108 | * current state is. | ||
1109 | * | ||
1110 | * The CPU could currently be scheduling a task (or not), be linked (or not). | ||
1111 | * | ||
1112 | * The following assertions for the scheduled task could hold: | ||
1113 | * | ||
1114 | * - !is_running(scheduled) // the job blocks | ||
1115 | * - scheduled->timeslice == 0 // the job completed (forcefully) | ||
1116 | * - is_completed() // the job completed (by syscall) | ||
1117 | * - linked != scheduled // we need to reschedule (for any reason) | ||
1118 | * - is_np(scheduled) // rescheduling must be delayed, | ||
1119 | * sys_exit_np must be requested | ||
1120 | * | ||
1121 | * Any of these can occur together. | ||
1122 | */ | ||
1123 | static struct task_struct* crm_schedule(struct task_struct * prev) | ||
1124 | { | ||
1125 | cpu_entry_t* entry = &__get_cpu_var(crm_cpu_entries); | ||
1126 | crm_domain_t *cluster = entry->cluster; | ||
1127 | int out_of_time, sleep, preempt, np, exists, blocks; | ||
1128 | struct task_struct* next = NULL; | ||
1129 | |||
1130 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1131 | int recheck_inheritance; | ||
1132 | #endif | ||
1133 | |||
1134 | #ifdef CONFIG_RELEASE_MASTER | ||
1135 | /* Bail out early if we are the release master. | ||
1136 | * The release master never schedules any real-time tasks. | ||
1137 | */ | ||
1138 | if (unlikely(cluster->domain.release_master == entry->cpu)) { | ||
1139 | sched_state_task_picked(); | ||
1140 | return NULL; | ||
1141 | } | ||
1142 | #endif | ||
1143 | |||
1144 | /* Detect and handle budget exhaustion if it hasn't already been done. | ||
1145 | * Do this before acquring any locks. */ | ||
1146 | if (prev && is_realtime(prev) && | ||
1147 | budget_exhausted(prev) && | ||
1148 | !is_completed(prev) && /* don't bother with jobs on their way out */ | ||
1149 | ((budget_enforced(prev) && !bt_flag_is_set(prev, BTF_BUDGET_EXHAUSTED)) || | ||
1150 | (budget_signalled(prev) && !bt_flag_is_set(prev, BTF_SIG_BUDGET_SENT))) ) { | ||
1151 | TRACE_TASK(prev, "handling exhaustion in schedule() at %llu\n", litmus_clock()); | ||
1152 | budget_state_machine2(prev,on_exhausted,IN_SCHEDULE); | ||
1153 | } | ||
1154 | |||
1155 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1156 | /* prevent updates to inheritance relations while we work with 'prev' */ | ||
1157 | /* recheck inheritance if the task holds locks, is running, and will | ||
1158 | * have its deadline pushed out by job_completion() */ | ||
1159 | recheck_inheritance = | ||
1160 | prev && | ||
1161 | is_realtime(prev) && | ||
1162 | holds_locks(prev) && | ||
1163 | !is_np(prev) && | ||
1164 | !is_completed(prev) && | ||
1165 | is_running(prev) && | ||
1166 | budget_enforced(prev) && | ||
1167 | bt_flag_is_set(prev, BTF_BUDGET_EXHAUSTED); | ||
1168 | if (recheck_inheritance) { | ||
1169 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
1170 | raw_spin_lock(&cluster->dgl_lock); | ||
1171 | #endif | ||
1172 | raw_spin_lock(&tsk_rt(prev)->hp_blocked_tasks_lock); | ||
1173 | } | ||
1174 | #endif | ||
1175 | |||
1176 | raw_readyq_lock(&cluster->cluster_lock); | ||
1177 | clear_will_schedule(); | ||
1178 | |||
1179 | /* sanity checking */ | ||
1180 | BUG_ON(entry->scheduled && entry->scheduled != prev); | ||
1181 | BUG_ON(entry->scheduled && !is_realtime(prev)); | ||
1182 | BUG_ON(is_realtime(prev) && !entry->scheduled); | ||
1183 | |||
1184 | /* (0) Determine state */ | ||
1185 | exists = entry->scheduled != NULL; | ||
1186 | blocks = exists && !is_running(entry->scheduled); | ||
1187 | out_of_time = exists && | ||
1188 | budget_enforced(entry->scheduled) && | ||
1189 | bt_flag_is_set(entry->scheduled, BTF_BUDGET_EXHAUSTED); | ||
1190 | np = exists && is_np(entry->scheduled); | ||
1191 | sleep = exists && is_completed(entry->scheduled); | ||
1192 | preempt = entry->scheduled != entry->linked; | ||
1193 | |||
1194 | #ifdef WANT_ALL_SCHED_EVENTS | ||
1195 | TRACE_TASK(prev, "invoked crm_schedule.\n"); | ||
1196 | #endif | ||
1197 | |||
1198 | if (exists) { | ||
1199 | TRACE_TASK(prev, | ||
1200 | "blocks:%d out_of_time:%d np:%d completed:%d preempt:%d " | ||
1201 | "state:%d sig:%d\n", | ||
1202 | blocks, out_of_time, np, sleep, preempt, | ||
1203 | prev->state, signal_pending(prev)); | ||
1204 | } | ||
1205 | if (entry->linked && preempt) | ||
1206 | TRACE_TASK(prev, "will be preempted by %s/%d\n", | ||
1207 | entry->linked->comm, entry->linked->pid); | ||
1208 | |||
1209 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1210 | if (tsk_rt(prev)->is_aux_task && | ||
1211 | (prev->state == TASK_INTERRUPTIBLE) && | ||
1212 | !blocks) { | ||
1213 | TRACE_TASK(prev, "Deferring descheduling of aux task %s/%d.\n", | ||
1214 | prev->comm, prev->pid); | ||
1215 | next = prev; /* allow prev to continue. */ | ||
1216 | goto out_set_state; | ||
1217 | } | ||
1218 | #endif | ||
1219 | |||
1220 | /* Do budget stuff */ | ||
1221 | if (blocks) { | ||
1222 | if (likely(!bt_flag_is_set(prev, BTF_WAITING_FOR_RELEASE))) | ||
1223 | budget_state_machine(prev,on_blocked); | ||
1224 | else { | ||
1225 | /* waiting for release. 'exit' the scheduler. */ | ||
1226 | crm_untrack_in_top_m(prev); | ||
1227 | budget_state_machine(prev,on_exit); | ||
1228 | } | ||
1229 | } | ||
1230 | else if (sleep) | ||
1231 | budget_state_machine(prev,on_sleep); | ||
1232 | else if (preempt) | ||
1233 | budget_state_machine(prev,on_preempt); | ||
1234 | |||
1235 | /* If a task blocks we have no choice but to reschedule. | ||
1236 | */ | ||
1237 | if (blocks) | ||
1238 | unlink(entry->scheduled); | ||
1239 | |||
1240 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) | ||
1241 | if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) { | ||
1242 | if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) { | ||
1243 | // don't track preemptions or locking protocol suspensions. | ||
1244 | TRACE_TASK(entry->scheduled, "stopping GPU tracker.\n"); | ||
1245 | stop_gpu_tracker(entry->scheduled); | ||
1246 | } | ||
1247 | else if(blocks && !tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) { | ||
1248 | TRACE_TASK(entry->scheduled, "GPU tracker remains on during suspension.\n"); | ||
1249 | } | ||
1250 | } | ||
1251 | #endif | ||
1252 | |||
1253 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
1254 | * We need to make sure to update the link structure anyway in case | ||
1255 | * that we are still linked. Multiple calls to request_exit_np() don't | ||
1256 | * hurt. | ||
1257 | */ | ||
1258 | if (np && (out_of_time || preempt || sleep)) { | ||
1259 | unlink(entry->scheduled); | ||
1260 | request_exit_np(entry->scheduled); | ||
1261 | } | ||
1262 | |||
1263 | /* Any task that is preemptable and either exhausts its execution | ||
1264 | * budget or wants to sleep completes. We may have to reschedule after | ||
1265 | * this. Don't do a job completion if we block (can't have timers running | ||
1266 | * for blocked jobs). | ||
1267 | */ | ||
1268 | if (!np && (out_of_time || sleep) && !blocks) { | ||
1269 | job_completion(entry->scheduled, !sleep); | ||
1270 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1271 | /* check if job completion enables an inheritance relation. no need to | ||
1272 | * recheck if task already inherits a priority since job_completion() | ||
1273 | * will not enable a higher-prio relation */ | ||
1274 | if (unlikely(recheck_inheritance && !tsk_rt(entry->scheduled)->inh_task)) { | ||
1275 | struct task_struct *hp_blocked; | ||
1276 | TRACE_TASK(entry->scheduled, "rechecking inheritance.\n"); | ||
1277 | hp_blocked = top_priority(&tsk_rt(entry->scheduled)->hp_blocked_tasks); | ||
1278 | /* hp_blocked_tasks_lock is held */ | ||
1279 | if (rm_higher_prio(hp_blocked, entry->scheduled)) | ||
1280 | __increase_priority_inheritance(entry->scheduled, effective_priority(hp_blocked)); | ||
1281 | } | ||
1282 | #endif | ||
1283 | } | ||
1284 | |||
1285 | /* Link pending task if we became unlinked. | ||
1286 | */ | ||
1287 | if (!entry->linked) | ||
1288 | link_task_to_cpu(__take_ready(&cluster->domain), entry); | ||
1289 | |||
1290 | /* The final scheduling decision. Do we need to switch for some reason? | ||
1291 | * If linked is different from scheduled, then select linked as next. | ||
1292 | */ | ||
1293 | if ((!np || blocks) && | ||
1294 | entry->linked != entry->scheduled) { | ||
1295 | /* Schedule a linked job? */ | ||
1296 | if (entry->linked) { | ||
1297 | entry->linked->rt_param.scheduled_on = entry->cpu; | ||
1298 | next = entry->linked; | ||
1299 | } | ||
1300 | if (entry->scheduled) { | ||
1301 | /* not gonna be scheduled soon */ | ||
1302 | entry->scheduled->rt_param.scheduled_on = NO_CPU; | ||
1303 | TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); | ||
1304 | } | ||
1305 | } | ||
1306 | else { | ||
1307 | /* Only override Linux scheduler if we have a real-time task | ||
1308 | * scheduled that needs to continue. | ||
1309 | */ | ||
1310 | if (exists) { | ||
1311 | next = prev; | ||
1312 | } | ||
1313 | } | ||
1314 | |||
1315 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1316 | out_set_state: | ||
1317 | #endif | ||
1318 | |||
1319 | sched_state_task_picked(); | ||
1320 | raw_readyq_unlock(&cluster->cluster_lock); | ||
1321 | |||
1322 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1323 | if (recheck_inheritance) { | ||
1324 | raw_spin_unlock(&tsk_rt(prev)->hp_blocked_tasks_lock); | ||
1325 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
1326 | raw_spin_unlock(&cluster->dgl_lock); | ||
1327 | #endif | ||
1328 | } | ||
1329 | #endif | ||
1330 | |||
1331 | #ifdef WANT_ALL_SCHED_EVENTS | ||
1332 | TRACE("cluster_lock released, next=0x%p\n", next); | ||
1333 | |||
1334 | if (next) | ||
1335 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | ||
1336 | else if (exists && !next) | ||
1337 | TRACE("becomes idle at %llu.\n", litmus_clock()); | ||
1338 | #endif | ||
1339 | |||
1340 | return next; | ||
1341 | } | ||
1342 | |||
1343 | |||
1344 | /* _finish_switch - we just finished the switch away from prev | ||
1345 | */ | ||
1346 | static void crm_finish_switch(struct task_struct *prev) | ||
1347 | { | ||
1348 | cpu_entry_t* entry = &__get_cpu_var(crm_cpu_entries); | ||
1349 | |||
1350 | entry->scheduled = is_realtime(current) ? current : NULL; | ||
1351 | #ifdef WANT_ALL_SCHED_EVENTS | ||
1352 | TRACE_TASK(prev, "switched away from\n"); | ||
1353 | #endif | ||
1354 | } | ||
1355 | |||
1356 | |||
1357 | /* Prepare a task for running in RT mode | ||
1358 | */ | ||
1359 | static void crm_task_new(struct task_struct * t, int on_rq, int running) | ||
1360 | { | ||
1361 | unsigned long flags; | ||
1362 | cpu_entry_t* entry; | ||
1363 | crm_domain_t* cluster; | ||
1364 | |||
1365 | TRACE("c-fp: task new %d (param running = %d, is_running = %d)\n", t->pid, running, is_running(t)); | ||
1366 | |||
1367 | /* the cluster doesn't change even if t is running */ | ||
1368 | cluster = task_cpu_cluster(t); | ||
1369 | |||
1370 | raw_readyq_lock_irqsave(&cluster->cluster_lock, flags); | ||
1371 | |||
1372 | /* setup job params */ | ||
1373 | release_at(t, litmus_clock()); | ||
1374 | |||
1375 | t->rt_param.linked_on = NO_CPU; | ||
1376 | |||
1377 | if (running) { | ||
1378 | entry = &per_cpu(crm_cpu_entries, task_cpu(t)); | ||
1379 | BUG_ON(entry->scheduled); | ||
1380 | |||
1381 | #ifdef CONFIG_RELEASE_MASTER | ||
1382 | if (entry->cpu != cluster->domain.release_master) { | ||
1383 | #endif | ||
1384 | entry->scheduled = t; | ||
1385 | tsk_rt(t)->scheduled_on = task_cpu(t); | ||
1386 | #ifdef CONFIG_RELEASE_MASTER | ||
1387 | } else { | ||
1388 | /* do not schedule on release master */ | ||
1389 | preempt(entry); /* force resched */ | ||
1390 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
1391 | } | ||
1392 | #endif | ||
1393 | } else { | ||
1394 | t->rt_param.scheduled_on = NO_CPU; | ||
1395 | } | ||
1396 | |||
1397 | if (is_running(t)) { | ||
1398 | crm_track_in_top_m(t); | ||
1399 | crm_job_arrival(t); | ||
1400 | } | ||
1401 | |||
1402 | raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
1403 | } | ||
1404 | |||
1405 | static void crm_task_wake_up(struct task_struct *t) | ||
1406 | { | ||
1407 | unsigned long flags; | ||
1408 | crm_domain_t *cluster; | ||
1409 | lt_t now; | ||
1410 | |||
1411 | cluster = task_cpu_cluster(t); | ||
1412 | |||
1413 | raw_readyq_lock_irqsave(&cluster->cluster_lock, flags); | ||
1414 | |||
1415 | now = litmus_clock(); | ||
1416 | TRACE_TASK(t, "wake_up at %llu\n", now); | ||
1417 | |||
1418 | if (is_sporadic(t) && is_tardy(t, now)) { | ||
1419 | release_at(t, now); | ||
1420 | sched_trace_task_release(t); | ||
1421 | } | ||
1422 | else { | ||
1423 | /* periodic task model. don't force job to end. | ||
1424 | * rely on user to say when jobs complete or when budget expires. */ | ||
1425 | tsk_rt(t)->completed = 0; | ||
1426 | } | ||
1427 | |||
1428 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1429 | if (tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->hide_from_aux_tasks) { | ||
1430 | TRACE_CUR("%s/%d is ready so aux tasks may not inherit.\n", t->comm, t->pid); | ||
1431 | disable_aux_task_owner(t); | ||
1432 | } | ||
1433 | #endif | ||
1434 | |||
1435 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1436 | if (tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu) { | ||
1437 | TRACE_CUR("%s/%d is ready so gpu klmirqd tasks may not inherit.\n", t->comm, t->pid); | ||
1438 | disable_gpu_owner(t); | ||
1439 | } | ||
1440 | #endif | ||
1441 | |||
1442 | budget_state_machine(t,on_wakeup); | ||
1443 | crm_job_arrival(t); | ||
1444 | |||
1445 | raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
1446 | } | ||
1447 | |||
1448 | static void crm_task_block(struct task_struct *t) | ||
1449 | { | ||
1450 | unsigned long flags; | ||
1451 | crm_domain_t *cluster; | ||
1452 | |||
1453 | TRACE_TASK(t, "block at %llu\n", litmus_clock()); | ||
1454 | |||
1455 | cluster = task_cpu_cluster(t); | ||
1456 | |||
1457 | /* unlink if necessary */ | ||
1458 | raw_readyq_lock_irqsave(&cluster->cluster_lock, flags); | ||
1459 | |||
1460 | unlink(t); | ||
1461 | |||
1462 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1463 | if (tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->hide_from_aux_tasks) { | ||
1464 | |||
1465 | TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid); | ||
1466 | enable_aux_task_owner(t); | ||
1467 | } | ||
1468 | #endif | ||
1469 | |||
1470 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1471 | if (tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu) { | ||
1472 | |||
1473 | TRACE_CUR("%s/%d is blocked so klmirqd threads may inherit.\n", t->comm, t->pid); | ||
1474 | enable_gpu_owner(t); | ||
1475 | } | ||
1476 | #endif | ||
1477 | |||
1478 | raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
1479 | |||
1480 | BUG_ON(!is_realtime(t)); | ||
1481 | } | ||
1482 | |||
1483 | |||
1484 | static void crm_task_exit(struct task_struct * t) | ||
1485 | { | ||
1486 | unsigned long flags; | ||
1487 | crm_domain_t *cluster = task_cpu_cluster(t); | ||
1488 | |||
1489 | /* unlink if necessary */ | ||
1490 | raw_readyq_lock_irqsave(&cluster->cluster_lock, flags); | ||
1491 | |||
1492 | if (tsk_rt(t)->inh_task) { | ||
1493 | WARN_ON(1); | ||
1494 | clear_inh_task_linkback(t, tsk_rt(t)->inh_task); | ||
1495 | } | ||
1496 | |||
1497 | /* disable budget enforcement */ | ||
1498 | crm_untrack_in_top_m(t); | ||
1499 | budget_state_machine(t,on_exit); | ||
1500 | |||
1501 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1502 | /* make sure we clean up on our way out */ | ||
1503 | if (unlikely(tsk_rt(t)->is_aux_task)) | ||
1504 | exit_aux_task(t); | ||
1505 | else if(tsk_rt(t)->has_aux_tasks) | ||
1506 | disable_aux_task_owner(t); | ||
1507 | #endif | ||
1508 | |||
1509 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1510 | /* make sure we clean up on our way out */ | ||
1511 | if(tsk_rt(t)->held_gpus) | ||
1512 | disable_gpu_owner(t); | ||
1513 | #endif | ||
1514 | |||
1515 | unlink(t); | ||
1516 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | ||
1517 | cpu_entry_t *cpu; | ||
1518 | cpu = &per_cpu(crm_cpu_entries, tsk_rt(t)->scheduled_on); | ||
1519 | cpu->scheduled = NULL; | ||
1520 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
1521 | } | ||
1522 | raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
1523 | |||
1524 | BUG_ON(!is_realtime(t)); | ||
1525 | TRACE_TASK(t, "RIP\n"); | ||
1526 | } | ||
1527 | |||
1528 | |||
1529 | |||
1530 | |||
1531 | |||
1532 | |||
1533 | static struct budget_tracker_ops crm_drain_simple_ops = | ||
1534 | { | ||
1535 | .on_scheduled = simple_on_scheduled, | ||
1536 | .on_blocked = simple_on_blocked, | ||
1537 | .on_preempt = simple_on_preempt, | ||
1538 | .on_sleep = simple_on_sleep, | ||
1539 | .on_exit = simple_on_exit, | ||
1540 | |||
1541 | .on_wakeup = NULL, | ||
1542 | .on_inherit = NULL, | ||
1543 | .on_disinherit = NULL, | ||
1544 | .on_enter_top_m = NULL, | ||
1545 | .on_exit_top_m = NULL, | ||
1546 | |||
1547 | .on_exhausted = crm_simple_on_exhausted, | ||
1548 | }; | ||
1549 | |||
1550 | static struct budget_tracker_ops crm_drain_simple_io_ops = | ||
1551 | { | ||
1552 | .on_scheduled = simple_io_on_scheduled, | ||
1553 | .on_blocked = simple_io_on_blocked, | ||
1554 | .on_preempt = simple_io_on_preempt, | ||
1555 | .on_sleep = simple_io_on_sleep, | ||
1556 | .on_exit = simple_io_on_exit, | ||
1557 | |||
1558 | .on_wakeup = simple_io_on_wakeup, | ||
1559 | .on_inherit = NULL, | ||
1560 | .on_disinherit = NULL, | ||
1561 | .on_enter_top_m = NULL, | ||
1562 | .on_exit_top_m = NULL, | ||
1563 | |||
1564 | .on_exhausted = crm_simple_io_on_exhausted, | ||
1565 | }; | ||
1566 | |||
1567 | static struct budget_tracker_ops crm_drain_sobliv_ops = | ||
1568 | { | ||
1569 | .on_scheduled = NULL, | ||
1570 | .on_preempt = NULL, | ||
1571 | .on_sleep = NULL, | ||
1572 | |||
1573 | .on_blocked = sobliv_on_blocked, | ||
1574 | .on_wakeup = sobliv_on_wakeup, | ||
1575 | .on_exit = sobliv_on_exit, | ||
1576 | .on_inherit = sobliv_on_inherit, | ||
1577 | .on_disinherit = sobliv_on_disinherit, | ||
1578 | .on_enter_top_m = sobliv_on_enter_top_m, | ||
1579 | .on_exit_top_m = sobliv_on_exit_top_m, | ||
1580 | |||
1581 | .on_exhausted = crm_sobliv_on_exhausted, | ||
1582 | }; | ||
1583 | |||
1584 | static long crm_admit_task(struct task_struct* tsk) | ||
1585 | { | ||
1586 | struct budget_tracker_ops* ops = NULL; | ||
1587 | |||
1588 | if (remote_cluster(task_cpu(tsk)) != task_cpu_cluster(tsk)) { | ||
1589 | // printk("rejected admit: incorrect cluster.\n"); | ||
1590 | // return -EINVAL; | ||
1591 | } | ||
1592 | |||
1593 | if (budget_enforced(tsk) || budget_signalled(tsk)) { | ||
1594 | switch(get_drain_policy(tsk)) { | ||
1595 | case DRAIN_SIMPLE: | ||
1596 | ops = &crm_drain_simple_ops; | ||
1597 | break; | ||
1598 | case DRAIN_SIMPLE_IO: | ||
1599 | ops = &crm_drain_simple_io_ops; | ||
1600 | break; | ||
1601 | case DRAIN_SOBLIV: | ||
1602 | /* budget_policy and budget_signal_policy cannot be quantum-based */ | ||
1603 | if (!budget_quantum_tracked(tsk) && budget_precisely_tracked(tsk)) { | ||
1604 | ops = &crm_drain_sobliv_ops; | ||
1605 | } | ||
1606 | else { | ||
1607 | printk("rejected admit: QUANTUM_ENFORCEMENT and QUANTUM_SIGNALS is " | ||
1608 | "unsupported with DRAIN_SOBLIV.\n"); | ||
1609 | return -EINVAL; | ||
1610 | } | ||
1611 | break; | ||
1612 | default: | ||
1613 | printk("rejected admit: Unsupported budget draining mode.\n"); | ||
1614 | return -EINVAL; | ||
1615 | } | ||
1616 | } | ||
1617 | |||
1618 | /* always init the budget tracker, even if we're not using timers */ | ||
1619 | init_budget_tracker(&tsk_rt(tsk)->budget, ops); | ||
1620 | |||
1621 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1622 | INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks, | ||
1623 | rm_max_heap_base_priority_order); | ||
1624 | #endif | ||
1625 | |||
1626 | return 0; | ||
1627 | } | ||
1628 | |||
1629 | |||
1630 | |||
1631 | #ifdef CONFIG_LITMUS_LOCKING | ||
1632 | |||
1633 | #include <litmus/fdso.h> | ||
1634 | |||
1635 | /* called with IRQs off */ | ||
1636 | static int __increase_priority_inheritance(struct task_struct* t, | ||
1637 | struct task_struct* prio_inh) | ||
1638 | { | ||
1639 | int success = 1; | ||
1640 | int linked_on; | ||
1641 | int check_preempt = 0; | ||
1642 | crm_domain_t* cluster; | ||
1643 | struct task_struct* old_prio_inh = tsk_rt(t)->inh_task; | ||
1644 | |||
1645 | if (prio_inh && prio_inh == effective_priority(t)) { | ||
1646 | /* relationship already established. */ | ||
1647 | TRACE_TASK(t, "already has effective priority of %s/%d\n", | ||
1648 | prio_inh->comm, prio_inh->pid); | ||
1649 | goto out; | ||
1650 | } | ||
1651 | |||
1652 | if (prio_inh && (effective_priority(prio_inh) != prio_inh)) { | ||
1653 | TRACE_TASK(t, "Inheriting from %s/%d instead of the eff_prio = %s/%d!\n", | ||
1654 | prio_inh->comm, prio_inh->pid, | ||
1655 | effective_priority(prio_inh)->comm, | ||
1656 | effective_priority(prio_inh)->pid); | ||
1657 | #ifndef CONFIG_LITMUS_NESTED_LOCKING | ||
1658 | /* Tasks should only inherit the base priority of a task. | ||
1659 | If 't' inherits a priority, then tsk_rt(t)->inh_task should | ||
1660 | be passed to this function instead. This includes transitive | ||
1661 | inheritance relations (tsk_rt(tsk_rt(...)->inh_task)->inh_task). */ | ||
1662 | BUG(); | ||
1663 | #else | ||
1664 | /* Not a bug with nested locking since inheritance propagation is | ||
1665 | not atomic. */ | ||
1666 | |||
1667 | /* TODO: Is the following 'helping' short-cut safe? | ||
1668 | prio_inh = effective_priority(prio_inh); | ||
1669 | */ | ||
1670 | #endif | ||
1671 | } | ||
1672 | |||
1673 | cluster = task_cpu_cluster(t); | ||
1674 | |||
1675 | #if 0 | ||
1676 | if (prio_inh && task_cpu_cluster(prio_inh) != cluster) { | ||
1677 | WARN_ONCE(1, "Illegal to inherit between clusters. " \ | ||
1678 | "target (%s/%d) on cluster w/ CPU %d and " \ | ||
1679 | "inh_prio (%s/%d) on w/ CPU %d\n", \ | ||
1680 | t->comm, t->pid, cluster->cpus[0]->cpu, \ | ||
1681 | prio_inh->comm, prio_inh->pid, \ | ||
1682 | task_cpu_cluster(prio_inh)->cpus[0]->cpu); | ||
1683 | return 1; | ||
1684 | } | ||
1685 | #endif | ||
1686 | |||
1687 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1688 | /* this sanity check allows for weaker locking in protocols */ | ||
1689 | /* TODO (klmirqd): Skip this check if 't' is a proxy thread (???) */ | ||
1690 | if(__rm_higher_prio(prio_inh, BASE, t, EFFECTIVE)) { | ||
1691 | #endif | ||
1692 | sched_trace_eff_prio_change(t, prio_inh); | ||
1693 | |||
1694 | /* clear out old inheritance relation */ | ||
1695 | if (old_prio_inh) { | ||
1696 | budget_state_machine_chgprio(t,old_prio_inh,on_disinherit); | ||
1697 | clear_inh_task_linkback(t, old_prio_inh); | ||
1698 | } | ||
1699 | |||
1700 | TRACE_TASK(t, "inherits priority from %s/%d\n", | ||
1701 | prio_inh->comm, prio_inh->pid); | ||
1702 | tsk_rt(t)->inh_task = prio_inh; | ||
1703 | |||
1704 | /* update inheritance relation */ | ||
1705 | if (prio_inh) | ||
1706 | budget_state_machine_chgprio(t,prio_inh,on_inherit); | ||
1707 | |||
1708 | linked_on = tsk_rt(t)->linked_on; | ||
1709 | |||
1710 | /* If it is scheduled, then we need to reorder the CPU heap. */ | ||
1711 | if (linked_on != NO_CPU) { | ||
1712 | TRACE_TASK(t, "%s: linked on %d\n", | ||
1713 | __FUNCTION__, linked_on); | ||
1714 | /* Holder is scheduled; need to re-order CPUs. | ||
1715 | * We can't use heap_decrease() here since | ||
1716 | * the cpu_heap is ordered in reverse direction, so | ||
1717 | * it is actually an increase. */ | ||
1718 | binheap_delete(&per_cpu(crm_cpu_entries, linked_on).hn, | ||
1719 | &cluster->cpu_heap); | ||
1720 | binheap_add(&per_cpu(crm_cpu_entries, linked_on).hn, | ||
1721 | &cluster->cpu_heap, cpu_entry_t, hn); | ||
1722 | |||
1723 | /* tell prio_inh that we're __running__ with its priority */ | ||
1724 | set_inh_task_linkback(t, prio_inh); | ||
1725 | } | ||
1726 | else { | ||
1727 | /* holder may be queued: first stop queue changes */ | ||
1728 | raw_spin_lock(&cluster->domain.release_lock); | ||
1729 | if (is_queued(t)) { | ||
1730 | TRACE_TASK(t, "%s: is queued\n", | ||
1731 | __FUNCTION__); | ||
1732 | /* We need to update the position of holder in some | ||
1733 | * heap. Note that this could be a release heap if we | ||
1734 | * budget enforcement is used and this job overran. */ | ||
1735 | check_preempt = | ||
1736 | !bheap_decrease(rm_ready_order, tsk_rt(t)->heap_node); | ||
1737 | } else { | ||
1738 | /* Nothing to do: if it is not queued and not linked | ||
1739 | * then it is either sleeping or currently being moved | ||
1740 | * by other code (e.g., a timer interrupt handler) that | ||
1741 | * will use the correct priority when enqueuing the | ||
1742 | * task. */ | ||
1743 | TRACE_TASK(t, "%s: is NOT queued => Done.\n", | ||
1744 | __FUNCTION__); | ||
1745 | } | ||
1746 | raw_spin_unlock(&cluster->domain.release_lock); | ||
1747 | |||
1748 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1749 | /* propagate to aux tasks */ | ||
1750 | if (tsk_rt(t)->has_aux_tasks) { | ||
1751 | aux_task_owner_increase_priority(t); | ||
1752 | } | ||
1753 | #endif | ||
1754 | |||
1755 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1756 | /* propagate to gpu klmirqd */ | ||
1757 | if (tsk_rt(t)->held_gpus) { | ||
1758 | gpu_owner_increase_priority(t); | ||
1759 | } | ||
1760 | #endif | ||
1761 | |||
1762 | /* If holder was enqueued in a release heap, then the following | ||
1763 | * preemption check is pointless, but we can't easily detect | ||
1764 | * that case. If you want to fix this, then consider that | ||
1765 | * simply adding a state flag requires O(n) time to update when | ||
1766 | * releasing n tasks, which conflicts with the goal to have | ||
1767 | * O(log n) merges. */ | ||
1768 | if (check_preempt) { | ||
1769 | /* heap_decrease() hit the top level of the heap: make | ||
1770 | * sure preemption checks get the right task, not the | ||
1771 | * potentially stale cache. */ | ||
1772 | bheap_uncache_min(rm_ready_order, | ||
1773 | &cluster->domain.ready_queue); | ||
1774 | check_for_preemptions(cluster); | ||
1775 | } | ||
1776 | } | ||
1777 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1778 | } | ||
1779 | else { | ||
1780 | /* Occurance is okay under two scenarios: | ||
1781 | * 1. Fine-grain nested locks (no compiled DGL support): Concurrent | ||
1782 | * updates are chasing each other through the wait-for chain. | ||
1783 | * 2. Budget exhausion caused the HP waiter to loose its priority, but | ||
1784 | * the lock structure hasn't yet been updated (but soon will be). | ||
1785 | */ | ||
1786 | TRACE_TASK(t, "Spurious invalid priority increase. " | ||
1787 | "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d" | ||
1788 | "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n", | ||
1789 | t->comm, t->pid, | ||
1790 | effective_priority(t)->comm, effective_priority(t)->pid, | ||
1791 | (prio_inh) ? prio_inh->comm : "null", | ||
1792 | (prio_inh) ? prio_inh->pid : 0); | ||
1793 | WARN_ON(!prio_inh); | ||
1794 | success = 0; | ||
1795 | } | ||
1796 | #endif | ||
1797 | |||
1798 | out: | ||
1799 | return success; | ||
1800 | } | ||
1801 | |||
1802 | /* called with IRQs off */ | ||
1803 | static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) | ||
1804 | { | ||
1805 | crm_domain_t* cluster = task_cpu_cluster(t); | ||
1806 | |||
1807 | raw_readyq_lock(&cluster->cluster_lock); | ||
1808 | |||
1809 | TRACE_TASK(t, "to inherit from %s/%d\n", prio_inh->comm, prio_inh->pid); | ||
1810 | |||
1811 | __increase_priority_inheritance(t, prio_inh); | ||
1812 | |||
1813 | raw_readyq_unlock(&cluster->cluster_lock); | ||
1814 | } | ||
1815 | |||
1816 | /* called with IRQs off */ | ||
1817 | static int __decrease_priority_inheritance(struct task_struct* t, | ||
1818 | struct task_struct* prio_inh, | ||
1819 | int budget_tiggered) | ||
1820 | { | ||
1821 | crm_domain_t* cluster; | ||
1822 | int success = 1; | ||
1823 | struct task_struct* old_prio_inh = tsk_rt(t)->inh_task; | ||
1824 | |||
1825 | if (prio_inh == old_prio_inh) { | ||
1826 | /* relationship already established. */ | ||
1827 | TRACE_TASK(t, "already inherits priority from %s/%d\n", | ||
1828 | (prio_inh) ? prio_inh->comm : "(null)", | ||
1829 | (prio_inh) ? prio_inh->pid : 0); | ||
1830 | goto out; | ||
1831 | } | ||
1832 | |||
1833 | if (prio_inh && (effective_priority(prio_inh) != prio_inh)) { | ||
1834 | TRACE_TASK(t, "Inheriting from %s/%d instead of the eff_prio = %s/%d!\n", | ||
1835 | prio_inh->comm, prio_inh->pid, | ||
1836 | effective_priority(prio_inh)->comm, | ||
1837 | effective_priority(prio_inh)->pid); | ||
1838 | #ifndef CONFIG_LITMUS_NESTED_LOCKING | ||
1839 | /* Tasks should only inherit the base priority of a task. | ||
1840 | If 't' inherits a priority, then tsk_rt(t)->inh_task should | ||
1841 | be passed to this function instead. This includes transitive | ||
1842 | inheritance relations (tsk_rt(tsk_rt(...)->inh_task)->inh_task). */ | ||
1843 | BUG(); | ||
1844 | #else | ||
1845 | /* Not a bug with nested locking since inheritance propagation is | ||
1846 | not atomic. */ | ||
1847 | |||
1848 | /* TODO: Is the following 'helping' short-cut safe? | ||
1849 | prio_inh = effective_priority(prio_inh); | ||
1850 | */ | ||
1851 | #endif | ||
1852 | } | ||
1853 | |||
1854 | cluster = task_cpu_cluster(t); | ||
1855 | |||
1856 | #if 0 | ||
1857 | if (prio_inh && task_cpu_cluster(prio_inh) != cluster) { | ||
1858 | WARN_ONCE(1, "Illegal to inherit between clusters. " \ | ||
1859 | "target (%s/%d) on cluster w/ CPU %d and " \ | ||
1860 | "inh_prio (%s/%d) on w/ CPU %d\n", \ | ||
1861 | t->comm, t->pid, cluster->cpus[0]->cpu, \ | ||
1862 | prio_inh->comm, prio_inh->pid, \ | ||
1863 | task_cpu_cluster(prio_inh)->cpus[0]->cpu); | ||
1864 | return 1; | ||
1865 | } | ||
1866 | #endif | ||
1867 | |||
1868 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1869 | if(budget_tiggered || __rm_higher_prio(t, EFFECTIVE, prio_inh, BASE)) { | ||
1870 | #endif | ||
1871 | sched_trace_eff_prio_change(t, prio_inh); | ||
1872 | |||
1873 | if (budget_tiggered) { | ||
1874 | BUG_ON(!old_prio_inh); | ||
1875 | TRACE_TASK(t, "budget-triggered 'decrease' in priority. " | ||
1876 | "%s/%d's budget should have just been exhuasted.\n", | ||
1877 | old_prio_inh->comm, old_prio_inh->pid); | ||
1878 | } | ||
1879 | |||
1880 | /* clear out old inheritance relation */ | ||
1881 | if (old_prio_inh) { | ||
1882 | budget_state_machine_chgprio(t,old_prio_inh,on_disinherit); | ||
1883 | clear_inh_task_linkback(t, old_prio_inh); | ||
1884 | } | ||
1885 | |||
1886 | /* A job only stops inheriting a priority when it releases a | ||
1887 | * resource. Thus we can make the following assumption.*/ | ||
1888 | if(prio_inh) | ||
1889 | TRACE_TASK(t, "EFFECTIVE priority decreased to %s/%d\n", | ||
1890 | prio_inh->comm, prio_inh->pid); | ||
1891 | else | ||
1892 | TRACE_TASK(t, "base priority restored.\n"); | ||
1893 | |||
1894 | /* set up new inheritance relation */ | ||
1895 | tsk_rt(t)->inh_task = prio_inh; | ||
1896 | |||
1897 | if (prio_inh) | ||
1898 | budget_state_machine_chgprio(t,prio_inh,on_inherit); | ||
1899 | |||
1900 | if(tsk_rt(t)->scheduled_on != NO_CPU) { | ||
1901 | TRACE_TASK(t, "is scheduled.\n"); | ||
1902 | |||
1903 | /* link back to new inheritance */ | ||
1904 | if (prio_inh) | ||
1905 | set_inh_task_linkback(t, prio_inh); | ||
1906 | |||
1907 | /* Check if rescheduling is necessary. We can't use heap_decrease() | ||
1908 | * since the priority was effectively lowered. */ | ||
1909 | unlink(t); | ||
1910 | crm_job_arrival(t); | ||
1911 | } | ||
1912 | else { | ||
1913 | /* task is queued */ | ||
1914 | raw_spin_lock(&cluster->domain.release_lock); | ||
1915 | if (is_queued(t)) { | ||
1916 | TRACE_TASK(t, "is queued.\n"); | ||
1917 | |||
1918 | BUG_ON( | ||
1919 | !is_released(t, litmus_clock()) && | ||
1920 | !tsk_rt(t)->job_params.is_backlogged_job && | ||
1921 | !is_early_releasing(t)); | ||
1922 | |||
1923 | unlink(t); | ||
1924 | crm_job_arrival(t); | ||
1925 | } | ||
1926 | else { | ||
1927 | TRACE_TASK(t, "is not in scheduler. Probably on wait queue somewhere.\n"); | ||
1928 | } | ||
1929 | raw_spin_unlock(&cluster->domain.release_lock); | ||
1930 | } | ||
1931 | |||
1932 | #ifdef CONFIG_REALTIME_AUX_TASKS | ||
1933 | /* propagate to aux tasks */ | ||
1934 | if (tsk_rt(t)->has_aux_tasks) | ||
1935 | aux_task_owner_decrease_priority(t); | ||
1936 | #endif | ||
1937 | |||
1938 | #ifdef CONFIG_LITMUS_NVIDIA | ||
1939 | /* propagate to gpu */ | ||
1940 | if (tsk_rt(t)->held_gpus) | ||
1941 | gpu_owner_decrease_priority(t); | ||
1942 | #endif | ||
1943 | |||
1944 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1945 | } | ||
1946 | else { | ||
1947 | TRACE_TASK(t, "Spurious invalid priority decrease. " | ||
1948 | "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n" | ||
1949 | "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n", | ||
1950 | t->comm, t->pid, | ||
1951 | effective_priority(t)->comm, effective_priority(t)->pid, | ||
1952 | (prio_inh) ? prio_inh->comm : "null", | ||
1953 | (prio_inh) ? prio_inh->pid : 0); | ||
1954 | success = 0; | ||
1955 | } | ||
1956 | #endif | ||
1957 | |||
1958 | out: | ||
1959 | return success; | ||
1960 | } | ||
1961 | |||
1962 | static void decrease_priority_inheritance(struct task_struct* t, | ||
1963 | struct task_struct* prio_inh, | ||
1964 | int budget_tiggered) | ||
1965 | { | ||
1966 | crm_domain_t* cluster = task_cpu_cluster(t); | ||
1967 | |||
1968 | raw_readyq_lock(&cluster->cluster_lock); | ||
1969 | |||
1970 | TRACE_TASK(t, "to inherit from %s/%d (decrease)\n", | ||
1971 | (prio_inh) ? prio_inh->comm : "null", | ||
1972 | (prio_inh) ? prio_inh->pid : 0); | ||
1973 | |||
1974 | __decrease_priority_inheritance(t, prio_inh, budget_tiggered); | ||
1975 | |||
1976 | raw_readyq_unlock(&cluster->cluster_lock); | ||
1977 | } | ||
1978 | |||
1979 | |||
1980 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
1981 | |||
1982 | /* called with IRQs off */ | ||
1983 | /* preconditions: | ||
1984 | (1) The 'hp_blocked_tasks_lock' of task 't' is held. | ||
1985 | (2) The lock 'to_unlock' is held. | ||
1986 | */ | ||
1987 | static void nested_increase_priority_inheritance(struct task_struct* t, | ||
1988 | struct task_struct* prio_inh, | ||
1989 | raw_spinlock_t *to_unlock, | ||
1990 | unsigned long irqflags) | ||
1991 | { | ||
1992 | struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock; | ||
1993 | |||
1994 | if(tsk_rt(t)->inh_task != prio_inh) { // shield redundent calls. | ||
1995 | increase_priority_inheritance(t, prio_inh); // increase our prio. | ||
1996 | } | ||
1997 | |||
1998 | /* note: cluster lock is not held continuously during propagation, so there | ||
1999 | may be momentary inconsistencies while nested priority propagation 'chases' | ||
2000 | other updates. */ | ||
2001 | |||
2002 | raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap. | ||
2003 | |||
2004 | if(blocked_lock) { | ||
2005 | if(blocked_lock->ops->supports_nesting) { | ||
2006 | TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n", | ||
2007 | blocked_lock->ident); | ||
2008 | |||
2009 | // beware: recursion | ||
2010 | blocked_lock->ops->propagate_increase_inheritance(blocked_lock, | ||
2011 | t, to_unlock, | ||
2012 | irqflags); | ||
2013 | } | ||
2014 | else { | ||
2015 | TRACE_TASK(t, "Inheritor is blocked on litmus lock (%d) that does not support nesting!\n", | ||
2016 | blocked_lock->ident); | ||
2017 | unlock_fine_irqrestore(to_unlock, irqflags); | ||
2018 | } | ||
2019 | } | ||
2020 | else { | ||
2021 | TRACE_TASK(t, "is not blocked on litmus lock. No propagation.\n"); | ||
2022 | unlock_fine_irqrestore(to_unlock, irqflags); | ||
2023 | } | ||
2024 | } | ||
2025 | |||
2026 | /* called with IRQs off */ | ||
2027 | /* preconditions: | ||
2028 | (1) The 'hp_blocked_tasks_lock' of task 't' is held. | ||
2029 | (2) The lock 'to_unlock' is held. | ||
2030 | */ | ||
2031 | static void nested_decrease_priority_inheritance(struct task_struct* t, | ||
2032 | struct task_struct* prio_inh, | ||
2033 | raw_spinlock_t *to_unlock, | ||
2034 | unsigned long irqflags, | ||
2035 | int budget_tiggered) | ||
2036 | { | ||
2037 | struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock; | ||
2038 | decrease_priority_inheritance(t, prio_inh, budget_tiggered); | ||
2039 | |||
2040 | raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap. | ||
2041 | |||
2042 | if(blocked_lock) { | ||
2043 | if(blocked_lock->ops->supports_nesting) { | ||
2044 | TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n", | ||
2045 | blocked_lock->ident); | ||
2046 | // beware: recursion | ||
2047 | blocked_lock->ops->propagate_decrease_inheritance(blocked_lock, t, | ||
2048 | to_unlock, | ||
2049 | irqflags, | ||
2050 | budget_tiggered); | ||
2051 | } | ||
2052 | else { | ||
2053 | TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n", | ||
2054 | blocked_lock); | ||
2055 | unlock_fine_irqrestore(to_unlock, irqflags); | ||
2056 | } | ||
2057 | } | ||
2058 | else { | ||
2059 | TRACE_TASK(t, "is not blocked. No propagation.\n"); | ||
2060 | unlock_fine_irqrestore(to_unlock, irqflags); | ||
2061 | } | ||
2062 | } | ||
2063 | |||
2064 | |||
2065 | /* ******************** FIFO MUTEX ********************** */ | ||
2066 | |||
2067 | static struct litmus_lock_ops crm_fifo_mutex_lock_ops = { | ||
2068 | .lock = fifo_mutex_lock, | ||
2069 | .unlock = fifo_mutex_unlock, | ||
2070 | .should_yield_lock = fifo_mutex_should_yield_lock, | ||
2071 | .close = fifo_mutex_close, | ||
2072 | .deallocate = fifo_mutex_free, | ||
2073 | |||
2074 | .budget_exhausted = fifo_mutex_budget_exhausted, | ||
2075 | .propagate_increase_inheritance = fifo_mutex_propagate_increase_inheritance, | ||
2076 | .propagate_decrease_inheritance = fifo_mutex_propagate_decrease_inheritance, | ||
2077 | |||
2078 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
2079 | .dgl_lock = fifo_mutex_dgl_lock, | ||
2080 | .is_owner = fifo_mutex_is_owner, | ||
2081 | .get_owner = fifo_mutex_get_owner, | ||
2082 | .enable_priority = fifo_mutex_enable_priority, | ||
2083 | |||
2084 | .dgl_can_quick_lock = NULL, | ||
2085 | .dgl_quick_lock = NULL, | ||
2086 | |||
2087 | .supports_dgl = 1, | ||
2088 | .requires_atomic_dgl = 0, | ||
2089 | #endif | ||
2090 | .supports_nesting = 1, | ||
2091 | .supports_budget_exhaustion = 1, | ||
2092 | .is_omlp_family = 0, | ||
2093 | }; | ||
2094 | |||
2095 | static struct litmus_lock* crm_new_fifo_mutex(void) | ||
2096 | { | ||
2097 | return fifo_mutex_new(&crm_fifo_mutex_lock_ops); | ||
2098 | } | ||
2099 | |||
2100 | /* ******************** PRIOQ MUTEX ********************** */ | ||
2101 | |||
2102 | static struct litmus_lock_ops crm_prioq_mutex_lock_ops = { | ||
2103 | .lock = prioq_mutex_lock, | ||
2104 | .unlock = prioq_mutex_unlock, | ||
2105 | .should_yield_lock = prioq_mutex_should_yield_lock, | ||
2106 | .close = prioq_mutex_close, | ||
2107 | .deallocate = prioq_mutex_free, | ||
2108 | |||
2109 | .budget_exhausted = prioq_mutex_budget_exhausted, | ||
2110 | .propagate_increase_inheritance = prioq_mutex_propagate_increase_inheritance, | ||
2111 | .propagate_decrease_inheritance = prioq_mutex_propagate_decrease_inheritance, | ||
2112 | |||
2113 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
2114 | .dgl_lock = prioq_mutex_dgl_lock, | ||
2115 | .is_owner = prioq_mutex_is_owner, | ||
2116 | .get_owner = prioq_mutex_get_owner, | ||
2117 | .enable_priority = prioq_mutex_enable_priority, | ||
2118 | |||
2119 | .dgl_can_quick_lock = prioq_mutex_dgl_can_quick_lock, | ||
2120 | .dgl_quick_lock = prioq_mutex_dgl_quick_lock, | ||
2121 | |||
2122 | .supports_dgl = 1, | ||
2123 | .requires_atomic_dgl = 1, | ||
2124 | #endif | ||
2125 | .supports_nesting = 1, | ||
2126 | .supports_budget_exhaustion = 1, | ||
2127 | .is_omlp_family = 0, | ||
2128 | }; | ||
2129 | |||
2130 | static struct litmus_lock* crm_new_prioq_mutex(void) | ||
2131 | { | ||
2132 | return prioq_mutex_new(&crm_prioq_mutex_lock_ops); | ||
2133 | } | ||
2134 | |||
2135 | /* ******************** IKGLP ********************** */ | ||
2136 | |||
2137 | static struct litmus_lock_ops crm_ikglp_lock_ops = { | ||
2138 | .lock = ikglp_lock, | ||
2139 | .unlock = ikglp_unlock, | ||
2140 | .should_yield_lock = NULL, | ||
2141 | .close = ikglp_close, | ||
2142 | .deallocate = ikglp_free, | ||
2143 | |||
2144 | .budget_exhausted = ikglp_budget_exhausted, | ||
2145 | .omlp_virtual_unlock = ikglp_virtual_unlock, | ||
2146 | |||
2147 | // ikglp can only be an outer-most lock. | ||
2148 | .propagate_increase_inheritance = NULL, | ||
2149 | .propagate_decrease_inheritance = NULL, | ||
2150 | |||
2151 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
2152 | .supports_dgl = 0, | ||
2153 | .requires_atomic_dgl = 0, | ||
2154 | #endif | ||
2155 | .supports_nesting = 0, | ||
2156 | .supports_budget_exhaustion = 1, | ||
2157 | .is_omlp_family = 1, | ||
2158 | }; | ||
2159 | |||
2160 | static struct litmus_lock* crm_new_ikglp(void* __user arg) | ||
2161 | { | ||
2162 | // assumes clusters of uniform size. | ||
2163 | return ikglp_new(cluster_size, &crm_ikglp_lock_ops, arg); | ||
2164 | } | ||
2165 | |||
2166 | |||
2167 | /* ******************** KFMLP support ********************** */ | ||
2168 | |||
2169 | static struct litmus_lock_ops crm_kfmlp_lock_ops = { | ||
2170 | .lock = kfmlp_lock, | ||
2171 | .unlock = kfmlp_unlock, | ||
2172 | .should_yield_lock = NULL, | ||
2173 | .close = kfmlp_close, | ||
2174 | .deallocate = kfmlp_free, | ||
2175 | |||
2176 | // kfmlp can only be an outer-most lock. | ||
2177 | .propagate_increase_inheritance = NULL, | ||
2178 | .propagate_decrease_inheritance = NULL, | ||
2179 | |||
2180 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
2181 | .supports_dgl = 0, | ||
2182 | .requires_atomic_dgl = 0, | ||
2183 | #endif | ||
2184 | .supports_nesting = 0, | ||
2185 | .supports_budget_exhaustion = 0, | ||
2186 | .is_omlp_family = 0, | ||
2187 | }; | ||
2188 | |||
2189 | |||
2190 | static struct litmus_lock* crm_new_kfmlp(void* __user arg) | ||
2191 | { | ||
2192 | return kfmlp_new(&crm_kfmlp_lock_ops, arg); | ||
2193 | } | ||
2194 | |||
2195 | |||
2196 | /* **** lock constructor **** */ | ||
2197 | |||
2198 | static long crm_allocate_lock(struct litmus_lock **lock, int type, | ||
2199 | void* __user args) | ||
2200 | { | ||
2201 | int err; | ||
2202 | |||
2203 | switch (type) { | ||
2204 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
2205 | case FIFO_MUTEX: | ||
2206 | *lock = crm_new_fifo_mutex(); | ||
2207 | break; | ||
2208 | |||
2209 | case PRIOQ_MUTEX: | ||
2210 | *lock = crm_new_prioq_mutex(); | ||
2211 | break; | ||
2212 | |||
2213 | case IKGLP_SEM: | ||
2214 | *lock = crm_new_ikglp(args); | ||
2215 | break; | ||
2216 | #endif | ||
2217 | case KFMLP_SEM: | ||
2218 | *lock = crm_new_kfmlp(args); | ||
2219 | break; | ||
2220 | |||
2221 | default: | ||
2222 | err = -ENXIO; | ||
2223 | goto UNSUPPORTED_LOCK; | ||
2224 | }; | ||
2225 | |||
2226 | if (*lock) | ||
2227 | err = 0; | ||
2228 | else | ||
2229 | err = -ENOMEM; | ||
2230 | |||
2231 | UNSUPPORTED_LOCK: | ||
2232 | return err; | ||
2233 | } | ||
2234 | |||
2235 | #endif // CONFIG_LITMUS_LOCKING | ||
2236 | |||
2237 | |||
2238 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
2239 | static struct affinity_observer_ops crm_kfmlp_affinity_ops __attribute__ ((unused)) = { | ||
2240 | .close = kfmlp_aff_obs_close, | ||
2241 | .deallocate = kfmlp_aff_obs_free, | ||
2242 | }; | ||
2243 | |||
2244 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
2245 | static struct affinity_observer_ops crm_ikglp_affinity_ops __attribute__ ((unused)) = { | ||
2246 | .close = ikglp_aff_obs_close, | ||
2247 | .deallocate = ikglp_aff_obs_free, | ||
2248 | }; | ||
2249 | #endif | ||
2250 | |||
2251 | static long crm_allocate_affinity_observer(struct affinity_observer **aff_obs, | ||
2252 | int type, | ||
2253 | void* __user args) | ||
2254 | { | ||
2255 | int err; | ||
2256 | |||
2257 | switch (type) { | ||
2258 | #ifdef CONFIG_LITMUS_NVIDIA | ||
2259 | case KFMLP_SIMPLE_GPU_AFF_OBS: | ||
2260 | *aff_obs = kfmlp_simple_gpu_aff_obs_new(&crm_kfmlp_affinity_ops, args); | ||
2261 | break; | ||
2262 | |||
2263 | case KFMLP_GPU_AFF_OBS: | ||
2264 | *aff_obs = kfmlp_gpu_aff_obs_new(&crm_kfmlp_affinity_ops, args); | ||
2265 | break; | ||
2266 | |||
2267 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
2268 | case IKGLP_SIMPLE_GPU_AFF_OBS: | ||
2269 | *aff_obs = ikglp_simple_gpu_aff_obs_new(&crm_ikglp_affinity_ops, args); | ||
2270 | break; | ||
2271 | |||
2272 | case IKGLP_GPU_AFF_OBS: | ||
2273 | *aff_obs = ikglp_gpu_aff_obs_new(&crm_ikglp_affinity_ops, args); | ||
2274 | break; | ||
2275 | #endif | ||
2276 | #endif | ||
2277 | default: | ||
2278 | err = -ENXIO; | ||
2279 | goto UNSUPPORTED_AFF_OBS; | ||
2280 | }; | ||
2281 | |||
2282 | if (*aff_obs) | ||
2283 | err = 0; | ||
2284 | else | ||
2285 | err = -ENOMEM; | ||
2286 | |||
2287 | UNSUPPORTED_AFF_OBS: | ||
2288 | return err; | ||
2289 | } | ||
2290 | #endif | ||
2291 | |||
2292 | |||
2293 | |||
2294 | #endif // CONFIG_LITMUS_NESTED_LOCKING | ||
2295 | |||
2296 | |||
2297 | #ifdef VERBOSE_INIT | ||
2298 | static void print_cluster_topology(cpumask_var_t mask, int cpu) | ||
2299 | { | ||
2300 | int chk; | ||
2301 | char buf[255]; | ||
2302 | |||
2303 | chk = cpulist_scnprintf(buf, 254, mask); | ||
2304 | buf[chk] = '\0'; | ||
2305 | printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf); | ||
2306 | |||
2307 | } | ||
2308 | #endif | ||
2309 | |||
2310 | static void cleanup_crm(void) | ||
2311 | { | ||
2312 | int i; | ||
2313 | |||
2314 | if (clusters_allocated) { | ||
2315 | for (i = 0; i < num_clusters; i++) { | ||
2316 | kfree(crm[i].cpus); | ||
2317 | free_cpumask_var(crm[i].cpu_map); | ||
2318 | } | ||
2319 | |||
2320 | kfree(crm); | ||
2321 | } | ||
2322 | } | ||
2323 | |||
2324 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) | ||
2325 | static int crm_map_gpu_to_cpu(int gpu) | ||
2326 | { | ||
2327 | int default_cpu; | ||
2328 | int cpu_cluster = gpu / gpu_cluster_size; | ||
2329 | |||
2330 | /* bonham-specific hack for the fully partitioned case (both CPUs and GPUs partitioned) */ | ||
2331 | /* TODO: Make this aware of the NUMA topology generically */ | ||
2332 | if(num_clusters == 12 && num_gpu_clusters == 8) { | ||
2333 | if(gpu >= 4) { | ||
2334 | cpu_cluster += 2; // assign the GPU to a CPU on the same NUMA node | ||
2335 | } | ||
2336 | } | ||
2337 | |||
2338 | default_cpu = crm[cpu_cluster].cpus[0]->cpu; // first CPU in given cluster | ||
2339 | |||
2340 | TRACE("CPU %d is default for GPU %d interrupt threads.\n", default_cpu, gpu); | ||
2341 | |||
2342 | return default_cpu; | ||
2343 | } | ||
2344 | #endif | ||
2345 | |||
2346 | static long crm_activate_plugin(void) | ||
2347 | { | ||
2348 | int i, j, cpu, ccpu, cpu_count; | ||
2349 | cpu_entry_t *entry; | ||
2350 | |||
2351 | cpumask_var_t mask; | ||
2352 | int chk = 0; | ||
2353 | |||
2354 | /* de-allocate old clusters, if any */ | ||
2355 | cleanup_crm(); | ||
2356 | |||
2357 | |||
2358 | printk(KERN_INFO "C-RM: Activate Plugin, cluster configuration = %d\n", | ||
2359 | cluster_config); | ||
2360 | |||
2361 | /* need to get cluster_size first */ | ||
2362 | if(!zalloc_cpumask_var(&mask, GFP_ATOMIC)) | ||
2363 | return -ENOMEM; | ||
2364 | |||
2365 | if (unlikely(cluster_config == GLOBAL_CLUSTER)) { | ||
2366 | cluster_size = num_online_cpus(); | ||
2367 | } else { | ||
2368 | chk = get_shared_cpu_map(mask, 0, cluster_config); | ||
2369 | if (chk) { | ||
2370 | /* if chk != 0 then it is the max allowed index */ | ||
2371 | printk(KERN_INFO "C-RM: Cluster configuration = %d " | ||
2372 | "is not supported on this hardware.\n", | ||
2373 | cluster_config); | ||
2374 | /* User should notice that the configuration failed, so | ||
2375 | * let's bail out. */ | ||
2376 | return -EINVAL; | ||
2377 | } | ||
2378 | |||
2379 | cluster_size = cpumask_weight(mask); | ||
2380 | } | ||
2381 | |||
2382 | if ((num_online_cpus() % cluster_size) != 0) { | ||
2383 | /* this can't be right, some cpus are left out */ | ||
2384 | printk(KERN_ERR "C-RM: Trying to group %d cpus in %d!\n", | ||
2385 | num_online_cpus(), cluster_size); | ||
2386 | return -1; | ||
2387 | } | ||
2388 | |||
2389 | num_clusters = num_online_cpus() / cluster_size; | ||
2390 | printk(KERN_INFO "C-RM: %d cluster(s) of size = %d\n", | ||
2391 | num_clusters, cluster_size); | ||
2392 | |||
2393 | |||
2394 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) | ||
2395 | num_gpu_clusters = min(num_clusters, num_online_gpus()); | ||
2396 | gpu_cluster_size = num_online_gpus() / num_gpu_clusters; | ||
2397 | |||
2398 | if (((num_online_gpus() % gpu_cluster_size) != 0) || | ||
2399 | (num_gpu_clusters != num_clusters)) { | ||
2400 | printk(KERN_WARNING "C-RM: GPUs not uniformly distributed among CPU clusters.\n"); | ||
2401 | } | ||
2402 | #endif | ||
2403 | |||
2404 | /* initialize clusters */ | ||
2405 | crm = kmalloc(num_clusters * sizeof(crm_domain_t), GFP_ATOMIC); | ||
2406 | for (i = 0; i < num_clusters; i++) { | ||
2407 | |||
2408 | crm[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t), | ||
2409 | GFP_ATOMIC); | ||
2410 | INIT_BINHEAP_HANDLE(&(crm[i].cpu_heap), cpu_lower_prio); | ||
2411 | rm_domain_init(&(crm[i].domain), NULL, crm_release_jobs); | ||
2412 | |||
2413 | if(!zalloc_cpumask_var(&crm[i].cpu_map, GFP_ATOMIC)) | ||
2414 | return -ENOMEM; | ||
2415 | #ifdef CONFIG_RELEASE_MASTER | ||
2416 | crm[i].domain.release_master = atomic_read(&release_master_cpu); | ||
2417 | #endif | ||
2418 | } | ||
2419 | |||
2420 | /* cycle through cluster and add cpus to them */ | ||
2421 | for (i = 0; i < num_clusters; i++) { | ||
2422 | |||
2423 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
2424 | raw_spin_lock_init(&crm[i].dgl_lock); | ||
2425 | #endif | ||
2426 | |||
2427 | #ifdef RECURSIVE_READY_QUEUE_LOCK | ||
2428 | crm[i].recursive_depth = 0; | ||
2429 | atomic_set(&crm[i].owner_cpu, NO_CPU); | ||
2430 | #endif | ||
2431 | |||
2432 | crm[i].top_m_size = 0; | ||
2433 | INIT_BINHEAP_HANDLE(&crm[i].top_m, crm_min_heap_base_priority_order); | ||
2434 | INIT_BINHEAP_HANDLE(&crm[i].not_top_m, crm_max_heap_base_priority_order); | ||
2435 | |||
2436 | for_each_online_cpu(cpu) { | ||
2437 | /* check if the cpu is already in a cluster */ | ||
2438 | for (j = 0; j < num_clusters; j++) | ||
2439 | if (cpumask_test_cpu(cpu, crm[j].cpu_map)) | ||
2440 | break; | ||
2441 | /* if it is in a cluster go to next cpu */ | ||
2442 | if (j < num_clusters && | ||
2443 | cpumask_test_cpu(cpu, crm[j].cpu_map)) | ||
2444 | continue; | ||
2445 | |||
2446 | /* this cpu isn't in any cluster */ | ||
2447 | /* get the shared cpus */ | ||
2448 | if (unlikely(cluster_config == GLOBAL_CLUSTER)) | ||
2449 | cpumask_copy(mask, cpu_online_mask); | ||
2450 | else | ||
2451 | get_shared_cpu_map(mask, cpu, cluster_config); | ||
2452 | |||
2453 | cpumask_copy(crm[i].cpu_map, mask); | ||
2454 | #ifdef VERBOSE_INIT | ||
2455 | print_cluster_topology(mask, cpu); | ||
2456 | #endif | ||
2457 | /* add cpus to current cluster and init cpu_entry_t */ | ||
2458 | cpu_count = 0; | ||
2459 | for_each_cpu(ccpu, crm[i].cpu_map) { | ||
2460 | |||
2461 | entry = &per_cpu(crm_cpu_entries, ccpu); | ||
2462 | crm[i].cpus[cpu_count] = entry; | ||
2463 | |||
2464 | memset(entry, 0, sizeof(*entry)); | ||
2465 | entry->cpu = ccpu; | ||
2466 | entry->cluster = &crm[i]; | ||
2467 | INIT_BINHEAP_NODE(&entry->hn); | ||
2468 | mb(); | ||
2469 | |||
2470 | ++cpu_count; | ||
2471 | |||
2472 | #ifdef CONFIG_RELEASE_MASTER | ||
2473 | /* only add CPUs that should schedule jobs */ | ||
2474 | if (entry->cpu != entry->cluster->domain.release_master) | ||
2475 | #endif | ||
2476 | update_cpu_position(entry); | ||
2477 | } | ||
2478 | /* done with this cluster */ | ||
2479 | break; | ||
2480 | } | ||
2481 | } | ||
2482 | |||
2483 | #ifdef CONFIG_LITMUS_SOFTIRQD | ||
2484 | init_klmirqd(); | ||
2485 | #endif | ||
2486 | |||
2487 | #ifdef CONFIG_LITMUS_NVIDIA | ||
2488 | init_nvidia_info(); | ||
2489 | #endif | ||
2490 | |||
2491 | init_wake_queues(); | ||
2492 | |||
2493 | free_cpumask_var(mask); | ||
2494 | clusters_allocated = 1; | ||
2495 | return 0; | ||
2496 | } | ||
2497 | |||
2498 | /* Plugin object */ | ||
2499 | static struct sched_plugin crm_plugin __cacheline_aligned_in_smp = { | ||
2500 | .plugin_name = "C-RM", // for now | ||
2501 | .finish_switch = crm_finish_switch, | ||
2502 | .tick = crm_tick, | ||
2503 | .task_new = crm_task_new, | ||
2504 | .complete_job = complete_job, | ||
2505 | .task_exit = crm_task_exit, | ||
2506 | .schedule = crm_schedule, | ||
2507 | .task_wake_up = crm_task_wake_up, | ||
2508 | .task_block = crm_task_block, | ||
2509 | .admit_task = crm_admit_task, | ||
2510 | .activate_plugin = crm_activate_plugin, | ||
2511 | .compare = rm_higher_prio, | ||
2512 | #ifdef CONFIG_LITMUS_LOCKING | ||
2513 | .allocate_lock = crm_allocate_lock, | ||
2514 | .increase_prio = increase_priority_inheritance, | ||
2515 | .decrease_prio = decrease_priority_inheritance, | ||
2516 | .__increase_prio = __increase_priority_inheritance, | ||
2517 | .__decrease_prio = __decrease_priority_inheritance, | ||
2518 | #endif | ||
2519 | #ifdef CONFIG_LITMUS_NESTED_LOCKING | ||
2520 | .nested_increase_prio = nested_increase_priority_inheritance, | ||
2521 | .nested_decrease_prio = nested_decrease_priority_inheritance, | ||
2522 | .__compare = __rm_higher_prio, | ||
2523 | #endif | ||
2524 | #ifdef CONFIG_LITMUS_DGL_SUPPORT | ||
2525 | .get_dgl_spinlock = crm_get_dgl_spinlock, | ||
2526 | #endif | ||
2527 | #ifdef CONFIG_LITMUS_AFFINITY_LOCKING | ||
2528 | .allocate_aff_obs = crm_allocate_affinity_observer, | ||
2529 | #endif | ||
2530 | #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD) | ||
2531 | .map_gpu_to_cpu = crm_map_gpu_to_cpu, | ||
2532 | #endif | ||
2533 | }; | ||
2534 | |||
2535 | static struct proc_dir_entry *cluster_file = NULL, *crm_dir = NULL; | ||
2536 | |||
2537 | static int __init init_crm(void) | ||
2538 | { | ||
2539 | int err, fs; | ||
2540 | |||
2541 | err = register_sched_plugin(&crm_plugin); | ||
2542 | if (!err) { | ||
2543 | fs = make_plugin_proc_dir(&crm_plugin, &crm_dir); | ||
2544 | if (!fs) | ||
2545 | cluster_file = create_cluster_file(crm_dir, &cluster_config); | ||
2546 | else | ||
2547 | printk(KERN_ERR "Could not allocate C-RM procfs dir.\n"); | ||
2548 | } | ||
2549 | return err; | ||
2550 | } | ||
2551 | |||
2552 | static void clean_crm(void) | ||
2553 | { | ||
2554 | cleanup_crm(); | ||
2555 | if (cluster_file) | ||
2556 | remove_proc_entry("cluster", crm_dir); | ||
2557 | if (crm_dir) | ||
2558 | remove_plugin_proc_dir(&crm_plugin); | ||
2559 | } | ||
2560 | |||
2561 | module_init(init_crm); | ||
2562 | module_exit(clean_crm); | ||