aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--litmus/Kconfig10
-rw-r--r--litmus/Makefile1
-rw-r--r--litmus/rm_common.c300
-rw-r--r--litmus/sched_crm.c2562
4 files changed, 0 insertions, 2873 deletions
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 03f31157abc7..3adfa1fe9800 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -12,16 +12,6 @@ config PLUGIN_CEDF
12 On smaller platforms (e.g., ARM PB11MPCore), using C-EDF 12 On smaller platforms (e.g., ARM PB11MPCore), using C-EDF
13 makes little sense since there aren't any shared caches. 13 makes little sense since there aren't any shared caches.
14 14
15config PLUGIN_CRM
16 bool "Clustered-RM"
17 depends on X86 && SYSFS
18 default y
19 help
20 Include the Clustered RM (C-RM) plugin in the kernel.
21 This is appropriate for large platforms with shared caches.
22 On smaller platforms (e.g., ARM PB11MPCore), using C-EDF
23 makes little sense since there aren't any shared caches.
24
25config RECURSIVE_READYQ_LOCK 15config RECURSIVE_READYQ_LOCK
26 bool "Recursive Ready Queue Lock" 16 bool "Recursive Ready Queue Lock"
27 default n 17 default n
diff --git a/litmus/Makefile b/litmus/Makefile
index 08ed4a663d8f..264640dd013b 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -25,7 +25,6 @@ obj-y = sched_plugin.o litmus.o \
25 sched_pfp.o 25 sched_pfp.o
26 26
27obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o 27obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
28obj-$(CONFIG_PLUGIN_CRM) += rm_common.o sched_crm.o
29obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o 28obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
30obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o 29obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
31 30
diff --git a/litmus/rm_common.c b/litmus/rm_common.c
deleted file mode 100644
index 8d4cdf4c71cf..000000000000
--- a/litmus/rm_common.c
+++ /dev/null
@@ -1,300 +0,0 @@
1/*
2 * kernel/rm_common.c
3 *
4 * Common functions for EDF based scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15#ifdef CONFIG_LITMUS_NESTED_LOCKING
16#include <litmus/locking.h>
17#endif
18
19#include <litmus/rm_common.h>
20
21
22/* rm_higher_prio - returns true if first has a higher EDF priority
23 * than second. Deadline ties are broken by PID.
24 *
25 * both first and second may be NULL
26 */
27#ifdef CONFIG_LITMUS_NESTED_LOCKING
28int __rm_higher_prio(
29 struct task_struct* first, comparison_mode_t first_mode,
30 struct task_struct* second, comparison_mode_t second_mode)
31#else
32int rm_higher_prio(struct task_struct* first, struct task_struct* second)
33#endif
34{
35 struct task_struct *first_task = first;
36 struct task_struct *second_task = second;
37
38 /* There is no point in comparing a task to itself. */
39 if (first && first == second) {
40 TRACE_CUR("WARNING: pointless rm priority comparison: %s/%d\n", first->comm, first->pid);
41// WARN_ON(1);
42 return 0;
43 }
44
45 /* check for NULL tasks */
46 if (!first || !second) {
47 return first && !second;
48 }
49 /* check for non-realtime */
50 if (!is_realtime(first) || !is_realtime(second)) {
51 return is_realtime(first) && !is_realtime(second);
52 }
53
54 /* There is some goofy stuff in this code here. There are three subclasses
55 * within the SCHED_LITMUS scheduling class:
56 * 1) Auxiliary tasks: COTS helper threads from the application level that
57 * are forced to be real-time.
58 * 2) klmirqd interrupt threads: Litmus threaded interrupt handlers.
59 * 3) Normal Litmus tasks.
60 *
61 * At their base priorities, #3 > #2 > #1. However, #1 and #2 threads might
62 * inherit a priority from a task of #3.
63 *
64 * The code proceeds in the following manner:
65 * 1) Make aux and klmirqd threads with base-priorities have low priorities.
66 * 2) Determine effective priorities.
67 * 3) Perform priority comparison. Favor #3 over #1 and #2 in case of tie.
68 */
69
70
71#if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_BOOSTED)
72 /* run aux tasks at max priority */
73 if (tsk_rt(first)->is_aux_task != tsk_rt(second)->is_aux_task) {
74 return (tsk_rt(first)->is_aux_task > tsk_rt(second)->is_aux_task);
75 }
76#elif defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE)
77 {
78 int first_lo_aux = tsk_rt(first)->is_aux_task && !tsk_rt(first)->inh_task;
79 int second_lo_aux = tsk_rt(second)->is_aux_task && !tsk_rt(second)->inh_task;
80
81 /* prioritize aux tasks without inheritance below real-time tasks */
82 if (first_lo_aux || second_lo_aux) {
83 // one of these is an aux task without inheritance.
84 if (first_lo_aux != second_lo_aux) {
85 int temp = (first_lo_aux < second_lo_aux); // non-lo-aux has higher priority.
86 return temp;
87 }
88 else {
89 /* both MUST be lo_aux. tie-break. */
90 //TRACE_CUR("aux tie break!\n");
91 goto aux_tie_break;
92 }
93 }
94
95 if (tsk_rt(first)->is_aux_task && tsk_rt(second)->is_aux_task &&
96 tsk_rt(first)->inh_task == tsk_rt(second)->inh_task) {
97 // inh_task is !NULL for both tasks since neither was a lo_aux task.
98 // Both aux tasks inherit from the same task, so tie-break
99 // by base priority of the aux tasks.
100 //TRACE_CUR("aux tie break!\n");
101 goto aux_tie_break;
102 }
103 }
104#endif
105
106#ifdef CONFIG_LITMUS_SOFTIRQD
107 {
108 int first_lo_klmirqd = tsk_rt(first)->is_interrupt_thread && !tsk_rt(first)->inh_task;
109 int second_lo_klmirqd = tsk_rt(second)->is_interrupt_thread && !tsk_rt(second)->inh_task;
110
111 /* prioritize aux tasks without inheritance below real-time tasks */
112 if (first_lo_klmirqd || second_lo_klmirqd) {
113 // one of these is an klmirqd thread without inheritance.
114 if (first_lo_klmirqd != second_lo_klmirqd) {
115 int temp = (first_lo_klmirqd < second_lo_klmirqd); // non-klmirqd has higher priority
116 return temp;
117 }
118 else {
119 /* both MUST be klmirqd. tie-break. */
120 //TRACE_CUR("klmirqd tie break!\n");
121 goto klmirqd_tie_break;
122 }
123 }
124
125 if (tsk_rt(first)->is_interrupt_thread && tsk_rt(second)->is_interrupt_thread &&
126 tsk_rt(first)->inh_task == tsk_rt(second)->inh_task) {
127 // inh_task is !NULL for both tasks since neither was a lo_klmirqd task.
128 // Both klmirqd tasks inherit from the same task, so tie-break
129 // by base priority of the klmirqd tasks.
130 //TRACE_CUR("klmirqd tie break!\n");
131 goto klmirqd_tie_break;
132 }
133 }
134#endif
135
136
137#ifdef CONFIG_LITMUS_LOCKING
138 /* Check for EFFECTIVE priorities. Change task
139 * used for comparison in such a case.
140 */
141 if (unlikely(tsk_rt(first)->inh_task)
142#ifdef CONFIG_LITMUS_NESTED_LOCKING
143 && (first_mode == EFFECTIVE)
144#endif
145 ) {
146 first_task = tsk_rt(first)->inh_task;
147 }
148 if (unlikely(tsk_rt(second)->inh_task)
149#ifdef CONFIG_LITMUS_NESTED_LOCKING
150 && (second_mode == EFFECTIVE)
151#endif
152 ) {
153 second_task = tsk_rt(second)->inh_task;
154 }
155
156 /* Check for priority boosting. Tie-break by start of boosting.
157 */
158 if (unlikely(is_priority_boosted(first_task))) {
159 /* first_task is boosted, how about second_task? */
160 if (!is_priority_boosted(second_task) ||
161 lt_before(get_boost_start(first_task),
162 get_boost_start(second_task))) {
163 return 1;
164 }
165 else {
166 return 0;
167 }
168 }
169 else if (unlikely(is_priority_boosted(second_task))) {
170 /* second_task is boosted, first is not*/
171 return 0;
172 }
173
174#endif
175
176#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
177aux_tie_break:
178#endif
179#ifdef CONFIG_LITMUS_SOFTIRQD
180klmirqd_tie_break:
181#endif
182
183 // KLUDGE! This is reverse of fp_common's implementation!!!
184 if (get_period(first_task) < get_period(second_task))
185 return 1;
186 else if (get_period(first_task) == get_period(second_task)) {
187 if (first_task->pid < second_task->pid)
188 return 1;
189 else if (first_task->pid == second_task->pid) {
190 /* there is inheritance going on. consider inheritors. */
191#ifdef CONFIG_LITMUS_SOFTIRQD
192 /* non-interrupt thread gets prio */
193 if (!tsk_rt(first)->is_interrupt_thread && tsk_rt(second)->is_interrupt_thread)
194 return 1;
195 else if (tsk_rt(first)->is_interrupt_thread == tsk_rt(second)->is_interrupt_thread) {
196#endif
197
198#if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE)
199 /* non-aux thread gets prio */
200 if (!tsk_rt(first)->is_aux_task && tsk_rt(second)->is_aux_task)
201 return 1;
202 else if (tsk_rt(first_task)->is_aux_task == tsk_rt(second_task)->is_aux_task) {
203#endif
204 /* if both tasks inherit from the same task */
205 if (tsk_rt(first)->inh_task == tsk_rt(second)->inh_task) {
206 /* TODO: Make a recurive call to rm_higher_prio,
207 comparing base priorities. */
208 return (first->pid < second->pid);
209 }
210 else {
211 /* At least one task must inherit */
212 BUG_ON(!tsk_rt(first)->inh_task &&
213 !tsk_rt(second)->inh_task);
214
215 /* The task withOUT the inherited priority wins. */
216 if (tsk_rt(second)->inh_task) {
217 return 1;
218 }
219 }
220#if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE)
221 }
222#endif
223
224#ifdef CONFIG_LITMUS_SOFTIRQD
225 }
226#endif
227 }
228 }
229
230 return 0; /* fall-through. prio(second_task) > prio(first_task) */
231}
232
233
234#ifdef CONFIG_LITMUS_NESTED_LOCKING
235int rm_higher_prio(struct task_struct* first, struct task_struct* second)
236{
237 return __rm_higher_prio(first, EFFECTIVE, second, EFFECTIVE);
238}
239
240int rm_max_heap_order(struct binheap_node *a, struct binheap_node *b)
241{
242 struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node);
243 struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node);
244
245 return __rm_higher_prio(l_a->hp_waiter_eff_prio, EFFECTIVE, l_b->hp_waiter_eff_prio, EFFECTIVE);
246}
247
248int rm_min_heap_order(struct binheap_node *a, struct binheap_node *b)
249{
250 return rm_max_heap_order(b, a); // swap comparison
251}
252
253int rm_max_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b)
254{
255 struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node);
256 struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node);
257
258 return __rm_higher_prio(l_a->hp_waiter_eff_prio, BASE, l_b->hp_waiter_eff_prio, BASE);
259}
260
261int rm_min_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b)
262{
263 return rm_max_heap_base_priority_order(b, a); // swap comparison
264}
265#endif
266
267
268int rm_ready_order(struct bheap_node* a, struct bheap_node* b)
269{
270 return rm_higher_prio(bheap2task(a), bheap2task(b));
271}
272
273void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
274 release_jobs_t release)
275{
276 rt_domain_init(rt, rm_ready_order, resched, release);
277}
278
279/* need_to_preempt - check whether the task t needs to be preempted
280 * call only with irqs disabled and with ready_lock acquired
281 * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
282 */
283int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t)
284{
285 /* we need the read lock for rm_ready_queue */
286 /* no need to preempt if there is nothing pending */
287 if (!__jobs_pending(rt))
288 return 0;
289 /* we need to reschedule if t doesn't exist */
290 if (!t)
291 return 1;
292
293 /* NOTE: We cannot check for non-preemptibility since we
294 * don't know what address space we're currently in.
295 */
296
297 /* make sure to get non-rt stuff out of the way */
298 return !is_realtime(t) || rm_higher_prio(__next_ready(rt), t);
299}
300
diff --git a/litmus/sched_crm.c b/litmus/sched_crm.c
deleted file mode 100644
index 791b9979190e..000000000000
--- a/litmus/sched_crm.c
+++ /dev/null
@@ -1,2562 +0,0 @@
1/*
2 * litmus/sched_crm.c
3 *
4 * Implementation of the C-EDF scheduling algorithm.
5 *
6 * This implementation is based on G-EDF:
7 * - CPUs are clustered around L2 or L3 caches.
8 * - Clusters topology is automatically detected (this is arch dependent
9 * and is working only on x86 at the moment --- and only with modern
10 * cpus that exports cpuid4 information)
11 * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
12 * the programmer needs to be aware of the topology to place tasks
13 * in the desired cluster
14 * - default clustering is around L2 cache (cache index = 2)
15 * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
16 * online_cpus are placed in a single cluster).
17 *
18 * For details on functions, take a look at sched_gsn_edf.c
19 *
20 * Currently, we do not support changes in the number of online cpus.
21 * If the num_online_cpus() dynamically changes, the plugin is broken.
22 *
23 * This version uses the simple approach and serializes all scheduling
24 * decisions by the use of a queue lock. This is probably not the
25 * best way to do it, but it should suffice for now.
26 */
27
28#include <linux/spinlock.h>
29#include <linux/percpu.h>
30#include <linux/sched.h>
31#include <linux/slab.h>
32#include <linux/uaccess.h>
33#include <linux/module.h>
34
35#include <litmus/litmus.h>
36#include <litmus/jobs.h>
37#include <litmus/preempt.h>
38#include <litmus/budget.h>
39#include <litmus/sched_plugin.h>
40#include <litmus/rm_common.h>
41#include <litmus/sched_trace.h>
42
43#include <litmus/clustered.h>
44
45#include <litmus/bheap.h>
46#include <litmus/binheap.h>
47#include <litmus/trace.h>
48
49#ifdef CONFIG_LITMUS_LOCKING
50#include <litmus/kfmlp_lock.h>
51#endif
52
53#ifdef CONFIG_LITMUS_NESTED_LOCKING
54#include <litmus/fifo_lock.h>
55#include <litmus/prioq_lock.h>
56#include <litmus/ikglp_lock.h>
57#endif
58
59#ifdef CONFIG_SCHED_CPU_AFFINITY
60#include <litmus/affinity.h>
61#endif
62
63#ifdef CONFIG_REALTIME_AUX_TASKS
64#include <litmus/aux_tasks.h>
65#endif
66
67/* to configure the cluster size */
68#include <litmus/litmus_proc.h>
69
70#ifdef CONFIG_SCHED_CPU_AFFINITY
71#include <litmus/affinity.h>
72#endif
73
74#ifdef CONFIG_LITMUS_SOFTIRQD
75#include <litmus/litmus_softirq.h>
76#endif
77
78#ifdef CONFIG_LITMUS_NVIDIA
79#include <litmus/nvidia_info.h>
80#endif
81
82#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
83#include <litmus/gpu_affinity.h>
84#endif
85
86/* Reference configuration variable. Determines which cache level is used to
87 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
88 * all CPUs form a single cluster (just like GSN-EDF).
89 */
90static enum cache_level cluster_config = GLOBAL_CLUSTER;
91
92struct clusterdomain;
93
94/* cpu_entry_t - maintain the linked and scheduled state
95 *
96 * A cpu also contains a pointer to the crm_domain_t cluster
97 * that owns it (struct clusterdomain*)
98 */
99typedef struct {
100 int cpu;
101 struct clusterdomain* cluster; /* owning cluster */
102 struct task_struct* linked; /* only RT tasks */
103 struct task_struct* scheduled; /* only RT tasks */
104 atomic_t will_schedule; /* prevent unneeded IPIs */
105 struct binheap_node hn;
106} cpu_entry_t;
107
108/* one cpu_entry_t per CPU */
109DEFINE_PER_CPU(cpu_entry_t, crm_cpu_entries);
110
111#define set_will_schedule() \
112 (atomic_set(&__get_cpu_var(crm_cpu_entries).will_schedule, 1))
113#define clear_will_schedule() \
114 (atomic_set(&__get_cpu_var(crm_cpu_entries).will_schedule, 0))
115#define test_will_schedule(cpu) \
116 (atomic_read(&per_cpu(crm_cpu_entries, cpu).will_schedule))
117
118/*
119 * In C-EDF there is a crm domain _per_ cluster
120 * The number of clusters is dynamically determined accordingly to the
121 * total cpu number and the cluster size
122 */
123typedef struct clusterdomain {
124 /* rt_domain for this cluster */
125 rt_domain_t domain;
126 /* cpus in this cluster */
127 cpu_entry_t* *cpus;
128 /* map of this cluster cpus */
129 cpumask_var_t cpu_map;
130 /* the cpus queue themselves according to priority in here */
131 struct binheap cpu_heap;
132
133#define cluster_lock domain.ready_lock
134
135#ifdef CONFIG_LITMUS_DGL_SUPPORT
136 raw_spinlock_t dgl_lock;
137#endif
138
139 int top_m_size;
140 struct binheap top_m;
141 struct binheap not_top_m;
142
143} crm_domain_t;
144
145
146/* a crm_domain per cluster; allocation is done at init/activation time */
147crm_domain_t *crm;
148
149#define remote_cluster(cpu) ((crm_domain_t *) per_cpu(crm_cpu_entries, cpu).cluster)
150#define task_cpu_cluster(task) remote_cluster(get_partition(task))
151
152/* total number of cluster */
153static int num_clusters;
154/* we do not support cluster of different sizes */
155static unsigned int cluster_size;
156
157static int clusters_allocated = 0;
158
159
160#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
161static int num_gpu_clusters;
162static unsigned int gpu_cluster_size;
163#endif
164
165inline static struct task_struct* binheap_node_to_task(struct binheap_node *bn)
166{
167 struct budget_tracker *bt = binheap_entry(bn, struct budget_tracker, top_m_node);
168 struct task_struct *t =
169 container_of(
170 container_of(bt, struct rt_param, budget),
171 struct task_struct,
172 rt_param);
173 return t;
174}
175
176static int crm_max_heap_base_priority_order(struct binheap_node *a,
177 struct binheap_node *b)
178{
179 struct task_struct* t_a = binheap_node_to_task(a);
180 struct task_struct* t_b = binheap_node_to_task(b);
181 return __rm_higher_prio(t_a, BASE, t_b, BASE);
182}
183
184static int crm_min_heap_base_priority_order(struct binheap_node *a,
185 struct binheap_node *b)
186{
187 struct task_struct* t_a = binheap_node_to_task(a);
188 struct task_struct* t_b = binheap_node_to_task(b);
189 return __rm_higher_prio(t_b, BASE, t_a, BASE);
190}
191
192static void crm_track_in_top_m(struct task_struct *t)
193{
194 /* cluster lock must be held */
195 crm_domain_t *cluster = task_cpu_cluster(t);
196 struct budget_tracker *bt;
197 struct task_struct *mth_highest;
198
199 //BUG_ON(binheap_is_in_heap(&tsk_rt(t)->budget.top_m_node));
200 if (binheap_is_in_heap(&tsk_rt(t)->budget.top_m_node)) {
201// TRACE_TASK(t, "apparently already being tracked. top-m?: %s\n",
202// (bt_flag_is_set(t, BTF_IS_TOP_M)) ? "Yes":"No");
203 return;
204 }
205
206 /* TODO: do cluster_size-1 if release master is in this cluster */
207 if (cluster->top_m_size < cluster_size) {
208// TRACE_TASK(t, "unconditionally adding task to top-m.\n");
209 binheap_add(&tsk_rt(t)->budget.top_m_node, &cluster->top_m,
210 struct budget_tracker, top_m_node);
211 ++cluster->top_m_size;
212 bt_flag_set(t, BTF_IS_TOP_M);
213 budget_state_machine(t,on_enter_top_m);
214
215 return;
216 }
217
218 BUG_ON(binheap_empty(&cluster->top_m));
219
220 bt = binheap_top_entry(&cluster->top_m, struct budget_tracker, top_m_node);
221 mth_highest =
222 container_of(
223 container_of(bt, struct rt_param, budget),
224 struct task_struct,
225 rt_param);
226
227 if (__rm_higher_prio(t, BASE, mth_highest, BASE)) {
228// TRACE_TASK(t, "adding to top-m (evicting %s/%d)\n",
229// mth_highest->comm, mth_highest->pid);
230
231 binheap_delete_root(&cluster->top_m, struct budget_tracker, top_m_node);
232 INIT_BINHEAP_NODE(&tsk_rt(mth_highest)->budget.top_m_node);
233 binheap_add(&tsk_rt(mth_highest)->budget.top_m_node,
234 &cluster->not_top_m,
235 struct budget_tracker, top_m_node);
236 budget_state_machine(mth_highest,on_exit_top_m);
237 bt_flag_clear(mth_highest, BTF_IS_TOP_M);
238
239 binheap_add(&tsk_rt(t)->budget.top_m_node, &cluster->top_m,
240 struct budget_tracker, top_m_node);
241 bt_flag_set(t, BTF_IS_TOP_M);
242 budget_state_machine(t,on_enter_top_m);
243 }
244 else {
245// TRACE_TASK(t, "adding to not-top-m\n");
246 binheap_add(&tsk_rt(t)->budget.top_m_node,
247 &cluster->not_top_m,
248 struct budget_tracker, top_m_node);
249 }
250}
251
252static void crm_untrack_in_top_m(struct task_struct *t)
253{
254 /* cluster lock must be held */
255 crm_domain_t *cluster = task_cpu_cluster(t);
256
257 if (!binheap_is_in_heap(&tsk_rt(t)->budget.top_m_node)) {
258// TRACE_TASK(t, "is not being tracked\n"); /* BUG() on this case? */
259 return;
260 }
261
262 if (bt_flag_is_set(t, BTF_IS_TOP_M)) {
263// TRACE_TASK(t, "removing task from top-m\n");
264
265 /* delete t's entry */
266 binheap_delete(&tsk_rt(t)->budget.top_m_node, &cluster->top_m);
267 budget_state_machine(t,on_exit_top_m);
268 bt_flag_clear(t, BTF_IS_TOP_M);
269
270 /* move a task over from the overflow heap */
271 if(!binheap_empty(&cluster->not_top_m)) {
272 struct budget_tracker *bt =
273 binheap_top_entry(&cluster->not_top_m, struct budget_tracker, top_m_node);
274 struct task_struct *to_move =
275 container_of(
276 container_of(bt, struct rt_param, budget),
277 struct task_struct,
278 rt_param);
279
280// TRACE_TASK(to_move, "being promoted to top-m\n");
281
282 binheap_delete_root(&cluster->not_top_m, struct budget_tracker, top_m_node);
283 INIT_BINHEAP_NODE(&tsk_rt(to_move)->budget.top_m_node);
284
285 binheap_add(&tsk_rt(to_move)->budget.top_m_node,
286 &cluster->top_m,
287 struct budget_tracker, top_m_node);
288 bt_flag_set(to_move, BTF_IS_TOP_M);
289 budget_state_machine(to_move,on_enter_top_m);
290 }
291 else {
292 --cluster->top_m_size;
293 }
294 }
295 else {
296// TRACE_TASK(t, "removing task from not-top-m\n");
297 binheap_delete(&tsk_rt(t)->budget.top_m_node, &cluster->not_top_m);
298 }
299}
300
301
302#ifdef CONFIG_LITMUS_DGL_SUPPORT
303static raw_spinlock_t* crm_get_dgl_spinlock(struct task_struct *t)
304{
305 crm_domain_t *cluster = task_cpu_cluster(t);
306 return(&cluster->dgl_lock);
307}
308#endif
309
310
311/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
312 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
313 * information during the initialization of the plugin (e.g., topology)
314#define WANT_ALL_SCHED_EVENTS
315 */
316#define VERBOSE_INIT
317
318static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b)
319{
320 cpu_entry_t *a = binheap_entry(_a, cpu_entry_t, hn);
321 cpu_entry_t *b = binheap_entry(_b, cpu_entry_t, hn);
322
323 /* Note that a and b are inverted: we want the lowest-priority CPU at
324 * the top of the heap.
325 */
326 return rm_higher_prio(b->linked, a->linked);
327}
328
329/* update_cpu_position - Move the cpu entry to the correct place to maintain
330 * order in the cpu queue. Caller must hold crm lock.
331 */
332static void update_cpu_position(cpu_entry_t *entry)
333{
334 crm_domain_t *cluster = entry->cluster;
335
336 if (likely(binheap_is_in_heap(&entry->hn))) {
337 binheap_delete(&entry->hn, &cluster->cpu_heap);
338 }
339
340 binheap_add(&entry->hn, &cluster->cpu_heap, cpu_entry_t, hn);
341}
342
343/* caller must hold crm lock */
344static cpu_entry_t* lowest_prio_cpu(crm_domain_t *cluster)
345{
346 return binheap_top_entry(&cluster->cpu_heap, cpu_entry_t, hn);
347}
348
349static noinline void unlink(struct task_struct* t);
350
351/* link_task_to_cpu - Update the link of a CPU.
352 * Handles the case where the to-be-linked task is already
353 * scheduled on a different CPU.
354 */
355static noinline void link_task_to_cpu(struct task_struct* linked,
356 cpu_entry_t *entry)
357{
358 cpu_entry_t *sched;
359 struct task_struct* tmp;
360 int on_cpu;
361
362 BUG_ON(linked && !is_realtime(linked));
363
364 /* Currently linked task is set to be unlinked. */
365 if (entry->linked) {
366 entry->linked->rt_param.linked_on = NO_CPU;
367
368#ifdef CONFIG_LITMUS_LOCKING
369 if (tsk_rt(entry->linked)->inh_task)
370 clear_inh_task_linkback(entry->linked, tsk_rt(entry->linked)->inh_task);
371#endif
372 }
373
374 /* Link new task to CPU. */
375 if (linked) {
376 /* handle task is already scheduled somewhere! */
377 on_cpu = linked->rt_param.scheduled_on;
378 if (on_cpu != NO_CPU) {
379 sched = &per_cpu(crm_cpu_entries, on_cpu);
380
381 BUG_ON(sched->linked == linked);
382
383 /* If we are already scheduled on the CPU to which we
384 * wanted to link, we don't need to do the swap --
385 * we just link ourselves to the CPU and depend on
386 * the caller to get things right.
387 */
388 if (entry != sched) {
389 TRACE_TASK(linked,
390 "already scheduled on %d, updating link.\n",
391 sched->cpu);
392 tmp = sched->linked;
393 linked->rt_param.linked_on = sched->cpu;
394 sched->linked = linked;
395 update_cpu_position(sched);
396 linked = tmp;
397 }
398 }
399 if (linked) { /* might be NULL due to swap */
400 linked->rt_param.linked_on = entry->cpu;
401
402#ifdef CONFIG_LITMUS_LOCKING
403 if (tsk_rt(linked)->inh_task)
404 set_inh_task_linkback(linked, tsk_rt(linked)->inh_task);
405#endif
406 }
407 }
408 entry->linked = linked;
409#ifdef WANT_ALL_SCHED_EVENTS
410 if (linked)
411 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
412 else
413 TRACE("NULL linked to %d.\n", entry->cpu);
414#endif
415 update_cpu_position(entry);
416}
417
418/* unlink - Make sure a task is not linked any longer to an entry
419 * where it was linked before. Must hold cluster_lock.
420 */
421static noinline void unlink(struct task_struct* t)
422{
423 if (t->rt_param.linked_on != NO_CPU) {
424 /* unlink */
425 cpu_entry_t *entry = &per_cpu(crm_cpu_entries, t->rt_param.linked_on);
426 t->rt_param.linked_on = NO_CPU;
427 link_task_to_cpu(NULL, entry);
428 } else if (is_queued(t)) {
429 /* This is an interesting situation: t is scheduled,
430 * but was just recently unlinked. It cannot be
431 * linked anywhere else (because then it would have
432 * been relinked to this CPU), thus it must be in some
433 * queue. We must remove it from the list in this
434 * case.
435 *
436 * in C-EDF case is should be somewhere in the queue for
437 * its domain, therefore and we can get the domain using
438 * task_cpu_cluster
439 */
440 remove(&(task_cpu_cluster(t))->domain, t);
441 }
442}
443
444
445/* preempt - force a CPU to reschedule
446 */
447static void preempt(cpu_entry_t *entry)
448{
449 preempt_if_preemptable(entry->scheduled, entry->cpu);
450}
451
452/* requeue - Put an unlinked task into gsn-edf domain.
453 * Caller must hold cluster_lock.
454 */
455static noinline void requeue(struct task_struct* task)
456{
457 crm_domain_t *cluster = task_cpu_cluster(task);
458 BUG_ON(!task);
459 /* sanity check before insertion */
460 BUG_ON(is_queued(task));
461
462 if (is_early_releasing(task) || is_released(task, litmus_clock()) ||
463 tsk_rt(task)->job_params.is_backlogged_job) {
464#ifdef CONFIG_REALTIME_AUX_TASKS
465 if (unlikely(tsk_rt(task)->is_aux_task && task->state != TASK_RUNNING && !tsk_rt(task)->aux_ready)) {
466 /* aux_task probably transitioned to real-time while it was blocked */
467 TRACE_CUR("aux task %s/%d is not ready!\n", task->comm, task->pid);
468 tsk_rt(task)->aux_ready = 1; /* allow this to only happen once per aux task */
469 }
470 else
471#endif
472 __add_ready(&cluster->domain, task);
473 }
474 else {
475 TRACE_TASK(task, "not requeueing not-yet-released job\n");
476 }
477}
478
479#ifdef CONFIG_SCHED_CPU_AFFINITY
480static cpu_entry_t* crm_get_nearest_available_cpu(
481 crm_domain_t *cluster, cpu_entry_t *start)
482{
483 cpu_entry_t *affinity;
484
485 get_nearest_available_cpu(affinity, start, crm_cpu_entries,
486#ifdef CONFIG_RELEASE_MASTER
487 cluster->domain.release_master
488#else
489 NO_CPU
490#endif
491 );
492
493 /* make sure CPU is in our cluster */
494 if (affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
495 return(affinity);
496 else
497 return(NULL);
498}
499#endif
500
501
502/* check for any necessary preemptions */
503static void check_for_preemptions(crm_domain_t *cluster)
504{
505 struct task_struct *task;
506 cpu_entry_t *last;
507
508 for(last = lowest_prio_cpu(cluster);
509 rm_preemption_needed(&cluster->domain, last->linked);
510 last = lowest_prio_cpu(cluster)) {
511 /* preemption necessary */
512 task = __take_ready(&cluster->domain);
513 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
514 task->pid, last->cpu);
515#ifdef CONFIG_SCHED_CPU_AFFINITY
516 {
517 cpu_entry_t *affinity =
518 crm_get_nearest_available_cpu(cluster,
519 &per_cpu(crm_cpu_entries, task_cpu(task)));
520 if(affinity)
521 last = affinity;
522 else if(should_requeue_preempted_job(last->linked))
523 requeue(last->linked);
524 }
525#else
526 if (should_requeue_preempted_job(last->linked))
527 requeue(last->linked);
528#endif
529 link_task_to_cpu(task, last);
530 preempt(last);
531 }
532}
533
534/* crm_job_arrival: task is either resumed or released */
535static noinline void crm_job_arrival(struct task_struct* task)
536{
537 crm_domain_t *cluster = task_cpu_cluster(task);
538 BUG_ON(!task);
539
540 requeue(task);
541 check_for_preemptions(cluster);
542}
543
544static void crm_track_on_release(struct bheap_node* n, void* dummy)
545{
546 struct task_struct* t = bheap2task(n);
547// TRACE_TASK(t, "released\n");
548
549 crm_track_in_top_m(t);
550}
551
552static void crm_release_jobs(rt_domain_t* rt, struct bheap* tasks)
553{
554 crm_domain_t* cluster = container_of(rt, crm_domain_t, domain);
555 unsigned long flags;
556
557 raw_readyq_lock_irqsave(&cluster->cluster_lock, flags);
558
559 bheap_for_each(tasks, crm_track_on_release, NULL);
560
561 __merge_ready(&cluster->domain, tasks);
562 check_for_preemptions(cluster);
563
564 raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags);
565}
566
567/* caller holds cluster_lock */
568static noinline void job_completion(struct task_struct *t, int forced)
569{
570 int do_release = 0;
571 int backlogged = 0;
572 lt_t now;
573
574 BUG_ON(!t);
575
576 now = litmus_clock();
577
578 /* DO BACKLOG TRACKING */
579
580 /* job completed with budget remaining */
581 if (get_release_policy(t) != SPORADIC) {
582 /* only jobs we know that will call sleep_next_job() can use backlogging */
583 if (!forced) {
584 /* was it a backlogged job that completed? */
585 if (tsk_rt(t)->job_params.is_backlogged_job) {
586 TRACE_TASK(t, "completed backlogged job\n");
587 if (get_backlog(t)) {
588 --get_backlog(t);
589 /* is_backlogged_job remains asserted */
590 }
591 else {
592 /* caught up completely */
593 TRACE_TASK(t, "completely caught up.\n");
594 tsk_rt(t)->job_params.is_backlogged_job = 0;
595 /* we now look like a normally completing job. */
596 }
597 }
598 }
599 else {
600 ++get_backlog(t);
601 TRACE_TASK(t, "adding backlogged job\n");
602 }
603
604 backlogged = has_backlog(t);
605 TRACE_TASK(t, "number of backlogged jobs: %u\n",
606 get_backlog(t));
607 }
608
609 /* SETUP FOR THE NEXT JOB */
610
611 sched_trace_task_completion(t, forced);
612
613 TRACE_TASK(t, "job_completion() at %llu (forced = %d).\n", now, forced);
614
615 /* set flags */
616 tsk_rt(t)->completed = 0;
617
618#if 0
619 if (unlikely(!forced && backlogged)) {
620 /* Don't advance deadline/refresh budget. Use the remaining budget for
621 * the backlogged job.
622 *
623 * NOTE: Allowing backlogged jobs comsume remaining budget may affect
624 * blocking bound analysis.
625 */
626 }
627 else if (unlikely(!forced && tsk_rt(t)->job_params.is_backlogged_job)) {
628 /* we've just about caught up, but we still have the job of this
629 * budget's allocation to do (even if it's for the future)... */
630 TRACE_TASK(t, "Releasing final catch-up job.\n");
631 backlogged = 1;
632 do_release = 1;
633 }
634 else {
635#endif
636 crm_untrack_in_top_m(t);
637 prepare_for_next_period(t);
638
639 do_release = (is_early_releasing(t) || is_released(t, now));
640
641 if (backlogged) {
642 TRACE_TASK(t, "refreshing budget with early "
643 "release for backlogged job.\n");
644 }
645 if (do_release || backlogged) {
646 /* log here to capture overheads */
647 sched_trace_task_release(t);
648 }
649// }
650
651 unlink(t);
652
653 /* release or arm next job */
654 if (is_running(t)) {
655 /* is our next job a backlogged job? */
656 if (backlogged) {
657 TRACE_TASK(t, "next job is a backlogged job.\n");
658 tsk_rt(t)->job_params.is_backlogged_job = 1;
659 }
660 else {
661 TRACE_TASK(t, "next job is a regular job.\n");
662 tsk_rt(t)->job_params.is_backlogged_job = 0;
663 }
664
665 if (do_release || backlogged) {
666 crm_track_in_top_m(t);
667 crm_job_arrival(t);
668 }
669 else {
670 add_release(&task_cpu_cluster(t)->domain, t);
671 }
672 }
673 else {
674 BUG_ON(!forced);
675 /* budget was refreshed and job early released */
676 TRACE_TASK(t, "job exhausted budget while sleeping\n");
677 crm_track_in_top_m(t);
678 }
679}
680
681static enum hrtimer_restart crm_simple_on_exhausted(struct task_struct *t, int in_schedule)
682{
683 /* Assumption: t is scheduled on the CPU executing this callback */
684
685 if (in_schedule) {
686 BUG_ON(tsk_rt(t)->scheduled_on != smp_processor_id());
687 if (budget_precisely_tracked(t) && cancel_enforcement_timer(t) < 0) {
688 TRACE_TASK(t, "raced with timer. deffering to timer.\n");
689 goto out;
690 }
691 }
692
693 if (budget_signalled(t) && !bt_flag_is_set(t, BTF_SIG_BUDGET_SENT)) {
694 /* signal exhaustion */
695 send_sigbudget(t); /* will set BTF_SIG_BUDGET_SENT */
696 }
697
698 if (budget_enforced(t) && !bt_flag_test_and_set(t, BTF_BUDGET_EXHAUSTED)) {
699 if (likely(!is_np(t))) {
700 /* np tasks will be preempted when they become
701 * preemptable again
702 */
703 if (!in_schedule) {
704 TRACE_TASK(t, "is preemptable => FORCE_RESCHED\n");
705 litmus_reschedule_local();
706 set_will_schedule();
707 }
708 } else if (is_user_np(t)) {
709 TRACE_TASK(t, "is non-preemptable, preemption delayed.\n");
710 request_exit_np(t);
711 }
712 }
713
714out:
715 return HRTIMER_NORESTART;
716}
717
718
719static enum hrtimer_restart crm_simple_io_on_exhausted(struct task_struct *t, int in_schedule)
720{
721 enum hrtimer_restart restart = HRTIMER_NORESTART;
722
723 if (in_schedule) {
724 BUG_ON(tsk_rt(t)->scheduled_on != smp_processor_id());
725 if (budget_precisely_tracked(t) && cancel_enforcement_timer(t) == -1) {
726 TRACE_TASK(t, "raced with timer. deffering to timer.\n");
727 goto out;
728 }
729 }
730
731 /* t may or may not be scheduled */
732
733 if (budget_signalled(t) && !bt_flag_is_set(t, BTF_SIG_BUDGET_SENT)) {
734 /* signal exhaustion */
735
736 /* Tasks should block SIG_BUDGET if they cannot gracefully respond to
737 * the signal while suspended. SIG_BUDGET is an rt-signal, so it will
738 * be queued and received when SIG_BUDGET is unblocked */
739 send_sigbudget(t); /* will set BTF_SIG_BUDGET_SENT */
740 }
741
742 if (budget_enforced(t) && !bt_flag_is_set(t, BTF_BUDGET_EXHAUSTED)) {
743 int cpu = (tsk_rt(t)->linked_on != NO_CPU) ?
744 tsk_rt(t)->linked_on : tsk_rt(t)->scheduled_on;
745
746 if (is_np(t) && is_user_np(t)) {
747 bt_flag_set(t, BTF_BUDGET_EXHAUSTED);
748 TRACE_TASK(t, "is non-preemptable, preemption delayed.\n");
749 request_exit_np(t);
750 }
751 /* where do we need to call resched? */
752 else if (cpu == smp_processor_id()) {
753 bt_flag_set(t, BTF_BUDGET_EXHAUSTED);
754 if (!in_schedule) {
755 TRACE_TASK(t, "is preemptable => FORCE_RESCHED\n");
756 litmus_reschedule_local();
757 set_will_schedule();
758 }
759 }
760 else if (cpu != NO_CPU) {
761 bt_flag_set(t, BTF_BUDGET_EXHAUSTED);
762 if (!in_schedule) {
763 TRACE_TASK(t, "is preemptable on remote cpu (%d) => FORCE_RESCHED\n", cpu);
764 litmus_reschedule(cpu);
765 }
766 }
767 else if (unlikely(tsk_rt(t)->blocked_lock)) {
768 /* we shouldn't be draining while waiting for litmus lock, but we
769 * could have raced with the budget timer (?). */
770 WARN_ON(1);
771 }
772 else {
773 lt_t remaining;
774 crm_domain_t *cluster;
775 unsigned long flags, kludge_flags;
776
777 BUG_ON(in_schedule);
778
779 cluster = task_cpu_cluster(t);
780
781 // 1) refresh budget through job completion
782 // 2) if holds locks, tell the locking protocol to re-eval priority
783 // 3) -- the LP must undo any inheritance relations if appropriate
784
785 /* force job completion */
786 TRACE_TASK(t, "blocked, postponing deadline\n");
787
788 local_irq_save(kludge_flags);
789
790 /* Outermost lock of the cluster. Recursive lock calls are
791 * possible on this code path. This should be the _ONLY_
792 * scenario where recursive calls are made. */
793#ifdef CONFIG_LITMUS_DGL_SUPPORT
794 /* Unfortunately, we _might_ need to grab the DGL lock, so we
795 * must grab it every time since it must be take before the
796 * cluster lock. */
797 raw_spin_lock_irqsave(&cluster->dgl_lock, flags);
798 raw_readyq_lock(&cluster->cluster_lock);
799#else
800 raw_readyq_lock_irqsave(&cluster->cluster_lock, flags);
801#endif
802
803 job_completion(t, 1); /* refreshes budget and pushes out deadline */
804
805#ifdef CONFIG_LITMUS_LOCKING
806 {
807 int i;
808 /* any linked task that inherits from 't' needs to have their
809 * cpu-position re-evaluated. we have to do this in two passes.
810 * pass 1: remove nodes from heap s.t. heap is in known good state.
811 * pass 2: re-add nodes.
812 *
813 */
814 for (i = find_first_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots));
815 i < BITS_PER_LONG;
816 i = find_next_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots), i+1))
817 {
818 struct task_struct *to_update = tsk_rt(t)->inh_task_linkbacks[i];
819 BUG_ON(!to_update);
820 if (tsk_rt(to_update)->linked_on != NO_CPU) {
821 cpu_entry_t *entry = &per_cpu(crm_cpu_entries, tsk_rt(to_update)->linked_on);
822 BUG_ON(!binheap_is_in_heap(&entry->hn));
823 binheap_delete(&entry->hn, &cluster->cpu_heap);
824 }
825 }
826 for (i = find_first_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots));
827 i < BITS_PER_LONG;
828 i = find_next_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots), i+1))
829 {
830 struct task_struct *to_update = tsk_rt(t)->inh_task_linkbacks[i];
831 BUG_ON(!to_update);
832 if (tsk_rt(to_update)->linked_on != NO_CPU) {
833 cpu_entry_t *entry = &per_cpu(crm_cpu_entries, tsk_rt(to_update)->linked_on);
834 binheap_add(&entry->hn, &cluster->cpu_heap, cpu_entry_t, hn);
835 }
836 }
837 }
838
839 /* Check our inheritance and propagate any changes forward. */
840 reevaluate_inheritance(t);
841#endif
842 /* No need to recheck priority of AUX tasks. They will always
843 * inherit from 't' if they are enabled. Their prio change was
844 * captured by the cpu-heap operations above. */
845
846#ifdef CONFIG_LITMUS_NVIDIA
847 /* Re-eval priority of GPU interrupt threads. */
848 if(tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu)
849 gpu_owner_decrease_priority(t);
850#endif
851
852#ifdef CONFIG_LITMUS_LOCKING
853 /* double-check that everything is okay */
854 check_for_preemptions(cluster);
855#endif
856
857 /* should be the outermost unlock call */
858#ifdef CONFIG_LITMUS_DGL_SUPPORT
859 raw_readyq_unlock(&cluster->cluster_lock);
860 raw_spin_unlock_irqrestore(&cluster->dgl_lock, flags);
861#else
862 raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags);
863#endif
864 flush_pending_wakes();
865 local_irq_restore(kludge_flags);
866
867 /* we need to set up the budget timer since we're within the callback. */
868 hrtimer_forward_now(&get_budget_timer(t).timer.timer,
869 ns_to_ktime(budget_remaining(t)));
870 remaining = hrtimer_get_expires_ns(&get_budget_timer(t).timer.timer);
871
872 TRACE_TASK(t, "rearmed timer to %ld\n", remaining);
873 restart = HRTIMER_RESTART;
874 }
875 }
876
877out:
878 return restart;
879}
880
881
882#ifdef CONFIG_LITMUS_LOCKING
883static void __crm_trigger_vunlock(struct task_struct *t)
884{
885 TRACE_TASK(t, "triggering virtual unlock of lock %d\n",
886 tsk_rt(t)->outermost_lock->ident);
887 tsk_rt(t)->outermost_lock->ops->omlp_virtual_unlock(tsk_rt(t)->outermost_lock, t);
888}
889
890static void crm_trigger_vunlock(struct task_struct *t)
891{
892 crm_domain_t *cluster = task_cpu_cluster(t);
893#ifdef CONFIG_LITMUS_DGL_SUPPORT
894 unsigned long flags;
895
896 /* Unfortunately, we _might_ need to grab the DGL lock, so we
897 * must grab it every time since it must be take before the
898 * cluster lock. */
899 raw_spin_lock_irqsave(&cluster->dgl_lock, flags);
900#endif
901
902 __crm_trigger_vunlock(t);
903
904#ifdef CONFIG_LITMUS_DGL_SUPPORT
905 raw_spin_unlock_irqrestore(&cluster->dgl_lock, flags);
906#endif
907}
908#endif
909
910static enum hrtimer_restart crm_sobliv_on_exhausted(struct task_struct *t, int in_schedule)
911{
912 enum hrtimer_restart restart = HRTIMER_NORESTART;
913
914 if (in_schedule) {
915 BUG_ON(tsk_rt(t)->scheduled_on != smp_processor_id());
916 if (budget_precisely_tracked(t) && cancel_enforcement_timer(t) == -1) {
917 TRACE_TASK(t, "raced with timer. deffering to timer.\n");
918 goto out;
919 }
920 }
921
922 /* t may or may not be scheduled */
923
924 if (budget_signalled(t) && !bt_flag_is_set(t, BTF_SIG_BUDGET_SENT)) {
925 /* signal exhaustion */
926
927 /* Tasks should block SIG_BUDGET if they cannot gracefully respond to
928 * the signal while suspended. SIG_BUDGET is an rt-signal, so it will
929 * be queued and received when SIG_BUDGET is unblocked */
930 send_sigbudget(t); /* will set BTF_SIG_BUDGET_SENT */
931 }
932
933 if (budget_enforced(t) && !bt_flag_is_set(t, BTF_BUDGET_EXHAUSTED)) {
934 int cpu = (tsk_rt(t)->linked_on != NO_CPU) ?
935 tsk_rt(t)->linked_on : tsk_rt(t)->scheduled_on;
936
937#ifdef CONFIG_LITMUS_LOCKING
938 /* if 't' running, trigger a virtual unlock of outermost held lock
939 * if supported. Case where 't' not running handled later in function.
940 */
941 if (cpu != NO_CPU &&
942 tsk_rt(t)->outermost_lock &&
943 tsk_rt(t)->outermost_lock->ops->is_omlp_family)
944 crm_trigger_vunlock(t);
945#endif
946
947 if (is_np(t) && is_user_np(t)) {
948 TRACE_TASK(t, "is non-preemptable, preemption delayed.\n");
949 bt_flag_set(t, BTF_BUDGET_EXHAUSTED);
950 request_exit_np(t);
951 }
952 /* where do we need to call resched? */
953 else if (cpu == smp_processor_id()) {
954 bt_flag_set(t, BTF_BUDGET_EXHAUSTED);
955 if (!in_schedule) {
956 TRACE_TASK(t, "is preemptable => FORCE_RESCHED\n");
957 litmus_reschedule_local();
958 set_will_schedule();
959 }
960 }
961 else if (cpu != NO_CPU) {
962 bt_flag_set(t, BTF_BUDGET_EXHAUSTED);
963 if (!in_schedule) {
964 litmus_reschedule(cpu);
965 TRACE_TASK(t, "is preemptable on remote cpu (%d) => FORCE_RESCHED\n", cpu);
966 }
967 }
968 else {
969 lt_t remaining;
970 crm_domain_t *cluster;
971 unsigned long flags, kludge_flags;
972
973 BUG_ON(in_schedule);
974
975 cluster = task_cpu_cluster(t);
976
977 // 1) refresh budget through job completion
978 // 2) if holds locks, tell the locking protocol to re-eval priority
979 // 3) -- the LP must undo any inheritance relations if appropriate
980
981 /* force job completion */
982 TRACE_TASK(t, "blocked, postponing deadline\n");
983
984 /* Outermost lock of the cluster. Recursive lock calls are
985 * possible on this code path. This should be the _ONLY_
986 * scenario where recursive calls are made. */
987 local_irq_save(kludge_flags);
988#ifdef CONFIG_LITMUS_DGL_SUPPORT
989 /* Unfortunately, we _might_ need to grab the DGL lock, so we
990 * must grab it every time since it must be take before the
991 * cluster lock. */
992 raw_spin_lock_irqsave(&cluster->dgl_lock, flags);
993 raw_readyq_lock(&cluster->cluster_lock);
994#else
995 raw_readyq_lock_irqsave(&cluster->cluster_lock, flags);
996#endif
997
998 job_completion(t, 1); /* refreshes budget and pushes out deadline */
999
1000#ifdef CONFIG_LITMUS_LOCKING
1001 {
1002 int i;
1003 /* any linked task that inherits from 't' needs to have their
1004 * cpu-position re-evaluated. we have to do this in two passes.
1005 * pass 1: remove nodes from heap s.t. heap is in known good state.
1006 * pass 2: re-add nodes.
1007 *
1008 */
1009 for (i = find_first_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots));
1010 i < BITS_PER_LONG;
1011 i = find_next_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots), i+1))
1012 {
1013 struct task_struct *to_update = tsk_rt(t)->inh_task_linkbacks[i];
1014 BUG_ON(!to_update);
1015 if (tsk_rt(to_update)->linked_on != NO_CPU) {
1016 cpu_entry_t *entry = &per_cpu(crm_cpu_entries, tsk_rt(to_update)->linked_on);
1017 BUG_ON(!binheap_is_in_heap(&entry->hn));
1018 binheap_delete(&entry->hn, &cluster->cpu_heap);
1019 }
1020 }
1021 for (i = find_first_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots));
1022 i < BITS_PER_LONG;
1023 i = find_next_bit(&tsk_rt(t)->used_linkback_slots, BITS_PER_BYTE*sizeof(&tsk_rt(t)->used_linkback_slots), i+1))
1024 {
1025 struct task_struct *to_update = tsk_rt(t)->inh_task_linkbacks[i];
1026 BUG_ON(!to_update);
1027 if (tsk_rt(to_update)->linked_on != NO_CPU) {
1028 cpu_entry_t *entry = &per_cpu(crm_cpu_entries, tsk_rt(to_update)->linked_on);
1029 binheap_add(&entry->hn, &cluster->cpu_heap, cpu_entry_t, hn);
1030 }
1031 }
1032 }
1033
1034 /* Check our inheritance and propagate any changes forward. */
1035 reevaluate_inheritance(t);
1036
1037 if (tsk_rt(t)->outermost_lock && tsk_rt(t)->outermost_lock->ops->is_omlp_family)
1038 __crm_trigger_vunlock(t);
1039#endif
1040 /* No need to recheck priority of AUX tasks. They will always
1041 * inherit from 't' if they are enabled. Their prio change was
1042 * captured by the cpu-heap operations above. */
1043
1044#ifdef CONFIG_LITMUS_NVIDIA
1045 /* Re-eval priority of GPU interrupt threads. */
1046 if(tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu)
1047 gpu_owner_decrease_priority(t);
1048#endif
1049
1050#ifdef CONFIG_LITMUS_LOCKING
1051 /* double-check that everything is okay */
1052 check_for_preemptions(cluster);
1053#endif
1054
1055 /* should be the outermost unlock call */
1056#ifdef CONFIG_LITMUS_DGL_SUPPORT
1057 raw_readyq_unlock(&cluster->cluster_lock);
1058 raw_spin_unlock_irqrestore(&cluster->dgl_lock, flags);
1059#else
1060 raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags);
1061#endif
1062 flush_pending_wakes();
1063 local_irq_restore(kludge_flags);
1064
1065 /* we need to set up the budget timer since we're within the callback. */
1066 if (bt_flag_is_set(t, BTF_IS_TOP_M)) {
1067 hrtimer_forward_now(&get_budget_timer(t).timer.timer,
1068 ns_to_ktime(budget_remaining(t)));
1069 remaining = hrtimer_get_expires_ns(&get_budget_timer(t).timer.timer);
1070
1071 TRACE_TASK(t, "rearmed timer to %ld\n", remaining);
1072 restart = HRTIMER_RESTART;
1073 }
1074 }
1075 }
1076
1077out:
1078 return restart;
1079}
1080
1081
1082/* crm_tick - this function is called for every local timer
1083 * interrupt.
1084 *
1085 * checks whether the current task has expired and checks
1086 * whether we need to preempt it if it has not expired
1087 */
1088static void crm_tick(struct task_struct* t)
1089{
1090 if (is_realtime(t) &&
1091 tsk_rt(t)->budget.ops && budget_quantum_tracked(t) &&
1092 budget_exhausted(t)) {
1093 TRACE_TASK(t, "budget exhausted\n");
1094 budget_state_machine2(t,on_exhausted,!IN_SCHEDULE);
1095 }
1096}
1097
1098#ifdef CONFIG_LITMUS_LOCKING
1099static int __increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh);
1100#endif
1101
1102/* Getting schedule() right is a bit tricky. schedule() may not make any
1103 * assumptions on the state of the current task since it may be called for a
1104 * number of reasons. The reasons include a scheduler_tick() determined that it
1105 * was necessary, because sys_exit_np() was called, because some Linux
1106 * subsystem determined so, or even (in the worst case) because there is a bug
1107 * hidden somewhere. Thus, we must take extreme care to determine what the
1108 * current state is.
1109 *
1110 * The CPU could currently be scheduling a task (or not), be linked (or not).
1111 *
1112 * The following assertions for the scheduled task could hold:
1113 *
1114 * - !is_running(scheduled) // the job blocks
1115 * - scheduled->timeslice == 0 // the job completed (forcefully)
1116 * - is_completed() // the job completed (by syscall)
1117 * - linked != scheduled // we need to reschedule (for any reason)
1118 * - is_np(scheduled) // rescheduling must be delayed,
1119 * sys_exit_np must be requested
1120 *
1121 * Any of these can occur together.
1122 */
1123static struct task_struct* crm_schedule(struct task_struct * prev)
1124{
1125 cpu_entry_t* entry = &__get_cpu_var(crm_cpu_entries);
1126 crm_domain_t *cluster = entry->cluster;
1127 int out_of_time, sleep, preempt, np, exists, blocks;
1128 struct task_struct* next = NULL;
1129
1130#ifdef CONFIG_LITMUS_NESTED_LOCKING
1131 int recheck_inheritance;
1132#endif
1133
1134#ifdef CONFIG_RELEASE_MASTER
1135 /* Bail out early if we are the release master.
1136 * The release master never schedules any real-time tasks.
1137 */
1138 if (unlikely(cluster->domain.release_master == entry->cpu)) {
1139 sched_state_task_picked();
1140 return NULL;
1141 }
1142#endif
1143
1144 /* Detect and handle budget exhaustion if it hasn't already been done.
1145 * Do this before acquring any locks. */
1146 if (prev && is_realtime(prev) &&
1147 budget_exhausted(prev) &&
1148 !is_completed(prev) && /* don't bother with jobs on their way out */
1149 ((budget_enforced(prev) && !bt_flag_is_set(prev, BTF_BUDGET_EXHAUSTED)) ||
1150 (budget_signalled(prev) && !bt_flag_is_set(prev, BTF_SIG_BUDGET_SENT))) ) {
1151 TRACE_TASK(prev, "handling exhaustion in schedule() at %llu\n", litmus_clock());
1152 budget_state_machine2(prev,on_exhausted,IN_SCHEDULE);
1153 }
1154
1155#ifdef CONFIG_LITMUS_NESTED_LOCKING
1156 /* prevent updates to inheritance relations while we work with 'prev' */
1157 /* recheck inheritance if the task holds locks, is running, and will
1158 * have its deadline pushed out by job_completion() */
1159 recheck_inheritance =
1160 prev &&
1161 is_realtime(prev) &&
1162 holds_locks(prev) &&
1163 !is_np(prev) &&
1164 !is_completed(prev) &&
1165 is_running(prev) &&
1166 budget_enforced(prev) &&
1167 bt_flag_is_set(prev, BTF_BUDGET_EXHAUSTED);
1168 if (recheck_inheritance) {
1169#ifdef CONFIG_LITMUS_DGL_SUPPORT
1170 raw_spin_lock(&cluster->dgl_lock);
1171#endif
1172 raw_spin_lock(&tsk_rt(prev)->hp_blocked_tasks_lock);
1173 }
1174#endif
1175
1176 raw_readyq_lock(&cluster->cluster_lock);
1177 clear_will_schedule();
1178
1179 /* sanity checking */
1180 BUG_ON(entry->scheduled && entry->scheduled != prev);
1181 BUG_ON(entry->scheduled && !is_realtime(prev));
1182 BUG_ON(is_realtime(prev) && !entry->scheduled);
1183
1184 /* (0) Determine state */
1185 exists = entry->scheduled != NULL;
1186 blocks = exists && !is_running(entry->scheduled);
1187 out_of_time = exists &&
1188 budget_enforced(entry->scheduled) &&
1189 bt_flag_is_set(entry->scheduled, BTF_BUDGET_EXHAUSTED);
1190 np = exists && is_np(entry->scheduled);
1191 sleep = exists && is_completed(entry->scheduled);
1192 preempt = entry->scheduled != entry->linked;
1193
1194#ifdef WANT_ALL_SCHED_EVENTS
1195 TRACE_TASK(prev, "invoked crm_schedule.\n");
1196#endif
1197
1198 if (exists) {
1199 TRACE_TASK(prev,
1200 "blocks:%d out_of_time:%d np:%d completed:%d preempt:%d "
1201 "state:%d sig:%d\n",
1202 blocks, out_of_time, np, sleep, preempt,
1203 prev->state, signal_pending(prev));
1204 }
1205 if (entry->linked && preempt)
1206 TRACE_TASK(prev, "will be preempted by %s/%d\n",
1207 entry->linked->comm, entry->linked->pid);
1208
1209#ifdef CONFIG_REALTIME_AUX_TASKS
1210 if (tsk_rt(prev)->is_aux_task &&
1211 (prev->state == TASK_INTERRUPTIBLE) &&
1212 !blocks) {
1213 TRACE_TASK(prev, "Deferring descheduling of aux task %s/%d.\n",
1214 prev->comm, prev->pid);
1215 next = prev; /* allow prev to continue. */
1216 goto out_set_state;
1217 }
1218#endif
1219
1220 /* Do budget stuff */
1221 if (blocks) {
1222 if (likely(!bt_flag_is_set(prev, BTF_WAITING_FOR_RELEASE)))
1223 budget_state_machine(prev,on_blocked);
1224 else {
1225 /* waiting for release. 'exit' the scheduler. */
1226 crm_untrack_in_top_m(prev);
1227 budget_state_machine(prev,on_exit);
1228 }
1229 }
1230 else if (sleep)
1231 budget_state_machine(prev,on_sleep);
1232 else if (preempt)
1233 budget_state_machine(prev,on_preempt);
1234
1235 /* If a task blocks we have no choice but to reschedule.
1236 */
1237 if (blocks)
1238 unlink(entry->scheduled);
1239
1240#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
1241 if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) {
1242 if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
1243 // don't track preemptions or locking protocol suspensions.
1244 TRACE_TASK(entry->scheduled, "stopping GPU tracker.\n");
1245 stop_gpu_tracker(entry->scheduled);
1246 }
1247 else if(blocks && !tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
1248 TRACE_TASK(entry->scheduled, "GPU tracker remains on during suspension.\n");
1249 }
1250 }
1251#endif
1252
1253 /* Request a sys_exit_np() call if we would like to preempt but cannot.
1254 * We need to make sure to update the link structure anyway in case
1255 * that we are still linked. Multiple calls to request_exit_np() don't
1256 * hurt.
1257 */
1258 if (np && (out_of_time || preempt || sleep)) {
1259 unlink(entry->scheduled);
1260 request_exit_np(entry->scheduled);
1261 }
1262
1263 /* Any task that is preemptable and either exhausts its execution
1264 * budget or wants to sleep completes. We may have to reschedule after
1265 * this. Don't do a job completion if we block (can't have timers running
1266 * for blocked jobs).
1267 */
1268 if (!np && (out_of_time || sleep) && !blocks) {
1269 job_completion(entry->scheduled, !sleep);
1270#ifdef CONFIG_LITMUS_NESTED_LOCKING
1271 /* check if job completion enables an inheritance relation. no need to
1272 * recheck if task already inherits a priority since job_completion()
1273 * will not enable a higher-prio relation */
1274 if (unlikely(recheck_inheritance && !tsk_rt(entry->scheduled)->inh_task)) {
1275 struct task_struct *hp_blocked;
1276 TRACE_TASK(entry->scheduled, "rechecking inheritance.\n");
1277 hp_blocked = top_priority(&tsk_rt(entry->scheduled)->hp_blocked_tasks);
1278 /* hp_blocked_tasks_lock is held */
1279 if (rm_higher_prio(hp_blocked, entry->scheduled))
1280 __increase_priority_inheritance(entry->scheduled, effective_priority(hp_blocked));
1281 }
1282#endif
1283 }
1284
1285 /* Link pending task if we became unlinked.
1286 */
1287 if (!entry->linked)
1288 link_task_to_cpu(__take_ready(&cluster->domain), entry);
1289
1290 /* The final scheduling decision. Do we need to switch for some reason?
1291 * If linked is different from scheduled, then select linked as next.
1292 */
1293 if ((!np || blocks) &&
1294 entry->linked != entry->scheduled) {
1295 /* Schedule a linked job? */
1296 if (entry->linked) {
1297 entry->linked->rt_param.scheduled_on = entry->cpu;
1298 next = entry->linked;
1299 }
1300 if (entry->scheduled) {
1301 /* not gonna be scheduled soon */
1302 entry->scheduled->rt_param.scheduled_on = NO_CPU;
1303 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
1304 }
1305 }
1306 else {
1307 /* Only override Linux scheduler if we have a real-time task
1308 * scheduled that needs to continue.
1309 */
1310 if (exists) {
1311 next = prev;
1312 }
1313 }
1314
1315#ifdef CONFIG_REALTIME_AUX_TASKS
1316out_set_state:
1317#endif
1318
1319 sched_state_task_picked();
1320 raw_readyq_unlock(&cluster->cluster_lock);
1321
1322#ifdef CONFIG_LITMUS_NESTED_LOCKING
1323 if (recheck_inheritance) {
1324 raw_spin_unlock(&tsk_rt(prev)->hp_blocked_tasks_lock);
1325#ifdef CONFIG_LITMUS_DGL_SUPPORT
1326 raw_spin_unlock(&cluster->dgl_lock);
1327#endif
1328 }
1329#endif
1330
1331#ifdef WANT_ALL_SCHED_EVENTS
1332 TRACE("cluster_lock released, next=0x%p\n", next);
1333
1334 if (next)
1335 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
1336 else if (exists && !next)
1337 TRACE("becomes idle at %llu.\n", litmus_clock());
1338#endif
1339
1340 return next;
1341}
1342
1343
1344/* _finish_switch - we just finished the switch away from prev
1345 */
1346static void crm_finish_switch(struct task_struct *prev)
1347{
1348 cpu_entry_t* entry = &__get_cpu_var(crm_cpu_entries);
1349
1350 entry->scheduled = is_realtime(current) ? current : NULL;
1351#ifdef WANT_ALL_SCHED_EVENTS
1352 TRACE_TASK(prev, "switched away from\n");
1353#endif
1354}
1355
1356
1357/* Prepare a task for running in RT mode
1358 */
1359static void crm_task_new(struct task_struct * t, int on_rq, int running)
1360{
1361 unsigned long flags;
1362 cpu_entry_t* entry;
1363 crm_domain_t* cluster;
1364
1365 TRACE("c-fp: task new %d (param running = %d, is_running = %d)\n", t->pid, running, is_running(t));
1366
1367 /* the cluster doesn't change even if t is running */
1368 cluster = task_cpu_cluster(t);
1369
1370 raw_readyq_lock_irqsave(&cluster->cluster_lock, flags);
1371
1372 /* setup job params */
1373 release_at(t, litmus_clock());
1374
1375 t->rt_param.linked_on = NO_CPU;
1376
1377 if (running) {
1378 entry = &per_cpu(crm_cpu_entries, task_cpu(t));
1379 BUG_ON(entry->scheduled);
1380
1381#ifdef CONFIG_RELEASE_MASTER
1382 if (entry->cpu != cluster->domain.release_master) {
1383#endif
1384 entry->scheduled = t;
1385 tsk_rt(t)->scheduled_on = task_cpu(t);
1386#ifdef CONFIG_RELEASE_MASTER
1387 } else {
1388 /* do not schedule on release master */
1389 preempt(entry); /* force resched */
1390 tsk_rt(t)->scheduled_on = NO_CPU;
1391 }
1392#endif
1393 } else {
1394 t->rt_param.scheduled_on = NO_CPU;
1395 }
1396
1397 if (is_running(t)) {
1398 crm_track_in_top_m(t);
1399 crm_job_arrival(t);
1400 }
1401
1402 raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags);
1403}
1404
1405static void crm_task_wake_up(struct task_struct *t)
1406{
1407 unsigned long flags;
1408 crm_domain_t *cluster;
1409 lt_t now;
1410
1411 cluster = task_cpu_cluster(t);
1412
1413 raw_readyq_lock_irqsave(&cluster->cluster_lock, flags);
1414
1415 now = litmus_clock();
1416 TRACE_TASK(t, "wake_up at %llu\n", now);
1417
1418 if (is_sporadic(t) && is_tardy(t, now)) {
1419 release_at(t, now);
1420 sched_trace_task_release(t);
1421 }
1422 else {
1423 /* periodic task model. don't force job to end.
1424 * rely on user to say when jobs complete or when budget expires. */
1425 tsk_rt(t)->completed = 0;
1426 }
1427
1428#ifdef CONFIG_REALTIME_AUX_TASKS
1429 if (tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->hide_from_aux_tasks) {
1430 TRACE_CUR("%s/%d is ready so aux tasks may not inherit.\n", t->comm, t->pid);
1431 disable_aux_task_owner(t);
1432 }
1433#endif
1434
1435#ifdef CONFIG_LITMUS_NVIDIA
1436 if (tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu) {
1437 TRACE_CUR("%s/%d is ready so gpu klmirqd tasks may not inherit.\n", t->comm, t->pid);
1438 disable_gpu_owner(t);
1439 }
1440#endif
1441
1442 budget_state_machine(t,on_wakeup);
1443 crm_job_arrival(t);
1444
1445 raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags);
1446}
1447
1448static void crm_task_block(struct task_struct *t)
1449{
1450 unsigned long flags;
1451 crm_domain_t *cluster;
1452
1453 TRACE_TASK(t, "block at %llu\n", litmus_clock());
1454
1455 cluster = task_cpu_cluster(t);
1456
1457 /* unlink if necessary */
1458 raw_readyq_lock_irqsave(&cluster->cluster_lock, flags);
1459
1460 unlink(t);
1461
1462#ifdef CONFIG_REALTIME_AUX_TASKS
1463 if (tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->hide_from_aux_tasks) {
1464
1465 TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid);
1466 enable_aux_task_owner(t);
1467 }
1468#endif
1469
1470#ifdef CONFIG_LITMUS_NVIDIA
1471 if (tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu) {
1472
1473 TRACE_CUR("%s/%d is blocked so klmirqd threads may inherit.\n", t->comm, t->pid);
1474 enable_gpu_owner(t);
1475 }
1476#endif
1477
1478 raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags);
1479
1480 BUG_ON(!is_realtime(t));
1481}
1482
1483
1484static void crm_task_exit(struct task_struct * t)
1485{
1486 unsigned long flags;
1487 crm_domain_t *cluster = task_cpu_cluster(t);
1488
1489 /* unlink if necessary */
1490 raw_readyq_lock_irqsave(&cluster->cluster_lock, flags);
1491
1492 if (tsk_rt(t)->inh_task) {
1493 WARN_ON(1);
1494 clear_inh_task_linkback(t, tsk_rt(t)->inh_task);
1495 }
1496
1497 /* disable budget enforcement */
1498 crm_untrack_in_top_m(t);
1499 budget_state_machine(t,on_exit);
1500
1501#ifdef CONFIG_REALTIME_AUX_TASKS
1502 /* make sure we clean up on our way out */
1503 if (unlikely(tsk_rt(t)->is_aux_task))
1504 exit_aux_task(t);
1505 else if(tsk_rt(t)->has_aux_tasks)
1506 disable_aux_task_owner(t);
1507#endif
1508
1509#ifdef CONFIG_LITMUS_NVIDIA
1510 /* make sure we clean up on our way out */
1511 if(tsk_rt(t)->held_gpus)
1512 disable_gpu_owner(t);
1513#endif
1514
1515 unlink(t);
1516 if (tsk_rt(t)->scheduled_on != NO_CPU) {
1517 cpu_entry_t *cpu;
1518 cpu = &per_cpu(crm_cpu_entries, tsk_rt(t)->scheduled_on);
1519 cpu->scheduled = NULL;
1520 tsk_rt(t)->scheduled_on = NO_CPU;
1521 }
1522 raw_readyq_unlock_irqrestore(&cluster->cluster_lock, flags);
1523
1524 BUG_ON(!is_realtime(t));
1525 TRACE_TASK(t, "RIP\n");
1526}
1527
1528
1529
1530
1531
1532
1533static struct budget_tracker_ops crm_drain_simple_ops =
1534{
1535 .on_scheduled = simple_on_scheduled,
1536 .on_blocked = simple_on_blocked,
1537 .on_preempt = simple_on_preempt,
1538 .on_sleep = simple_on_sleep,
1539 .on_exit = simple_on_exit,
1540
1541 .on_wakeup = NULL,
1542 .on_inherit = NULL,
1543 .on_disinherit = NULL,
1544 .on_enter_top_m = NULL,
1545 .on_exit_top_m = NULL,
1546
1547 .on_exhausted = crm_simple_on_exhausted,
1548};
1549
1550static struct budget_tracker_ops crm_drain_simple_io_ops =
1551{
1552 .on_scheduled = simple_io_on_scheduled,
1553 .on_blocked = simple_io_on_blocked,
1554 .on_preempt = simple_io_on_preempt,
1555 .on_sleep = simple_io_on_sleep,
1556 .on_exit = simple_io_on_exit,
1557
1558 .on_wakeup = simple_io_on_wakeup,
1559 .on_inherit = NULL,
1560 .on_disinherit = NULL,
1561 .on_enter_top_m = NULL,
1562 .on_exit_top_m = NULL,
1563
1564 .on_exhausted = crm_simple_io_on_exhausted,
1565};
1566
1567static struct budget_tracker_ops crm_drain_sobliv_ops =
1568{
1569 .on_scheduled = NULL,
1570 .on_preempt = NULL,
1571 .on_sleep = NULL,
1572
1573 .on_blocked = sobliv_on_blocked,
1574 .on_wakeup = sobliv_on_wakeup,
1575 .on_exit = sobliv_on_exit,
1576 .on_inherit = sobliv_on_inherit,
1577 .on_disinherit = sobliv_on_disinherit,
1578 .on_enter_top_m = sobliv_on_enter_top_m,
1579 .on_exit_top_m = sobliv_on_exit_top_m,
1580
1581 .on_exhausted = crm_sobliv_on_exhausted,
1582};
1583
1584static long crm_admit_task(struct task_struct* tsk)
1585{
1586 struct budget_tracker_ops* ops = NULL;
1587
1588 if (remote_cluster(task_cpu(tsk)) != task_cpu_cluster(tsk)) {
1589// printk("rejected admit: incorrect cluster.\n");
1590// return -EINVAL;
1591 }
1592
1593 if (budget_enforced(tsk) || budget_signalled(tsk)) {
1594 switch(get_drain_policy(tsk)) {
1595 case DRAIN_SIMPLE:
1596 ops = &crm_drain_simple_ops;
1597 break;
1598 case DRAIN_SIMPLE_IO:
1599 ops = &crm_drain_simple_io_ops;
1600 break;
1601 case DRAIN_SOBLIV:
1602 /* budget_policy and budget_signal_policy cannot be quantum-based */
1603 if (!budget_quantum_tracked(tsk) && budget_precisely_tracked(tsk)) {
1604 ops = &crm_drain_sobliv_ops;
1605 }
1606 else {
1607 printk("rejected admit: QUANTUM_ENFORCEMENT and QUANTUM_SIGNALS is "
1608 "unsupported with DRAIN_SOBLIV.\n");
1609 return -EINVAL;
1610 }
1611 break;
1612 default:
1613 printk("rejected admit: Unsupported budget draining mode.\n");
1614 return -EINVAL;
1615 }
1616 }
1617
1618 /* always init the budget tracker, even if we're not using timers */
1619 init_budget_tracker(&tsk_rt(tsk)->budget, ops);
1620
1621#ifdef CONFIG_LITMUS_NESTED_LOCKING
1622 INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks,
1623 rm_max_heap_base_priority_order);
1624#endif
1625
1626 return 0;
1627}
1628
1629
1630
1631#ifdef CONFIG_LITMUS_LOCKING
1632
1633#include <litmus/fdso.h>
1634
1635/* called with IRQs off */
1636static int __increase_priority_inheritance(struct task_struct* t,
1637 struct task_struct* prio_inh)
1638{
1639 int success = 1;
1640 int linked_on;
1641 int check_preempt = 0;
1642 crm_domain_t* cluster;
1643 struct task_struct* old_prio_inh = tsk_rt(t)->inh_task;
1644
1645 if (prio_inh && prio_inh == effective_priority(t)) {
1646 /* relationship already established. */
1647 TRACE_TASK(t, "already has effective priority of %s/%d\n",
1648 prio_inh->comm, prio_inh->pid);
1649 goto out;
1650 }
1651
1652 if (prio_inh && (effective_priority(prio_inh) != prio_inh)) {
1653 TRACE_TASK(t, "Inheriting from %s/%d instead of the eff_prio = %s/%d!\n",
1654 prio_inh->comm, prio_inh->pid,
1655 effective_priority(prio_inh)->comm,
1656 effective_priority(prio_inh)->pid);
1657#ifndef CONFIG_LITMUS_NESTED_LOCKING
1658 /* Tasks should only inherit the base priority of a task.
1659 If 't' inherits a priority, then tsk_rt(t)->inh_task should
1660 be passed to this function instead. This includes transitive
1661 inheritance relations (tsk_rt(tsk_rt(...)->inh_task)->inh_task). */
1662 BUG();
1663#else
1664 /* Not a bug with nested locking since inheritance propagation is
1665 not atomic. */
1666
1667 /* TODO: Is the following 'helping' short-cut safe?
1668 prio_inh = effective_priority(prio_inh);
1669 */
1670#endif
1671 }
1672
1673 cluster = task_cpu_cluster(t);
1674
1675#if 0
1676 if (prio_inh && task_cpu_cluster(prio_inh) != cluster) {
1677 WARN_ONCE(1, "Illegal to inherit between clusters. " \
1678 "target (%s/%d) on cluster w/ CPU %d and " \
1679 "inh_prio (%s/%d) on w/ CPU %d\n", \
1680 t->comm, t->pid, cluster->cpus[0]->cpu, \
1681 prio_inh->comm, prio_inh->pid, \
1682 task_cpu_cluster(prio_inh)->cpus[0]->cpu);
1683 return 1;
1684 }
1685#endif
1686
1687#ifdef CONFIG_LITMUS_NESTED_LOCKING
1688 /* this sanity check allows for weaker locking in protocols */
1689 /* TODO (klmirqd): Skip this check if 't' is a proxy thread (???) */
1690 if(__rm_higher_prio(prio_inh, BASE, t, EFFECTIVE)) {
1691#endif
1692 sched_trace_eff_prio_change(t, prio_inh);
1693
1694 /* clear out old inheritance relation */
1695 if (old_prio_inh) {
1696 budget_state_machine_chgprio(t,old_prio_inh,on_disinherit);
1697 clear_inh_task_linkback(t, old_prio_inh);
1698 }
1699
1700 TRACE_TASK(t, "inherits priority from %s/%d\n",
1701 prio_inh->comm, prio_inh->pid);
1702 tsk_rt(t)->inh_task = prio_inh;
1703
1704 /* update inheritance relation */
1705 if (prio_inh)
1706 budget_state_machine_chgprio(t,prio_inh,on_inherit);
1707
1708 linked_on = tsk_rt(t)->linked_on;
1709
1710 /* If it is scheduled, then we need to reorder the CPU heap. */
1711 if (linked_on != NO_CPU) {
1712 TRACE_TASK(t, "%s: linked on %d\n",
1713 __FUNCTION__, linked_on);
1714 /* Holder is scheduled; need to re-order CPUs.
1715 * We can't use heap_decrease() here since
1716 * the cpu_heap is ordered in reverse direction, so
1717 * it is actually an increase. */
1718 binheap_delete(&per_cpu(crm_cpu_entries, linked_on).hn,
1719 &cluster->cpu_heap);
1720 binheap_add(&per_cpu(crm_cpu_entries, linked_on).hn,
1721 &cluster->cpu_heap, cpu_entry_t, hn);
1722
1723 /* tell prio_inh that we're __running__ with its priority */
1724 set_inh_task_linkback(t, prio_inh);
1725 }
1726 else {
1727 /* holder may be queued: first stop queue changes */
1728 raw_spin_lock(&cluster->domain.release_lock);
1729 if (is_queued(t)) {
1730 TRACE_TASK(t, "%s: is queued\n",
1731 __FUNCTION__);
1732 /* We need to update the position of holder in some
1733 * heap. Note that this could be a release heap if we
1734 * budget enforcement is used and this job overran. */
1735 check_preempt =
1736 !bheap_decrease(rm_ready_order, tsk_rt(t)->heap_node);
1737 } else {
1738 /* Nothing to do: if it is not queued and not linked
1739 * then it is either sleeping or currently being moved
1740 * by other code (e.g., a timer interrupt handler) that
1741 * will use the correct priority when enqueuing the
1742 * task. */
1743 TRACE_TASK(t, "%s: is NOT queued => Done.\n",
1744 __FUNCTION__);
1745 }
1746 raw_spin_unlock(&cluster->domain.release_lock);
1747
1748#ifdef CONFIG_REALTIME_AUX_TASKS
1749 /* propagate to aux tasks */
1750 if (tsk_rt(t)->has_aux_tasks) {
1751 aux_task_owner_increase_priority(t);
1752 }
1753#endif
1754
1755#ifdef CONFIG_LITMUS_NVIDIA
1756 /* propagate to gpu klmirqd */
1757 if (tsk_rt(t)->held_gpus) {
1758 gpu_owner_increase_priority(t);
1759 }
1760#endif
1761
1762 /* If holder was enqueued in a release heap, then the following
1763 * preemption check is pointless, but we can't easily detect
1764 * that case. If you want to fix this, then consider that
1765 * simply adding a state flag requires O(n) time to update when
1766 * releasing n tasks, which conflicts with the goal to have
1767 * O(log n) merges. */
1768 if (check_preempt) {
1769 /* heap_decrease() hit the top level of the heap: make
1770 * sure preemption checks get the right task, not the
1771 * potentially stale cache. */
1772 bheap_uncache_min(rm_ready_order,
1773 &cluster->domain.ready_queue);
1774 check_for_preemptions(cluster);
1775 }
1776 }
1777#ifdef CONFIG_LITMUS_NESTED_LOCKING
1778 }
1779 else {
1780 /* Occurance is okay under two scenarios:
1781 * 1. Fine-grain nested locks (no compiled DGL support): Concurrent
1782 * updates are chasing each other through the wait-for chain.
1783 * 2. Budget exhausion caused the HP waiter to loose its priority, but
1784 * the lock structure hasn't yet been updated (but soon will be).
1785 */
1786 TRACE_TASK(t, "Spurious invalid priority increase. "
1787 "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d"
1788 "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
1789 t->comm, t->pid,
1790 effective_priority(t)->comm, effective_priority(t)->pid,
1791 (prio_inh) ? prio_inh->comm : "null",
1792 (prio_inh) ? prio_inh->pid : 0);
1793 WARN_ON(!prio_inh);
1794 success = 0;
1795 }
1796#endif
1797
1798out:
1799 return success;
1800}
1801
1802/* called with IRQs off */
1803static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
1804{
1805 crm_domain_t* cluster = task_cpu_cluster(t);
1806
1807 raw_readyq_lock(&cluster->cluster_lock);
1808
1809 TRACE_TASK(t, "to inherit from %s/%d\n", prio_inh->comm, prio_inh->pid);
1810
1811 __increase_priority_inheritance(t, prio_inh);
1812
1813 raw_readyq_unlock(&cluster->cluster_lock);
1814}
1815
1816/* called with IRQs off */
1817static int __decrease_priority_inheritance(struct task_struct* t,
1818 struct task_struct* prio_inh,
1819 int budget_tiggered)
1820{
1821 crm_domain_t* cluster;
1822 int success = 1;
1823 struct task_struct* old_prio_inh = tsk_rt(t)->inh_task;
1824
1825 if (prio_inh == old_prio_inh) {
1826 /* relationship already established. */
1827 TRACE_TASK(t, "already inherits priority from %s/%d\n",
1828 (prio_inh) ? prio_inh->comm : "(null)",
1829 (prio_inh) ? prio_inh->pid : 0);
1830 goto out;
1831 }
1832
1833 if (prio_inh && (effective_priority(prio_inh) != prio_inh)) {
1834 TRACE_TASK(t, "Inheriting from %s/%d instead of the eff_prio = %s/%d!\n",
1835 prio_inh->comm, prio_inh->pid,
1836 effective_priority(prio_inh)->comm,
1837 effective_priority(prio_inh)->pid);
1838#ifndef CONFIG_LITMUS_NESTED_LOCKING
1839 /* Tasks should only inherit the base priority of a task.
1840 If 't' inherits a priority, then tsk_rt(t)->inh_task should
1841 be passed to this function instead. This includes transitive
1842 inheritance relations (tsk_rt(tsk_rt(...)->inh_task)->inh_task). */
1843 BUG();
1844#else
1845 /* Not a bug with nested locking since inheritance propagation is
1846 not atomic. */
1847
1848 /* TODO: Is the following 'helping' short-cut safe?
1849 prio_inh = effective_priority(prio_inh);
1850 */
1851#endif
1852 }
1853
1854 cluster = task_cpu_cluster(t);
1855
1856#if 0
1857 if (prio_inh && task_cpu_cluster(prio_inh) != cluster) {
1858 WARN_ONCE(1, "Illegal to inherit between clusters. " \
1859 "target (%s/%d) on cluster w/ CPU %d and " \
1860 "inh_prio (%s/%d) on w/ CPU %d\n", \
1861 t->comm, t->pid, cluster->cpus[0]->cpu, \
1862 prio_inh->comm, prio_inh->pid, \
1863 task_cpu_cluster(prio_inh)->cpus[0]->cpu);
1864 return 1;
1865 }
1866#endif
1867
1868#ifdef CONFIG_LITMUS_NESTED_LOCKING
1869 if(budget_tiggered || __rm_higher_prio(t, EFFECTIVE, prio_inh, BASE)) {
1870#endif
1871 sched_trace_eff_prio_change(t, prio_inh);
1872
1873 if (budget_tiggered) {
1874 BUG_ON(!old_prio_inh);
1875 TRACE_TASK(t, "budget-triggered 'decrease' in priority. "
1876 "%s/%d's budget should have just been exhuasted.\n",
1877 old_prio_inh->comm, old_prio_inh->pid);
1878 }
1879
1880 /* clear out old inheritance relation */
1881 if (old_prio_inh) {
1882 budget_state_machine_chgprio(t,old_prio_inh,on_disinherit);
1883 clear_inh_task_linkback(t, old_prio_inh);
1884 }
1885
1886 /* A job only stops inheriting a priority when it releases a
1887 * resource. Thus we can make the following assumption.*/
1888 if(prio_inh)
1889 TRACE_TASK(t, "EFFECTIVE priority decreased to %s/%d\n",
1890 prio_inh->comm, prio_inh->pid);
1891 else
1892 TRACE_TASK(t, "base priority restored.\n");
1893
1894 /* set up new inheritance relation */
1895 tsk_rt(t)->inh_task = prio_inh;
1896
1897 if (prio_inh)
1898 budget_state_machine_chgprio(t,prio_inh,on_inherit);
1899
1900 if(tsk_rt(t)->scheduled_on != NO_CPU) {
1901 TRACE_TASK(t, "is scheduled.\n");
1902
1903 /* link back to new inheritance */
1904 if (prio_inh)
1905 set_inh_task_linkback(t, prio_inh);
1906
1907 /* Check if rescheduling is necessary. We can't use heap_decrease()
1908 * since the priority was effectively lowered. */
1909 unlink(t);
1910 crm_job_arrival(t);
1911 }
1912 else {
1913 /* task is queued */
1914 raw_spin_lock(&cluster->domain.release_lock);
1915 if (is_queued(t)) {
1916 TRACE_TASK(t, "is queued.\n");
1917
1918 BUG_ON(
1919 !is_released(t, litmus_clock()) &&
1920 !tsk_rt(t)->job_params.is_backlogged_job &&
1921 !is_early_releasing(t));
1922
1923 unlink(t);
1924 crm_job_arrival(t);
1925 }
1926 else {
1927 TRACE_TASK(t, "is not in scheduler. Probably on wait queue somewhere.\n");
1928 }
1929 raw_spin_unlock(&cluster->domain.release_lock);
1930 }
1931
1932#ifdef CONFIG_REALTIME_AUX_TASKS
1933 /* propagate to aux tasks */
1934 if (tsk_rt(t)->has_aux_tasks)
1935 aux_task_owner_decrease_priority(t);
1936#endif
1937
1938#ifdef CONFIG_LITMUS_NVIDIA
1939 /* propagate to gpu */
1940 if (tsk_rt(t)->held_gpus)
1941 gpu_owner_decrease_priority(t);
1942#endif
1943
1944#ifdef CONFIG_LITMUS_NESTED_LOCKING
1945 }
1946 else {
1947 TRACE_TASK(t, "Spurious invalid priority decrease. "
1948 "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
1949 "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
1950 t->comm, t->pid,
1951 effective_priority(t)->comm, effective_priority(t)->pid,
1952 (prio_inh) ? prio_inh->comm : "null",
1953 (prio_inh) ? prio_inh->pid : 0);
1954 success = 0;
1955 }
1956#endif
1957
1958out:
1959 return success;
1960}
1961
1962static void decrease_priority_inheritance(struct task_struct* t,
1963 struct task_struct* prio_inh,
1964 int budget_tiggered)
1965{
1966 crm_domain_t* cluster = task_cpu_cluster(t);
1967
1968 raw_readyq_lock(&cluster->cluster_lock);
1969
1970 TRACE_TASK(t, "to inherit from %s/%d (decrease)\n",
1971 (prio_inh) ? prio_inh->comm : "null",
1972 (prio_inh) ? prio_inh->pid : 0);
1973
1974 __decrease_priority_inheritance(t, prio_inh, budget_tiggered);
1975
1976 raw_readyq_unlock(&cluster->cluster_lock);
1977}
1978
1979
1980#ifdef CONFIG_LITMUS_NESTED_LOCKING
1981
1982/* called with IRQs off */
1983/* preconditions:
1984 (1) The 'hp_blocked_tasks_lock' of task 't' is held.
1985 (2) The lock 'to_unlock' is held.
1986 */
1987static void nested_increase_priority_inheritance(struct task_struct* t,
1988 struct task_struct* prio_inh,
1989 raw_spinlock_t *to_unlock,
1990 unsigned long irqflags)
1991{
1992 struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
1993
1994 if(tsk_rt(t)->inh_task != prio_inh) { // shield redundent calls.
1995 increase_priority_inheritance(t, prio_inh); // increase our prio.
1996 }
1997
1998 /* note: cluster lock is not held continuously during propagation, so there
1999 may be momentary inconsistencies while nested priority propagation 'chases'
2000 other updates. */
2001
2002 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
2003
2004 if(blocked_lock) {
2005 if(blocked_lock->ops->supports_nesting) {
2006 TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
2007 blocked_lock->ident);
2008
2009 // beware: recursion
2010 blocked_lock->ops->propagate_increase_inheritance(blocked_lock,
2011 t, to_unlock,
2012 irqflags);
2013 }
2014 else {
2015 TRACE_TASK(t, "Inheritor is blocked on litmus lock (%d) that does not support nesting!\n",
2016 blocked_lock->ident);
2017 unlock_fine_irqrestore(to_unlock, irqflags);
2018 }
2019 }
2020 else {
2021 TRACE_TASK(t, "is not blocked on litmus lock. No propagation.\n");
2022 unlock_fine_irqrestore(to_unlock, irqflags);
2023 }
2024}
2025
2026/* called with IRQs off */
2027/* preconditions:
2028 (1) The 'hp_blocked_tasks_lock' of task 't' is held.
2029 (2) The lock 'to_unlock' is held.
2030 */
2031static void nested_decrease_priority_inheritance(struct task_struct* t,
2032 struct task_struct* prio_inh,
2033 raw_spinlock_t *to_unlock,
2034 unsigned long irqflags,
2035 int budget_tiggered)
2036{
2037 struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
2038 decrease_priority_inheritance(t, prio_inh, budget_tiggered);
2039
2040 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
2041
2042 if(blocked_lock) {
2043 if(blocked_lock->ops->supports_nesting) {
2044 TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
2045 blocked_lock->ident);
2046 // beware: recursion
2047 blocked_lock->ops->propagate_decrease_inheritance(blocked_lock, t,
2048 to_unlock,
2049 irqflags,
2050 budget_tiggered);
2051 }
2052 else {
2053 TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
2054 blocked_lock);
2055 unlock_fine_irqrestore(to_unlock, irqflags);
2056 }
2057 }
2058 else {
2059 TRACE_TASK(t, "is not blocked. No propagation.\n");
2060 unlock_fine_irqrestore(to_unlock, irqflags);
2061 }
2062}
2063
2064
2065/* ******************** FIFO MUTEX ********************** */
2066
2067static struct litmus_lock_ops crm_fifo_mutex_lock_ops = {
2068 .lock = fifo_mutex_lock,
2069 .unlock = fifo_mutex_unlock,
2070 .should_yield_lock = fifo_mutex_should_yield_lock,
2071 .close = fifo_mutex_close,
2072 .deallocate = fifo_mutex_free,
2073
2074 .budget_exhausted = fifo_mutex_budget_exhausted,
2075 .propagate_increase_inheritance = fifo_mutex_propagate_increase_inheritance,
2076 .propagate_decrease_inheritance = fifo_mutex_propagate_decrease_inheritance,
2077
2078#ifdef CONFIG_LITMUS_DGL_SUPPORT
2079 .dgl_lock = fifo_mutex_dgl_lock,
2080 .is_owner = fifo_mutex_is_owner,
2081 .get_owner = fifo_mutex_get_owner,
2082 .enable_priority = fifo_mutex_enable_priority,
2083
2084 .dgl_can_quick_lock = NULL,
2085 .dgl_quick_lock = NULL,
2086
2087 .supports_dgl = 1,
2088 .requires_atomic_dgl = 0,
2089#endif
2090 .supports_nesting = 1,
2091 .supports_budget_exhaustion = 1,
2092 .is_omlp_family = 0,
2093};
2094
2095static struct litmus_lock* crm_new_fifo_mutex(void)
2096{
2097 return fifo_mutex_new(&crm_fifo_mutex_lock_ops);
2098}
2099
2100/* ******************** PRIOQ MUTEX ********************** */
2101
2102static struct litmus_lock_ops crm_prioq_mutex_lock_ops = {
2103 .lock = prioq_mutex_lock,
2104 .unlock = prioq_mutex_unlock,
2105 .should_yield_lock = prioq_mutex_should_yield_lock,
2106 .close = prioq_mutex_close,
2107 .deallocate = prioq_mutex_free,
2108
2109 .budget_exhausted = prioq_mutex_budget_exhausted,
2110 .propagate_increase_inheritance = prioq_mutex_propagate_increase_inheritance,
2111 .propagate_decrease_inheritance = prioq_mutex_propagate_decrease_inheritance,
2112
2113#ifdef CONFIG_LITMUS_DGL_SUPPORT
2114 .dgl_lock = prioq_mutex_dgl_lock,
2115 .is_owner = prioq_mutex_is_owner,
2116 .get_owner = prioq_mutex_get_owner,
2117 .enable_priority = prioq_mutex_enable_priority,
2118
2119 .dgl_can_quick_lock = prioq_mutex_dgl_can_quick_lock,
2120 .dgl_quick_lock = prioq_mutex_dgl_quick_lock,
2121
2122 .supports_dgl = 1,
2123 .requires_atomic_dgl = 1,
2124#endif
2125 .supports_nesting = 1,
2126 .supports_budget_exhaustion = 1,
2127 .is_omlp_family = 0,
2128};
2129
2130static struct litmus_lock* crm_new_prioq_mutex(void)
2131{
2132 return prioq_mutex_new(&crm_prioq_mutex_lock_ops);
2133}
2134
2135/* ******************** IKGLP ********************** */
2136
2137static struct litmus_lock_ops crm_ikglp_lock_ops = {
2138 .lock = ikglp_lock,
2139 .unlock = ikglp_unlock,
2140 .should_yield_lock = NULL,
2141 .close = ikglp_close,
2142 .deallocate = ikglp_free,
2143
2144 .budget_exhausted = ikglp_budget_exhausted,
2145 .omlp_virtual_unlock = ikglp_virtual_unlock,
2146
2147 // ikglp can only be an outer-most lock.
2148 .propagate_increase_inheritance = NULL,
2149 .propagate_decrease_inheritance = NULL,
2150
2151#ifdef CONFIG_LITMUS_DGL_SUPPORT
2152 .supports_dgl = 0,
2153 .requires_atomic_dgl = 0,
2154#endif
2155 .supports_nesting = 0,
2156 .supports_budget_exhaustion = 1,
2157 .is_omlp_family = 1,
2158};
2159
2160static struct litmus_lock* crm_new_ikglp(void* __user arg)
2161{
2162 // assumes clusters of uniform size.
2163 return ikglp_new(cluster_size, &crm_ikglp_lock_ops, arg);
2164}
2165
2166
2167/* ******************** KFMLP support ********************** */
2168
2169static struct litmus_lock_ops crm_kfmlp_lock_ops = {
2170 .lock = kfmlp_lock,
2171 .unlock = kfmlp_unlock,
2172 .should_yield_lock = NULL,
2173 .close = kfmlp_close,
2174 .deallocate = kfmlp_free,
2175
2176 // kfmlp can only be an outer-most lock.
2177 .propagate_increase_inheritance = NULL,
2178 .propagate_decrease_inheritance = NULL,
2179
2180#ifdef CONFIG_LITMUS_DGL_SUPPORT
2181 .supports_dgl = 0,
2182 .requires_atomic_dgl = 0,
2183#endif
2184 .supports_nesting = 0,
2185 .supports_budget_exhaustion = 0,
2186 .is_omlp_family = 0,
2187};
2188
2189
2190static struct litmus_lock* crm_new_kfmlp(void* __user arg)
2191{
2192 return kfmlp_new(&crm_kfmlp_lock_ops, arg);
2193}
2194
2195
2196/* **** lock constructor **** */
2197
2198static long crm_allocate_lock(struct litmus_lock **lock, int type,
2199 void* __user args)
2200{
2201 int err;
2202
2203 switch (type) {
2204#ifdef CONFIG_LITMUS_NESTED_LOCKING
2205 case FIFO_MUTEX:
2206 *lock = crm_new_fifo_mutex();
2207 break;
2208
2209 case PRIOQ_MUTEX:
2210 *lock = crm_new_prioq_mutex();
2211 break;
2212
2213 case IKGLP_SEM:
2214 *lock = crm_new_ikglp(args);
2215 break;
2216#endif
2217 case KFMLP_SEM:
2218 *lock = crm_new_kfmlp(args);
2219 break;
2220
2221 default:
2222 err = -ENXIO;
2223 goto UNSUPPORTED_LOCK;
2224 };
2225
2226 if (*lock)
2227 err = 0;
2228 else
2229 err = -ENOMEM;
2230
2231UNSUPPORTED_LOCK:
2232 return err;
2233}
2234
2235#endif // CONFIG_LITMUS_LOCKING
2236
2237
2238#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
2239static struct affinity_observer_ops crm_kfmlp_affinity_ops __attribute__ ((unused)) = {
2240 .close = kfmlp_aff_obs_close,
2241 .deallocate = kfmlp_aff_obs_free,
2242};
2243
2244#ifdef CONFIG_LITMUS_NESTED_LOCKING
2245static struct affinity_observer_ops crm_ikglp_affinity_ops __attribute__ ((unused)) = {
2246 .close = ikglp_aff_obs_close,
2247 .deallocate = ikglp_aff_obs_free,
2248};
2249#endif
2250
2251static long crm_allocate_affinity_observer(struct affinity_observer **aff_obs,
2252 int type,
2253 void* __user args)
2254{
2255 int err;
2256
2257 switch (type) {
2258#ifdef CONFIG_LITMUS_NVIDIA
2259 case KFMLP_SIMPLE_GPU_AFF_OBS:
2260 *aff_obs = kfmlp_simple_gpu_aff_obs_new(&crm_kfmlp_affinity_ops, args);
2261 break;
2262
2263 case KFMLP_GPU_AFF_OBS:
2264 *aff_obs = kfmlp_gpu_aff_obs_new(&crm_kfmlp_affinity_ops, args);
2265 break;
2266
2267#ifdef CONFIG_LITMUS_NESTED_LOCKING
2268 case IKGLP_SIMPLE_GPU_AFF_OBS:
2269 *aff_obs = ikglp_simple_gpu_aff_obs_new(&crm_ikglp_affinity_ops, args);
2270 break;
2271
2272 case IKGLP_GPU_AFF_OBS:
2273 *aff_obs = ikglp_gpu_aff_obs_new(&crm_ikglp_affinity_ops, args);
2274 break;
2275#endif
2276#endif
2277 default:
2278 err = -ENXIO;
2279 goto UNSUPPORTED_AFF_OBS;
2280 };
2281
2282 if (*aff_obs)
2283 err = 0;
2284 else
2285 err = -ENOMEM;
2286
2287UNSUPPORTED_AFF_OBS:
2288 return err;
2289}
2290#endif
2291
2292
2293
2294#endif // CONFIG_LITMUS_NESTED_LOCKING
2295
2296
2297#ifdef VERBOSE_INIT
2298static void print_cluster_topology(cpumask_var_t mask, int cpu)
2299{
2300 int chk;
2301 char buf[255];
2302
2303 chk = cpulist_scnprintf(buf, 254, mask);
2304 buf[chk] = '\0';
2305 printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
2306
2307}
2308#endif
2309
2310static void cleanup_crm(void)
2311{
2312 int i;
2313
2314 if (clusters_allocated) {
2315 for (i = 0; i < num_clusters; i++) {
2316 kfree(crm[i].cpus);
2317 free_cpumask_var(crm[i].cpu_map);
2318 }
2319
2320 kfree(crm);
2321 }
2322}
2323
2324#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
2325static int crm_map_gpu_to_cpu(int gpu)
2326{
2327 int default_cpu;
2328 int cpu_cluster = gpu / gpu_cluster_size;
2329
2330 /* bonham-specific hack for the fully partitioned case (both CPUs and GPUs partitioned) */
2331 /* TODO: Make this aware of the NUMA topology generically */
2332 if(num_clusters == 12 && num_gpu_clusters == 8) {
2333 if(gpu >= 4) {
2334 cpu_cluster += 2; // assign the GPU to a CPU on the same NUMA node
2335 }
2336 }
2337
2338 default_cpu = crm[cpu_cluster].cpus[0]->cpu; // first CPU in given cluster
2339
2340 TRACE("CPU %d is default for GPU %d interrupt threads.\n", default_cpu, gpu);
2341
2342 return default_cpu;
2343}
2344#endif
2345
2346static long crm_activate_plugin(void)
2347{
2348 int i, j, cpu, ccpu, cpu_count;
2349 cpu_entry_t *entry;
2350
2351 cpumask_var_t mask;
2352 int chk = 0;
2353
2354 /* de-allocate old clusters, if any */
2355 cleanup_crm();
2356
2357
2358 printk(KERN_INFO "C-RM: Activate Plugin, cluster configuration = %d\n",
2359 cluster_config);
2360
2361 /* need to get cluster_size first */
2362 if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
2363 return -ENOMEM;
2364
2365 if (unlikely(cluster_config == GLOBAL_CLUSTER)) {
2366 cluster_size = num_online_cpus();
2367 } else {
2368 chk = get_shared_cpu_map(mask, 0, cluster_config);
2369 if (chk) {
2370 /* if chk != 0 then it is the max allowed index */
2371 printk(KERN_INFO "C-RM: Cluster configuration = %d "
2372 "is not supported on this hardware.\n",
2373 cluster_config);
2374 /* User should notice that the configuration failed, so
2375 * let's bail out. */
2376 return -EINVAL;
2377 }
2378
2379 cluster_size = cpumask_weight(mask);
2380 }
2381
2382 if ((num_online_cpus() % cluster_size) != 0) {
2383 /* this can't be right, some cpus are left out */
2384 printk(KERN_ERR "C-RM: Trying to group %d cpus in %d!\n",
2385 num_online_cpus(), cluster_size);
2386 return -1;
2387 }
2388
2389 num_clusters = num_online_cpus() / cluster_size;
2390 printk(KERN_INFO "C-RM: %d cluster(s) of size = %d\n",
2391 num_clusters, cluster_size);
2392
2393
2394#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
2395 num_gpu_clusters = min(num_clusters, num_online_gpus());
2396 gpu_cluster_size = num_online_gpus() / num_gpu_clusters;
2397
2398 if (((num_online_gpus() % gpu_cluster_size) != 0) ||
2399 (num_gpu_clusters != num_clusters)) {
2400 printk(KERN_WARNING "C-RM: GPUs not uniformly distributed among CPU clusters.\n");
2401 }
2402#endif
2403
2404 /* initialize clusters */
2405 crm = kmalloc(num_clusters * sizeof(crm_domain_t), GFP_ATOMIC);
2406 for (i = 0; i < num_clusters; i++) {
2407
2408 crm[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
2409 GFP_ATOMIC);
2410 INIT_BINHEAP_HANDLE(&(crm[i].cpu_heap), cpu_lower_prio);
2411 rm_domain_init(&(crm[i].domain), NULL, crm_release_jobs);
2412
2413 if(!zalloc_cpumask_var(&crm[i].cpu_map, GFP_ATOMIC))
2414 return -ENOMEM;
2415#ifdef CONFIG_RELEASE_MASTER
2416 crm[i].domain.release_master = atomic_read(&release_master_cpu);
2417#endif
2418 }
2419
2420 /* cycle through cluster and add cpus to them */
2421 for (i = 0; i < num_clusters; i++) {
2422
2423#ifdef CONFIG_LITMUS_DGL_SUPPORT
2424 raw_spin_lock_init(&crm[i].dgl_lock);
2425#endif
2426
2427#ifdef RECURSIVE_READY_QUEUE_LOCK
2428 crm[i].recursive_depth = 0;
2429 atomic_set(&crm[i].owner_cpu, NO_CPU);
2430#endif
2431
2432 crm[i].top_m_size = 0;
2433 INIT_BINHEAP_HANDLE(&crm[i].top_m, crm_min_heap_base_priority_order);
2434 INIT_BINHEAP_HANDLE(&crm[i].not_top_m, crm_max_heap_base_priority_order);
2435
2436 for_each_online_cpu(cpu) {
2437 /* check if the cpu is already in a cluster */
2438 for (j = 0; j < num_clusters; j++)
2439 if (cpumask_test_cpu(cpu, crm[j].cpu_map))
2440 break;
2441 /* if it is in a cluster go to next cpu */
2442 if (j < num_clusters &&
2443 cpumask_test_cpu(cpu, crm[j].cpu_map))
2444 continue;
2445
2446 /* this cpu isn't in any cluster */
2447 /* get the shared cpus */
2448 if (unlikely(cluster_config == GLOBAL_CLUSTER))
2449 cpumask_copy(mask, cpu_online_mask);
2450 else
2451 get_shared_cpu_map(mask, cpu, cluster_config);
2452
2453 cpumask_copy(crm[i].cpu_map, mask);
2454#ifdef VERBOSE_INIT
2455 print_cluster_topology(mask, cpu);
2456#endif
2457 /* add cpus to current cluster and init cpu_entry_t */
2458 cpu_count = 0;
2459 for_each_cpu(ccpu, crm[i].cpu_map) {
2460
2461 entry = &per_cpu(crm_cpu_entries, ccpu);
2462 crm[i].cpus[cpu_count] = entry;
2463
2464 memset(entry, 0, sizeof(*entry));
2465 entry->cpu = ccpu;
2466 entry->cluster = &crm[i];
2467 INIT_BINHEAP_NODE(&entry->hn);
2468 mb();
2469
2470 ++cpu_count;
2471
2472#ifdef CONFIG_RELEASE_MASTER
2473 /* only add CPUs that should schedule jobs */
2474 if (entry->cpu != entry->cluster->domain.release_master)
2475#endif
2476 update_cpu_position(entry);
2477 }
2478 /* done with this cluster */
2479 break;
2480 }
2481 }
2482
2483#ifdef CONFIG_LITMUS_SOFTIRQD
2484 init_klmirqd();
2485#endif
2486
2487#ifdef CONFIG_LITMUS_NVIDIA
2488 init_nvidia_info();
2489#endif
2490
2491 init_wake_queues();
2492
2493 free_cpumask_var(mask);
2494 clusters_allocated = 1;
2495 return 0;
2496}
2497
2498/* Plugin object */
2499static struct sched_plugin crm_plugin __cacheline_aligned_in_smp = {
2500 .plugin_name = "C-RM", // for now
2501 .finish_switch = crm_finish_switch,
2502 .tick = crm_tick,
2503 .task_new = crm_task_new,
2504 .complete_job = complete_job,
2505 .task_exit = crm_task_exit,
2506 .schedule = crm_schedule,
2507 .task_wake_up = crm_task_wake_up,
2508 .task_block = crm_task_block,
2509 .admit_task = crm_admit_task,
2510 .activate_plugin = crm_activate_plugin,
2511 .compare = rm_higher_prio,
2512#ifdef CONFIG_LITMUS_LOCKING
2513 .allocate_lock = crm_allocate_lock,
2514 .increase_prio = increase_priority_inheritance,
2515 .decrease_prio = decrease_priority_inheritance,
2516 .__increase_prio = __increase_priority_inheritance,
2517 .__decrease_prio = __decrease_priority_inheritance,
2518#endif
2519#ifdef CONFIG_LITMUS_NESTED_LOCKING
2520 .nested_increase_prio = nested_increase_priority_inheritance,
2521 .nested_decrease_prio = nested_decrease_priority_inheritance,
2522 .__compare = __rm_higher_prio,
2523#endif
2524#ifdef CONFIG_LITMUS_DGL_SUPPORT
2525 .get_dgl_spinlock = crm_get_dgl_spinlock,
2526#endif
2527#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
2528 .allocate_aff_obs = crm_allocate_affinity_observer,
2529#endif
2530#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
2531 .map_gpu_to_cpu = crm_map_gpu_to_cpu,
2532#endif
2533};
2534
2535static struct proc_dir_entry *cluster_file = NULL, *crm_dir = NULL;
2536
2537static int __init init_crm(void)
2538{
2539 int err, fs;
2540
2541 err = register_sched_plugin(&crm_plugin);
2542 if (!err) {
2543 fs = make_plugin_proc_dir(&crm_plugin, &crm_dir);
2544 if (!fs)
2545 cluster_file = create_cluster_file(crm_dir, &cluster_config);
2546 else
2547 printk(KERN_ERR "Could not allocate C-RM procfs dir.\n");
2548 }
2549 return err;
2550}
2551
2552static void clean_crm(void)
2553{
2554 cleanup_crm();
2555 if (cluster_file)
2556 remove_proc_entry("cluster", crm_dir);
2557 if (crm_dir)
2558 remove_plugin_proc_dir(&crm_plugin);
2559}
2560
2561module_init(init_crm);
2562module_exit(clean_crm);