aboutsummaryrefslogtreecommitdiffstats
path: root/litmus
diff options
context:
space:
mode:
Diffstat (limited to 'litmus')
-rw-r--r--litmus/Kconfig212
-rw-r--r--litmus/Makefile9
-rw-r--r--litmus/affinity.c2
-rw-r--r--litmus/aux_tasks.c529
-rw-r--r--litmus/budget.c16
-rw-r--r--litmus/edf_common.c264
-rw-r--r--litmus/fdso.c15
-rw-r--r--litmus/gpu_affinity.c231
-rw-r--r--litmus/ikglp_lock.c2976
-rw-r--r--litmus/jobs.c2
-rw-r--r--litmus/kexclu_affinity.c92
-rw-r--r--litmus/kfmlp_lock.c1003
-rw-r--r--litmus/litmus.c280
-rw-r--r--litmus/litmus_pai_softirq.c64
-rw-r--r--litmus/litmus_proc.c17
-rw-r--r--litmus/litmus_softirq.c1205
-rw-r--r--litmus/locking.c447
-rw-r--r--litmus/nvidia_info.c1137
-rw-r--r--litmus/preempt.c32
-rw-r--r--litmus/rsm_lock.c796
-rw-r--r--litmus/rt_domain.c13
-rw-r--r--litmus/sched_cedf.c1138
-rw-r--r--litmus/sched_gsn_edf.c1195
-rw-r--r--litmus/sched_litmus.c4
-rw-r--r--litmus/sched_pfp.c40
-rw-r--r--litmus/sched_plugin.c167
-rw-r--r--litmus/sched_psn_edf.c41
-rw-r--r--litmus/sched_task_trace.c282
-rw-r--r--litmus/sched_trace_external.c64
29 files changed, 11980 insertions, 293 deletions
diff --git a/litmus/Kconfig b/litmus/Kconfig
index bd6635c8de08..594c54342bdc 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -34,8 +34,70 @@ config RELEASE_MASTER
34 (http://www.cs.unc.edu/~anderson/papers.html). 34 (http://www.cs.unc.edu/~anderson/papers.html).
35 Currently only supported by GSN-EDF. 35 Currently only supported by GSN-EDF.
36 36
37config REALTIME_AUX_TASKS
38 bool "Real-Time Auxillary Tasks"
39 depends on LITMUS_LOCKING
40 default n
41 help
42 Adds a system call that forces all non-real-time threads in a process
43 to become auxillary real-time tasks. These tasks inherit the priority of
44 the highest-prio *BLOCKED* (but NOT blocked on a Litmus lock) real-time
45 task (non-auxillary) in the process. This allows the integration of COTS
46 code that has background helper threads used primarily for message passing
47 and synchronization. If these background threads are NOT real-time scheduled,
48 then unbounded priority inversions may occur if a real-time task blocks on
49 a non-real-time thread.
50
51 Beware of the following pitfalls:
52 1) Auxillary threads should not be CPU intensive. They should mostly
53 block on mutexes and condition variables. Violating this will
54 likely prevent meaningful analysis.
55 2) Since there may be more than one auxillary thread per process,
56 priority inversions may occur with respect to single-threaded
57 task models if/when one of threads are scheduled simultanously
58 with another of the same identity.
59
60choice
61 prompt "Scheduling prioritization of AUX tasks."
62 default REALTIME_AUX_TASK_PRIORITY_BOOSTED
63 help
64 Select the prioritization method for auxillary tasks.
65
66config REALTIME_AUX_TASK_PRIORITY_BOOSTED
67 bool "Boosted"
68 help
69 Run all auxillary task threads at a maximum priority. Useful for
70 temporarily working around bugs during development.
71
72config REALTIME_AUX_TASK_PRIORITY_INHERITANCE
73 bool "Inheritance"
74 help
75 Auxillary tasks inherit the maximum priority from blocked real-time
76 threads within the same process.
77
78 Additional pitfall:
79 3) Busy-wait deadlock is likely between normal real-time tasks and
80 auxillary tasks synchronize using _preemptive_ spinlocks that do
81 not use priority inheritance.
82
83 These pitfalls are mitgated by the fact that auxillary tasks only
84 inherit priorities from blocked tasks (Blocking signifies that the
85 blocked task _may_ be waiting on an auxillary task to perform some
86 work.). Futher, auxillary tasks without an inherited priority are
87 _always_ scheduled with a priority less than any normal real-time task!!
88
89 NOTE: Aux tasks do not _directly_ inherit a priority from rt tasks that
90 are blocked on Litmus locks. Aux task should be COTS code that know nothing
91 of Litmus, so they won't hold Litmus locks. Nothing the aux task can do can
92 _directly_ unblock the rt task blocked on a Litmus lock. However, the lock
93 holder that blocks the rt task CAN block on I/O and contribute its priority
94 to the aux tasks. Aux tasks may still _indirectly_ inherit the priority of
95 the blocked rt task via the lock holder.
96endchoice
97
37endmenu 98endmenu
38 99
100
39menu "Real-Time Synchronization" 101menu "Real-Time Synchronization"
40 102
41config NP_SECTION 103config NP_SECTION
@@ -60,6 +122,42 @@ config LITMUS_LOCKING
60 Say Yes if you want to include locking protocols such as the FMLP and 122 Say Yes if you want to include locking protocols such as the FMLP and
61 Baker's SRP. 123 Baker's SRP.
62 124
125config LITMUS_AFFINITY_LOCKING
126 bool "Enable affinity infrastructure in k-exclusion locking protocols."
127 depends on LITMUS_LOCKING
128 default n
129 help
130 Enable affinity tracking infrastructure in k-exclusion locking protocols.
131 This only enabled the *infrastructure* not actual affinity algorithms.
132
133 If unsure, say No.
134
135config LITMUS_NESTED_LOCKING
136 bool "Support for nested inheritance in locking protocols"
137 depends on LITMUS_LOCKING
138 default n
139 help
140 Enable nested priority inheritance.
141
142config LITMUS_DGL_SUPPORT
143 bool "Support for dynamic group locks"
144 depends on LITMUS_NESTED_LOCKING
145 default n
146 help
147 Enable dynamic group lock support.
148
149config LITMUS_MAX_DGL_SIZE
150 int "Maximum size of a dynamic group lock."
151 depends on LITMUS_DGL_SUPPORT
152 range 1 128
153 default "10"
154 help
155 Dynamic group lock data structures are allocated on the process
156 stack when a group is requested. We set a maximum size of
157 locks in a dynamic group lock to avoid dynamic allocation.
158
159 TODO: Batch DGL requests exceeding LITMUS_MAX_DGL_SIZE.
160
63endmenu 161endmenu
64 162
65menu "Performance Enhancements" 163menu "Performance Enhancements"
@@ -112,6 +210,14 @@ choice
112 Break ties between two jobs, A and B, with equal deadlines by using a 210 Break ties between two jobs, A and B, with equal deadlines by using a
113 uniform hash; i.e.: hash(A.pid, A.job_num) < hash(B.pid, B.job_num). Job 211 uniform hash; i.e.: hash(A.pid, A.job_num) < hash(B.pid, B.job_num). Job
114 A has ~50% of winning a given tie-break. 212 A has ~50% of winning a given tie-break.
213
214 NOTES:
215 * This method doesn't work very well if a tied job has a low-valued
216 hash while the jobs it ties with do not make progress (that is,
217 they don't increment to new job numbers). The job with a low-valued
218 hash job will lose most tie-breaks. This is usually not a problem
219 unless you are doing something funky in Litmus (ex. worker threads
220 that do not increment job numbers).
115 221
116 config EDF_PID_TIE_BREAK 222 config EDF_PID_TIE_BREAK
117 bool "PID-based Tie Breaks" 223 bool "PID-based Tie Breaks"
@@ -167,7 +273,7 @@ config SCHED_TASK_TRACE
167config SCHED_TASK_TRACE_SHIFT 273config SCHED_TASK_TRACE_SHIFT
168 int "Buffer size for sched_trace_xxx() events" 274 int "Buffer size for sched_trace_xxx() events"
169 depends on SCHED_TASK_TRACE 275 depends on SCHED_TASK_TRACE
170 range 8 13 276 range 8 15
171 default 9 277 default 9
172 help 278 help
173 279
@@ -279,4 +385,108 @@ config PREEMPT_STATE_TRACE
279 385
280endmenu 386endmenu
281 387
388menu "Interrupt Handling"
389
390choice
391 prompt "Scheduling of interrupt bottom-halves in Litmus."
392 default LITMUS_SOFTIRQD_NONE
393 depends on LITMUS_LOCKING
394 help
395 Schedule tasklets with known priorities in Litmus.
396
397config LITMUS_SOFTIRQD_NONE
398 bool "No tasklet scheduling in Litmus."
399 help
400 Don't schedule tasklets in Litmus. Default.
401
402config LITMUS_SOFTIRQD
403 bool "Spawn klmirqd interrupt handling threads."
404 help
405 Create klmirqd interrupt handling threads. Work must be
406 specifically dispatched to these workers. (Softirqs for
407 Litmus tasks are not magically redirected to klmirqd.)
408
409 G-EDF, C-EDF ONLY for now!
410
411
412config LITMUS_PAI_SOFTIRQD
413 bool "Defer tasklets to context switch points."
414 help
415 Only execute scheduled tasklet bottom halves at
416 scheduling points. Trades context switch overhead
417 at the cost of non-preemptive durations of bottom half
418 processing.
419
420 G-EDF, C-EDF ONLY for now!
421
422endchoice
423
424
425config LITMUS_NVIDIA
426 bool "Litmus handling of NVIDIA interrupts."
427 default n
428 help
429 Direct tasklets from NVIDIA devices to Litmus's klmirqd
430 or PAI interrupt handling routines.
431
432 If unsure, say No.
433
434config LITMUS_AFFINITY_AWARE_GPU_ASSINGMENT
435 bool "Enable affinity-aware heuristics to improve GPU assignment."
436 depends on LITMUS_NVIDIA && LITMUS_AFFINITY_LOCKING
437 default n
438 help
439 Enable several heuristics to improve the assignment
440 of GPUs to real-time tasks to reduce the overheads
441 of memory migrations.
442
443 If unsure, say No.
444
445config NV_DEVICE_NUM
446 int "Number of NVIDIA GPUs."
447 depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
448 range 1 16
449 default "1"
450 help
451 Should be (<= to the number of CPUs) and
452 (<= to the number of GPUs) in your system.
453
454choice
455 prompt "CUDA/Driver Version Support"
456 default CUDA_5_0
457 depends on LITMUS_NVIDIA
458 help
459 Select the version of CUDA/driver to support.
460
461config CUDA_5_0
462 bool "CUDA 5.0"
463 depends on LITMUS_NVIDIA && REALTIME_AUX_TASKS
464 help
465 Support CUDA 5.0 RCx (dev. driver version: x86_64-304.33)
466
467config CUDA_4_0
468 bool "CUDA 4.0"
469 depends on LITMUS_NVIDIA
470 help
471 Support CUDA 4.0 RC2 (dev. driver version: x86_64-270.40)
472
473config CUDA_3_2
474 bool "CUDA 3.2"
475 depends on LITMUS_NVIDIA
476 help
477 Support CUDA 3.2 (dev. driver version: x86_64-260.24)
478
479endchoice
480
481config LITMUS_NV_KLMIRQD_DEBUG
482 bool "Raise fake sporadic tasklets to test nv klimirqd threads."
483 depends on LITMUS_NVIDIA && LITMUS_SOFTIRQD
484 default n
485 help
486 Causes tasklets to be sporadically dispatched to waiting klmirqd
487 threads. WARNING! Kernel panic may occur if you switch between
488 LITMUS plugins!
489
490endmenu
491
282endmenu 492endmenu
diff --git a/litmus/Makefile b/litmus/Makefile
index d26ca7076b62..67d8b8ee72bc 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -18,6 +18,7 @@ obj-y = sched_plugin.o litmus.o \
18 bheap.o \ 18 bheap.o \
19 binheap.o \ 19 binheap.o \
20 ctrldev.o \ 20 ctrldev.o \
21 aux_tasks.o \
21 sched_gsn_edf.o \ 22 sched_gsn_edf.o \
22 sched_psn_edf.o \ 23 sched_psn_edf.o \
23 sched_pfp.o 24 sched_pfp.o
@@ -30,3 +31,11 @@ obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
30obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o 31obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
31obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o 32obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
32obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o 33obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
34
35obj-$(CONFIG_LITMUS_LOCKING) += kfmlp_lock.o
36obj-$(CONFIG_LITMUS_NESTED_LOCKING) += rsm_lock.o ikglp_lock.o
37obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o
38obj-$(CONFIG_LITMUS_PAI_SOFTIRQD) += litmus_pai_softirq.o
39obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o sched_trace_external.o
40
41obj-$(CONFIG_LITMUS_AFFINITY_LOCKING) += kexclu_affinity.o gpu_affinity.o
diff --git a/litmus/affinity.c b/litmus/affinity.c
index 3fa6dd789400..cd93249b5506 100644
--- a/litmus/affinity.c
+++ b/litmus/affinity.c
@@ -26,7 +26,7 @@ void init_topology(void) {
26 cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]); 26 cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]);
27 } 27 }
28 printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n", 28 printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
29 cpu, neigh_info[cpu].size[i], i, 29 cpu, neigh_info[cpu].size[i], i,
30 *cpumask_bits(neigh_info[cpu].neighbors[i])); 30 *cpumask_bits(neigh_info[cpu].neighbors[i]));
31 } 31 }
32 32
diff --git a/litmus/aux_tasks.c b/litmus/aux_tasks.c
new file mode 100644
index 000000000000..ef26bba3be77
--- /dev/null
+++ b/litmus/aux_tasks.c
@@ -0,0 +1,529 @@
1#include <litmus/sched_plugin.h>
2#include <litmus/trace.h>
3#include <litmus/litmus.h>
4
5#ifdef CONFIG_REALTIME_AUX_TASKS
6#include <litmus/rt_param.h>
7#include <litmus/aux_tasks.h>
8
9#include <linux/time.h>
10
11#define AUX_SLICE_NR_JIFFIES 1
12#define AUX_SLICE_NS ((NSEC_PER_SEC / HZ) * AUX_SLICE_NR_JIFFIES)
13
14static int admit_aux_task(struct task_struct *t)
15{
16 int retval = 0;
17 struct task_struct *leader = t->group_leader;
18
19 /* budget enforcement increments job numbers. job numbers are used in
20 * tie-breaking of aux_tasks. method helps ensure:
21 * 1) aux threads with no inherited priority can starve another (they share
22 * the CPUs equally.
23 * 2) aux threads that inherit the same priority cannot starve each other.
24 *
25 * Assuming aux threads are well-behavied (they do very little work and
26 * suspend), risk of starvation should not be an issue, but this is a
27 * fail-safe.
28 */
29 struct rt_task tp = {
30 .period = AUX_SLICE_NS,
31 .relative_deadline = AUX_SLICE_NS,
32 .exec_cost = AUX_SLICE_NS, /* allow full utilization with buget tracking */
33 .phase = 0,
34 .cpu = task_cpu(leader), /* take CPU of group leader */
35 .budget_policy = QUANTUM_ENFORCEMENT,
36 .budget_signal_policy = NO_SIGNALS,
37 .cls = RT_CLASS_BEST_EFFORT
38 };
39
40 struct sched_param param = { .sched_priority = 0};
41
42 tsk_rt(t)->task_params = tp;
43 retval = sched_setscheduler_nocheck(t, SCHED_LITMUS, &param);
44
45 return retval;
46}
47
48int exit_aux_task(struct task_struct *t)
49{
50 int retval = 0;
51
52 BUG_ON(!tsk_rt(t)->is_aux_task);
53
54 TRACE_CUR("Aux task %s/%d is exiting from %s/%d.\n", t->comm, t->pid, t->group_leader->comm, t->group_leader->pid);
55
56 tsk_rt(t)->is_aux_task = 0;
57
58#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
59 list_del(&tsk_rt(t)->aux_task_node);
60 if (tsk_rt(t)->inh_task) {
61 litmus->__decrease_prio(t, NULL);
62 }
63#endif
64
65 return retval;
66}
67
68static int aux_tasks_increase_priority(struct task_struct *leader, struct task_struct *hp)
69{
70 int retval = 0;
71
72#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
73 struct list_head *pos;
74
75 TRACE_CUR("Increasing priority of aux tasks in group %s/%d.\n", leader->comm, leader->pid);
76
77 list_for_each(pos, &tsk_aux(leader)->aux_tasks) {
78 struct task_struct *aux =
79 container_of(list_entry(pos, struct rt_param, aux_task_node),
80 struct task_struct, rt_param);
81
82 if (!is_realtime(aux)) {
83 TRACE_CUR("skipping non-real-time aux task %s/%d\n", aux->comm, aux->pid);
84 }
85 else if(tsk_rt(aux)->inh_task == hp) {
86 TRACE_CUR("skipping real-time aux task %s/%d that already inherits from %s/%d\n", aux->comm, aux->pid, hp->comm, hp->pid);
87 }
88 else {
89 // aux tasks don't touch rt locks, so no nested call needed.
90 TRACE_CUR("increasing %s/%d.\n", aux->comm, aux->pid);
91 retval = litmus->__increase_prio(aux, hp);
92 }
93 }
94#endif
95
96 return retval;
97}
98
99static int aux_tasks_decrease_priority(struct task_struct *leader, struct task_struct *hp)
100{
101 int retval = 0;
102
103#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
104 struct list_head *pos;
105
106 TRACE_CUR("Decreasing priority of aux tasks in group %s/%d.\n", leader->comm, leader->pid);
107
108 list_for_each(pos, &tsk_aux(leader)->aux_tasks) {
109 struct task_struct *aux =
110 container_of(list_entry(pos, struct rt_param, aux_task_node),
111 struct task_struct, rt_param);
112
113 if (!is_realtime(aux)) {
114 TRACE_CUR("skipping non-real-time aux task %s/%d\n", aux->comm, aux->pid);
115 }
116 else {
117 TRACE_CUR("decreasing %s/%d.\n", aux->comm, aux->pid);
118 retval = litmus->__decrease_prio(aux, hp);
119 }
120 }
121#endif
122
123 return retval;
124}
125
126int aux_task_owner_increase_priority(struct task_struct *t)
127{
128 int retval = 0;
129
130#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
131 struct task_struct *leader;
132 struct task_struct *hp = NULL;
133 struct task_struct *hp_eff = NULL;
134
135 BUG_ON(!is_realtime(t));
136 BUG_ON(!tsk_rt(t)->has_aux_tasks);
137
138 leader = t->group_leader;
139
140 if (!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) {
141 WARN_ON(!is_running(t));
142 TRACE_CUR("aux tasks may not inherit from %s/%d in group %s/%d\n",
143 t->comm, t->pid, leader->comm, leader->pid);
144 goto out;
145 }
146
147 TRACE_CUR("task %s/%d in group %s/%d increasing priority.\n", t->comm, t->pid, leader->comm, leader->pid);
148
149 hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
150 struct task_struct, rt_param);
151 hp_eff = effective_priority(hp);
152
153 if (hp != t) { /* our position in the heap may have changed. hp is already at the root. */
154 binheap_decrease(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners);
155 }
156
157 hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
158 struct task_struct, rt_param);
159
160 if (effective_priority(hp) != hp_eff) { /* the eff. prio. of hp has changed */
161 hp_eff = effective_priority(hp);
162 TRACE_CUR("%s/%d is new hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
163 retval = aux_tasks_increase_priority(leader, hp_eff);
164 }
165#endif
166
167out:
168 return retval;
169}
170
171int aux_task_owner_decrease_priority(struct task_struct *t)
172{
173 int retval = 0;
174
175#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
176 struct task_struct *leader;
177 struct task_struct *hp = NULL;
178 struct task_struct *hp_eff = NULL;
179
180 BUG_ON(!is_realtime(t));
181 BUG_ON(!tsk_rt(t)->has_aux_tasks);
182
183 leader = t->group_leader;
184
185 if (!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) {
186 WARN_ON(!is_running(t));
187 TRACE_CUR("aux tasks may not inherit from %s/%d in group %s/%d\n",
188 t->comm, t->pid, leader->comm, leader->pid);
189 goto out;
190 }
191
192 TRACE_CUR("task %s/%d in group %s/%d decresing priority.\n", t->comm, t->pid, leader->comm, leader->pid);
193
194 hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
195 struct task_struct, rt_param);
196 hp_eff = effective_priority(hp);
197 binheap_delete(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners);
198 binheap_add(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners,
199 struct rt_param, aux_task_owner_node);
200
201 if (hp == t) { /* t was originally the hp */
202 struct task_struct *new_hp =
203 container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
204 struct task_struct, rt_param);
205 if (effective_priority(new_hp) != hp_eff) { /* eff prio. of hp has changed */
206 hp_eff = effective_priority(new_hp);
207 TRACE_CUR("%s/%d is no longer hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
208 retval = aux_tasks_decrease_priority(leader, hp_eff);
209 }
210 }
211#endif
212
213out:
214 return retval;
215}
216
217int make_aux_task_if_required(struct task_struct *t)
218{
219 struct task_struct *leader;
220 int retval = 0;
221
222 read_lock_irq(&tasklist_lock);
223
224 leader = t->group_leader;
225
226 if(!tsk_aux(leader)->initialized || !tsk_aux(leader)->aux_future) {
227 goto out;
228 }
229
230 TRACE_CUR("Making %s/%d in %s/%d an aux thread.\n", t->comm, t->pid, leader->comm, leader->pid);
231
232 INIT_LIST_HEAD(&tsk_rt(t)->aux_task_node);
233 INIT_BINHEAP_NODE(&tsk_rt(t)->aux_task_owner_node);
234
235 retval = admit_aux_task(t);
236 if (retval == 0) {
237 tsk_rt(t)->is_aux_task = 1;
238
239#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
240 list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks);
241
242 if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) {
243 struct task_struct *hp =
244 container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
245 struct task_struct, rt_param);
246
247 TRACE_CUR("hp in group: %s/%d\n", hp->comm, hp->pid);
248
249 retval = litmus->__increase_prio(t, (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp);
250
251 if (retval != 0) {
252 /* don't know how to recover from bugs with prio inheritance. better just crash. */
253 read_unlock_irq(&tasklist_lock);
254 BUG();
255 }
256 }
257#endif
258 }
259
260out:
261 read_unlock_irq(&tasklist_lock);
262
263 return retval;
264}
265
266
267long enable_aux_task_owner(struct task_struct *t)
268{
269 long retval = 0;
270
271#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
272 struct task_struct *leader = t->group_leader;
273 struct task_struct *hp;
274
275 if (!tsk_rt(t)->has_aux_tasks) {
276 TRACE_CUR("task %s/%d is not an aux owner\n", t->comm, t->pid);
277 return -1;
278 }
279
280 BUG_ON(!is_realtime(t));
281
282 if (binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) {
283 TRACE_CUR("task %s/%d is already active\n", t->comm, t->pid);
284 goto out;
285 }
286
287 binheap_add(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners,
288 struct rt_param, aux_task_owner_node);
289
290 hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
291 struct task_struct, rt_param);
292 if (hp == t) {
293 /* we're the new hp */
294 TRACE_CUR("%s/%d is new hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
295
296 retval = aux_tasks_increase_priority(leader,
297 (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp);
298 }
299#endif
300
301out:
302 return retval;
303}
304
305long disable_aux_task_owner(struct task_struct *t)
306{
307 long retval = 0;
308
309#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
310 struct task_struct *leader = t->group_leader;
311 struct task_struct *hp;
312 struct task_struct *new_hp = NULL;
313
314 if (!tsk_rt(t)->has_aux_tasks) {
315 TRACE_CUR("task %s/%d is not an aux owner\n", t->comm, t->pid);
316 return -1;
317 }
318
319 BUG_ON(!is_realtime(t));
320
321 if (!binheap_is_in_heap(&tsk_rt(t)->aux_task_owner_node)) {
322 TRACE_CUR("task %s/%d is already not active\n", t->comm, t->pid);
323 goto out;
324 }
325
326 TRACE_CUR("task %s/%d exiting from group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
327
328 hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
329 struct task_struct, rt_param);
330 binheap_delete(&tsk_rt(t)->aux_task_owner_node, &tsk_aux(leader)->aux_task_owners);
331
332 if (!binheap_empty(&tsk_aux(leader)->aux_task_owners)) {
333 new_hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
334 struct task_struct, rt_param);
335 }
336
337 if (hp == t && new_hp != t) {
338 struct task_struct *to_inh = NULL;
339
340 TRACE_CUR("%s/%d is no longer hp in group %s/%d.\n", t->comm, t->pid, leader->comm, leader->pid);
341
342 if (new_hp) {
343 to_inh = (tsk_rt(new_hp)->inh_task) ? tsk_rt(new_hp)->inh_task : new_hp;
344 }
345
346 retval = aux_tasks_decrease_priority(leader, to_inh);
347 }
348#endif
349
350out:
351 return retval;
352}
353
354
355static int aux_task_owner_max_priority_order(struct binheap_node *a,
356 struct binheap_node *b)
357{
358 struct task_struct *d_a = container_of(binheap_entry(a, struct rt_param, aux_task_owner_node),
359 struct task_struct, rt_param);
360 struct task_struct *d_b = container_of(binheap_entry(b, struct rt_param, aux_task_owner_node),
361 struct task_struct, rt_param);
362
363 BUG_ON(!d_a);
364 BUG_ON(!d_b);
365
366 return litmus->compare(d_a, d_b);
367}
368
369
370static long __do_enable_aux_tasks(int flags)
371{
372 long retval = 0;
373 struct task_struct *leader;
374 struct task_struct *t;
375 int aux_tasks_added = 0;
376
377 leader = current->group_leader;
378
379 if (!tsk_aux(leader)->initialized) {
380 INIT_LIST_HEAD(&tsk_aux(leader)->aux_tasks);
381 INIT_BINHEAP_HANDLE(&tsk_aux(leader)->aux_task_owners, aux_task_owner_max_priority_order);
382 tsk_aux(leader)->initialized = 1;
383 }
384
385 if (flags & AUX_FUTURE) {
386 tsk_aux(leader)->aux_future = 1;
387 }
388
389 t = leader;
390 do {
391 if (!tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->is_aux_task) {
392 /* This may harmlessly reinit unused nodes. TODO: Don't reinit already init nodes. */
393 /* doesn't hurt to initialize both nodes */
394 INIT_LIST_HEAD(&tsk_rt(t)->aux_task_node);
395 INIT_BINHEAP_NODE(&tsk_rt(t)->aux_task_owner_node);
396 }
397
398 TRACE_CUR("Checking task in %s/%d: %s/%d = (p = %llu):\n",
399 leader->comm, leader->pid, t->comm, t->pid,
400 tsk_rt(t)->task_params.period);
401
402 /* inspect period to see if it is an rt task */
403 if (tsk_rt(t)->task_params.period == 0) {
404 if (flags && AUX_CURRENT) {
405 if (!tsk_rt(t)->is_aux_task) {
406 int admit_ret;
407
408 TRACE_CUR("AUX task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid);
409
410 admit_ret = admit_aux_task(t);
411
412 if (admit_ret == 0) {
413 /* hasn't been aux_tasks_increase_priorityted into rt. make it a aux. */
414 tsk_rt(t)->is_aux_task = 1;
415 aux_tasks_added = 1;
416
417#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
418 list_add_tail(&tsk_rt(t)->aux_task_node, &tsk_aux(leader)->aux_tasks);
419#endif
420 }
421 }
422 else {
423 TRACE_CUR("AUX task in %s/%d is already set up: %s/%d\n", leader->comm, leader->pid, t->comm, t->pid);
424 }
425 }
426 else {
427 TRACE_CUR("Not changing thread in %s/%d to AUX task: %s/%d\n", leader->comm, leader->pid, t->comm, t->pid);
428 }
429 }
430 else if (!tsk_rt(t)->is_aux_task) { /* don't let aux tasks get aux tasks of their own */
431 if (!tsk_rt(t)->has_aux_tasks) {
432 TRACE_CUR("task in %s/%d: %s/%d:\n", leader->comm, leader->pid, t->comm, t->pid);
433 tsk_rt(t)->has_aux_tasks = 1;
434 }
435 else {
436 TRACE_CUR("task in %s/%d is already set up: %s/%d\n", leader->comm, leader->pid, t->comm, t->pid);
437 }
438 }
439
440 t = next_thread(t);
441 } while(t != leader);
442
443
444#ifdef CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE
445 if (aux_tasks_added && !binheap_empty(&tsk_aux(leader)->aux_task_owners)) {
446 struct task_struct *hp = container_of(binheap_top_entry(&tsk_aux(leader)->aux_task_owners, struct rt_param, aux_task_owner_node),
447 struct task_struct, rt_param);
448 TRACE_CUR("hp in group: %s/%d\n", hp->comm, hp->pid);
449 retval = aux_tasks_increase_priority(leader, (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp);
450 }
451#endif
452
453 return retval;
454}
455
456static long __do_disable_aux_tasks(int flags)
457{
458 long retval = 0;
459 struct task_struct *leader;
460 struct task_struct *t;
461
462 leader = current->group_leader;
463
464 if (flags & AUX_FUTURE) {
465 tsk_aux(leader)->aux_future = 0;
466 }
467
468 if (flags & AUX_CURRENT) {
469 t = leader;
470 do {
471 if (tsk_rt(t)->is_aux_task) {
472
473 TRACE_CUR("%s/%d is an aux task.\n", t->comm, t->pid);
474
475 if (is_realtime(t)) {
476 long temp_retval;
477 struct sched_param param = { .sched_priority = 0};
478
479 TRACE_CUR("%s/%d is real-time. Changing policy to SCHED_NORMAL.\n", t->comm, t->pid);
480
481 temp_retval = sched_setscheduler_nocheck(t, SCHED_NORMAL, &param);
482
483 if (temp_retval != 0) {
484 TRACE_CUR("error changing policy of %s/%d to SCHED_NORMAL\n", t->comm, t->pid);
485 if (retval == 0) {
486 retval = temp_retval;
487 }
488 else {
489 TRACE_CUR("prior error (%d) masks new error (%d)\n", retval, temp_retval);
490 }
491 }
492 }
493
494 tsk_rt(t)->is_aux_task = 0;
495 }
496 t = next_thread(t);
497 } while(t != leader);
498 }
499
500 return retval;
501}
502
503asmlinkage long sys_set_aux_tasks(int flags)
504{
505 long retval;
506
507 read_lock_irq(&tasklist_lock);
508
509 if (flags & AUX_ENABLE) {
510 retval = __do_enable_aux_tasks(flags);
511 }
512 else {
513 retval = __do_disable_aux_tasks(flags);
514 }
515
516 read_unlock_irq(&tasklist_lock);
517
518 return retval;
519}
520
521#else
522
523asmlinkage long sys_set_aux_tasks(int flags)
524{
525 printk("Unsupported. Recompile with CONFIG_REALTIME_AUX_TASKS.\n");
526 return -EINVAL;
527}
528
529#endif
diff --git a/litmus/budget.c b/litmus/budget.c
index f7712be29adb..518174a37a3b 100644
--- a/litmus/budget.c
+++ b/litmus/budget.c
@@ -1,11 +1,13 @@
1#include <linux/sched.h> 1#include <linux/sched.h>
2#include <linux/percpu.h> 2#include <linux/percpu.h>
3#include <linux/hrtimer.h> 3#include <linux/hrtimer.h>
4#include <linux/signal.h>
4 5
5#include <litmus/litmus.h> 6#include <litmus/litmus.h>
6#include <litmus/preempt.h> 7#include <litmus/preempt.h>
7 8
8#include <litmus/budget.h> 9#include <litmus/budget.h>
10#include <litmus/signal.h>
9 11
10struct enforcement_timer { 12struct enforcement_timer {
11 /* The enforcement timer is used to accurately police 13 /* The enforcement timer is used to accurately police
@@ -64,7 +66,7 @@ static void arm_enforcement_timer(struct enforcement_timer* et,
64 66
65 /* Calling this when there is no budget left for the task 67 /* Calling this when there is no budget left for the task
66 * makes no sense, unless the task is non-preemptive. */ 68 * makes no sense, unless the task is non-preemptive. */
67 BUG_ON(budget_exhausted(t) && (!is_np(t))); 69 BUG_ON(budget_exhausted(t) && !is_np(t));
68 70
69 /* __hrtimer_start_range_ns() cancels the timer 71 /* __hrtimer_start_range_ns() cancels the timer
70 * anyway, so we don't have to check whether it is still armed */ 72 * anyway, so we don't have to check whether it is still armed */
@@ -86,7 +88,7 @@ void update_enforcement_timer(struct task_struct* t)
86{ 88{
87 struct enforcement_timer* et = &__get_cpu_var(budget_timer); 89 struct enforcement_timer* et = &__get_cpu_var(budget_timer);
88 90
89 if (t && budget_precisely_enforced(t)) { 91 if (t && budget_precisely_tracked(t) && !sigbudget_sent(t)) {
90 /* Make sure we call into the scheduler when this budget 92 /* Make sure we call into the scheduler when this budget
91 * expires. */ 93 * expires. */
92 arm_enforcement_timer(et, t); 94 arm_enforcement_timer(et, t);
@@ -96,6 +98,16 @@ void update_enforcement_timer(struct task_struct* t)
96 } 98 }
97} 99}
98 100
101void send_sigbudget(struct task_struct* t)
102{
103 if (!test_and_set_bit(RT_JOB_SIG_BUDGET_SENT, &tsk_rt(t)->job_params.flags)) {
104 /* signal has not yet been sent and we are responsible for sending
105 * since we just set the sent-bit when it was previously 0. */
106
107 TRACE_TASK(t, "SIG_BUDGET being sent!\n");
108 send_sig(SIG_BUDGET, t, 1); /* '1' denotes signal sent from kernel */
109 }
110}
99 111
100static int __init init_budget_enforcement(void) 112static int __init init_budget_enforcement(void)
101{ 113{
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index 5aca2934a7b5..441fbfddf0c2 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -12,6 +12,10 @@
12#include <litmus/sched_plugin.h> 12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h> 13#include <litmus/sched_trace.h>
14 14
15#ifdef CONFIG_LITMUS_NESTED_LOCKING
16#include <litmus/locking.h>
17#endif
18
15#include <litmus/edf_common.h> 19#include <litmus/edf_common.h>
16 20
17#ifdef CONFIG_EDF_TIE_BREAK_LATENESS_NORM 21#ifdef CONFIG_EDF_TIE_BREAK_LATENESS_NORM
@@ -45,33 +49,158 @@ static inline long edf_hash(struct task_struct *t)
45 * 49 *
46 * both first and second may be NULL 50 * both first and second may be NULL
47 */ 51 */
48int edf_higher_prio(struct task_struct* first, 52#ifdef CONFIG_LITMUS_NESTED_LOCKING
49 struct task_struct* second) 53int __edf_higher_prio(
54 struct task_struct* first, comparison_mode_t first_mode,
55 struct task_struct* second, comparison_mode_t second_mode)
56#else
57int edf_higher_prio(struct task_struct* first, struct task_struct* second)
58#endif
50{ 59{
51 struct task_struct *first_task = first; 60 struct task_struct *first_task = first;
52 struct task_struct *second_task = second; 61 struct task_struct *second_task = second;
53 62
54 /* There is no point in comparing a task to itself. */ 63 /* There is no point in comparing a task to itself. */
55 if (first && first == second) { 64 if (first && first == second) {
56 TRACE_TASK(first, 65 TRACE_CUR("WARNING: pointless edf priority comparison: %s/%d\n", first->comm, first->pid);
57 "WARNING: pointless edf priority comparison.\n"); 66 WARN_ON(1);
58 return 0; 67 return 0;
59 } 68 }
60 69
61 70
62 /* check for NULL tasks */ 71 /* check for NULL tasks */
63 if (!first || !second) 72 if (!first || !second) {
64 return first && !second; 73 return first && !second;
74 }
65 75
66#ifdef CONFIG_LITMUS_LOCKING 76 /* There is some goofy stuff in this code here. There are three subclasses
77 * within the SCHED_LITMUS scheduling class:
78 * 1) Auxiliary tasks: COTS helper threads from the application level that
79 * are forced to be real-time.
80 * 2) klmirqd interrupt threads: Litmus threaded interrupt handlers.
81 * 3) Normal Litmus tasks.
82 *
83 * At their base priorities, #3 > #2 > #1. However, #1 and #2 threads might
84 * inherit a priority from a task of #3.
85 *
86 * The code proceeds in the following manner:
87 * 1) Make aux and klmirqd threads with base-priorities have low priorities.
88 * 2) Determine effective priorities.
89 * 3) Perform priority comparison. Favor #3 over #1 and #2 in case of tie.
90 */
91
92
93#if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_BOOSTED)
94 /* run aux tasks at max priority */
95 /* TODO: Actually use prio-boosting. */
96 if (first->rt_param.is_aux_task != second->rt_param.is_aux_task)
97 {
98 return (first->rt_param.is_aux_task > second->rt_param.is_aux_task);
99 }
100 else if(first->rt_param.is_aux_task && second->rt_param.is_aux_task)
101 {
102 if(first->group_leader == second->group_leader) {
103 TRACE_CUR("aux tie break!\n"); // tie-break by BASE priority of the aux tasks
104 goto aux_tie_break;
105 }
106 first = first->group_leader;
107 second = second->group_leader;
108 }
109#elif defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE)
110 {
111 int first_lo_aux = first->rt_param.is_aux_task && !first->rt_param.inh_task;
112 int second_lo_aux = second->rt_param.is_aux_task && !second->rt_param.inh_task;
113
114 /* prioritize aux tasks without inheritance below real-time tasks */
115 if (first_lo_aux || second_lo_aux) {
116 // one of these is an aux task without inheritance.
117 if(first_lo_aux && second_lo_aux) {
118 TRACE_CUR("aux tie break!\n"); // tie-break by BASE priority of the aux tasks
119 goto aux_tie_break;
120 }
121 else {
122
123 // make the aux thread lowest priority real-time task
124 int temp = 0;
125 if (first_lo_aux && is_realtime(second)) {
126// temp = 0;
127 }
128 else if(second_lo_aux && is_realtime(first)) {
129 temp = 1;
130 }
131 TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, temp);
132 return temp;
133 }
134 }
135
136 if (first->rt_param.is_aux_task && second->rt_param.is_aux_task &&
137 first->rt_param.inh_task == second->rt_param.inh_task) {
138 // inh_task is !NULL for both tasks since neither was a lo_aux task.
139 // Both aux tasks inherit from the same task, so tie-break
140 // by base priority of the aux tasks.
141 TRACE_CUR("aux tie break!\n");
142 goto aux_tie_break;
143 }
144 }
145#endif
146
147#ifdef CONFIG_LITMUS_SOFTIRQD
148 {
149 int first_lo_klmirqd = first->rt_param.is_interrupt_thread && !first->rt_param.inh_task;
150 int second_lo_klmirqd = second->rt_param.is_interrupt_thread && !second->rt_param.inh_task;
151
152 /* prioritize aux tasks without inheritance below real-time tasks */
153 if (first_lo_klmirqd || second_lo_klmirqd) {
154 // one of these is an klmirqd thread without inheritance.
155 if(first_lo_klmirqd && second_lo_klmirqd) {
156 TRACE_CUR("klmirqd tie break!\n"); // tie-break by BASE priority of the aux tasks
157 goto klmirqd_tie_break;
158 }
159 else {
160 // make the klmirqd thread the lowest-priority real-time task
161 // but (above low-prio aux tasks and Linux tasks)
162 int temp = 0;
163 if (first_lo_klmirqd && is_realtime(second)) {
164// temp = 0;
165 }
166 else if(second_lo_klmirqd && is_realtime(first)) {
167 temp = 1;
168 }
169 TRACE_CUR("%s/%d >> %s/%d --- %d\n", first->comm, first->pid, second->comm, second->pid, temp);
170 return temp;
171 }
172 }
173
174 if (first->rt_param.is_interrupt_thread && second->rt_param.is_interrupt_thread &&
175 first->rt_param.inh_task == second->rt_param.inh_task) {
176 // inh_task is !NULL for both tasks since neither was a lo_klmirqd task.
177 // Both klmirqd tasks inherit from the same task, so tie-break
178 // by base priority of the klmirqd tasks.
179 TRACE_CUR("klmirqd tie break!\n");
180 goto klmirqd_tie_break;
181 }
182 }
183#endif
67 184
68 /* Check for inherited priorities. Change task 185
186#ifdef CONFIG_LITMUS_LOCKING
187 /* Check for EFFECTIVE priorities. Change task
69 * used for comparison in such a case. 188 * used for comparison in such a case.
70 */ 189 */
71 if (unlikely(first->rt_param.inh_task)) 190 if (unlikely(first->rt_param.inh_task)
191#ifdef CONFIG_LITMUS_NESTED_LOCKING
192 && (first_mode == EFFECTIVE)
193#endif
194 ) {
72 first_task = first->rt_param.inh_task; 195 first_task = first->rt_param.inh_task;
73 if (unlikely(second->rt_param.inh_task)) 196 }
197 if (unlikely(second->rt_param.inh_task)
198#ifdef CONFIG_LITMUS_NESTED_LOCKING
199 && (second_mode == EFFECTIVE)
200#endif
201 ) {
74 second_task = second->rt_param.inh_task; 202 second_task = second->rt_param.inh_task;
203 }
75 204
76 /* Check for priority boosting. Tie-break by start of boosting. 205 /* Check for priority boosting. Tie-break by start of boosting.
77 */ 206 */
@@ -79,17 +208,31 @@ int edf_higher_prio(struct task_struct* first,
79 /* first_task is boosted, how about second_task? */ 208 /* first_task is boosted, how about second_task? */
80 if (!is_priority_boosted(second_task) || 209 if (!is_priority_boosted(second_task) ||
81 lt_before(get_boost_start(first_task), 210 lt_before(get_boost_start(first_task),
82 get_boost_start(second_task))) 211 get_boost_start(second_task))) {
83 return 1; 212 return 1;
84 else 213 }
214 else {
85 return 0; 215 return 0;
86 } else if (unlikely(is_priority_boosted(second_task))) 216 }
217 }
218 else if (unlikely(is_priority_boosted(second_task))) {
87 /* second_task is boosted, first is not*/ 219 /* second_task is boosted, first is not*/
88 return 0; 220 return 0;
221 }
222
223#endif
89 224
225#ifdef CONFIG_REALTIME_AUX_TASKS
226aux_tie_break:
227#endif
228#ifdef CONFIG_LITMUS_SOFTIRQD
229klmirqd_tie_break:
90#endif 230#endif
91 231
92 if (earlier_deadline(first_task, second_task)) { 232 if (!is_realtime(second_task)) {
233 return 1;
234 }
235 else if (earlier_deadline(first_task, second_task)) {
93 return 1; 236 return 1;
94 } 237 }
95 else if (get_deadline(first_task) == get_deadline(second_task)) { 238 else if (get_deadline(first_task) == get_deadline(second_task)) {
@@ -98,7 +241,6 @@ int edf_higher_prio(struct task_struct* first,
98 */ 241 */
99 int pid_break; 242 int pid_break;
100 243
101
102#if defined(CONFIG_EDF_TIE_BREAK_LATENESS) 244#if defined(CONFIG_EDF_TIE_BREAK_LATENESS)
103 /* Tie break by lateness. Jobs with greater lateness get 245 /* Tie break by lateness. Jobs with greater lateness get
104 * priority. This should spread tardiness across all tasks, 246 * priority. This should spread tardiness across all tasks,
@@ -154,18 +296,104 @@ int edf_higher_prio(struct task_struct* first,
154 return 1; 296 return 1;
155 } 297 }
156 else if (first_task->pid == second_task->pid) { 298 else if (first_task->pid == second_task->pid) {
157 /* If the PIDs are the same then the task with the 299#ifdef CONFIG_LITMUS_SOFTIRQD
158 * inherited priority wins. 300 if (first_task->rt_param.is_interrupt_thread < second_task->rt_param.is_interrupt_thread) {
159 */ 301 return 1;
160 if (!second->rt_param.inh_task) { 302 }
303 else if (first_task->rt_param.is_interrupt_thread == second_task->rt_param.is_interrupt_thread) {
304#endif
305
306#if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE)
307 if (tsk_rt(first)->is_aux_task < tsk_rt(second)->is_aux_task) {
161 return 1; 308 return 1;
162 } 309 }
310 else if (tsk_rt(first)->is_aux_task == tsk_rt(second)->is_aux_task) {
311#endif
312
313 /* Something could be wrong if you get this far. */
314 if (unlikely(first->rt_param.inh_task == second->rt_param.inh_task)) {
315 /* Both tasks have the same inherited priority.
316 * Likely in a bug-condition.
317 */
318 if (first->pid < second->pid) {
319 return 1;
320 }
321 else if (first->pid == second->pid) {
322 //WARN_ON(1);
323 }
324 }
325 else {
326 /* At least one task must inherit */
327 BUG_ON(!first->rt_param.inh_task &&
328 !second->rt_param.inh_task);
329
330 /* The task withOUT the inherited priority wins. */
331 if (second->rt_param.inh_task) {
332 /*
333 * common with aux tasks.
334 TRACE_CUR("unusual comparison: "
335 "first = %s/%d first_task = %s/%d "
336 "second = %s/%d second_task = %s/%d\n",
337 first->comm, first->pid,
338 (first->rt_param.inh_task) ? first->rt_param.inh_task->comm : "(nil)",
339 (first->rt_param.inh_task) ? first->rt_param.inh_task->pid : 0,
340 second->comm, second->pid,
341 (second->rt_param.inh_task) ? second->rt_param.inh_task->comm : "(nil)",
342 (second->rt_param.inh_task) ? second->rt_param.inh_task->pid : 0);
343 */
344 return 1;
345 }
346 }
347#if defined(CONFIG_REALTIME_AUX_TASK_PRIORITY_INHERITANCE)
348 }
349#endif
350
351#ifdef CONFIG_LITMUS_SOFTIRQD
352 }
353#endif
354
163 } 355 }
164 } 356 }
165 } 357 }
358
166 return 0; /* fall-through. prio(second_task) > prio(first_task) */ 359 return 0; /* fall-through. prio(second_task) > prio(first_task) */
167} 360}
168 361
362
363#ifdef CONFIG_LITMUS_NESTED_LOCKING
364int edf_higher_prio(struct task_struct* first, struct task_struct* second)
365{
366 return __edf_higher_prio(first, EFFECTIVE, second, EFFECTIVE);
367}
368
369int edf_max_heap_order(struct binheap_node *a, struct binheap_node *b)
370{
371 struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node);
372 struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node);
373
374 return __edf_higher_prio(l_a->hp_waiter_eff_prio, EFFECTIVE, l_b->hp_waiter_eff_prio, EFFECTIVE);
375}
376
377int edf_min_heap_order(struct binheap_node *a, struct binheap_node *b)
378{
379 return edf_max_heap_order(b, a); // swap comparison
380}
381
382int edf_max_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b)
383{
384 struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node);
385 struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node);
386
387 return __edf_higher_prio(l_a->hp_waiter_eff_prio, BASE, l_b->hp_waiter_eff_prio, BASE);
388}
389
390int edf_min_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b)
391{
392 return edf_max_heap_base_priority_order(b, a); // swap comparison
393}
394#endif
395
396
169int edf_ready_order(struct bheap_node* a, struct bheap_node* b) 397int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
170{ 398{
171 return edf_higher_prio(bheap2task(a), bheap2task(b)); 399 return edf_higher_prio(bheap2task(a), bheap2task(b));
diff --git a/litmus/fdso.c b/litmus/fdso.c
index 250377d184e7..709be3cc8992 100644
--- a/litmus/fdso.c
+++ b/litmus/fdso.c
@@ -20,13 +20,28 @@
20 20
21extern struct fdso_ops generic_lock_ops; 21extern struct fdso_ops generic_lock_ops;
22 22
23#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
24extern struct fdso_ops generic_affinity_ops;
25#endif
26
23static const struct fdso_ops* fdso_ops[] = { 27static const struct fdso_ops* fdso_ops[] = {
24 &generic_lock_ops, /* FMLP_SEM */ 28 &generic_lock_ops, /* FMLP_SEM */
25 &generic_lock_ops, /* SRP_SEM */ 29 &generic_lock_ops, /* SRP_SEM */
30
26 &generic_lock_ops, /* MPCP_SEM */ 31 &generic_lock_ops, /* MPCP_SEM */
27 &generic_lock_ops, /* MPCP_VS_SEM */ 32 &generic_lock_ops, /* MPCP_VS_SEM */
28 &generic_lock_ops, /* DPCP_SEM */ 33 &generic_lock_ops, /* DPCP_SEM */
29 &generic_lock_ops, /* PCP_SEM */ 34 &generic_lock_ops, /* PCP_SEM */
35
36 &generic_lock_ops, /* RSM_MUTEX */
37 &generic_lock_ops, /* IKGLP_SEM */
38 &generic_lock_ops, /* KFMLP_SEM */
39#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
40 &generic_affinity_ops, /* IKGLP_SIMPLE_GPU_AFF_OBS */
41 &generic_affinity_ops, /* IKGLP_GPU_AFF_OBS */
42 &generic_affinity_ops, /* KFMLP_SIMPLE_GPU_AFF_OBS */
43 &generic_affinity_ops, /* KFMLP_GPU_AFF_OBS */
44#endif
30}; 45};
31 46
32static int fdso_create(void** obj_ref, obj_type_t type, void* __user config) 47static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c
new file mode 100644
index 000000000000..7d73105b4181
--- /dev/null
+++ b/litmus/gpu_affinity.c
@@ -0,0 +1,231 @@
1
2#ifdef CONFIG_LITMUS_NVIDIA
3
4#include <linux/sched.h>
5#include <litmus/litmus.h>
6#include <litmus/gpu_affinity.h>
7
8#include <litmus/sched_trace.h>
9
10#define OBSERVATION_CAP ((lt_t)(2e9))
11
12// reason for skew: high outliers are less
13// frequent and way out of bounds
14//#define HI_THRESHOLD 2
15//#define LO_THRESHOLD 4
16
17#define NUM_STDEV_NUM 1
18#define NUM_STDEV_DENOM 2
19
20#define MIN(a, b) ((a < b) ? a : b)
21
22static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
23{
24 fp_t relative_err;
25 fp_t err, new;
26 fp_t actual = _integer_to_fp(observed);
27
28 err = _sub(actual, fb->est);
29 new = _add(_mul(a, err), _mul(b, fb->accum_err));
30
31 relative_err = _div(err, actual);
32
33 fb->est = new;
34 fb->accum_err = _add(fb->accum_err, err);
35
36 return relative_err;
37}
38
39lt_t varience(lt_t nums[], const lt_t avg, const uint16_t count)
40{
41 /* brute force: takes about as much time as incremental running methods when
42 * count < 50 (on Bonham). Brute force also less prone to overflow.
43 */
44 lt_t sqdeviations = 0;
45 uint16_t i;
46 for(i = 0; i < count; ++i)
47 {
48 lt_t temp = (int64_t)nums[i] - (int64_t)avg;
49 sqdeviations += temp * temp;
50 }
51 return sqdeviations/count;
52}
53
54lt_t isqrt(lt_t n)
55{
56 /* integer square root using babylonian method
57 * (algo taken from wikipedia */
58 lt_t res = 0;
59 lt_t bit = ((lt_t)1) << (sizeof(n)*8-2);
60 while (bit > n) {
61 bit >>= 2;
62 }
63
64 while (bit != 0) {
65 if (n >= res + bit) {
66 n -= res + bit;
67 res = (res >> 1) + bit;
68 }
69 else {
70 res >>= 1;
71 }
72 bit >>= 2;
73 }
74 return res;
75}
76
77void update_gpu_estimate(struct task_struct *t, lt_t observed)
78{
79 //feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
80 avg_est_t *est;
81 struct migration_info mig_info;
82
83 BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST);
84
85 est = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
86
87 if (unlikely(observed > OBSERVATION_CAP)) {
88 TRACE_TASK(t, "Crazy observation greater than was dropped: %llu > %llu\n",
89 observed,
90 OBSERVATION_CAP);
91 return;
92 }
93
94#if 0
95 // filter out values that are HI_THRESHOLDx or (1/LO_THRESHOLD)x out
96 // of range of the average, but only filter if enough samples
97 // have been taken.
98 if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) {
99 if (unlikely(observed < est->avg/LO_THRESHOLD)) {
100 TRACE_TASK(t, "Observation is too small: %llu\n",
101 observed);
102 return;
103 }
104 else if (unlikely(observed > est->avg*HI_THRESHOLD)) {
105 TRACE_TASK(t, "Observation is too large: %llu\n",
106 observed);
107 return;
108 }
109#endif
110 // filter values outside NUM_STDEVx the standard deviation,
111 // but only filter if enough samples have been taken.
112 if (likely((est->count > MIN(10, AVG_EST_WINDOW_SIZE/2)))) {
113 lt_t lower, upper;
114
115 lt_t range = (est->std*NUM_STDEV_NUM)/NUM_STDEV_DENOM;
116 lower = est->avg - MIN(range, est->avg); // no underflow.
117
118 if (unlikely(observed < lower)) {
119 TRACE_TASK(t, "Observation is too small: %llu\n", observed);
120 return;
121 }
122
123 upper = est->avg + range;
124 if (unlikely(observed > upper)) {
125 TRACE_TASK(t, "Observation is too large: %llu\n", observed);
126 return;
127 }
128 }
129
130
131
132 if (unlikely(est->count < AVG_EST_WINDOW_SIZE)) {
133 ++est->count;
134 }
135 else {
136 est->sum -= est->history[est->idx];
137 }
138
139 mig_info.observed = observed;
140 mig_info.estimated = est->avg;
141 mig_info.distance = tsk_rt(t)->gpu_migration;
142 sched_trace_migration(t, &mig_info);
143
144
145 est->history[est->idx] = observed;
146 est->sum += observed;
147 est->avg = est->sum/est->count;
148 est->std = isqrt(varience(est->history, est->avg, est->count));
149 est->idx = (est->idx + 1) % AVG_EST_WINDOW_SIZE;
150
151
152#if 0
153 if(unlikely(fb->est.val == 0)) {
154 // kludge-- cap observed values to prevent whacky estimations.
155 // whacky stuff happens during the first few jobs.
156 if(unlikely(observed > OBSERVATION_CAP)) {
157 TRACE_TASK(t, "Crazy observation was capped: %llu -> %llu\n",
158 observed, OBSERVATION_CAP);
159 observed = OBSERVATION_CAP;
160 }
161
162 // take the first observation as our estimate
163 // (initial value of 0 was bogus anyhow)
164 fb->est = _integer_to_fp(observed);
165 fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work.
166 }
167 else {
168 fp_t rel_err = update_estimate(fb,
169 tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration],
170 tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration],
171 observed);
172
173 if(unlikely(_fp_to_integer(fb->est) <= 0)) {
174 TRACE_TASK(t, "Invalid estimate. Patching.\n");
175 fb->est = _integer_to_fp(observed);
176 fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work.
177 }
178 else {
179 struct migration_info mig_info;
180
181 sched_trace_prediction_err(t,
182 &(tsk_rt(t)->gpu_migration),
183 &rel_err);
184
185 mig_info.observed = observed;
186 mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration);
187 mig_info.distance = tsk_rt(t)->gpu_migration;
188
189 sched_trace_migration(t, &mig_info);
190 }
191 }
192#endif
193
194 TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %llu\n",
195 tsk_rt(t)->gpu_migration,
196 observed,
197 est->avg);
198}
199
200gpu_migration_dist_t gpu_migration_distance(int a, int b)
201{
202 // GPUs organized in a binary hierarchy, no more than 2^MIG_FAR GPUs
203 int i;
204 int dist;
205
206 if(likely(a >= 0 && b >= 0)) {
207 for(i = 0; i <= MIG_FAR; ++i) {
208 if(a>>i == b>>i) {
209 dist = i;
210 goto out;
211 }
212 }
213 dist = MIG_NONE; // hopefully never reached.
214 TRACE_CUR("WARNING: GPU distance too far! %d -> %d\n", a, b);
215 }
216 else {
217 dist = MIG_NONE;
218 }
219
220out:
221 TRACE_CUR("Distance %d -> %d is %d\n",
222 a, b, dist);
223
224 return dist;
225}
226
227
228
229
230#endif
231
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
new file mode 100644
index 000000000000..a4ae74331782
--- /dev/null
+++ b/litmus/ikglp_lock.c
@@ -0,0 +1,2976 @@
1#include <linux/slab.h>
2#include <linux/uaccess.h>
3
4#include <litmus/trace.h>
5#include <litmus/sched_plugin.h>
6#include <litmus/fdso.h>
7
8#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
9#include <litmus/gpu_affinity.h>
10#include <litmus/nvidia_info.h>
11#endif
12
13#include <litmus/ikglp_lock.h>
14
15// big signed value.
16#define IKGLP_INVAL_DISTANCE 0x7FFFFFFF
17
18int ikglp_max_heap_base_priority_order(struct binheap_node *a,
19 struct binheap_node *b)
20{
21 ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node);
22 ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node);
23
24 BUG_ON(!d_a);
25 BUG_ON(!d_b);
26
27 return litmus->__compare(d_a->task, BASE, d_b->task, BASE);
28}
29
30int ikglp_min_heap_base_priority_order(struct binheap_node *a,
31 struct binheap_node *b)
32{
33 ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node);
34 ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node);
35
36 return litmus->__compare(d_b->task, BASE, d_a->task, BASE);
37}
38
39int ikglp_donor_max_heap_base_priority_order(struct binheap_node *a,
40 struct binheap_node *b)
41{
42 ikglp_wait_state_t *d_a = binheap_entry(a, ikglp_wait_state_t, node);
43 ikglp_wait_state_t *d_b = binheap_entry(b, ikglp_wait_state_t, node);
44
45 return litmus->__compare(d_a->task, BASE, d_b->task, BASE);
46}
47
48
49int ikglp_min_heap_donee_order(struct binheap_node *a,
50 struct binheap_node *b)
51{
52 struct task_struct *prio_a, *prio_b;
53
54 ikglp_donee_heap_node_t *d_a =
55 binheap_entry(a, ikglp_donee_heap_node_t, node);
56 ikglp_donee_heap_node_t *d_b =
57 binheap_entry(b, ikglp_donee_heap_node_t, node);
58
59 if(!d_a->donor_info) {
60 prio_a = d_a->task;
61 }
62 else {
63 prio_a = d_a->donor_info->task;
64 BUG_ON(d_a->task != d_a->donor_info->donee_info->task);
65 }
66
67 if(!d_b->donor_info) {
68 prio_b = d_b->task;
69 }
70 else {
71 prio_b = d_b->donor_info->task;
72 BUG_ON(d_b->task != d_b->donor_info->donee_info->task);
73 }
74
75 // note reversed order
76 return litmus->__compare(prio_b, BASE, prio_a, BASE);
77}
78
79
80
81static inline int ikglp_get_idx(struct ikglp_semaphore *sem,
82 struct fifo_queue *queue)
83{
84 return (queue - &sem->fifo_queues[0]);
85}
86
87static inline struct fifo_queue* ikglp_get_queue(struct ikglp_semaphore *sem,
88 struct task_struct *holder)
89{
90 int i;
91 for(i = 0; i < sem->nr_replicas; ++i)
92 if(sem->fifo_queues[i].owner == holder)
93 return(&sem->fifo_queues[i]);
94 return(NULL);
95}
96
97
98
99static struct task_struct* ikglp_find_hp_waiter(struct fifo_queue *kqueue,
100 struct task_struct *skip)
101{
102 struct list_head *pos;
103 struct task_struct *queued, *found = NULL;
104
105 list_for_each(pos, &kqueue->wait.task_list) {
106 queued = (struct task_struct*) list_entry(pos,
107 wait_queue_t, task_list)->private;
108
109 /* Compare task prios, find high prio task. */
110 if(queued != skip && litmus->compare(queued, found))
111 found = queued;
112 }
113 return found;
114}
115
116static struct fifo_queue* ikglp_find_shortest(struct ikglp_semaphore *sem,
117 struct fifo_queue *search_start)
118{
119 // we start our search at search_start instead of at the beginning of the
120 // queue list to load-balance across all resources.
121 struct fifo_queue* step = search_start;
122 struct fifo_queue* shortest = sem->shortest_fifo_queue;
123
124 do {
125 step = (step+1 != &sem->fifo_queues[sem->nr_replicas]) ?
126 step+1 : &sem->fifo_queues[0];
127
128 if(step->count < shortest->count) {
129 shortest = step;
130 if(step->count == 0)
131 break; /* can't get any shorter */
132 }
133
134 }while(step != search_start);
135
136 return(shortest);
137}
138
139static inline struct task_struct* ikglp_mth_highest(struct ikglp_semaphore *sem)
140{
141 return binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node)->task;
142}
143
144
145
146#if 0
147static void print_global_list(struct binheap_node* n, int depth)
148{
149 ikglp_heap_node_t *global_heap_node;
150 char padding[81] = " ";
151
152 if(n == NULL) {
153 TRACE_CUR("+-> %p\n", NULL);
154 return;
155 }
156
157 global_heap_node = binheap_entry(n, ikglp_heap_node_t, node);
158
159 if(depth*2 <= 80)
160 padding[depth*2] = '\0';
161
162 TRACE_CUR("%s+-> %s/%d\n",
163 padding,
164 global_heap_node->task->comm,
165 global_heap_node->task->pid);
166
167 if(n->left) print_global_list(n->left, depth+1);
168 if(n->right) print_global_list(n->right, depth+1);
169}
170
171static void print_donees(struct ikglp_semaphore *sem, struct binheap_node *n, int depth)
172{
173 ikglp_donee_heap_node_t *donee_node;
174 char padding[81] = " ";
175 struct task_struct* donor = NULL;
176
177 if(n == NULL) {
178 TRACE_CUR("+-> %p\n", NULL);
179 return;
180 }
181
182 donee_node = binheap_entry(n, ikglp_donee_heap_node_t, node);
183
184 if(depth*2 <= 80)
185 padding[depth*2] = '\0';
186
187 if(donee_node->donor_info) {
188 donor = donee_node->donor_info->task;
189 }
190
191 TRACE_CUR("%s+-> %s/%d (d: %s/%d) (fq: %d)\n",
192 padding,
193 donee_node->task->comm,
194 donee_node->task->pid,
195 (donor) ? donor->comm : "nil",
196 (donor) ? donor->pid : -1,
197 ikglp_get_idx(sem, donee_node->fq));
198
199 if(n->left) print_donees(sem, n->left, depth+1);
200 if(n->right) print_donees(sem, n->right, depth+1);
201}
202
203static void print_donors(struct binheap_node *n, int depth)
204{
205 ikglp_wait_state_t *donor_node;
206 char padding[81] = " ";
207
208 if(n == NULL) {
209 TRACE_CUR("+-> %p\n", NULL);
210 return;
211 }
212
213 donor_node = binheap_entry(n, ikglp_wait_state_t, node);
214
215 if(depth*2 <= 80)
216 padding[depth*2] = '\0';
217
218
219 TRACE_CUR("%s+-> %s/%d (donee: %s/%d)\n",
220 padding,
221 donor_node->task->comm,
222 donor_node->task->pid,
223 donor_node->donee_info->task->comm,
224 donor_node->donee_info->task->pid);
225
226 if(n->left) print_donors(n->left, depth+1);
227 if(n->right) print_donors(n->right, depth+1);
228}
229#endif
230
231static void ikglp_add_global_list(struct ikglp_semaphore *sem,
232 struct task_struct *t,
233 ikglp_heap_node_t *node)
234{
235
236
237 node->task = t;
238 INIT_BINHEAP_NODE(&node->node);
239
240 if(sem->top_m_size < sem->m) {
241 TRACE_CUR("Trivially adding %s/%d to top-m global list.\n",
242 t->comm, t->pid);
243// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
244// print_global_list(sem->top_m.root, 1);
245
246 binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node);
247 ++(sem->top_m_size);
248
249// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
250// print_global_list(sem->top_m.root, 1);
251 }
252 else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) {
253 ikglp_heap_node_t *evicted =
254 binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node);
255
256 TRACE_CUR("Adding %s/%d to top-m and evicting %s/%d.\n",
257 t->comm, t->pid,
258 evicted->task->comm, evicted->task->pid);
259
260// TRACE_CUR("Not-Top-M Before:\n");
261// print_global_list(sem->not_top_m.root, 1);
262// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
263// print_global_list(sem->top_m.root, 1);
264
265
266 binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node);
267 INIT_BINHEAP_NODE(&evicted->node);
268 binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node);
269
270 binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node);
271
272// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
273// print_global_list(sem->top_m.root, 1);
274// TRACE_CUR("Not-Top-M After:\n");
275// print_global_list(sem->not_top_m.root, 1);
276 }
277 else {
278 TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n",
279 t->comm, t->pid);
280// TRACE_CUR("Not-Top-M Before:\n");
281// print_global_list(sem->not_top_m.root, 1);
282
283 binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node);
284
285// TRACE_CUR("Not-Top-M After:\n");
286// print_global_list(sem->not_top_m.root, 1);
287 }
288}
289
290
291static void ikglp_del_global_list(struct ikglp_semaphore *sem,
292 struct task_struct *t,
293 ikglp_heap_node_t *node)
294{
295 BUG_ON(!binheap_is_in_heap(&node->node));
296
297 TRACE_CUR("Removing %s/%d from global list.\n", t->comm, t->pid);
298
299 if(binheap_is_in_this_heap(&node->node, &sem->top_m)) {
300 TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid);
301
302// TRACE_CUR("Not-Top-M Before:\n");
303// print_global_list(sem->not_top_m.root, 1);
304// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
305// print_global_list(sem->top_m.root, 1);
306
307
308 binheap_delete(&node->node, &sem->top_m);
309
310 if(!binheap_empty(&sem->not_top_m)) {
311 ikglp_heap_node_t *promoted =
312 binheap_top_entry(&sem->not_top_m, ikglp_heap_node_t, node);
313
314 TRACE_CUR("Promoting %s/%d to top-m\n",
315 promoted->task->comm, promoted->task->pid);
316
317 binheap_delete_root(&sem->not_top_m, ikglp_heap_node_t, node);
318 INIT_BINHEAP_NODE(&promoted->node);
319
320 binheap_add(&promoted->node, &sem->top_m, ikglp_heap_node_t, node);
321 }
322 else {
323 TRACE_CUR("No one to promote to top-m.\n");
324 --(sem->top_m_size);
325 }
326
327// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
328// print_global_list(sem->top_m.root, 1);
329// TRACE_CUR("Not-Top-M After:\n");
330// print_global_list(sem->not_top_m.root, 1);
331 }
332 else {
333 TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid);
334// TRACE_CUR("Not-Top-M Before:\n");
335// print_global_list(sem->not_top_m.root, 1);
336
337 binheap_delete(&node->node, &sem->not_top_m);
338
339// TRACE_CUR("Not-Top-M After:\n");
340// print_global_list(sem->not_top_m.root, 1);
341 }
342}
343
344
345static void ikglp_add_donees(struct ikglp_semaphore *sem,
346 struct fifo_queue *fq,
347 struct task_struct *t,
348 ikglp_donee_heap_node_t* node)
349{
350// TRACE_CUR("Adding %s/%d to donee list.\n", t->comm, t->pid);
351// TRACE_CUR("donees Before:\n");
352// print_donees(sem, sem->donees.root, 1);
353
354 node->task = t;
355 node->donor_info = NULL;
356 node->fq = fq;
357 INIT_BINHEAP_NODE(&node->node);
358
359 binheap_add(&node->node, &sem->donees, ikglp_donee_heap_node_t, node);
360
361// TRACE_CUR("donees After:\n");
362// print_donees(sem, sem->donees.root, 1);
363}
364
365
366static void ikglp_refresh_owners_prio_increase(struct task_struct *t,
367 struct fifo_queue *fq,
368 struct ikglp_semaphore *sem,
369 unsigned long flags)
370{
371 // priority of 't' has increased (note: 't' might already be hp_waiter).
372 if ((t == fq->hp_waiter) || litmus->compare(t, fq->hp_waiter)) {
373 struct task_struct *old_max_eff_prio;
374 struct task_struct *new_max_eff_prio;
375 struct task_struct *new_prio = NULL;
376 struct task_struct *owner = fq->owner;
377
378 if(fq->hp_waiter)
379 TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
380 fq->hp_waiter->comm, fq->hp_waiter->pid);
381 else
382 TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
383
384 if(owner)
385 {
386 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
387
388// TRACE_TASK(owner, "Heap Before:\n");
389// print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0);
390
391 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
392
393 fq->hp_waiter = t;
394 fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
395
396 binheap_decrease(&fq->nest.hp_binheap_node,
397 &tsk_rt(owner)->hp_blocked_tasks);
398
399// TRACE_TASK(owner, "Heap After:\n");
400// print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0);
401
402 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
403
404 if(new_max_eff_prio != old_max_eff_prio) {
405 TRACE_TASK(t, "is new hp_waiter.\n");
406
407 if ((effective_priority(owner) == old_max_eff_prio) ||
408 (litmus->__compare(new_max_eff_prio, BASE,
409 owner, EFFECTIVE))){
410 new_prio = new_max_eff_prio;
411 }
412 }
413 else {
414 TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
415 }
416
417 if(new_prio) {
418 // set new inheritance and propagate
419 TRACE_TASK(t, "Effective priority changed for owner %s/%d to %s/%d\n",
420 owner->comm, owner->pid,
421 new_prio->comm, new_prio->pid);
422 litmus->nested_increase_prio(owner, new_prio, &sem->lock,
423 flags); // unlocks lock.
424 }
425 else {
426 TRACE_TASK(t, "No change in effective priority (is %s/%d). Propagation halted.\n",
427 new_max_eff_prio->comm, new_max_eff_prio->pid);
428 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
429 unlock_fine_irqrestore(&sem->lock, flags);
430 }
431 }
432 else {
433 fq->hp_waiter = t;
434 fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
435
436 TRACE_TASK(t, "no owner.\n");
437 unlock_fine_irqrestore(&sem->lock, flags);
438 }
439 }
440 else {
441 TRACE_TASK(t, "hp_waiter is unaffected.\n");
442 unlock_fine_irqrestore(&sem->lock, flags);
443 }
444}
445
446// hp_waiter has decreased
447static void ikglp_refresh_owners_prio_decrease(struct fifo_queue *fq,
448 struct ikglp_semaphore *sem,
449 unsigned long flags)
450{
451 struct task_struct *owner = fq->owner;
452
453 struct task_struct *old_max_eff_prio;
454 struct task_struct *new_max_eff_prio;
455
456 if(!owner) {
457 TRACE_CUR("No owner. Returning.\n");
458 unlock_fine_irqrestore(&sem->lock, flags);
459 return;
460 }
461
462 TRACE_CUR("ikglp_refresh_owners_prio_decrease\n");
463
464 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
465
466 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
467
468 binheap_delete(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks);
469 fq->nest.hp_waiter_eff_prio = fq->hp_waiter;
470 binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks,
471 struct nested_info, hp_binheap_node);
472
473 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
474
475 if((old_max_eff_prio != new_max_eff_prio) &&
476 (effective_priority(owner) == old_max_eff_prio))
477 {
478 // Need to set new effective_priority for owner
479 struct task_struct *decreased_prio;
480
481 TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n",
482 ikglp_get_idx(sem, fq));
483
484 if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
485 TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of fq %d.\n",
486 (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
487 (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
488 owner->comm,
489 owner->pid,
490 ikglp_get_idx(sem, fq));
491
492 decreased_prio = new_max_eff_prio;
493 }
494 else {
495 TRACE_CUR("%s/%d has lesser base priority than base priority of owner (%s/%d) of fq %d.\n",
496 (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
497 (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
498 owner->comm,
499 owner->pid,
500 ikglp_get_idx(sem, fq));
501
502 decreased_prio = NULL;
503 }
504
505 // beware: recursion
506 litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags); // will unlock mutex->lock
507 }
508 else {
509 TRACE_TASK(owner, "No need to propagate priority decrease forward.\n");
510 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
511 unlock_fine_irqrestore(&sem->lock, flags);
512 }
513}
514
515
516static void ikglp_remove_donation_from_owner(struct binheap_node *n,
517 struct fifo_queue *fq,
518 struct ikglp_semaphore *sem,
519 unsigned long flags)
520{
521 struct task_struct *owner = fq->owner;
522
523 struct task_struct *old_max_eff_prio;
524 struct task_struct *new_max_eff_prio;
525
526 BUG_ON(!owner);
527
528 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
529
530 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
531
532 binheap_delete(n, &tsk_rt(owner)->hp_blocked_tasks);
533
534 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
535
536 if((old_max_eff_prio != new_max_eff_prio) &&
537 (effective_priority(owner) == old_max_eff_prio))
538 {
539 // Need to set new effective_priority for owner
540 struct task_struct *decreased_prio;
541
542 TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n",
543 ikglp_get_idx(sem, fq));
544
545 if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
546 TRACE_CUR("has greater base priority than base priority of owner of fq %d.\n",
547 ikglp_get_idx(sem, fq));
548 decreased_prio = new_max_eff_prio;
549 }
550 else {
551 TRACE_CUR("has lesser base priority than base priority of owner of fq %d.\n",
552 ikglp_get_idx(sem, fq));
553 decreased_prio = NULL;
554 }
555
556 // beware: recursion
557 litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags); // will unlock mutex->lock
558 }
559 else {
560 TRACE_TASK(owner, "No need to propagate priority decrease forward.\n");
561 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
562 unlock_fine_irqrestore(&sem->lock, flags);
563 }
564}
565
566static void ikglp_remove_donation_from_fq_waiter(struct task_struct *t,
567 struct binheap_node *n)
568{
569 struct task_struct *old_max_eff_prio;
570 struct task_struct *new_max_eff_prio;
571
572 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
573
574 old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
575
576 binheap_delete(n, &tsk_rt(t)->hp_blocked_tasks);
577
578 new_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
579
580 if((old_max_eff_prio != new_max_eff_prio) &&
581 (effective_priority(t) == old_max_eff_prio))
582 {
583 // Need to set new effective_priority for owner
584 struct task_struct *decreased_prio;
585
586 if(litmus->__compare(new_max_eff_prio, BASE, t, BASE)) {
587 decreased_prio = new_max_eff_prio;
588 }
589 else {
590 decreased_prio = NULL;
591 }
592
593 tsk_rt(t)->inh_task = decreased_prio;
594 }
595
596 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
597}
598
599static void ikglp_get_immediate(struct task_struct* t,
600 struct fifo_queue *fq,
601 struct ikglp_semaphore *sem,
602 unsigned long flags)
603{
604 // resource available now
605 TRACE_CUR("queue %d: acquired immediately\n", ikglp_get_idx(sem, fq));
606
607 fq->owner = t;
608
609 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
610 binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
611 struct nested_info, hp_binheap_node);
612 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
613
614 ++(fq->count);
615
616 ikglp_add_global_list(sem, t, &fq->global_heap_node);
617 ikglp_add_donees(sem, fq, t, &fq->donee_heap_node);
618
619 sem->shortest_fifo_queue = ikglp_find_shortest(sem, sem->shortest_fifo_queue);
620
621#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
622 if(sem->aff_obs) {
623 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t);
624 sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, t);
625 }
626#endif
627
628 unlock_fine_irqrestore(&sem->lock, flags);
629}
630
631
632
633
634
635static void __ikglp_enqueue_on_fq(struct ikglp_semaphore *sem,
636 struct fifo_queue* fq,
637 struct task_struct* t,
638 wait_queue_t *wait,
639 ikglp_heap_node_t *global_heap_node,
640 ikglp_donee_heap_node_t *donee_heap_node)
641{
642 /* resource is not free => must suspend and wait */
643 TRACE_TASK(t, "Enqueuing on fq %d.\n",
644 ikglp_get_idx(sem, fq));
645
646 init_waitqueue_entry(wait, t);
647
648 __add_wait_queue_tail_exclusive(&fq->wait, wait);
649
650 ++(fq->count);
651 ++(sem->nr_in_fifos);
652
653 // update global list.
654 if(likely(global_heap_node)) {
655 if(binheap_is_in_heap(&global_heap_node->node)) {
656 WARN_ON(1);
657 ikglp_del_global_list(sem, t, global_heap_node);
658 }
659 ikglp_add_global_list(sem, t, global_heap_node);
660 }
661 // update donor eligiblity list.
662 if(likely(donee_heap_node)) {
663// if(binheap_is_in_heap(&donee_heap_node->node)) {
664// WARN_ON(1);
665// }
666 ikglp_add_donees(sem, fq, t, donee_heap_node);
667 }
668
669 if(sem->shortest_fifo_queue == fq) {
670 sem->shortest_fifo_queue = ikglp_find_shortest(sem, fq);
671 }
672
673#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
674 if(sem->aff_obs) {
675 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t);
676 }
677#endif
678
679 TRACE_TASK(t, "shortest queue is now %d\n", ikglp_get_idx(sem, fq));
680}
681
682
683static void ikglp_enqueue_on_fq(
684 struct ikglp_semaphore *sem,
685 struct fifo_queue *fq,
686 ikglp_wait_state_t *wait,
687 unsigned long flags)
688{
689 /* resource is not free => must suspend and wait */
690 TRACE_TASK(wait->task, "queue %d: Resource is not free => must suspend and wait.\n",
691 ikglp_get_idx(sem, fq));
692
693 INIT_BINHEAP_NODE(&wait->global_heap_node.node);
694 INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
695
696 __ikglp_enqueue_on_fq(sem, fq, wait->task, &wait->fq_node,
697 &wait->global_heap_node, &wait->donee_heap_node);
698
699 ikglp_refresh_owners_prio_increase(wait->task, fq, sem, flags); // unlocks sem->lock
700}
701
702
703static void __ikglp_enqueue_on_pq(struct ikglp_semaphore *sem,
704 ikglp_wait_state_t *wait)
705{
706 TRACE_TASK(wait->task, "goes to PQ.\n");
707
708 wait->pq_node.task = wait->task; // copy over task (little redundant...)
709
710 binheap_add(&wait->pq_node.node, &sem->priority_queue,
711 ikglp_heap_node_t, node);
712}
713
714static void ikglp_enqueue_on_pq(struct ikglp_semaphore *sem,
715 ikglp_wait_state_t *wait)
716{
717 INIT_BINHEAP_NODE(&wait->global_heap_node.node);
718 INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
719 INIT_BINHEAP_NODE(&wait->pq_node.node);
720
721 __ikglp_enqueue_on_pq(sem, wait);
722}
723
724static void ikglp_enqueue_on_donor(struct ikglp_semaphore *sem,
725 ikglp_wait_state_t* wait,
726 unsigned long flags)
727{
728 struct task_struct *t = wait->task;
729 ikglp_donee_heap_node_t *donee_node = NULL;
730 struct task_struct *donee;
731
732 struct task_struct *old_max_eff_prio;
733 struct task_struct *new_max_eff_prio;
734 struct task_struct *new_prio = NULL;
735
736 INIT_BINHEAP_NODE(&wait->global_heap_node.node);
737 INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
738 INIT_BINHEAP_NODE(&wait->pq_node.node);
739 INIT_BINHEAP_NODE(&wait->node);
740
741// TRACE_CUR("Adding %s/%d as donor.\n", t->comm, t->pid);
742// TRACE_CUR("donors Before:\n");
743// print_donors(sem->donors.root, 1);
744
745 // Add donor to the global list.
746 ikglp_add_global_list(sem, t, &wait->global_heap_node);
747
748 // Select a donee
749#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
750 donee_node = (sem->aff_obs) ?
751 sem->aff_obs->ops->advise_donee_selection(sem->aff_obs, t) :
752 binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
753#else
754 donee_node = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
755#endif
756
757 donee = donee_node->task;
758
759 TRACE_TASK(t, "Donee selected: %s/%d\n", donee->comm, donee->pid);
760
761 TRACE_CUR("Temporarily removing %s/%d to donee list.\n",
762 donee->comm, donee->pid);
763// TRACE_CUR("donees Before:\n");
764// print_donees(sem, sem->donees.root, 1);
765
766 //binheap_delete_root(&sem->donees, ikglp_donee_heap_node_t, node); // will re-add it shortly
767 binheap_delete(&donee_node->node, &sem->donees);
768
769// TRACE_CUR("donees After:\n");
770// print_donees(sem, sem->donees.root, 1);
771
772
773 wait->donee_info = donee_node;
774
775 // Add t to donor heap.
776 binheap_add(&wait->node, &sem->donors, ikglp_wait_state_t, node);
777
778 // Now adjust the donee's priority.
779
780 // Lock the donee's inheritance heap.
781 raw_spin_lock(&tsk_rt(donee)->hp_blocked_tasks_lock);
782
783 old_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks);
784
785 if(donee_node->donor_info) {
786 // Steal donation relation. Evict old donor to PQ.
787
788 // Remove old donor from donor heap
789 ikglp_wait_state_t *old_wait = donee_node->donor_info;
790 struct task_struct *old_donor = old_wait->task;
791
792 TRACE_TASK(t, "Donee (%s/%d) had donor %s/%d. Moving old donor to PQ.\n",
793 donee->comm, donee->pid, old_donor->comm, old_donor->pid);
794
795 binheap_delete(&old_wait->node, &sem->donors);
796
797 // Remove donation from donee's inheritance heap.
798 binheap_delete(&old_wait->prio_donation.hp_binheap_node,
799 &tsk_rt(donee)->hp_blocked_tasks);
800 // WARNING: have not updated inh_prio!
801
802 // Add old donor to PQ.
803 __ikglp_enqueue_on_pq(sem, old_wait);
804
805 // Remove old donor from the global heap.
806 ikglp_del_global_list(sem, old_donor, &old_wait->global_heap_node);
807 }
808
809 // Add back donee's node to the donees heap with increased prio
810 donee_node->donor_info = wait;
811 INIT_BINHEAP_NODE(&donee_node->node);
812
813
814 TRACE_CUR("Adding %s/%d back to donee list.\n", donee->comm, donee->pid);
815// TRACE_CUR("donees Before:\n");
816// print_donees(sem, sem->donees.root, 1);
817
818 binheap_add(&donee_node->node, &sem->donees, ikglp_donee_heap_node_t, node);
819
820// TRACE_CUR("donees After:\n");
821// print_donees(sem, sem->donees.root, 1);
822
823 // Add an inheritance/donation to the donee's inheritance heap.
824 wait->prio_donation.lock = (struct litmus_lock*)sem;
825 wait->prio_donation.hp_waiter_eff_prio = t;
826 wait->prio_donation.hp_waiter_ptr = NULL;
827 INIT_BINHEAP_NODE(&wait->prio_donation.hp_binheap_node);
828
829 binheap_add(&wait->prio_donation.hp_binheap_node,
830 &tsk_rt(donee)->hp_blocked_tasks,
831 struct nested_info, hp_binheap_node);
832
833 new_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks);
834
835 if(new_max_eff_prio != old_max_eff_prio) {
836 if ((effective_priority(donee) == old_max_eff_prio) ||
837 (litmus->__compare(new_max_eff_prio, BASE, donee, EFFECTIVE))){
838 TRACE_TASK(t, "Donation increases %s/%d's effective priority\n",
839 donee->comm, donee->pid);
840 new_prio = new_max_eff_prio;
841 }
842// else {
843// // should be bug. donor would not be in top-m.
844// TRACE_TASK(t, "Donation is not greater than base prio of %s/%d?\n", donee->comm, donee->pid);
845// WARN_ON(1);
846// }
847// }
848// else {
849// // should be bug. donor would not be in top-m.
850// TRACE_TASK(t, "No change in %s/%d's inheritance heap?\n", donee->comm, donee->pid);
851// WARN_ON(1);
852 }
853
854 if(new_prio) {
855 struct fifo_queue *donee_fq = donee_node->fq;
856
857 if(donee != donee_fq->owner) {
858 TRACE_TASK(t, "%s/%d is not the owner. Propagating priority to owner %s/%d.\n",
859 donee->comm, donee->pid,
860 donee_fq->owner->comm, donee_fq->owner->pid);
861
862 raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock);
863 ikglp_refresh_owners_prio_increase(donee, donee_fq, sem, flags); // unlocks sem->lock
864 }
865 else {
866 TRACE_TASK(t, "%s/%d is the owner. Progatating priority immediatly.\n",
867 donee->comm, donee->pid);
868 litmus->nested_increase_prio(donee, new_prio, &sem->lock, flags); // unlocks sem->lock and donee's heap lock
869 }
870 }
871 else {
872 TRACE_TASK(t, "No change in effective priority (it is %d/%s). BUG?\n",
873 new_max_eff_prio->comm, new_max_eff_prio->pid);
874 raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock);
875 unlock_fine_irqrestore(&sem->lock, flags);
876 }
877
878
879// TRACE_CUR("donors After:\n");
880// print_donors(sem->donors.root, 1);
881}
882
883int ikglp_lock(struct litmus_lock* l)
884{
885 struct task_struct* t = current;
886 struct ikglp_semaphore *sem = ikglp_from_lock(l);
887 unsigned long flags = 0, real_flags;
888 struct fifo_queue *fq = NULL;
889 int replica = -EINVAL;
890
891#ifdef CONFIG_LITMUS_DGL_SUPPORT
892 raw_spinlock_t *dgl_lock;
893#endif
894
895 ikglp_wait_state_t wait;
896
897 if (!is_realtime(t))
898 return -EPERM;
899
900#ifdef CONFIG_LITMUS_DGL_SUPPORT
901 dgl_lock = litmus->get_dgl_spinlock(t);
902#endif
903
904 raw_spin_lock_irqsave(&sem->real_lock, real_flags);
905
906 lock_global_irqsave(dgl_lock, flags);
907 lock_fine_irqsave(&sem->lock, flags);
908
909 if(sem->nr_in_fifos < sem->m) {
910 // enqueue somwhere
911#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
912 fq = (sem->aff_obs) ?
913 sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) :
914 sem->shortest_fifo_queue;
915#else
916 fq = sem->shortest_fifo_queue;
917#endif
918 if(fq->count == 0) {
919 // take available resource
920 replica = ikglp_get_idx(sem, fq);
921
922 ikglp_get_immediate(t, fq, sem, flags); // unlocks sem->lock
923
924 unlock_global_irqrestore(dgl_lock, flags);
925 raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
926 goto acquired;
927 }
928 else {
929 wait.task = t; // THIS IS CRITICALLY IMPORTANT!!!
930
931 tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked
932 mb();
933
934 /* FIXME: interruptible would be nice some day */
935 set_task_state(t, TASK_UNINTERRUPTIBLE);
936
937 ikglp_enqueue_on_fq(sem, fq, &wait, flags); // unlocks sem->lock
938 }
939 }
940 else {
941 // donor!
942 wait.task = t; // THIS IS CRITICALLY IMPORTANT!!!
943
944 tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked
945 mb();
946
947 /* FIXME: interruptible would be nice some day */
948 set_task_state(t, TASK_UNINTERRUPTIBLE);
949
950 if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) {
951 // enqueue on PQ
952 ikglp_enqueue_on_pq(sem, &wait);
953 unlock_fine_irqrestore(&sem->lock, flags);
954 }
955 else {
956 // enqueue as donor
957 ikglp_enqueue_on_donor(sem, &wait, flags); // unlocks sem->lock
958 }
959 }
960
961 unlock_global_irqrestore(dgl_lock, flags);
962 raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
963
964 TS_LOCK_SUSPEND;
965
966 suspend_for_lock();
967
968 TS_LOCK_RESUME;
969
970 fq = ikglp_get_queue(sem, t);
971 BUG_ON(!fq);
972
973 replica = ikglp_get_idx(sem, fq);
974
975acquired:
976 TRACE_CUR("Acquired lock %d, queue %d\n",
977 l->ident, replica);
978
979#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
980 if(sem->aff_obs) {
981 return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq);
982 }
983#endif
984
985 return replica;
986}
987
988//int ikglp_lock(struct litmus_lock* l)
989//{
990// struct task_struct* t = current;
991// struct ikglp_semaphore *sem = ikglp_from_lock(l);
992// unsigned long flags = 0, real_flags;
993// struct fifo_queue *fq = NULL;
994// int replica = -EINVAL;
995//
996//#ifdef CONFIG_LITMUS_DGL_SUPPORT
997// raw_spinlock_t *dgl_lock;
998//#endif
999//
1000// ikglp_wait_state_t wait;
1001//
1002// if (!is_realtime(t))
1003// return -EPERM;
1004//
1005//#ifdef CONFIG_LITMUS_DGL_SUPPORT
1006// dgl_lock = litmus->get_dgl_spinlock(t);
1007//#endif
1008//
1009// raw_spin_lock_irqsave(&sem->real_lock, real_flags);
1010//
1011// lock_global_irqsave(dgl_lock, flags);
1012// lock_fine_irqsave(&sem->lock, flags);
1013//
1014//
1015//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1016// fq = (sem->aff_obs) ?
1017// sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) :
1018// sem->shortest_fifo_queue;
1019//#else
1020// fq = sem->shortest_fifo_queue;
1021//#endif
1022//
1023// if(fq->count == 0) {
1024// // take available resource
1025// replica = ikglp_get_idx(sem, fq);
1026//
1027// ikglp_get_immediate(t, fq, sem, flags); // unlocks sem->lock
1028//
1029// unlock_global_irqrestore(dgl_lock, flags);
1030// raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
1031// }
1032// else
1033// {
1034// // we have to suspend.
1035//
1036// wait.task = t; // THIS IS CRITICALLY IMPORTANT!!!
1037//
1038// tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked
1039// mb();
1040//
1041// /* FIXME: interruptible would be nice some day */
1042// set_task_state(t, TASK_UNINTERRUPTIBLE);
1043//
1044// if(fq->count < sem->max_fifo_len) {
1045// // enqueue on fq
1046// ikglp_enqueue_on_fq(sem, fq, &wait, flags); // unlocks sem->lock
1047// }
1048// else {
1049//
1050// TRACE_CUR("IKGLP fifo queues are full (at least they better be).\n");
1051//
1052// // no room in fifos. Go to PQ or donors.
1053//
1054// if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) {
1055// // enqueue on PQ
1056// ikglp_enqueue_on_pq(sem, &wait);
1057// unlock_fine_irqrestore(&sem->lock, flags);
1058// }
1059// else {
1060// // enqueue as donor
1061// ikglp_enqueue_on_donor(sem, &wait, flags); // unlocks sem->lock
1062// }
1063// }
1064//
1065// unlock_global_irqrestore(dgl_lock, flags);
1066// raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
1067//
1068// TS_LOCK_SUSPEND;
1069//
1070// schedule();
1071//
1072// TS_LOCK_RESUME;
1073//
1074// fq = ikglp_get_queue(sem, t);
1075// BUG_ON(!fq);
1076//
1077// replica = ikglp_get_idx(sem, fq);
1078// }
1079//
1080// TRACE_CUR("Acquired lock %d, queue %d\n",
1081// l->ident, replica);
1082//
1083//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1084// if(sem->aff_obs) {
1085// return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq);
1086// }
1087//#endif
1088//
1089// return replica;
1090//}
1091
1092static void ikglp_move_donor_to_fq(struct ikglp_semaphore *sem,
1093 struct fifo_queue *fq,
1094 ikglp_wait_state_t *donor_info)
1095{
1096 struct task_struct *t = donor_info->task;
1097
1098 TRACE_CUR("Donor %s/%d being moved to fq %d\n",
1099 t->comm,
1100 t->pid,
1101 ikglp_get_idx(sem, fq));
1102
1103 binheap_delete(&donor_info->node, &sem->donors);
1104
1105 __ikglp_enqueue_on_fq(sem, fq, t,
1106 &donor_info->fq_node,
1107 NULL, // already in global_list, so pass null to prevent adding 2nd time.
1108 &donor_info->donee_heap_node);
1109
1110 // warning:
1111 // ikglp_update_owners_prio(t, fq, sem, flags) has not been called.
1112}
1113
1114static void ikglp_move_pq_to_fq(struct ikglp_semaphore *sem,
1115 struct fifo_queue *fq,
1116 ikglp_wait_state_t *wait)
1117{
1118 struct task_struct *t = wait->task;
1119
1120 TRACE_CUR("PQ request %s/%d being moved to fq %d\n",
1121 t->comm,
1122 t->pid,
1123 ikglp_get_idx(sem, fq));
1124
1125 binheap_delete(&wait->pq_node.node, &sem->priority_queue);
1126
1127 __ikglp_enqueue_on_fq(sem, fq, t,
1128 &wait->fq_node,
1129 &wait->global_heap_node,
1130 &wait->donee_heap_node);
1131 // warning:
1132 // ikglp_update_owners_prio(t, fq, sem, flags) has not been called.
1133}
1134
1135static ikglp_wait_state_t* ikglp_find_hp_waiter_to_steal(
1136 struct ikglp_semaphore* sem)
1137{
1138 /* must hold sem->lock */
1139
1140 struct fifo_queue *fq = NULL;
1141 struct list_head *pos;
1142 struct task_struct *queued;
1143 int i;
1144
1145 for(i = 0; i < sem->nr_replicas; ++i) {
1146 if( (sem->fifo_queues[i].count > 1) &&
1147 (!fq || litmus->compare(sem->fifo_queues[i].hp_waiter, fq->hp_waiter)) ) {
1148
1149 TRACE_CUR("hp_waiter on fq %d (%s/%d) has higher prio than hp_waiter on fq %d (%s/%d)\n",
1150 ikglp_get_idx(sem, &sem->fifo_queues[i]),
1151 sem->fifo_queues[i].hp_waiter->comm,
1152 sem->fifo_queues[i].hp_waiter->pid,
1153 (fq) ? ikglp_get_idx(sem, fq) : -1,
1154 (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->comm : "nil") : "nilXX",
1155 (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->pid : -1) : -2);
1156
1157 fq = &sem->fifo_queues[i];
1158
1159 WARN_ON(!(fq->hp_waiter));
1160 }
1161 }
1162
1163 if(fq) {
1164 struct task_struct *max_hp = fq->hp_waiter;
1165 ikglp_wait_state_t* ret = NULL;
1166
1167 TRACE_CUR("Searching for %s/%d on fq %d\n",
1168 max_hp->comm,
1169 max_hp->pid,
1170 ikglp_get_idx(sem, fq));
1171
1172 BUG_ON(!max_hp);
1173
1174 list_for_each(pos, &fq->wait.task_list) {
1175 wait_queue_t *wait = list_entry(pos, wait_queue_t, task_list);
1176
1177 queued = (struct task_struct*) wait->private;
1178
1179 TRACE_CUR("fq %d entry: %s/%d\n",
1180 ikglp_get_idx(sem, fq),
1181 queued->comm,
1182 queued->pid);
1183
1184 /* Compare task prios, find high prio task. */
1185 if (queued == max_hp) {
1186 TRACE_CUR("Found it!\n");
1187 ret = container_of(wait, ikglp_wait_state_t, fq_node);
1188 }
1189 }
1190
1191 WARN_ON(!ret);
1192 return ret;
1193 }
1194
1195 return(NULL);
1196}
1197
1198static void ikglp_steal_to_fq(struct ikglp_semaphore *sem,
1199 struct fifo_queue *fq,
1200 ikglp_wait_state_t *fq_wait)
1201{
1202 struct task_struct *t = fq_wait->task;
1203 struct fifo_queue *fq_steal = fq_wait->donee_heap_node.fq;
1204
1205 TRACE_CUR("FQ request %s/%d being moved to fq %d\n",
1206 t->comm,
1207 t->pid,
1208 ikglp_get_idx(sem, fq));
1209
1210 fq_wait->donee_heap_node.fq = fq; // just to be safe
1211
1212
1213 __remove_wait_queue(&fq_steal->wait, &fq_wait->fq_node);
1214 --(fq_steal->count);
1215
1216#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1217 if(sem->aff_obs) {
1218 sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq_steal, t);
1219 }
1220#endif
1221
1222 if(t == fq_steal->hp_waiter) {
1223 fq_steal->hp_waiter = ikglp_find_hp_waiter(fq_steal, NULL);
1224 TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
1225 ikglp_get_idx(sem, fq_steal),
1226 (fq_steal->hp_waiter) ? fq_steal->hp_waiter->comm : "nil",
1227 (fq_steal->hp_waiter) ? fq_steal->hp_waiter->pid : -1);
1228 }
1229
1230
1231 // Update shortest.
1232 if(fq_steal->count < sem->shortest_fifo_queue->count) {
1233 sem->shortest_fifo_queue = fq_steal;
1234 }
1235
1236 __ikglp_enqueue_on_fq(sem, fq, t,
1237 &fq_wait->fq_node,
1238 NULL,
1239 NULL);
1240
1241 // warning: We have not checked the priority inheritance of fq's owner yet.
1242}
1243
1244
1245static void ikglp_migrate_fq_to_owner_heap_nodes(struct ikglp_semaphore *sem,
1246 struct fifo_queue *fq,
1247 ikglp_wait_state_t *old_wait)
1248{
1249 struct task_struct *t = old_wait->task;
1250
1251 BUG_ON(old_wait->donee_heap_node.fq != fq);
1252
1253 TRACE_TASK(t, "Migrating wait_state to memory of queue %d.\n",
1254 ikglp_get_idx(sem, fq));
1255
1256 // need to migrate global_heap_node and donee_heap_node off of the stack
1257 // to the nodes allocated for the owner of this fq.
1258
1259 // TODO: Enhance binheap() to perform this operation in place.
1260
1261 ikglp_del_global_list(sem, t, &old_wait->global_heap_node); // remove
1262 fq->global_heap_node = old_wait->global_heap_node; // copy
1263 ikglp_add_global_list(sem, t, &fq->global_heap_node); // re-add
1264
1265 binheap_delete(&old_wait->donee_heap_node.node, &sem->donees); // remove
1266 fq->donee_heap_node = old_wait->donee_heap_node; // copy
1267
1268 if(fq->donee_heap_node.donor_info) {
1269 // let donor know that our location has changed
1270 BUG_ON(fq->donee_heap_node.donor_info->donee_info->task != t); // validate cross-link
1271 fq->donee_heap_node.donor_info->donee_info = &fq->donee_heap_node;
1272 }
1273 INIT_BINHEAP_NODE(&fq->donee_heap_node.node);
1274 binheap_add(&fq->donee_heap_node.node, &sem->donees,
1275 ikglp_donee_heap_node_t, node); // re-add
1276}
1277
1278int ikglp_unlock(struct litmus_lock* l)
1279{
1280 struct ikglp_semaphore *sem = ikglp_from_lock(l);
1281 struct task_struct *t = current;
1282 struct task_struct *donee = NULL;
1283 struct task_struct *next = NULL;
1284 struct task_struct *new_on_fq = NULL;
1285 struct fifo_queue *fq_of_new_on_fq = NULL;
1286
1287 ikglp_wait_state_t *other_donor_info = NULL;
1288 struct fifo_queue *to_steal = NULL;
1289 int need_steal_prio_reeval = 0;
1290 struct fifo_queue *fq;
1291
1292#ifdef CONFIG_LITMUS_DGL_SUPPORT
1293 raw_spinlock_t *dgl_lock;
1294#endif
1295
1296 unsigned long flags = 0, real_flags;
1297
1298 int err = 0;
1299
1300 fq = ikglp_get_queue(sem, t); // returns NULL if 't' is not owner.
1301
1302 if (!fq) {
1303 err = -EINVAL;
1304 goto out;
1305 }
1306
1307#ifdef CONFIG_LITMUS_DGL_SUPPORT
1308 dgl_lock = litmus->get_dgl_spinlock(t);
1309#endif
1310 raw_spin_lock_irqsave(&sem->real_lock, real_flags);
1311
1312 lock_global_irqsave(dgl_lock, flags); // TODO: Push this deeper
1313 lock_fine_irqsave(&sem->lock, flags);
1314
1315 TRACE_TASK(t, "Freeing replica %d.\n", ikglp_get_idx(sem, fq));
1316
1317
1318 // Remove 't' from the heaps, but data in nodes will still be good.
1319 ikglp_del_global_list(sem, t, &fq->global_heap_node);
1320 binheap_delete(&fq->donee_heap_node.node, &sem->donees);
1321
1322 fq->owner = NULL; // no longer owned!!
1323 --(fq->count);
1324 if(fq->count < sem->shortest_fifo_queue->count) {
1325 sem->shortest_fifo_queue = fq;
1326 }
1327 --(sem->nr_in_fifos);
1328
1329#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1330 if(sem->aff_obs) {
1331 sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq, t);
1332 sem->aff_obs->ops->notify_freed(sem->aff_obs, fq, t);
1333 }
1334#endif
1335
1336 // Move the next request into the FQ and update heaps as needed.
1337 // We defer re-evaluation of priorities to later in the function.
1338 if(fq->donee_heap_node.donor_info) { // move my donor to FQ
1339 ikglp_wait_state_t *donor_info = fq->donee_heap_node.donor_info;
1340
1341 new_on_fq = donor_info->task;
1342
1343 // donor moved to FQ
1344 donee = t;
1345
1346#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1347 if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
1348 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1349 if(fq_of_new_on_fq->count == 0) {
1350 // ignore it?
1351// fq_of_new_on_fq = fq;
1352 }
1353 }
1354 else {
1355 fq_of_new_on_fq = fq;
1356 }
1357#else
1358 fq_of_new_on_fq = fq;
1359#endif
1360
1361 TRACE_TASK(t, "Moving MY donor (%s/%d) to fq %d (non-aff wanted fq %d).\n",
1362 new_on_fq->comm, new_on_fq->pid,
1363 ikglp_get_idx(sem, fq_of_new_on_fq),
1364 ikglp_get_idx(sem, fq));
1365
1366
1367 ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, donor_info);
1368 }
1369 else if(!binheap_empty(&sem->donors)) { // No donor, so move any donor to FQ
1370 // move other donor to FQ
1371 // Select a donor
1372#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1373 other_donor_info = (sem->aff_obs) ?
1374 sem->aff_obs->ops->advise_donor_to_fq(sem->aff_obs, fq) :
1375 binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
1376#else
1377 other_donor_info = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
1378#endif
1379
1380 new_on_fq = other_donor_info->task;
1381 donee = other_donor_info->donee_info->task;
1382
1383 // update the donee's heap position.
1384 other_donor_info->donee_info->donor_info = NULL; // clear the cross-link
1385 binheap_decrease(&other_donor_info->donee_info->node, &sem->donees);
1386
1387#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1388 if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
1389 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1390 if(fq_of_new_on_fq->count == 0) {
1391 // ignore it?
1392// fq_of_new_on_fq = fq;
1393 }
1394 }
1395 else {
1396 fq_of_new_on_fq = fq;
1397 }
1398#else
1399 fq_of_new_on_fq = fq;
1400#endif
1401
1402 TRACE_TASK(t, "Moving a donor (%s/%d) to fq %d (non-aff wanted fq %d).\n",
1403 new_on_fq->comm, new_on_fq->pid,
1404 ikglp_get_idx(sem, fq_of_new_on_fq),
1405 ikglp_get_idx(sem, fq));
1406
1407 ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, other_donor_info);
1408 }
1409 else if(!binheap_empty(&sem->priority_queue)) { // No donors, so move PQ
1410 ikglp_heap_node_t *pq_node = binheap_top_entry(&sem->priority_queue,
1411 ikglp_heap_node_t, node);
1412 ikglp_wait_state_t *pq_wait = container_of(pq_node, ikglp_wait_state_t,
1413 pq_node);
1414
1415 new_on_fq = pq_wait->task;
1416
1417#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1418 if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
1419 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1420 if(fq_of_new_on_fq->count == 0) {
1421 // ignore it?
1422// fq_of_new_on_fq = fq;
1423 }
1424 }
1425 else {
1426 fq_of_new_on_fq = fq;
1427 }
1428#else
1429 fq_of_new_on_fq = fq;
1430#endif
1431
1432 TRACE_TASK(t, "Moving a pq waiter (%s/%d) to fq %d (non-aff wanted fq %d).\n",
1433 new_on_fq->comm, new_on_fq->pid,
1434 ikglp_get_idx(sem, fq_of_new_on_fq),
1435 ikglp_get_idx(sem, fq));
1436
1437 ikglp_move_pq_to_fq(sem, fq_of_new_on_fq, pq_wait);
1438 }
1439 else if(fq->count == 0) { // No PQ and this queue is empty, so steal.
1440 ikglp_wait_state_t *fq_wait;
1441
1442 TRACE_TASK(t, "Looking to steal a request for fq %d...\n",
1443 ikglp_get_idx(sem, fq));
1444
1445#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1446 fq_wait = (sem->aff_obs) ?
1447 sem->aff_obs->ops->advise_steal(sem->aff_obs, fq) :
1448 ikglp_find_hp_waiter_to_steal(sem);
1449#else
1450 fq_wait = ikglp_find_hp_waiter_to_steal(sem);
1451#endif
1452
1453 if(fq_wait) {
1454 to_steal = fq_wait->donee_heap_node.fq;
1455
1456 new_on_fq = fq_wait->task;
1457 fq_of_new_on_fq = fq;
1458 need_steal_prio_reeval = (new_on_fq == to_steal->hp_waiter);
1459
1460 TRACE_TASK(t, "Found %s/%d of fq %d to steal for fq %d...\n",
1461 new_on_fq->comm, new_on_fq->pid,
1462 ikglp_get_idx(sem, to_steal),
1463 ikglp_get_idx(sem, fq));
1464
1465 ikglp_steal_to_fq(sem, fq, fq_wait);
1466 }
1467 else {
1468 TRACE_TASK(t, "Found nothing to steal for fq %d.\n",
1469 ikglp_get_idx(sem, fq));
1470 }
1471 }
1472 else { // move no one
1473 }
1474
1475 // 't' must drop all priority and clean up data structures before hand-off.
1476
1477 // DROP ALL INHERITANCE. IKGLP MUST BE OUTER-MOST
1478 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
1479 {
1480 int count = 0;
1481 while(!binheap_empty(&tsk_rt(t)->hp_blocked_tasks)) {
1482 binheap_delete_root(&tsk_rt(t)->hp_blocked_tasks,
1483 struct nested_info, hp_binheap_node);
1484 ++count;
1485 }
1486 litmus->decrease_prio(t, NULL);
1487 WARN_ON(count > 2); // should not be greater than 2. only local fq inh and donation can be possible.
1488 }
1489 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
1490
1491
1492
1493 // Now patch up other priorities.
1494 //
1495 // At most one of the following:
1496 // if(donee && donee != t), decrease prio, propagate to owner, or onward
1497 // if(to_steal), update owner's prio (hp_waiter has already been set)
1498 //
1499
1500 BUG_ON((other_donor_info != NULL) && (to_steal != NULL));
1501
1502 if(other_donor_info) {
1503 struct fifo_queue *other_fq = other_donor_info->donee_info->fq;
1504
1505 BUG_ON(!donee);
1506 BUG_ON(donee == t);
1507
1508 TRACE_TASK(t, "Terminating donation relation of donor %s/%d to donee %s/%d!\n",
1509 other_donor_info->task->comm, other_donor_info->task->pid,
1510 donee->comm, donee->pid);
1511
1512 // need to terminate donation relation.
1513 if(donee == other_fq->owner) {
1514 TRACE_TASK(t, "Donee %s/%d is an owner of fq %d.\n",
1515 donee->comm, donee->pid,
1516 ikglp_get_idx(sem, other_fq));
1517
1518 ikglp_remove_donation_from_owner(&other_donor_info->prio_donation.hp_binheap_node, other_fq, sem, flags);
1519 lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
1520 }
1521 else {
1522 TRACE_TASK(t, "Donee %s/%d is an blocked in of fq %d.\n",
1523 donee->comm, donee->pid,
1524 ikglp_get_idx(sem, other_fq));
1525
1526 ikglp_remove_donation_from_fq_waiter(donee, &other_donor_info->prio_donation.hp_binheap_node);
1527 if(donee == other_fq->hp_waiter) {
1528 TRACE_TASK(t, "Donee %s/%d was an hp_waiter of fq %d. Rechecking hp_waiter.\n",
1529 donee->comm, donee->pid,
1530 ikglp_get_idx(sem, other_fq));
1531
1532 other_fq->hp_waiter = ikglp_find_hp_waiter(other_fq, NULL);
1533 TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
1534 ikglp_get_idx(sem, other_fq),
1535 (other_fq->hp_waiter) ? other_fq->hp_waiter->comm : "nil",
1536 (other_fq->hp_waiter) ? other_fq->hp_waiter->pid : -1);
1537
1538 ikglp_refresh_owners_prio_decrease(other_fq, sem, flags); // unlocks sem->lock. reacquire it.
1539 lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
1540 }
1541 }
1542 }
1543 else if(to_steal) {
1544 TRACE_TASK(t, "Rechecking priority inheritance of fq %d, triggered by stealing.\n",
1545 ikglp_get_idx(sem, to_steal));
1546
1547 if(need_steal_prio_reeval) {
1548 ikglp_refresh_owners_prio_decrease(to_steal, sem, flags); // unlocks sem->lock. reacquire it.
1549 lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
1550 }
1551 }
1552
1553 // check for new HP waiter.
1554 if(new_on_fq) {
1555 if(fq == fq_of_new_on_fq) {
1556 // fq->owner is null, so just update the hp_waiter without locking.
1557 if(new_on_fq == fq->hp_waiter) {
1558 TRACE_TASK(t, "new_on_fq is already hp_waiter.\n",
1559 fq->hp_waiter->comm, fq->hp_waiter->pid);
1560 fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); // set this just to be sure...
1561 }
1562 else if(litmus->compare(new_on_fq, fq->hp_waiter)) {
1563 if(fq->hp_waiter)
1564 TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
1565 fq->hp_waiter->comm, fq->hp_waiter->pid);
1566 else
1567 TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
1568
1569 fq->hp_waiter = new_on_fq;
1570 fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
1571
1572 TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
1573 ikglp_get_idx(sem, fq),
1574 (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
1575 (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
1576 }
1577 }
1578 else {
1579 ikglp_refresh_owners_prio_increase(new_on_fq, fq_of_new_on_fq, sem, flags); // unlocks sem->lock. reacquire it.
1580 lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
1581 }
1582 }
1583
1584wake_kludge:
1585 if(waitqueue_active(&fq->wait))
1586 {
1587 wait_queue_t *wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list);
1588 ikglp_wait_state_t *fq_wait = container_of(wait, ikglp_wait_state_t, fq_node);
1589 next = (struct task_struct*) wait->private;
1590
1591 __remove_wait_queue(&fq->wait, wait);
1592
1593 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
1594 ikglp_get_idx(sem, fq),
1595 next->comm, next->pid);
1596
1597 // migrate wait-state to fifo-memory.
1598 ikglp_migrate_fq_to_owner_heap_nodes(sem, fq, fq_wait);
1599
1600 /* next becomes the resouce holder */
1601 fq->owner = next;
1602 tsk_rt(next)->blocked_lock = NULL;
1603
1604#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1605 if(sem->aff_obs) {
1606 sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, next);
1607 }
1608#endif
1609
1610 /* determine new hp_waiter if necessary */
1611 if (next == fq->hp_waiter) {
1612
1613 TRACE_TASK(next, "was highest-prio waiter\n");
1614 /* next has the highest priority --- it doesn't need to
1615 * inherit. However, we need to make sure that the
1616 * next-highest priority in the queue is reflected in
1617 * hp_waiter. */
1618 fq->hp_waiter = ikglp_find_hp_waiter(fq, NULL);
1619 TRACE_TASK(next, "New hp_waiter for fq %d is %s/%d!\n",
1620 ikglp_get_idx(sem, fq),
1621 (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
1622 (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
1623
1624 fq->nest.hp_waiter_eff_prio = (fq->hp_waiter) ?
1625 effective_priority(fq->hp_waiter) : NULL;
1626
1627 if (fq->hp_waiter)
1628 TRACE_TASK(fq->hp_waiter, "is new highest-prio waiter\n");
1629 else
1630 TRACE("no further waiters\n");
1631
1632 raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
1633
1634// TRACE_TASK(next, "Heap Before:\n");
1635// print_hp_waiters(tsk_rt(next)->hp_blocked_tasks.root, 0);
1636
1637 binheap_add(&fq->nest.hp_binheap_node,
1638 &tsk_rt(next)->hp_blocked_tasks,
1639 struct nested_info,
1640 hp_binheap_node);
1641
1642// TRACE_TASK(next, "Heap After:\n");
1643// print_hp_waiters(tsk_rt(next)->hp_blocked_tasks.root, 0);
1644
1645 raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
1646 }
1647 else {
1648 /* Well, if 'next' is not the highest-priority waiter,
1649 * then it (probably) ought to inherit the highest-priority
1650 * waiter's priority. */
1651 TRACE_TASK(next, "is not hp_waiter of replica %d. hp_waiter is %s/%d\n",
1652 ikglp_get_idx(sem, fq),
1653 (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
1654 (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
1655
1656 raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
1657
1658 binheap_add(&fq->nest.hp_binheap_node,
1659 &tsk_rt(next)->hp_blocked_tasks,
1660 struct nested_info,
1661 hp_binheap_node);
1662
1663 /* It is possible that 'next' *should* be the hp_waiter, but isn't
1664 * because that update hasn't yet executed (update operation is
1665 * probably blocked on mutex->lock). So only inherit if the top of
1666 * 'next's top heap node is indeed the effective prio. of hp_waiter.
1667 * (We use fq->hp_waiter_eff_prio instead of effective_priority(hp_waiter)
1668 * since the effective priority of hp_waiter can change (and the
1669 * update has not made it to this lock).)
1670 */
1671 if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
1672 fq->nest.hp_waiter_eff_prio))
1673 {
1674 if(fq->nest.hp_waiter_eff_prio)
1675 litmus->increase_prio(next, fq->nest.hp_waiter_eff_prio);
1676 else
1677 WARN_ON(1);
1678 }
1679
1680 raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
1681 }
1682
1683
1684 // wake up the new resource holder!
1685 wake_up_process(next);
1686 }
1687 if(fq_of_new_on_fq && fq_of_new_on_fq != fq && fq_of_new_on_fq->count == 1) {
1688 // The guy we promoted when to an empty FQ. (Why didn't stealing pick this up?)
1689 // Wake up the new guy too.
1690
1691 BUG_ON(fq_of_new_on_fq->owner != NULL);
1692
1693 fq = fq_of_new_on_fq;
1694 fq_of_new_on_fq = NULL;
1695 goto wake_kludge;
1696 }
1697
1698 unlock_fine_irqrestore(&sem->lock, flags);
1699 unlock_global_irqrestore(dgl_lock, flags);
1700
1701 raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
1702
1703out:
1704 return err;
1705}
1706
1707
1708
1709int ikglp_close(struct litmus_lock* l)
1710{
1711 struct task_struct *t = current;
1712 struct ikglp_semaphore *sem = ikglp_from_lock(l);
1713 unsigned long flags;
1714
1715 int owner = 0;
1716 int i;
1717
1718 raw_spin_lock_irqsave(&sem->real_lock, flags);
1719
1720 for(i = 0; i < sem->nr_replicas; ++i) {
1721 if(sem->fifo_queues[i].owner == t) {
1722 owner = 1;
1723 break;
1724 }
1725 }
1726
1727 raw_spin_unlock_irqrestore(&sem->real_lock, flags);
1728
1729 if (owner)
1730 ikglp_unlock(l);
1731
1732 return 0;
1733}
1734
1735void ikglp_free(struct litmus_lock* l)
1736{
1737 struct ikglp_semaphore *sem = ikglp_from_lock(l);
1738
1739 kfree(sem->fifo_queues);
1740 kfree(sem);
1741}
1742
1743
1744
1745struct litmus_lock* ikglp_new(int m,
1746 struct litmus_lock_ops* ops,
1747 void* __user arg)
1748{
1749 struct ikglp_semaphore* sem;
1750 int nr_replicas = 0;
1751 int i;
1752
1753 if(!access_ok(VERIFY_READ, arg, sizeof(nr_replicas)))
1754 {
1755 return(NULL);
1756 }
1757 if(__copy_from_user(&nr_replicas, arg, sizeof(nr_replicas)))
1758 {
1759 return(NULL);
1760 }
1761 if(nr_replicas < 1)
1762 {
1763 return(NULL);
1764 }
1765
1766 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1767 if(!sem)
1768 {
1769 return NULL;
1770 }
1771
1772 sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*nr_replicas, GFP_KERNEL);
1773 if(!sem->fifo_queues)
1774 {
1775 kfree(sem);
1776 return NULL;
1777 }
1778
1779 sem->litmus_lock.ops = ops;
1780
1781#ifdef CONFIG_DEBUG_SPINLOCK
1782 {
1783 __raw_spin_lock_init(&sem->lock, ((struct litmus_lock*)sem)->cheat_lockdep, &((struct litmus_lock*)sem)->key);
1784 }
1785#else
1786 raw_spin_lock_init(&sem->lock);
1787#endif
1788
1789 raw_spin_lock_init(&sem->real_lock);
1790
1791 sem->nr_replicas = nr_replicas;
1792 sem->m = m;
1793 sem->max_fifo_len = (sem->m/nr_replicas) + ((sem->m%nr_replicas) != 0);
1794 sem->nr_in_fifos = 0;
1795
1796 TRACE("New IKGLP Sem: m = %d, k = %d, max fifo_len = %d\n",
1797 sem->m,
1798 sem->nr_replicas,
1799 sem->max_fifo_len);
1800
1801 for(i = 0; i < nr_replicas; ++i)
1802 {
1803 struct fifo_queue* q = &(sem->fifo_queues[i]);
1804
1805 q->owner = NULL;
1806 q->hp_waiter = NULL;
1807 init_waitqueue_head(&q->wait);
1808 q->count = 0;
1809
1810 q->global_heap_node.task = NULL;
1811 INIT_BINHEAP_NODE(&q->global_heap_node.node);
1812
1813 q->donee_heap_node.task = NULL;
1814 q->donee_heap_node.donor_info = NULL;
1815 q->donee_heap_node.fq = NULL;
1816 INIT_BINHEAP_NODE(&q->donee_heap_node.node);
1817
1818 q->nest.lock = (struct litmus_lock*)sem;
1819 q->nest.hp_waiter_eff_prio = NULL;
1820 q->nest.hp_waiter_ptr = &q->hp_waiter;
1821 INIT_BINHEAP_NODE(&q->nest.hp_binheap_node);
1822 }
1823
1824 sem->shortest_fifo_queue = &sem->fifo_queues[0];
1825
1826 sem->top_m_size = 0;
1827
1828 // init heaps
1829 INIT_BINHEAP_HANDLE(&sem->top_m, ikglp_min_heap_base_priority_order);
1830 INIT_BINHEAP_HANDLE(&sem->not_top_m, ikglp_max_heap_base_priority_order);
1831 INIT_BINHEAP_HANDLE(&sem->donees, ikglp_min_heap_donee_order);
1832 INIT_BINHEAP_HANDLE(&sem->priority_queue, ikglp_max_heap_base_priority_order);
1833 INIT_BINHEAP_HANDLE(&sem->donors, ikglp_donor_max_heap_base_priority_order);
1834
1835#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1836 sem->aff_obs = NULL;
1837#endif
1838
1839 return &sem->litmus_lock;
1840}
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
1871
1872static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica)
1873{
1874 int gpu = replica % aff->nr_rsrc;
1875 return gpu;
1876}
1877
1878static inline int replica_to_gpu(struct ikglp_affinity* aff, int replica)
1879{
1880 int gpu = __replica_to_gpu(aff, replica) + aff->offset;
1881 return gpu;
1882}
1883
1884static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu)
1885{
1886 int replica = gpu - aff->offset;
1887 return replica;
1888}
1889
1890static inline int same_gpu(struct ikglp_affinity* aff, int replica_a, int replica_b)
1891{
1892 return(replica_to_gpu(aff, replica_a) == replica_to_gpu(aff, replica_b));
1893}
1894
1895static inline int has_affinity(struct ikglp_affinity* aff, struct task_struct* t, int replica)
1896{
1897 if(tsk_rt(t)->last_gpu >= 0)
1898 {
1899 return (tsk_rt(t)->last_gpu == replica_to_gpu(aff, replica));
1900 }
1901 return 0;
1902}
1903
1904int ikglp_aff_obs_close(struct affinity_observer* obs)
1905{
1906 return 0;
1907}
1908
1909void ikglp_aff_obs_free(struct affinity_observer* obs)
1910{
1911 struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs);
1912
1913 // make sure the thread destroying this semaphore will not
1914 // call the exit callback on a destroyed lock.
1915 struct task_struct *t = current;
1916 if (is_realtime(t) && tsk_rt(t)->rsrc_exit_cb_args == ikglp_aff)
1917 {
1918 tsk_rt(t)->rsrc_exit_cb = NULL;
1919 tsk_rt(t)->rsrc_exit_cb_args = NULL;
1920 }
1921
1922 kfree(ikglp_aff->nr_cur_users_on_rsrc);
1923 kfree(ikglp_aff->nr_aff_on_rsrc);
1924 kfree(ikglp_aff->q_info);
1925 kfree(ikglp_aff);
1926}
1927
1928static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* ops,
1929 struct ikglp_affinity_ops* ikglp_ops,
1930 void* __user args)
1931{
1932 struct ikglp_affinity* ikglp_aff;
1933 struct gpu_affinity_observer_args aff_args;
1934 struct ikglp_semaphore* sem;
1935 int i;
1936 unsigned long flags;
1937
1938 if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
1939 return(NULL);
1940 }
1941 if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
1942 return(NULL);
1943 }
1944
1945 sem = (struct ikglp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
1946
1947 if(sem->litmus_lock.type != IKGLP_SEM) {
1948 TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type);
1949 return(NULL);
1950 }
1951
1952 if((aff_args.nr_simult_users <= 0) ||
1953 (sem->nr_replicas%aff_args.nr_simult_users != 0)) {
1954 TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
1955 "(%d) per replica. #replicas should be evenly divisible "
1956 "by #simult_users.\n",
1957 sem->litmus_lock.ident,
1958 sem->nr_replicas,
1959 aff_args.nr_simult_users);
1960 return(NULL);
1961 }
1962
1963// if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
1964// TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
1965// NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
1966//// return(NULL);
1967// }
1968
1969 ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL);
1970 if(!ikglp_aff) {
1971 return(NULL);
1972 }
1973
1974 ikglp_aff->q_info = kmalloc(sizeof(struct ikglp_queue_info)*sem->nr_replicas, GFP_KERNEL);
1975 if(!ikglp_aff->q_info) {
1976 kfree(ikglp_aff);
1977 return(NULL);
1978 }
1979
1980 ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
1981 if(!ikglp_aff->nr_cur_users_on_rsrc) {
1982 kfree(ikglp_aff->q_info);
1983 kfree(ikglp_aff);
1984 return(NULL);
1985 }
1986
1987 ikglp_aff->nr_aff_on_rsrc = kmalloc(sizeof(int64_t)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
1988 if(!ikglp_aff->nr_aff_on_rsrc) {
1989 kfree(ikglp_aff->nr_cur_users_on_rsrc);
1990 kfree(ikglp_aff->q_info);
1991 kfree(ikglp_aff);
1992 return(NULL);
1993 }
1994
1995 affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs);
1996
1997 ikglp_aff->ops = ikglp_ops;
1998 ikglp_aff->offset = aff_args.replica_to_gpu_offset;
1999 ikglp_aff->nr_simult = aff_args.nr_simult_users;
2000 ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult;
2001 ikglp_aff->relax_max_fifo_len = (aff_args.relaxed_rules) ? 1 : 0;
2002
2003 TRACE_CUR("GPU affinity_observer: offset = %d, nr_simult = %d, "
2004 "nr_rsrc = %d, relaxed_fifo_len = %d\n",
2005 ikglp_aff->offset, ikglp_aff->nr_simult, ikglp_aff->nr_rsrc,
2006 ikglp_aff->relax_max_fifo_len);
2007
2008 memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
2009 memset(ikglp_aff->nr_aff_on_rsrc, 0, sizeof(int64_t)*(ikglp_aff->nr_rsrc));
2010
2011 for(i = 0; i < sem->nr_replicas; ++i) {
2012 ikglp_aff->q_info[i].q = &sem->fifo_queues[i];
2013 ikglp_aff->q_info[i].estimated_len = 0;
2014
2015 // multiple q_info's will point to the same resource (aka GPU) if
2016 // aff_args.nr_simult_users > 1
2017 ikglp_aff->q_info[i].nr_cur_users = &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)];
2018 ikglp_aff->q_info[i].nr_aff_users = &ikglp_aff->nr_aff_on_rsrc[__replica_to_gpu(ikglp_aff,i)];
2019 }
2020
2021 // attach observer to the lock
2022 raw_spin_lock_irqsave(&sem->real_lock, flags);
2023 sem->aff_obs = ikglp_aff;
2024 raw_spin_unlock_irqrestore(&sem->real_lock, flags);
2025
2026 return &ikglp_aff->obs;
2027}
2028
2029
2030
2031
2032static int gpu_replica_to_resource(struct ikglp_affinity* aff,
2033 struct fifo_queue* fq) {
2034 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2035 return(replica_to_gpu(aff, ikglp_get_idx(sem, fq)));
2036}
2037
2038
2039// Smart IKGLP Affinity
2040
2041//static inline struct ikglp_queue_info* ikglp_aff_find_shortest(struct ikglp_affinity* aff)
2042//{
2043// struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2044// struct ikglp_queue_info *shortest = &aff->q_info[0];
2045// int i;
2046//
2047// for(i = 1; i < sem->nr_replicas; ++i) {
2048// if(aff->q_info[i].estimated_len < shortest->estimated_len) {
2049// shortest = &aff->q_info[i];
2050// }
2051// }
2052//
2053// return(shortest);
2054//}
2055
2056struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
2057{
2058 // advise_enqueue must be smart as not not break IKGLP rules:
2059 // * No queue can be greater than ceil(m/k) in length. We may return
2060 // such a queue, but IKGLP will be smart enough as to send requests
2061 // to donors or PQ.
2062 // * Cannot let a queue idle if there exist waiting PQ/donors
2063 // -- needed to guarantee parallel progress of waiters.
2064 //
2065 // We may be able to relax some of these constraints, but this will have to
2066 // be carefully evaluated.
2067 //
2068 // Huristic strategy: Find the shortest queue that is not full.
2069
2070 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2071 lt_t min_len;
2072 int min_nr_users, min_nr_aff_users;
2073 struct ikglp_queue_info *shortest, *aff_queue;
2074 struct fifo_queue *to_enqueue;
2075 int i;
2076 int affinity_gpu;
2077
2078 int max_fifo_len = (aff->relax_max_fifo_len) ?
2079 sem->m : sem->max_fifo_len;
2080
2081 // if we have no affinity, find the GPU with the least number of users
2082 // with active affinity
2083 if(unlikely(tsk_rt(t)->last_gpu < 0)) {
2084 int temp_min = aff->nr_aff_on_rsrc[0];
2085 affinity_gpu = aff->offset;
2086
2087 for(i = 1; i < aff->nr_rsrc; ++i) {
2088 if(aff->nr_aff_on_rsrc[i] < temp_min) {
2089 affinity_gpu = aff->offset + i;
2090 }
2091 }
2092
2093 TRACE_CUR("no affinity. defaulting to %d with %d aff users.\n",
2094 affinity_gpu, temp_min);
2095 }
2096 else {
2097 affinity_gpu = tsk_rt(t)->last_gpu;
2098 }
2099
2100 // all things being equal, let's start with the queue with which we have
2101 // affinity. this helps us maintain affinity even when we don't have
2102 // an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
2103 aff_queue = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
2104 shortest = aff_queue;
2105
2106 // if(shortest == aff->shortest_queue) {
2107 // TRACE_CUR("special case: have affinity with shortest queue\n");
2108 // goto out;
2109 // }
2110
2111 min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
2112 min_nr_users = *(shortest->nr_cur_users);
2113 min_nr_aff_users = *(shortest->nr_aff_users);
2114
2115
2116 TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
2117 get_gpu_estimate(t, MIG_LOCAL),
2118 ikglp_get_idx(sem, shortest->q),
2119 shortest->q->count,
2120 min_len);
2121
2122 for(i = 0; i < sem->nr_replicas; ++i) {
2123 if(&aff->q_info[i] != shortest) {
2124 if(aff->q_info[i].q->count < max_fifo_len) {
2125 int want = 0;
2126
2127 lt_t migration =
2128 get_gpu_estimate(t,
2129 gpu_migration_distance(tsk_rt(t)->last_gpu,
2130 replica_to_gpu(aff, i)));
2131 lt_t est_len = aff->q_info[i].estimated_len + migration;
2132
2133 // queue is smaller, or they're equal and the other has a smaller number
2134 // of total users.
2135 //
2136 // tie-break on the shortest number of simult users. this only kicks in
2137 // when there are more than 1 empty queues.
2138
2139 // TODO: Make "est_len < min_len" a fuzzy function that allows
2140 // queues "close enough" in length to be considered equal.
2141
2142 /* NOTE: 'shortest' starts out with affinity GPU */
2143 if(unlikely(shortest->q->count >= max_fifo_len)) { /* 'shortest' is full and i-th queue is not */
2144 want = 1;
2145 }
2146 else if(est_len < min_len) {
2147 want = 1; /* i-th queue has shortest length */
2148 }
2149 else if(unlikely(est_len == min_len)) { /* equal lengths */
2150 if(!has_affinity(aff, t, ikglp_get_idx(sem, shortest->q))) { /* don't sacrifice affinity on tie */
2151 if(has_affinity(aff, t, i)) {
2152 want = 1; /* switch to maintain affinity */
2153 }
2154 else if(*(aff->q_info[i].nr_aff_users) < min_nr_aff_users) { /* favor one with less affinity load */
2155 want = 1;
2156 }
2157 else if((*(aff->q_info[i].nr_aff_users) == min_nr_aff_users) && /* equal number of affinity */
2158 (*(aff->q_info[i].nr_cur_users) < min_nr_users)) { /* favor one with current fewer users */
2159 want = 1;
2160 }
2161 }
2162 }
2163
2164 if(want) {
2165 shortest = &aff->q_info[i];
2166 min_len = est_len;
2167 min_nr_users = *(aff->q_info[i].nr_cur_users);
2168 min_nr_aff_users = *(aff->q_info[i].nr_aff_users);
2169 }
2170
2171 TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
2172 get_gpu_estimate(t,
2173 gpu_migration_distance(tsk_rt(t)->last_gpu,
2174 replica_to_gpu(aff, i))),
2175 ikglp_get_idx(sem, aff->q_info[i].q),
2176 aff->q_info[i].q->count,
2177 est_len);
2178 }
2179 else {
2180 TRACE_CUR("queue %d is too long. ineligible for enqueue.\n",
2181 ikglp_get_idx(sem, aff->q_info[i].q));
2182 }
2183 }
2184 }
2185
2186 if(shortest->q->count >= max_fifo_len) {
2187 TRACE_CUR("selected fq %d is too long, but returning it anyway.\n",
2188 ikglp_get_idx(sem, shortest->q));
2189 }
2190
2191 to_enqueue = shortest->q;
2192 TRACE_CUR("enqueue on fq %d (count = %d) (non-aff wanted fq %d)\n",
2193 ikglp_get_idx(sem, to_enqueue),
2194 to_enqueue->count,
2195 ikglp_get_idx(sem, sem->shortest_fifo_queue));
2196
2197 return to_enqueue;
2198
2199 //return(sem->shortest_fifo_queue);
2200}
2201
2202
2203
2204
2205static ikglp_wait_state_t* pick_steal(struct ikglp_affinity* aff,
2206 int dest_gpu,
2207 struct fifo_queue* fq)
2208{
2209 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2210 ikglp_wait_state_t *wait = NULL;
2211 int max_improvement = -(MIG_NONE+1);
2212 int replica = ikglp_get_idx(sem, fq);
2213
2214 if(waitqueue_active(&fq->wait)) {
2215 int this_gpu = replica_to_gpu(aff, replica);
2216 struct list_head *pos;
2217
2218 list_for_each(pos, &fq->wait.task_list) {
2219 wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list);
2220 ikglp_wait_state_t *tmp_wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
2221
2222 int tmp_improvement =
2223 gpu_migration_distance(this_gpu, tsk_rt(tmp_wait->task)->last_gpu) -
2224 gpu_migration_distance(dest_gpu, tsk_rt(tmp_wait->task)->last_gpu);
2225
2226 if(tmp_improvement > max_improvement) {
2227 wait = tmp_wait;
2228 max_improvement = tmp_improvement;
2229
2230 if(max_improvement >= (MIG_NONE-1)) {
2231 goto out;
2232 }
2233 }
2234 }
2235
2236 BUG_ON(!wait);
2237 }
2238 else {
2239 TRACE_CUR("fq %d is empty!\n", replica);
2240 }
2241
2242out:
2243
2244 TRACE_CUR("Candidate victim from fq %d is %s/%d. aff improvement = %d.\n",
2245 replica,
2246 (wait) ? wait->task->comm : "nil",
2247 (wait) ? wait->task->pid : -1,
2248 max_improvement);
2249
2250 return wait;
2251}
2252
2253
2254ikglp_wait_state_t* gpu_ikglp_advise_steal(struct ikglp_affinity* aff,
2255 struct fifo_queue* dst)
2256{
2257 // Huristic strategy: Find task with greatest improvement in affinity.
2258 //
2259 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2260 ikglp_wait_state_t *to_steal_state = NULL;
2261// ikglp_wait_state_t *default_to_steal_state = ikglp_find_hp_waiter_to_steal(sem);
2262 int max_improvement = -(MIG_NONE+1);
2263 int replica, i;
2264 int dest_gpu;
2265
2266 replica = ikglp_get_idx(sem, dst);
2267 dest_gpu = replica_to_gpu(aff, replica);
2268
2269 for(i = 0; i < sem->nr_replicas; ++i) {
2270 ikglp_wait_state_t *tmp_to_steal_state =
2271 pick_steal(aff, dest_gpu, &sem->fifo_queues[i]);
2272
2273 if(tmp_to_steal_state) {
2274 int tmp_improvement =
2275 gpu_migration_distance(replica_to_gpu(aff, i), tsk_rt(tmp_to_steal_state->task)->last_gpu) -
2276 gpu_migration_distance(dest_gpu, tsk_rt(tmp_to_steal_state->task)->last_gpu);
2277
2278 if(tmp_improvement > max_improvement) {
2279 to_steal_state = tmp_to_steal_state;
2280 max_improvement = tmp_improvement;
2281
2282 if(max_improvement >= (MIG_NONE-1)) {
2283 goto out;
2284 }
2285 }
2286 }
2287 }
2288
2289out:
2290 if(!to_steal_state) {
2291 TRACE_CUR("Could not find anyone to steal.\n");
2292 }
2293 else {
2294 TRACE_CUR("Selected victim %s/%d on fq %d (GPU %d) for fq %d (GPU %d): improvement = %d\n",
2295 to_steal_state->task->comm, to_steal_state->task->pid,
2296 ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq),
2297 replica_to_gpu(aff, ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq)),
2298 ikglp_get_idx(sem, dst),
2299 dest_gpu,
2300 max_improvement);
2301
2302// TRACE_CUR("Non-aff wanted to select victim %s/%d on fq %d (GPU %d) for fq %d (GPU %d): improvement = %d\n",
2303// default_to_steal_state->task->comm, default_to_steal_state->task->pid,
2304// ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq),
2305// replica_to_gpu(aff, ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq)),
2306// ikglp_get_idx(sem, dst),
2307// replica_to_gpu(aff, ikglp_get_idx(sem, dst)),
2308//
2309// gpu_migration_distance(
2310// replica_to_gpu(aff, ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq)),
2311// tsk_rt(default_to_steal_state->task)->last_gpu) -
2312// gpu_migration_distance(dest_gpu, tsk_rt(default_to_steal_state->task)->last_gpu));
2313 }
2314
2315 return(to_steal_state);
2316}
2317
2318
2319static inline int has_donor(wait_queue_t* fq_wait)
2320{
2321 ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
2322 return(wait->donee_heap_node.donor_info != NULL);
2323}
2324
2325static ikglp_donee_heap_node_t* pick_donee(struct ikglp_affinity* aff,
2326 struct fifo_queue* fq,
2327 int* dist_from_head)
2328{
2329 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2330 struct task_struct *donee;
2331 ikglp_donee_heap_node_t *donee_node;
2332 struct task_struct *mth_highest = ikglp_mth_highest(sem);
2333
2334// lt_t now = litmus_clock();
2335//
2336// TRACE_CUR("fq %d: mth_highest: %s/%d, deadline = %d: (donor) = ??? ",
2337// ikglp_get_idx(sem, fq),
2338// mth_highest->comm, mth_highest->pid,
2339// (int)get_deadline(mth_highest) - now);
2340
2341 if(fq->owner &&
2342 fq->donee_heap_node.donor_info == NULL &&
2343 mth_highest != fq->owner &&
2344 litmus->__compare(mth_highest, BASE, fq->owner, BASE)) {
2345 donee = fq->owner;
2346 donee_node = &(fq->donee_heap_node);
2347 *dist_from_head = 0;
2348
2349 BUG_ON(donee != donee_node->task);
2350
2351 TRACE_CUR("picked owner of fq %d as donee\n",
2352 ikglp_get_idx(sem, fq));
2353
2354 goto out;
2355 }
2356 else if(waitqueue_active(&fq->wait)) {
2357 struct list_head *pos;
2358
2359
2360// TRACE_CUR("fq %d: owner: %s/%d, deadline = %d: (donor) = %s/%d "
2361// "(mth_highest != fq->owner) = %d "
2362// "(mth_highest > fq->owner) = %d\n",
2363// ikglp_get_idx(sem, fq),
2364// (fq->owner) ? fq->owner->comm : "nil",
2365// (fq->owner) ? fq->owner->pid : -1,
2366// (fq->owner) ? (int)get_deadline(fq->owner) - now : -999,
2367// (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->comm : "nil",
2368// (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->pid : -1,
2369// (mth_highest != fq->owner),
2370// (litmus->__compare(mth_highest, BASE, fq->owner, BASE)));
2371
2372
2373 *dist_from_head = 1;
2374
2375 // iterating from the start of the queue is nice since this means
2376 // the donee will be closer to obtaining a resource.
2377 list_for_each(pos, &fq->wait.task_list) {
2378 wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list);
2379 ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
2380
2381// TRACE_CUR("fq %d: waiter %d: %s/%d, deadline = %d (donor) = %s/%d "
2382// "(mth_highest != wait->task) = %d "
2383// "(mth_highest > wait->task) = %d\n",
2384// ikglp_get_idx(sem, fq),
2385// dist_from_head,
2386// wait->task->comm, wait->task->pid,
2387// (int)get_deadline(wait->task) - now,
2388// (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->comm : "nil",
2389// (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->pid : -1,
2390// (mth_highest != wait->task),
2391// (litmus->__compare(mth_highest, BASE, wait->task, BASE)));
2392
2393
2394 if(!has_donor(fq_wait) &&
2395 mth_highest != wait->task &&
2396 litmus->__compare(mth_highest, BASE, wait->task, BASE)) {
2397 donee = (struct task_struct*) fq_wait->private;
2398 donee_node = &wait->donee_heap_node;
2399
2400 BUG_ON(donee != donee_node->task);
2401
2402 TRACE_CUR("picked waiter in fq %d as donee\n",
2403 ikglp_get_idx(sem, fq));
2404
2405 goto out;
2406 }
2407 ++(*dist_from_head);
2408 }
2409 }
2410
2411 donee = NULL;
2412 donee_node = NULL;
2413 //*dist_from_head = sem->max_fifo_len + 1;
2414 *dist_from_head = IKGLP_INVAL_DISTANCE;
2415
2416 TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq));
2417
2418out:
2419
2420 TRACE_CUR("Candidate donee for fq %d is %s/%d (dist_from_head = %d)\n",
2421 ikglp_get_idx(sem, fq),
2422 (donee) ? (donee)->comm : "nil",
2423 (donee) ? (donee)->pid : -1,
2424 *dist_from_head);
2425
2426 return donee_node;
2427}
2428
2429ikglp_donee_heap_node_t* gpu_ikglp_advise_donee_selection(
2430 struct ikglp_affinity* aff,
2431 struct task_struct* donor)
2432{
2433 // Huristic strategy: Find the highest-priority donee that is waiting on
2434 // a queue closest to our affinity. (1) The donee CANNOT already have a
2435 // donor (exception: donee is the lowest-prio task in the donee heap).
2436 // (2) Requests in 'top_m' heap are ineligible.
2437 //
2438 // Further strategy: amongst elible donees waiting for the same GPU, pick
2439 // the one closest to the head of the FIFO queue (including owners).
2440 //
2441 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2442 ikglp_donee_heap_node_t *donee_node;
2443 gpu_migration_dist_t distance;
2444 int start, i, j;
2445
2446 ikglp_donee_heap_node_t *default_donee;
2447 ikglp_wait_state_t *default_donee_donor_info;
2448
2449 if(tsk_rt(donor)->last_gpu < 0) {
2450 // no affinity. just return the min prio, like standard IKGLP
2451 // TODO: Find something closer to the head of the queue??
2452 donee_node = binheap_top_entry(&sem->donees,
2453 ikglp_donee_heap_node_t,
2454 node);
2455 goto out;
2456 }
2457
2458
2459 // Temporarily break any donation relation the default donee (the lowest
2460 // prio task in the FIFO queues) to make it eligible for selection below.
2461 //
2462 // NOTE: The original donor relation *must* be restored, even if we select
2463 // the default donee throug affinity-aware selection, before returning
2464 // from this function so we don't screw up our heap ordering.
2465 // The standard IKGLP algorithm will steal the donor relationship if needed.
2466 default_donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
2467 default_donee_donor_info = default_donee->donor_info; // back-up donor relation
2468 default_donee->donor_info = NULL; // temporarily break any donor relation.
2469
2470 // initialize our search
2471 donee_node = NULL;
2472 distance = MIG_NONE;
2473
2474 // TODO: The below search logic may work well for locating nodes to steal
2475 // when an FQ goes idle. Validate this code and apply it to stealing.
2476
2477 // begin search with affinity GPU.
2478 start = gpu_to_base_replica(aff, tsk_rt(donor)->last_gpu);
2479 i = start;
2480 do { // "for each gpu" / "for each aff->nr_rsrc"
2481 gpu_migration_dist_t temp_distance = gpu_migration_distance(start, i);
2482
2483 // only interested in queues that will improve our distance
2484 if(temp_distance < distance || donee_node == NULL) {
2485 int dist_from_head = IKGLP_INVAL_DISTANCE;
2486
2487 TRACE_CUR("searching for donor on GPU %d", i);
2488
2489 // visit each queue and pick a donee. bail as soon as we find
2490 // one for this class.
2491
2492 for(j = 0; j < aff->nr_simult; ++j) {
2493 int temp_dist_from_head;
2494 ikglp_donee_heap_node_t *temp_donee_node;
2495 struct fifo_queue *fq;
2496
2497 fq = &(sem->fifo_queues[i + j*aff->nr_rsrc]);
2498 temp_donee_node = pick_donee(aff, fq, &temp_dist_from_head);
2499
2500 if(temp_dist_from_head < dist_from_head)
2501 {
2502 // we check all the FQs for this GPU to spread priorities
2503 // out across the queues. does this decrease jitter?
2504 donee_node = temp_donee_node;
2505 dist_from_head = temp_dist_from_head;
2506 }
2507 }
2508
2509 if(dist_from_head != IKGLP_INVAL_DISTANCE) {
2510 TRACE_CUR("found donee %s/%d and is the %d-th waiter.\n",
2511 donee_node->task->comm, donee_node->task->pid,
2512 dist_from_head);
2513 }
2514 else {
2515 TRACE_CUR("found no eligible donors from GPU %d\n", i);
2516 }
2517 }
2518 else {
2519 TRACE_CUR("skipping GPU %d (distance = %d, best donor "
2520 "distance = %d)\n", i, temp_distance, distance);
2521 }
2522
2523 i = (i+1 < aff->nr_rsrc) ? i+1 : 0; // increment with wrap-around
2524 } while (i != start);
2525
2526
2527 // restore old donor info state.
2528 default_donee->donor_info = default_donee_donor_info;
2529
2530 if(!donee_node) {
2531 donee_node = default_donee;
2532
2533 TRACE_CUR("Could not find a donee. We have to steal one.\n");
2534 WARN_ON(default_donee->donor_info == NULL);
2535 }
2536
2537out:
2538
2539 TRACE_CUR("Selected donee %s/%d on fq %d (GPU %d) for %s/%d with affinity for GPU %d\n",
2540 donee_node->task->comm, donee_node->task->pid,
2541 ikglp_get_idx(sem, donee_node->fq),
2542 replica_to_gpu(aff, ikglp_get_idx(sem, donee_node->fq)),
2543 donor->comm, donor->pid, tsk_rt(donor)->last_gpu);
2544
2545 return(donee_node);
2546}
2547
2548
2549
2550static void __find_closest_donor(int target_gpu,
2551 struct binheap_node* donor_node,
2552 ikglp_wait_state_t** cur_closest,
2553 int* cur_dist)
2554{
2555 ikglp_wait_state_t *this_donor =
2556 binheap_entry(donor_node, ikglp_wait_state_t, node);
2557
2558 int this_dist =
2559 gpu_migration_distance(target_gpu, tsk_rt(this_donor->task)->last_gpu);
2560
2561// TRACE_CUR("%s/%d: dist from target = %d\n",
2562// this_donor->task->comm,
2563// this_donor->task->pid,
2564// this_dist);
2565
2566 if(this_dist < *cur_dist) {
2567 // take this donor
2568 *cur_dist = this_dist;
2569 *cur_closest = this_donor;
2570 }
2571 else if(this_dist == *cur_dist) {
2572 // priority tie-break. Even though this is a pre-order traversal,
2573 // this is a heap, not a binary tree, so we still need to do a priority
2574 // comparision.
2575 if(!(*cur_closest) ||
2576 litmus->compare(this_donor->task, (*cur_closest)->task)) {
2577 *cur_dist = this_dist;
2578 *cur_closest = this_donor;
2579 }
2580 }
2581
2582 if(donor_node->left) __find_closest_donor(target_gpu, donor_node->left, cur_closest, cur_dist);
2583 if(donor_node->right) __find_closest_donor(target_gpu, donor_node->right, cur_closest, cur_dist);
2584}
2585
2586ikglp_wait_state_t* gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq)
2587{
2588 // Huristic strategy: Find donor with the closest affinity to fq.
2589 // Tie-break on priority.
2590
2591 // We need to iterate over all the donors to do this. Unfortunatly,
2592 // our donors are organized in a heap. We'll visit each node with a
2593 // recurisve call. This is realitively safe since there are only sem->m
2594 // donors, at most. We won't recurse too deeply to have to worry about
2595 // our stack. (even with 128 CPUs, our nest depth is at most 7 deep).
2596
2597 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2598 ikglp_wait_state_t *donor = NULL;
2599 int distance = MIG_NONE;
2600 int gpu = replica_to_gpu(aff, ikglp_get_idx(sem, fq));
2601
2602#ifdef CONFIG_SCHED_DEBUG_TRACE
2603 ikglp_wait_state_t* default_donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
2604#endif
2605
2606 __find_closest_donor(gpu, sem->donors.root, &donor, &distance);
2607
2608 TRACE_CUR("Selected donor %s/%d (distance = %d) to move to fq %d "
2609 "(non-aff wanted %s/%d). differs = %d\n",
2610 donor->task->comm, donor->task->pid,
2611 distance,
2612 ikglp_get_idx(sem, fq),
2613 default_donor->task->comm, default_donor->task->pid,
2614 (donor->task != default_donor->task)
2615 );
2616
2617 return(donor);
2618}
2619
2620
2621
2622void gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2623{
2624 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2625 int replica = ikglp_get_idx(sem, fq);
2626 int gpu = replica_to_gpu(aff, replica);
2627 struct ikglp_queue_info *info = &aff->q_info[replica];
2628 lt_t est_time;
2629 lt_t est_len_before;
2630
2631 if(current == t) {
2632 tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
2633 }
2634
2635 est_len_before = info->estimated_len;
2636 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
2637 info->estimated_len += est_time;
2638
2639 TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n",
2640 ikglp_get_idx(sem, info->q),
2641 est_len_before, est_time,
2642 info->estimated_len);
2643
2644 // if(aff->shortest_queue == info) {
2645 // // we may no longer be the shortest
2646 // aff->shortest_queue = ikglp_aff_find_shortest(aff);
2647 //
2648 // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
2649 // ikglp_get_idx(sem, aff->shortest_queue->q),
2650 // aff->shortest_queue->q->count,
2651 // aff->shortest_queue->estimated_len);
2652 // }
2653}
2654
2655void gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2656{
2657 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2658 int replica = ikglp_get_idx(sem, fq);
2659 int gpu = replica_to_gpu(aff, replica);
2660 struct ikglp_queue_info *info = &aff->q_info[replica];
2661 lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
2662
2663 if(est_time > info->estimated_len) {
2664 WARN_ON(1);
2665 info->estimated_len = 0;
2666 }
2667 else {
2668 info->estimated_len -= est_time;
2669 }
2670
2671 TRACE_CUR("fq %d est len is now %llu\n",
2672 ikglp_get_idx(sem, info->q),
2673 info->estimated_len);
2674
2675 // check to see if we're the shortest queue now.
2676 // if((aff->shortest_queue != info) &&
2677 // (aff->shortest_queue->estimated_len > info->estimated_len)) {
2678 //
2679 // aff->shortest_queue = info;
2680 //
2681 // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
2682 // ikglp_get_idx(sem, info->q),
2683 // info->q->count,
2684 // info->estimated_len);
2685 // }
2686}
2687
2688int gpu_ikglp_notify_exit(struct ikglp_affinity* aff, struct task_struct* t)
2689{
2690 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2691 unsigned long flags = 0, real_flags;
2692 int aff_rsrc;
2693#ifdef CONFIG_LITMUS_DGL_SUPPORT
2694 raw_spinlock_t *dgl_lock;
2695
2696 dgl_lock = litmus->get_dgl_spinlock(t);
2697#endif
2698
2699 if (tsk_rt(t)->last_gpu < 0)
2700 return 0;
2701
2702 raw_spin_lock_irqsave(&sem->real_lock, real_flags);
2703 lock_global_irqsave(dgl_lock, flags);
2704 lock_fine_irqsave(&sem->lock, flags);
2705
2706 // decrement affinity count on old GPU
2707 aff_rsrc = tsk_rt(t)->last_gpu - aff->offset;
2708 --(aff->nr_aff_on_rsrc[aff_rsrc]);
2709// aff->nr_aff_on_rsrc[aff_rsrc] -= ((uint64_t)1e9)/get_rt_period(t);
2710
2711 if(unlikely(aff->nr_aff_on_rsrc[aff_rsrc] < 0)) {
2712 WARN_ON(aff->nr_aff_on_rsrc[aff_rsrc] < 0);
2713 aff->nr_aff_on_rsrc[aff_rsrc] = 0;
2714 }
2715
2716 unlock_fine_irqrestore(&sem->lock, flags);
2717 unlock_global_irqrestore(dgl_lock, flags);
2718 raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
2719
2720 return 0;
2721}
2722
2723int gpu_ikglp_notify_exit_trampoline(struct task_struct* t)
2724{
2725 struct ikglp_affinity* aff = (struct ikglp_affinity*)tsk_rt(t)->rsrc_exit_cb_args;
2726 if(likely(aff)) {
2727 return gpu_ikglp_notify_exit(aff, t);
2728 }
2729 else {
2730 return -1;
2731 }
2732}
2733
2734void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
2735 struct fifo_queue* fq,
2736 struct task_struct* t)
2737{
2738 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2739 int replica = ikglp_get_idx(sem, fq);
2740 int gpu = replica_to_gpu(aff, replica);
2741 int last_gpu = tsk_rt(t)->last_gpu;
2742
2743 tsk_rt(t)->gpu_migration = gpu_migration_distance(last_gpu, gpu); // record the type of migration
2744
2745 TRACE_CUR("%s/%d acquired gpu %d (prev = %d). migration type = %d\n",
2746 t->comm, t->pid, gpu, last_gpu, tsk_rt(t)->gpu_migration);
2747
2748 // count the number or resource holders
2749 ++(*(aff->q_info[replica].nr_cur_users));
2750
2751 if(gpu != last_gpu) {
2752 if(last_gpu >= 0) {
2753 int old_rsrc = last_gpu - aff->offset;
2754 --(aff->nr_aff_on_rsrc[old_rsrc]);
2755// aff->nr_aff_on_rsrc[old_rsrc] -= ((uint64_t)(1e9)/get_rt_period(t));
2756 }
2757
2758 // increment affinity count on new GPU
2759 ++(aff->nr_aff_on_rsrc[gpu - aff->offset]);
2760// aff->nr_aff_on_rsrc[gpu - aff->offset] += ((uint64_t)(1e9)/get_rt_period(t));
2761 tsk_rt(t)->rsrc_exit_cb_args = aff;
2762 tsk_rt(t)->rsrc_exit_cb = gpu_ikglp_notify_exit_trampoline;
2763 }
2764
2765 reg_nv_device(gpu, 1, t); // register
2766
2767 tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
2768 reset_gpu_tracker(t);
2769 start_gpu_tracker(t);
2770}
2771
2772void gpu_ikglp_notify_freed(struct ikglp_affinity* aff,
2773 struct fifo_queue* fq,
2774 struct task_struct* t)
2775{
2776 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2777 int replica = ikglp_get_idx(sem, fq);
2778 int gpu = replica_to_gpu(aff, replica);
2779 lt_t est_time;
2780
2781 stop_gpu_tracker(t); // stop the tracker before we do anything else.
2782
2783 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
2784
2785 // count the number or resource holders
2786 --(*(aff->q_info[replica].nr_cur_users));
2787
2788 reg_nv_device(gpu, 0, t); // unregister
2789
2790 // update estimates
2791 update_gpu_estimate(t, get_gpu_time(t));
2792
2793 TRACE_CUR("%s/%d freed gpu %d (prev = %d). mig type = %d. actual time was %llu. "
2794 "estimated was %llu. diff is %d\n",
2795 t->comm, t->pid, gpu, tsk_rt(t)->last_gpu,
2796 tsk_rt(t)->gpu_migration,
2797 get_gpu_time(t),
2798 est_time,
2799 (long long)get_gpu_time(t) - (long long)est_time);
2800
2801 tsk_rt(t)->last_gpu = gpu;
2802}
2803
2804struct ikglp_affinity_ops gpu_ikglp_affinity =
2805{
2806 .advise_enqueue = gpu_ikglp_advise_enqueue,
2807 .advise_steal = gpu_ikglp_advise_steal,
2808 .advise_donee_selection = gpu_ikglp_advise_donee_selection,
2809 .advise_donor_to_fq = gpu_ikglp_advise_donor_to_fq,
2810
2811 .notify_enqueue = gpu_ikglp_notify_enqueue,
2812 .notify_dequeue = gpu_ikglp_notify_dequeue,
2813 .notify_acquired = gpu_ikglp_notify_acquired,
2814 .notify_freed = gpu_ikglp_notify_freed,
2815
2816 .notify_exit = gpu_ikglp_notify_exit,
2817
2818 .replica_to_resource = gpu_replica_to_resource,
2819};
2820
2821struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
2822 void* __user args)
2823{
2824 return ikglp_aff_obs_new(ops, &gpu_ikglp_affinity, args);
2825}
2826
2827
2828
2829
2830
2831
2832
2833
2834// Simple ikglp Affinity (standard ikglp with auto-gpu registration)
2835
2836struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
2837{
2838 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2839 int min_count;
2840 int min_nr_users;
2841 struct ikglp_queue_info *shortest;
2842 struct fifo_queue *to_enqueue;
2843 int i;
2844
2845 // TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n");
2846
2847 shortest = &aff->q_info[0];
2848 min_count = shortest->q->count;
2849 min_nr_users = *(shortest->nr_cur_users);
2850
2851 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
2852 ikglp_get_idx(sem, shortest->q),
2853 shortest->q->count,
2854 min_nr_users);
2855
2856 for(i = 1; i < sem->nr_replicas; ++i) {
2857 int len = aff->q_info[i].q->count;
2858
2859 // queue is smaller, or they're equal and the other has a smaller number
2860 // of total users.
2861 //
2862 // tie-break on the shortest number of simult users. this only kicks in
2863 // when there are more than 1 empty queues.
2864 if((len < min_count) ||
2865 ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
2866 shortest = &aff->q_info[i];
2867 min_count = shortest->q->count;
2868 min_nr_users = *(aff->q_info[i].nr_cur_users);
2869 }
2870
2871 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
2872 ikglp_get_idx(sem, aff->q_info[i].q),
2873 aff->q_info[i].q->count,
2874 *(aff->q_info[i].nr_cur_users));
2875 }
2876
2877 to_enqueue = shortest->q;
2878 TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
2879 ikglp_get_idx(sem, to_enqueue),
2880 ikglp_get_idx(sem, sem->shortest_fifo_queue));
2881
2882 return to_enqueue;
2883}
2884
2885ikglp_wait_state_t* simple_gpu_ikglp_advise_steal(struct ikglp_affinity* aff,
2886 struct fifo_queue* dst)
2887{
2888 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2889 // TRACE_CUR("Simple GPU ikglp advise_steal invoked\n");
2890 return ikglp_find_hp_waiter_to_steal(sem);
2891}
2892
2893ikglp_donee_heap_node_t* simple_gpu_ikglp_advise_donee_selection(struct ikglp_affinity* aff, struct task_struct* donor)
2894{
2895 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2896 ikglp_donee_heap_node_t *donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
2897 return(donee);
2898}
2899
2900ikglp_wait_state_t* simple_gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq)
2901{
2902 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2903 ikglp_wait_state_t* donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
2904 return(donor);
2905}
2906
2907void simple_gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2908{
2909 // TRACE_CUR("Simple GPU ikglp notify_enqueue invoked\n");
2910}
2911
2912void simple_gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2913{
2914 // TRACE_CUR("Simple GPU ikglp notify_dequeue invoked\n");
2915}
2916
2917void simple_gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2918{
2919 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2920 int replica = ikglp_get_idx(sem, fq);
2921 int gpu = replica_to_gpu(aff, replica);
2922
2923 // TRACE_CUR("Simple GPU ikglp notify_acquired invoked\n");
2924
2925 // count the number or resource holders
2926 ++(*(aff->q_info[replica].nr_cur_users));
2927
2928 reg_nv_device(gpu, 1, t); // register
2929}
2930
2931void simple_gpu_ikglp_notify_freed(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2932{
2933 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2934 int replica = ikglp_get_idx(sem, fq);
2935 int gpu = replica_to_gpu(aff, replica);
2936
2937 // TRACE_CUR("Simple GPU ikglp notify_freed invoked\n");
2938 // count the number or resource holders
2939 --(*(aff->q_info[replica].nr_cur_users));
2940
2941 reg_nv_device(gpu, 0, t); // unregister
2942}
2943
2944struct ikglp_affinity_ops simple_gpu_ikglp_affinity =
2945{
2946 .advise_enqueue = simple_gpu_ikglp_advise_enqueue,
2947 .advise_steal = simple_gpu_ikglp_advise_steal,
2948 .advise_donee_selection = simple_gpu_ikglp_advise_donee_selection,
2949 .advise_donor_to_fq = simple_gpu_ikglp_advise_donor_to_fq,
2950
2951 .notify_enqueue = simple_gpu_ikglp_notify_enqueue,
2952 .notify_dequeue = simple_gpu_ikglp_notify_dequeue,
2953 .notify_acquired = simple_gpu_ikglp_notify_acquired,
2954 .notify_freed = simple_gpu_ikglp_notify_freed,
2955
2956 .notify_exit = NULL,
2957
2958 .replica_to_resource = gpu_replica_to_resource,
2959};
2960
2961struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
2962 void* __user args)
2963{
2964 return ikglp_aff_obs_new(ops, &simple_gpu_ikglp_affinity, args);
2965}
2966
2967#endif
2968
2969
2970
2971
2972
2973
2974
2975
2976
diff --git a/litmus/jobs.c b/litmus/jobs.c
index 13a4ed4c9e93..e25854e1d143 100644
--- a/litmus/jobs.c
+++ b/litmus/jobs.c
@@ -13,6 +13,8 @@ static inline void setup_release(struct task_struct *t, lt_t release)
13 t->rt_param.job_params.deadline = release + get_rt_relative_deadline(t); 13 t->rt_param.job_params.deadline = release + get_rt_relative_deadline(t);
14 t->rt_param.job_params.exec_time = 0; 14 t->rt_param.job_params.exec_time = 0;
15 15
16 clear_bit(RT_JOB_SIG_BUDGET_SENT, &t->rt_param.job_params.flags);
17
16 /* update job sequence number */ 18 /* update job sequence number */
17 t->rt_param.job_params.job_no++; 19 t->rt_param.job_params.job_no++;
18 20
diff --git a/litmus/kexclu_affinity.c b/litmus/kexclu_affinity.c
new file mode 100644
index 000000000000..5ef5e54d600d
--- /dev/null
+++ b/litmus/kexclu_affinity.c
@@ -0,0 +1,92 @@
1#include <litmus/fdso.h>
2#include <litmus/sched_plugin.h>
3#include <litmus/trace.h>
4#include <litmus/litmus.h>
5#include <litmus/locking.h>
6
7#include <litmus/kexclu_affinity.h>
8
9static int create_generic_aff_obs(void** obj_ref, obj_type_t type, void* __user arg);
10static int open_generic_aff_obs(struct od_table_entry* entry, void* __user arg);
11static int close_generic_aff_obs(struct od_table_entry* entry);
12static void destroy_generic_aff_obs(obj_type_t type, void* sem);
13
14struct fdso_ops generic_affinity_ops = {
15 .create = create_generic_aff_obs,
16 .open = open_generic_aff_obs,
17 .close = close_generic_aff_obs,
18 .destroy = destroy_generic_aff_obs
19};
20
21static atomic_t aff_obs_id_gen = ATOMIC_INIT(0);
22
23static inline bool is_affinity_observer(struct od_table_entry *entry)
24{
25 return (entry->class == &generic_affinity_ops);
26}
27
28static inline struct affinity_observer* get_affinity_observer(struct od_table_entry* entry)
29{
30 BUG_ON(!is_affinity_observer(entry));
31 return (struct affinity_observer*) entry->obj->obj;
32}
33
34static int create_generic_aff_obs(void** obj_ref, obj_type_t type, void* __user arg)
35{
36 struct affinity_observer* aff_obs;
37 int err;
38
39 err = litmus->allocate_aff_obs(&aff_obs, type, arg);
40 if (err == 0) {
41 BUG_ON(!aff_obs->lock);
42 aff_obs->type = type;
43 *obj_ref = aff_obs;
44 }
45 return err;
46}
47
48static int open_generic_aff_obs(struct od_table_entry* entry, void* __user arg)
49{
50 struct affinity_observer* aff_obs = get_affinity_observer(entry);
51 if (aff_obs->ops->open)
52 return aff_obs->ops->open(aff_obs, arg);
53 else
54 return 0; /* default: any task can open it */
55}
56
57static int close_generic_aff_obs(struct od_table_entry* entry)
58{
59 struct affinity_observer* aff_obs = get_affinity_observer(entry);
60 if (aff_obs->ops->close)
61 return aff_obs->ops->close(aff_obs);
62 else
63 return 0; /* default: closing succeeds */
64}
65
66static void destroy_generic_aff_obs(obj_type_t type, void* obj)
67{
68 struct affinity_observer* aff_obs = (struct affinity_observer*) obj;
69 aff_obs->ops->deallocate(aff_obs);
70}
71
72
73struct litmus_lock* get_lock_from_od(int od)
74{
75 extern struct fdso_ops generic_lock_ops;
76
77 struct od_table_entry *entry = get_entry_for_od(od);
78
79 if(entry && entry->class == &generic_lock_ops) {
80 return (struct litmus_lock*) entry->obj->obj;
81 }
82 return NULL;
83}
84
85void affinity_observer_new(struct affinity_observer* aff,
86 struct affinity_observer_ops* ops,
87 struct affinity_observer_args* args)
88{
89 aff->ops = ops;
90 aff->lock = get_lock_from_od(args->lock_od);
91 aff->ident = atomic_inc_return(&aff_obs_id_gen);
92} \ No newline at end of file
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
new file mode 100644
index 000000000000..785a095275e6
--- /dev/null
+++ b/litmus/kfmlp_lock.c
@@ -0,0 +1,1003 @@
1#include <linux/slab.h>
2#include <linux/uaccess.h>
3
4#include <litmus/trace.h>
5#include <litmus/sched_plugin.h>
6#include <litmus/fdso.h>
7
8#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
9#include <litmus/gpu_affinity.h>
10#include <litmus/nvidia_info.h>
11#endif
12
13#include <litmus/kfmlp_lock.h>
14
15static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
16 struct kfmlp_queue* queue)
17{
18 return (queue - &sem->queues[0]);
19}
20
21static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
22 struct task_struct* holder)
23{
24 int i;
25 for(i = 0; i < sem->num_resources; ++i)
26 if(sem->queues[i].owner == holder)
27 return(&sem->queues[i]);
28 return(NULL);
29}
30
31/* caller is responsible for locking */
32static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
33 struct task_struct *skip)
34{
35 struct list_head *pos;
36 struct task_struct *queued, *found = NULL;
37
38 list_for_each(pos, &kqueue->wait.task_list) {
39 queued = (struct task_struct*) list_entry(pos, wait_queue_t,
40 task_list)->private;
41
42 /* Compare task prios, find high prio task. */
43 //if (queued != skip && edf_higher_prio(queued, found))
44 if (queued != skip && litmus->compare(queued, found))
45 found = queued;
46 }
47 return found;
48}
49
50static inline struct kfmlp_queue* kfmlp_find_shortest(struct kfmlp_semaphore* sem,
51 struct kfmlp_queue* search_start)
52{
53 // we start our search at search_start instead of at the beginning of the
54 // queue list to load-balance across all resources.
55 struct kfmlp_queue* step = search_start;
56 struct kfmlp_queue* shortest = sem->shortest_queue;
57
58 do
59 {
60 step = (step+1 != &sem->queues[sem->num_resources]) ?
61 step+1 : &sem->queues[0];
62
63 if(step->count < shortest->count)
64 {
65 shortest = step;
66 if(step->count == 0)
67 break; /* can't get any shorter */
68 }
69
70 }while(step != search_start);
71
72 return(shortest);
73}
74
75
76static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem,
77 wait_queue_t** to_steal,
78 struct kfmlp_queue** to_steal_from)
79{
80 /* must hold sem->lock */
81
82 int i;
83
84 *to_steal = NULL;
85 *to_steal_from = NULL;
86
87 for(i = 0; i < sem->num_resources; ++i)
88 {
89 if( (sem->queues[i].count > 1) &&
90 ((*to_steal_from == NULL) ||
91 //(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
92 (litmus->compare(sem->queues[i].hp_waiter, (*to_steal_from)->hp_waiter))) )
93 {
94 *to_steal_from = &sem->queues[i];
95 }
96 }
97
98 if(*to_steal_from)
99 {
100 struct list_head *pos;
101 struct task_struct *target = (*to_steal_from)->hp_waiter;
102
103 TRACE_CUR("want to steal hp_waiter (%s/%d) from queue %d\n",
104 target->comm,
105 target->pid,
106 kfmlp_get_idx(sem, *to_steal_from));
107
108 list_for_each(pos, &(*to_steal_from)->wait.task_list)
109 {
110 wait_queue_t *node = list_entry(pos, wait_queue_t, task_list);
111 struct task_struct *queued = (struct task_struct*) node->private;
112 /* Compare task prios, find high prio task. */
113 if (queued == target)
114 {
115 *to_steal = node;
116
117 TRACE_CUR("steal: selected %s/%d from queue %d\n",
118 queued->comm, queued->pid,
119 kfmlp_get_idx(sem, *to_steal_from));
120
121 return queued;
122 }
123 }
124
125 TRACE_CUR("Could not find %s/%d in queue %d!!! THIS IS A BUG!\n",
126 target->comm,
127 target->pid,
128 kfmlp_get_idx(sem, *to_steal_from));
129 }
130
131 return NULL;
132}
133
134static void kfmlp_steal_node(struct kfmlp_semaphore *sem,
135 struct kfmlp_queue *dst,
136 wait_queue_t *wait,
137 struct kfmlp_queue *src)
138{
139 struct task_struct* t = (struct task_struct*) wait->private;
140
141 __remove_wait_queue(&src->wait, wait);
142 --(src->count);
143
144 if(t == src->hp_waiter) {
145 src->hp_waiter = kfmlp_find_hp_waiter(src, NULL);
146
147 TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
148 kfmlp_get_idx(sem, src),
149 (src->hp_waiter) ? src->hp_waiter->comm : "nil",
150 (src->hp_waiter) ? src->hp_waiter->pid : -1);
151
152 if(src->owner && tsk_rt(src->owner)->inh_task == t) {
153 litmus->decrease_prio(src->owner, src->hp_waiter);
154 }
155 }
156
157 if(sem->shortest_queue->count > src->count) {
158 sem->shortest_queue = src;
159 TRACE_CUR("queue %d is the shortest\n", kfmlp_get_idx(sem, sem->shortest_queue));
160 }
161
162#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
163 if(sem->aff_obs) {
164 sem->aff_obs->ops->notify_dequeue(sem->aff_obs, src, t);
165 }
166#endif
167
168 init_waitqueue_entry(wait, t);
169 __add_wait_queue_tail_exclusive(&dst->wait, wait);
170 ++(dst->count);
171
172 if(litmus->compare(t, dst->hp_waiter)) {
173 dst->hp_waiter = t;
174
175 TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
176 kfmlp_get_idx(sem, dst),
177 t->comm, t->pid);
178
179 if(dst->owner && litmus->compare(t, dst->owner))
180 {
181 litmus->increase_prio(dst->owner, t);
182 }
183 }
184
185#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
186 if(sem->aff_obs) {
187 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, dst, t);
188 }
189#endif
190}
191
192
193int kfmlp_lock(struct litmus_lock* l)
194{
195 struct task_struct* t = current;
196 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
197 struct kfmlp_queue* my_queue = NULL;
198 wait_queue_t wait;
199 unsigned long flags;
200
201 if (!is_realtime(t))
202 return -EPERM;
203
204 spin_lock_irqsave(&sem->lock, flags);
205
206#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
207 if(sem->aff_obs) {
208 my_queue = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t);
209 }
210 if(!my_queue) {
211 my_queue = sem->shortest_queue;
212 }
213#else
214 my_queue = sem->shortest_queue;
215#endif
216
217 if (my_queue->owner) {
218 /* resource is not free => must suspend and wait */
219 TRACE_CUR("queue %d: Resource is not free => must suspend and wait. (queue size = %d)\n",
220 kfmlp_get_idx(sem, my_queue),
221 my_queue->count);
222
223 init_waitqueue_entry(&wait, t);
224
225 /* FIXME: interruptible would be nice some day */
226 set_task_state(t, TASK_UNINTERRUPTIBLE);
227
228 __add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
229
230 TRACE_CUR("queue %d: hp_waiter is currently %s/%d\n",
231 kfmlp_get_idx(sem, my_queue),
232 (my_queue->hp_waiter) ? my_queue->hp_waiter->comm : "nil",
233 (my_queue->hp_waiter) ? my_queue->hp_waiter->pid : -1);
234
235 /* check if we need to activate priority inheritance */
236 //if (edf_higher_prio(t, my_queue->hp_waiter))
237 if (litmus->compare(t, my_queue->hp_waiter)) {
238 my_queue->hp_waiter = t;
239 TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
240 kfmlp_get_idx(sem, my_queue),
241 t->comm, t->pid);
242
243 //if (edf_higher_prio(t, my_queue->owner))
244 if (litmus->compare(t, my_queue->owner)) {
245 litmus->increase_prio(my_queue->owner, my_queue->hp_waiter);
246 }
247 }
248
249 ++(my_queue->count);
250
251 if(my_queue == sem->shortest_queue) {
252 sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
253 TRACE_CUR("queue %d is the shortest\n",
254 kfmlp_get_idx(sem, sem->shortest_queue));
255 }
256
257#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
258 if(sem->aff_obs) {
259 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t);
260 }
261#endif
262
263 /* release lock before sleeping */
264 spin_unlock_irqrestore(&sem->lock, flags);
265
266 /* We depend on the FIFO order. Thus, we don't need to recheck
267 * when we wake up; we are guaranteed to have the lock since
268 * there is only one wake up per release (or steal).
269 */
270 suspend_for_lock();
271
272
273 if(my_queue->owner == t) {
274 TRACE_CUR("queue %d: acquired through waiting\n",
275 kfmlp_get_idx(sem, my_queue));
276 }
277 else {
278 /* this case may happen if our wait entry was stolen
279 between queues. record where we went. */
280 my_queue = kfmlp_get_queue(sem, t);
281
282 BUG_ON(!my_queue);
283 TRACE_CUR("queue %d: acquired through stealing\n",
284 kfmlp_get_idx(sem, my_queue));
285 }
286 }
287 else {
288 TRACE_CUR("queue %d: acquired immediately\n",
289 kfmlp_get_idx(sem, my_queue));
290
291 my_queue->owner = t;
292
293 ++(my_queue->count);
294
295 if(my_queue == sem->shortest_queue) {
296 sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
297 TRACE_CUR("queue %d is the shortest\n",
298 kfmlp_get_idx(sem, sem->shortest_queue));
299 }
300
301#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
302 if(sem->aff_obs) {
303 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t);
304 sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, t);
305 }
306#endif
307
308 spin_unlock_irqrestore(&sem->lock, flags);
309 }
310
311
312#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
313 if(sem->aff_obs) {
314 return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, my_queue);
315 }
316#endif
317 return kfmlp_get_idx(sem, my_queue);
318}
319
320
321int kfmlp_unlock(struct litmus_lock* l)
322{
323 struct task_struct *t = current, *next;
324 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
325 struct kfmlp_queue *my_queue, *to_steal_from;
326 unsigned long flags;
327 int err = 0;
328
329 my_queue = kfmlp_get_queue(sem, t);
330
331 if (!my_queue) {
332 err = -EINVAL;
333 goto out;
334 }
335
336 spin_lock_irqsave(&sem->lock, flags);
337
338 TRACE_CUR("queue %d: unlocking\n", kfmlp_get_idx(sem, my_queue));
339
340 my_queue->owner = NULL; // clear ownership
341 --(my_queue->count);
342
343 if(my_queue->count < sem->shortest_queue->count)
344 {
345 sem->shortest_queue = my_queue;
346 TRACE_CUR("queue %d is the shortest\n",
347 kfmlp_get_idx(sem, sem->shortest_queue));
348 }
349
350#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
351 if(sem->aff_obs) {
352 sem->aff_obs->ops->notify_dequeue(sem->aff_obs, my_queue, t);
353 sem->aff_obs->ops->notify_freed(sem->aff_obs, my_queue, t);
354 }
355#endif
356
357 /* we lose the benefit of priority inheritance (if any) */
358 if (tsk_rt(t)->inh_task)
359 litmus->decrease_prio(t, NULL);
360
361
362 /* check if there are jobs waiting for this resource */
363RETRY:
364 next = __waitqueue_remove_first(&my_queue->wait);
365 if (next) {
366 /* next becomes the resouce holder */
367 my_queue->owner = next;
368
369#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
370 if(sem->aff_obs) {
371 sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, next);
372 }
373#endif
374
375 TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
376 kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
377
378 /* determine new hp_waiter if necessary */
379 if (next == my_queue->hp_waiter) {
380 TRACE_TASK(next, "was highest-prio waiter\n");
381 my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
382 if (my_queue->hp_waiter)
383 TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
384 else
385 TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
386 } else {
387 /* Well, if next is not the highest-priority waiter,
388 * then it ought to inherit the highest-priority
389 * waiter's priority. */
390 litmus->increase_prio(next, my_queue->hp_waiter);
391 }
392
393 /* wake up next */
394 wake_up_process(next);
395 }
396 else {
397 // TODO: put this stealing logic before we attempt to release
398 // our resource. (simplifies code and gets rid of ugly goto RETRY.
399 wait_queue_t *wait;
400
401 TRACE_CUR("queue %d: looking to steal someone...\n",
402 kfmlp_get_idx(sem, my_queue));
403
404#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
405 next = (sem->aff_obs) ?
406 sem->aff_obs->ops->advise_steal(sem->aff_obs, &wait, &to_steal_from) :
407 kfmlp_select_hp_steal(sem, &wait, &to_steal_from);
408#else
409 next = kfmlp_select_hp_steal(sem, &wait, &to_steal_from);
410#endif
411
412 if(next) {
413 TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
414 kfmlp_get_idx(sem, my_queue),
415 next->comm, next->pid,
416 kfmlp_get_idx(sem, to_steal_from));
417
418 kfmlp_steal_node(sem, my_queue, wait, to_steal_from);
419
420 goto RETRY; // will succeed this time.
421 }
422 else {
423 TRACE_CUR("queue %d: no one to steal.\n",
424 kfmlp_get_idx(sem, my_queue));
425 }
426 }
427
428 spin_unlock_irqrestore(&sem->lock, flags);
429
430out:
431 return err;
432}
433
434int kfmlp_close(struct litmus_lock* l)
435{
436 struct task_struct *t = current;
437 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
438 struct kfmlp_queue *my_queue;
439 unsigned long flags;
440
441 int owner;
442
443 spin_lock_irqsave(&sem->lock, flags);
444
445 my_queue = kfmlp_get_queue(sem, t);
446 owner = (my_queue) ? (my_queue->owner == t) : 0;
447
448 spin_unlock_irqrestore(&sem->lock, flags);
449
450 if (owner)
451 kfmlp_unlock(l);
452
453 return 0;
454}
455
456void kfmlp_free(struct litmus_lock* l)
457{
458 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
459 kfree(sem->queues);
460 kfree(sem);
461}
462
463
464
465struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args)
466{
467 struct kfmlp_semaphore* sem;
468 int num_resources = 0;
469 int i;
470
471 if(!access_ok(VERIFY_READ, args, sizeof(num_resources)))
472 {
473 return(NULL);
474 }
475 if(__copy_from_user(&num_resources, args, sizeof(num_resources)))
476 {
477 return(NULL);
478 }
479 if(num_resources < 1)
480 {
481 return(NULL);
482 }
483
484 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
485 if(!sem)
486 {
487 return(NULL);
488 }
489
490 sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
491 if(!sem->queues)
492 {
493 kfree(sem);
494 return(NULL);
495 }
496
497 sem->litmus_lock.ops = ops;
498 spin_lock_init(&sem->lock);
499 sem->num_resources = num_resources;
500
501 for(i = 0; i < num_resources; ++i)
502 {
503 sem->queues[i].owner = NULL;
504 sem->queues[i].hp_waiter = NULL;
505 init_waitqueue_head(&sem->queues[i].wait);
506 sem->queues[i].count = 0;
507 }
508
509 sem->shortest_queue = &sem->queues[0];
510
511#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
512 sem->aff_obs = NULL;
513#endif
514
515 return &sem->litmus_lock;
516}
517
518
519
520
521#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
522
523static inline int __replica_to_gpu(struct kfmlp_affinity* aff, int replica)
524{
525 int gpu = replica % aff->nr_rsrc;
526 return gpu;
527}
528
529static inline int replica_to_gpu(struct kfmlp_affinity* aff, int replica)
530{
531 int gpu = __replica_to_gpu(aff, replica) + aff->offset;
532 return gpu;
533}
534
535static inline int gpu_to_base_replica(struct kfmlp_affinity* aff, int gpu)
536{
537 int replica = gpu - aff->offset;
538 return replica;
539}
540
541
542int kfmlp_aff_obs_close(struct affinity_observer* obs)
543{
544 return 0;
545}
546
547void kfmlp_aff_obs_free(struct affinity_observer* obs)
548{
549 struct kfmlp_affinity *kfmlp_aff = kfmlp_aff_obs_from_aff_obs(obs);
550 kfree(kfmlp_aff->nr_cur_users_on_rsrc);
551 kfree(kfmlp_aff->q_info);
552 kfree(kfmlp_aff);
553}
554
555static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* ops,
556 struct kfmlp_affinity_ops* kfmlp_ops,
557 void* __user args)
558{
559 struct kfmlp_affinity* kfmlp_aff;
560 struct gpu_affinity_observer_args aff_args;
561 struct kfmlp_semaphore* sem;
562 int i;
563 unsigned long flags;
564
565 if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
566 return(NULL);
567 }
568 if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
569 return(NULL);
570 }
571
572 sem = (struct kfmlp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
573
574 if(sem->litmus_lock.type != KFMLP_SEM) {
575 TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type);
576 return(NULL);
577 }
578
579 if((aff_args.nr_simult_users <= 0) ||
580 (sem->num_resources%aff_args.nr_simult_users != 0)) {
581 TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
582 "(%d) per replica. #replicas should be evenly divisible "
583 "by #simult_users.\n",
584 sem->litmus_lock.ident,
585 sem->num_resources,
586 aff_args.nr_simult_users);
587 return(NULL);
588 }
589
590// if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
591// TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
592// NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
593//// return(NULL);
594// }
595
596 kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL);
597 if(!kfmlp_aff) {
598 return(NULL);
599 }
600
601 kfmlp_aff->q_info = kmalloc(sizeof(struct kfmlp_queue_info)*sem->num_resources, GFP_KERNEL);
602 if(!kfmlp_aff->q_info) {
603 kfree(kfmlp_aff);
604 return(NULL);
605 }
606
607 kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->num_resources / aff_args.nr_simult_users), GFP_KERNEL);
608 if(!kfmlp_aff->nr_cur_users_on_rsrc) {
609 kfree(kfmlp_aff->q_info);
610 kfree(kfmlp_aff);
611 return(NULL);
612 }
613
614 affinity_observer_new(&kfmlp_aff->obs, ops, &aff_args.obs);
615
616 kfmlp_aff->ops = kfmlp_ops;
617 kfmlp_aff->offset = aff_args.replica_to_gpu_offset;
618 kfmlp_aff->nr_simult = aff_args.nr_simult_users;
619 kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult;
620
621 memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->num_resources / kfmlp_aff->nr_rsrc));
622
623 for(i = 0; i < sem->num_resources; ++i) {
624 kfmlp_aff->q_info[i].q = &sem->queues[i];
625 kfmlp_aff->q_info[i].estimated_len = 0;
626
627 // multiple q_info's will point to the same resource (aka GPU) if
628 // aff_args.nr_simult_users > 1
629 kfmlp_aff->q_info[i].nr_cur_users = &kfmlp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(kfmlp_aff,i)];
630 }
631
632 // attach observer to the lock
633 spin_lock_irqsave(&sem->lock, flags);
634 sem->aff_obs = kfmlp_aff;
635 spin_unlock_irqrestore(&sem->lock, flags);
636
637 return &kfmlp_aff->obs;
638}
639
640
641
642
643static int gpu_replica_to_resource(struct kfmlp_affinity* aff,
644 struct kfmlp_queue* fq) {
645 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
646 return(replica_to_gpu(aff, kfmlp_get_idx(sem, fq)));
647}
648
649
650// Smart KFMLP Affinity
651
652//static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff)
653//{
654// struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
655// struct kfmlp_queue_info *shortest = &aff->q_info[0];
656// int i;
657//
658// for(i = 1; i < sem->num_resources; ++i) {
659// if(aff->q_info[i].estimated_len < shortest->estimated_len) {
660// shortest = &aff->q_info[i];
661// }
662// }
663//
664// return(shortest);
665//}
666
667struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
668{
669 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
670 lt_t min_len;
671 int min_nr_users;
672 struct kfmlp_queue_info *shortest;
673 struct kfmlp_queue *to_enqueue;
674 int i;
675 int affinity_gpu;
676
677 // simply pick the shortest queue if, we have no affinity, or we have
678 // affinity with the shortest
679 if(unlikely(tsk_rt(t)->last_gpu < 0)) {
680 affinity_gpu = aff->offset; // first gpu
681 TRACE_CUR("no affinity\n");
682 }
683 else {
684 affinity_gpu = tsk_rt(t)->last_gpu;
685 }
686
687 // all things being equal, let's start with the queue with which we have
688 // affinity. this helps us maintain affinity even when we don't have
689 // an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
690 shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
691
692// if(shortest == aff->shortest_queue) {
693// TRACE_CUR("special case: have affinity with shortest queue\n");
694// goto out;
695// }
696
697 min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
698 min_nr_users = *(shortest->nr_cur_users);
699
700 TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
701 get_gpu_estimate(t, MIG_LOCAL),
702 kfmlp_get_idx(sem, shortest->q),
703 min_len);
704
705 for(i = 0; i < sem->num_resources; ++i) {
706 if(&aff->q_info[i] != shortest) {
707
708 lt_t est_len =
709 aff->q_info[i].estimated_len +
710 get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i)));
711
712 // queue is smaller, or they're equal and the other has a smaller number
713 // of total users.
714 //
715 // tie-break on the shortest number of simult users. this only kicks in
716 // when there are more than 1 empty queues.
717 if((est_len < min_len) ||
718 ((est_len == min_len) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
719 shortest = &aff->q_info[i];
720 min_len = est_len;
721 min_nr_users = *(aff->q_info[i].nr_cur_users);
722 }
723
724 TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
725 get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))),
726 kfmlp_get_idx(sem, aff->q_info[i].q),
727 est_len);
728 }
729 }
730
731 to_enqueue = shortest->q;
732 TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
733 kfmlp_get_idx(sem, to_enqueue),
734 kfmlp_get_idx(sem, sem->shortest_queue));
735
736 return to_enqueue;
737}
738
739struct task_struct* gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
740{
741 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
742
743 // For now, just steal highest priority waiter
744 // TODO: Implement affinity-aware stealing.
745
746 return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
747}
748
749
750void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
751{
752 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
753 int replica = kfmlp_get_idx(sem, fq);
754 int gpu = replica_to_gpu(aff, replica);
755 struct kfmlp_queue_info *info = &aff->q_info[replica];
756 lt_t est_time;
757 lt_t est_len_before;
758
759 if(current == t) {
760 tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
761 }
762
763 est_len_before = info->estimated_len;
764 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
765 info->estimated_len += est_time;
766
767 TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n",
768 kfmlp_get_idx(sem, info->q),
769 est_len_before, est_time,
770 info->estimated_len);
771
772// if(aff->shortest_queue == info) {
773// // we may no longer be the shortest
774// aff->shortest_queue = kfmlp_aff_find_shortest(aff);
775//
776// TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
777// kfmlp_get_idx(sem, aff->shortest_queue->q),
778// aff->shortest_queue->q->count,
779// aff->shortest_queue->estimated_len);
780// }
781}
782
783void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
784{
785 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
786 int replica = kfmlp_get_idx(sem, fq);
787 int gpu = replica_to_gpu(aff, replica);
788 struct kfmlp_queue_info *info = &aff->q_info[replica];
789 lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
790
791 if(est_time > info->estimated_len) {
792 WARN_ON(1);
793 info->estimated_len = 0;
794 }
795 else {
796 info->estimated_len -= est_time;
797 }
798
799 TRACE_CUR("fq %d est len is now %llu\n",
800 kfmlp_get_idx(sem, info->q),
801 info->estimated_len);
802
803 // check to see if we're the shortest queue now.
804// if((aff->shortest_queue != info) &&
805// (aff->shortest_queue->estimated_len > info->estimated_len)) {
806//
807// aff->shortest_queue = info;
808//
809// TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
810// kfmlp_get_idx(sem, info->q),
811// info->q->count,
812// info->estimated_len);
813// }
814}
815
816void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
817{
818 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
819 int replica = kfmlp_get_idx(sem, fq);
820 int gpu = replica_to_gpu(aff, replica);
821
822 tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration
823
824 TRACE_CUR("%s/%d acquired gpu %d. migration type = %d\n",
825 t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration);
826
827 // count the number or resource holders
828 ++(*(aff->q_info[replica].nr_cur_users));
829
830 reg_nv_device(gpu, 1, t); // register
831
832
833 tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
834 reset_gpu_tracker(t);
835 start_gpu_tracker(t);
836}
837
838void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
839{
840 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
841 int replica = kfmlp_get_idx(sem, fq);
842 int gpu = replica_to_gpu(aff, replica);
843 lt_t est_time;
844
845 stop_gpu_tracker(t); // stop the tracker before we do anything else.
846
847 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
848
849 tsk_rt(t)->last_gpu = gpu;
850
851 // count the number or resource holders
852 --(*(aff->q_info[replica].nr_cur_users));
853
854 reg_nv_device(gpu, 0, t); // unregister
855
856 // update estimates
857 update_gpu_estimate(t, get_gpu_time(t));
858
859 TRACE_CUR("%s/%d freed gpu %d. actual time was %llu. estimated was %llu. diff is %d\n",
860 t->comm, t->pid, gpu,
861 get_gpu_time(t),
862 est_time,
863 (long long)get_gpu_time(t) - (long long)est_time);
864}
865
866struct kfmlp_affinity_ops gpu_kfmlp_affinity =
867{
868 .advise_enqueue = gpu_kfmlp_advise_enqueue,
869 .advise_steal = gpu_kfmlp_advise_steal,
870 .notify_enqueue = gpu_kfmlp_notify_enqueue,
871 .notify_dequeue = gpu_kfmlp_notify_dequeue,
872 .notify_acquired = gpu_kfmlp_notify_acquired,
873 .notify_freed = gpu_kfmlp_notify_freed,
874 .replica_to_resource = gpu_replica_to_resource,
875};
876
877struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
878 void* __user args)
879{
880 return kfmlp_aff_obs_new(ops, &gpu_kfmlp_affinity, args);
881}
882
883
884
885
886
887
888
889
890// Simple KFMLP Affinity (standard KFMLP with auto-gpu registration)
891
892struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
893{
894 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
895 int min_count;
896 int min_nr_users;
897 struct kfmlp_queue_info *shortest;
898 struct kfmlp_queue *to_enqueue;
899 int i;
900
901// TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n");
902
903 shortest = &aff->q_info[0];
904 min_count = shortest->q->count;
905 min_nr_users = *(shortest->nr_cur_users);
906
907 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
908 kfmlp_get_idx(sem, shortest->q),
909 shortest->q->count,
910 min_nr_users);
911
912 for(i = 1; i < sem->num_resources; ++i) {
913 int len = aff->q_info[i].q->count;
914
915 // queue is smaller, or they're equal and the other has a smaller number
916 // of total users.
917 //
918 // tie-break on the shortest number of simult users. this only kicks in
919 // when there are more than 1 empty queues.
920 if((len < min_count) ||
921 ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
922 shortest = &aff->q_info[i];
923 min_count = shortest->q->count;
924 min_nr_users = *(aff->q_info[i].nr_cur_users);
925 }
926
927 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
928 kfmlp_get_idx(sem, aff->q_info[i].q),
929 aff->q_info[i].q->count,
930 *(aff->q_info[i].nr_cur_users));
931 }
932
933 to_enqueue = shortest->q;
934 TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
935 kfmlp_get_idx(sem, to_enqueue),
936 kfmlp_get_idx(sem, sem->shortest_queue));
937
938 return to_enqueue;
939}
940
941struct task_struct* simple_gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
942{
943 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
944// TRACE_CUR("Simple GPU KFMLP advise_steal invoked\n");
945 return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
946}
947
948void simple_gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
949{
950// TRACE_CUR("Simple GPU KFMLP notify_enqueue invoked\n");
951}
952
953void simple_gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
954{
955// TRACE_CUR("Simple GPU KFMLP notify_dequeue invoked\n");
956}
957
958void simple_gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
959{
960 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
961 int replica = kfmlp_get_idx(sem, fq);
962 int gpu = replica_to_gpu(aff, replica);
963
964// TRACE_CUR("Simple GPU KFMLP notify_acquired invoked\n");
965
966 // count the number or resource holders
967 ++(*(aff->q_info[replica].nr_cur_users));
968
969 reg_nv_device(gpu, 1, t); // register
970}
971
972void simple_gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
973{
974 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
975 int replica = kfmlp_get_idx(sem, fq);
976 int gpu = replica_to_gpu(aff, replica);
977
978// TRACE_CUR("Simple GPU KFMLP notify_freed invoked\n");
979 // count the number or resource holders
980 --(*(aff->q_info[replica].nr_cur_users));
981
982 reg_nv_device(gpu, 0, t); // unregister
983}
984
985struct kfmlp_affinity_ops simple_gpu_kfmlp_affinity =
986{
987 .advise_enqueue = simple_gpu_kfmlp_advise_enqueue,
988 .advise_steal = simple_gpu_kfmlp_advise_steal,
989 .notify_enqueue = simple_gpu_kfmlp_notify_enqueue,
990 .notify_dequeue = simple_gpu_kfmlp_notify_dequeue,
991 .notify_acquired = simple_gpu_kfmlp_notify_acquired,
992 .notify_freed = simple_gpu_kfmlp_notify_freed,
993 .replica_to_resource = gpu_replica_to_resource,
994};
995
996struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
997 void* __user args)
998{
999 return kfmlp_aff_obs_new(ops, &simple_gpu_kfmlp_affinity, args);
1000}
1001
1002#endif
1003
diff --git a/litmus/litmus.c b/litmus/litmus.c
index dc94be71bfb6..2911e7ec7029 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -23,6 +23,14 @@
23#include <litmus/affinity.h> 23#include <litmus/affinity.h>
24#endif 24#endif
25 25
26#ifdef CONFIG_LITMUS_NVIDIA
27#include <litmus/nvidia_info.h>
28#endif
29
30#ifdef CONFIG_REALTIME_AUX_TASKS
31#include <litmus/aux_tasks.h>
32#endif
33
26/* Number of RT tasks that exist in the system */ 34/* Number of RT tasks that exist in the system */
27atomic_t rt_task_count = ATOMIC_INIT(0); 35atomic_t rt_task_count = ATOMIC_INIT(0);
28 36
@@ -135,6 +143,16 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
135 pid, tp.budget_policy); 143 pid, tp.budget_policy);
136 goto out_unlock; 144 goto out_unlock;
137 } 145 }
146 if (tp.budget_signal_policy != NO_SIGNALS &&
147 tp.budget_signal_policy != QUANTUM_SIGNALS &&
148 tp.budget_signal_policy != PRECISE_SIGNALS)
149 {
150 printk(KERN_INFO "litmus: real-time task %d rejected "
151 "because unsupported budget signalling policy "
152 "specified (%d)\n",
153 pid, tp.budget_signal_policy);
154 goto out_unlock;
155 }
138 156
139 target->rt_param.task_params = tp; 157 target->rt_param.task_params = tp;
140 158
@@ -272,6 +290,7 @@ asmlinkage long sys_query_job_no(unsigned int __user *job)
272 return retval; 290 return retval;
273} 291}
274 292
293
275/* sys_null_call() is only used for determining raw system call 294/* sys_null_call() is only used for determining raw system call
276 * overheads (kernel entry, kernel exit). It has no useful side effects. 295 * overheads (kernel entry, kernel exit). It has no useful side effects.
277 * If ts is non-NULL, then the current Feather-Trace time is recorded. 296 * If ts is non-NULL, then the current Feather-Trace time is recorded.
@@ -289,12 +308,117 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
289 return ret; 308 return ret;
290} 309}
291 310
311
312asmlinkage long sys_sched_trace_event(int event, struct st_inject_args __user *__args)
313{
314 long retval = 0;
315 struct task_struct* t = current;
316
317 struct st_inject_args args;
318
319 if (is_realtime(t)) {
320 printk(KERN_WARNING "Only non-real-time tasks may inject sched_trace events.\n");
321 retval = -EINVAL;
322 goto out;
323 }
324
325 if (__args && copy_from_user(&args, __args, sizeof(args))) {
326 retval = -EFAULT;
327 goto out;
328 }
329
330 switch(event) {
331 /*************************************/
332 /* events that don't need parameters */
333 /*************************************/
334 case ST_INJECT_NAME:
335 sched_trace_task_name(t);
336 break;
337 case ST_INJECT_PARAM:
338 /* presumes sporadic_task_ns() has already been called
339 * and valid data has been initialized even if the calling
340 * task is SCHED_NORMAL. */
341 sched_trace_task_param(t);
342 break;
343
344 /*******************************/
345 /* events that need parameters */
346 /*******************************/
347 case ST_INJECT_COMPLETION:
348 if (!__args) {
349 retval = -EINVAL;
350 goto out;
351 }
352
353 /* slam in the data */
354 t->rt_param.job_params.job_no = args.job_no;
355
356 sched_trace_task_completion(t, 0);
357 break;
358 case ST_INJECT_RELEASE:
359 if (!__args) {
360 retval = -EINVAL;
361 goto out;
362 }
363
364 /* slam in the data */
365 tsk_rt(t)->job_params.release = args.release;
366 tsk_rt(t)->job_params.deadline = args.deadline;
367
368 sched_trace_task_release(t);
369 break;
370
371 /**********************/
372 /* unsupported events */
373 /**********************/
374 default:
375 retval = -EINVAL;
376 break;
377 }
378
379out:
380 return retval;
381}
382
383
384#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
385void init_gpu_affinity_state(struct task_struct* p)
386{
387 // under-damped
388 //p->rt_param.gpu_fb_param_a = _frac(14008, 10000);
389 //p->rt_param.gpu_fb_param_b = _frac(16024, 10000);
390
391#if 0
392 // emperical;
393 p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000);
394 p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000);
395
396 p->rt_param.gpu_fb_param_a[1] = _frac(8600, 10000);
397 p->rt_param.gpu_fb_param_b[1] = _frac(40000, 10000);
398
399 p->rt_param.gpu_fb_param_a[2] = _frac(6890, 10000);
400 p->rt_param.gpu_fb_param_b[2] = _frac(40000, 10000);
401
402 p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000);
403 p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000);
404#endif
405 p->rt_param.gpu_migration = MIG_NONE;
406 p->rt_param.last_gpu = -1;
407}
408#endif
409
292/* p is a real-time task. Re-init its state as a best-effort task. */ 410/* p is a real-time task. Re-init its state as a best-effort task. */
293static void reinit_litmus_state(struct task_struct* p, int restore) 411static void reinit_litmus_state(struct task_struct* p, int restore)
294{ 412{
295 struct rt_task user_config = {}; 413 struct rt_task user_config = {};
296 void* ctrl_page = NULL; 414 void* ctrl_page = NULL;
297 415
416#ifdef CONFIG_LITMUS_NESTED_LOCKING
417 binheap_order_t prio_order = NULL;
418#endif
419
420 TRACE_TASK(p, "reinit_litmus_state: restore = %d\n", restore);
421
298 if (restore) { 422 if (restore) {
299 /* Safe user-space provided configuration data. 423 /* Safe user-space provided configuration data.
300 * and allocated page. */ 424 * and allocated page. */
@@ -302,48 +426,57 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
302 ctrl_page = p->rt_param.ctrl_page; 426 ctrl_page = p->rt_param.ctrl_page;
303 } 427 }
304 428
429#ifdef CONFIG_LITMUS_NVIDIA
430 WARN_ON(p->rt_param.held_gpus != 0);
431#endif
432
433#ifdef CONFIG_LITMUS_LOCKING
305 /* We probably should not be inheriting any task's priority 434 /* We probably should not be inheriting any task's priority
306 * at this point in time. 435 * at this point in time.
307 */ 436 */
308 WARN_ON(p->rt_param.inh_task); 437 WARN_ON(p->rt_param.inh_task);
438#endif
439
440#ifdef CONFIG_LITMUS_NESTED_LOCKING
441 prio_order = p->rt_param.hp_blocked_tasks.compare;
442#endif
309 443
310 /* Cleanup everything else. */ 444 /* Cleanup everything else. */
311 memset(&p->rt_param, 0, sizeof(p->rt_param)); 445 memset(&p->rt_param, 0, sizeof(p->rt_param));
312 446
447#ifdef CONFIG_REALTIME_AUX_TASKS
448 /* also clear out the aux_data. the !restore case is only called on
449 * fork (initial thread creation). */
450 if (!restore) {
451 memset(&p->aux_data, 0, sizeof(p->aux_data));
452 }
453#endif
454
313 /* Restore preserved fields. */ 455 /* Restore preserved fields. */
314 if (restore) { 456 if (restore) {
315 p->rt_param.task_params = user_config; 457 p->rt_param.task_params = user_config;
316 p->rt_param.ctrl_page = ctrl_page; 458 p->rt_param.ctrl_page = ctrl_page;
317 } 459 }
318}
319 460
320long litmus_admit_task(struct task_struct* tsk) 461#ifdef CONFIG_LITMUS_NVIDIA
321{ 462 INIT_BINHEAP_NODE(&p->rt_param.gpu_owner_node);
322 long retval = 0; 463#endif
323 464
324 BUG_ON(is_realtime(tsk)); 465#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
466 init_gpu_affinity_state(p);
467#endif
325 468
326 tsk_rt(tsk)->heap_node = NULL; 469#ifdef CONFIG_LITMUS_NESTED_LOCKING
327 tsk_rt(tsk)->rel_heap = NULL; 470 INIT_BINHEAP_HANDLE(&p->rt_param.hp_blocked_tasks, prio_order);
471 raw_spin_lock_init(&p->rt_param.hp_blocked_tasks_lock);
472#endif
473}
328 474
329 if (get_rt_relative_deadline(tsk) == 0 ||
330 get_exec_cost(tsk) >
331 min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) {
332 TRACE_TASK(tsk,
333 "litmus admit: invalid task parameters "
334 "(e = %lu, p = %lu, d = %lu)\n",
335 get_exec_cost(tsk), get_rt_period(tsk),
336 get_rt_relative_deadline(tsk));
337 retval = -EINVAL;
338 goto out;
339 }
340 475
341 if (!cpu_online(get_partition(tsk))) { 476
342 TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n", 477long __litmus_admit_task(struct task_struct* tsk)
343 get_partition(tsk)); 478{
344 retval = -EINVAL; 479 long retval = 0;
345 goto out;
346 }
347 480
348 INIT_LIST_HEAD(&tsk_rt(tsk)->list); 481 INIT_LIST_HEAD(&tsk_rt(tsk)->list);
349 482
@@ -360,6 +493,17 @@ long litmus_admit_task(struct task_struct* tsk)
360 bheap_node_init(&tsk_rt(tsk)->heap_node, tsk); 493 bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
361 } 494 }
362 495
496#ifdef CONFIG_LITMUS_NVIDIA
497 atomic_set(&tsk_rt(tsk)->nv_int_count, 0);
498#endif
499#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
500 init_gpu_affinity_state(tsk);
501#endif
502#ifdef CONFIG_LITMUS_NESTED_LOCKING
503 tsk_rt(tsk)->blocked_lock = NULL;
504 raw_spin_lock_init(&tsk_rt(tsk)->hp_blocked_tasks_lock);
505#endif
506
363 preempt_disable(); 507 preempt_disable();
364 508
365 retval = litmus->admit_task(tsk); 509 retval = litmus->admit_task(tsk);
@@ -372,14 +516,56 @@ long litmus_admit_task(struct task_struct* tsk)
372 516
373 preempt_enable(); 517 preempt_enable();
374 518
375out:
376 if (retval) { 519 if (retval) {
377 bheap_node_free(tsk_rt(tsk)->heap_node); 520 bheap_node_free(tsk_rt(tsk)->heap_node);
378 release_heap_free(tsk_rt(tsk)->rel_heap); 521 release_heap_free(tsk_rt(tsk)->rel_heap);
379 } 522 }
523
524out:
525 return retval;
526}
527
528long litmus_admit_task(struct task_struct* tsk)
529{
530 long retval = 0;
531
532 BUG_ON(is_realtime(tsk));
533
534 if (get_rt_relative_deadline(tsk) == 0 ||
535 get_exec_cost(tsk) >
536 min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) {
537 TRACE_TASK(tsk,
538 "litmus admit: invalid task parameters "
539 "(e = %lu, p = %lu, d = %lu)\n",
540 get_exec_cost(tsk), get_rt_period(tsk),
541 get_rt_relative_deadline(tsk));
542 retval = -EINVAL;
543 goto out;
544 }
545
546 if (!cpu_online(get_partition(tsk))) {
547 TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n",
548 get_partition(tsk));
549 retval = -EINVAL;
550 goto out;
551 }
552
553 retval = __litmus_admit_task(tsk);
554
555out:
380 return retval; 556 return retval;
381} 557}
382 558
559void litmus_pre_exit_task(struct task_struct* tsk)
560{
561 if (is_realtime(tsk)) {
562 if (tsk_rt(tsk)->rsrc_exit_cb) {
563 int ret = tsk_rt(tsk)->rsrc_exit_cb(tsk);
564 WARN_ON(ret != 0);
565 }
566 }
567}
568
383void litmus_exit_task(struct task_struct* tsk) 569void litmus_exit_task(struct task_struct* tsk)
384{ 570{
385 if (is_realtime(tsk)) { 571 if (is_realtime(tsk)) {
@@ -388,7 +574,7 @@ void litmus_exit_task(struct task_struct* tsk)
388 litmus->task_exit(tsk); 574 litmus->task_exit(tsk);
389 575
390 BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node)); 576 BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node));
391 bheap_node_free(tsk_rt(tsk)->heap_node); 577 bheap_node_free(tsk_rt(tsk)->heap_node);
392 release_heap_free(tsk_rt(tsk)->rel_heap); 578 release_heap_free(tsk_rt(tsk)->rel_heap);
393 579
394 atomic_dec(&rt_task_count); 580 atomic_dec(&rt_task_count);
@@ -406,14 +592,19 @@ static int do_plugin_switch(void *_plugin)
406 ret = litmus->deactivate_plugin(); 592 ret = litmus->deactivate_plugin();
407 if (0 != ret) 593 if (0 != ret)
408 goto out; 594 goto out;
409 ret = plugin->activate_plugin(); 595
596 litmus = plugin; /* optimistic switch */
597 mb();
598
599 ret = litmus->activate_plugin();
410 if (0 != ret) { 600 if (0 != ret) {
411 printk(KERN_INFO "Can't activate %s (%d).\n", 601 printk(KERN_INFO "Can't activate %s (%d).\n",
412 plugin->plugin_name, ret); 602 litmus->plugin_name, ret);
413 plugin = &linux_sched_plugin; 603 litmus = &linux_sched_plugin; /* fail to Linux */
604 ret = litmus->activate_plugin();
605 BUG_ON(ret);
414 } 606 }
415 printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name); 607 printk(KERN_INFO "Switched to LITMUS^RT plugin %s.\n", litmus->plugin_name);
416 litmus = plugin;
417 } else 608 } else
418 ret = -EBUSY; 609 ret = -EBUSY;
419out: 610out:
@@ -429,6 +620,12 @@ int switch_sched_plugin(struct sched_plugin* plugin)
429{ 620{
430 BUG_ON(!plugin); 621 BUG_ON(!plugin);
431 622
623#ifdef CONFIG_LITMUS_SOFTIRQD
624 if (!klmirqd_is_dead()) {
625 kill_klmirqd();
626 }
627#endif
628
432 if (atomic_read(&rt_task_count) == 0) 629 if (atomic_read(&rt_task_count) == 0)
433 return stop_machine(do_plugin_switch, plugin, NULL); 630 return stop_machine(do_plugin_switch, plugin, NULL);
434 else 631 else
@@ -441,18 +638,33 @@ int switch_sched_plugin(struct sched_plugin* plugin)
441void litmus_fork(struct task_struct* p) 638void litmus_fork(struct task_struct* p)
442{ 639{
443 if (is_realtime(p)) { 640 if (is_realtime(p)) {
641 TRACE_TASK(p, "fork, is real-time\n");
642
444 /* clean out any litmus related state, don't preserve anything */ 643 /* clean out any litmus related state, don't preserve anything */
445 reinit_litmus_state(p, 0); 644 reinit_litmus_state(p, 0);
645
446 /* Don't let the child be a real-time task. */ 646 /* Don't let the child be a real-time task. */
447 p->sched_reset_on_fork = 1; 647 p->sched_reset_on_fork = 1;
448 } else 648
649 } else {
449 /* non-rt tasks might have ctrl_page set */ 650 /* non-rt tasks might have ctrl_page set */
450 tsk_rt(p)->ctrl_page = NULL; 651 tsk_rt(p)->ctrl_page = NULL;
451 652
653 reinit_litmus_state(p, 0);
654 }
655
452 /* od tables are never inherited across a fork */ 656 /* od tables are never inherited across a fork */
453 p->od_table = NULL; 657 p->od_table = NULL;
454} 658}
455 659
660/* Called right before copy_process() returns a forked thread. */
661void litmus_post_fork_thread(struct task_struct* p)
662{
663#ifdef CONFIG_REALTIME_AUX_TASKS
664 make_aux_task_if_required(p);
665#endif
666}
667
456/* Called upon execve(). 668/* Called upon execve().
457 * current is doing the exec. 669 * current is doing the exec.
458 * Don't let address space specific stuff leak. 670 * Don't let address space specific stuff leak.
@@ -486,8 +698,10 @@ void exit_litmus(struct task_struct *dead_tsk)
486 } 698 }
487 699
488 /* main cleanup only for RT tasks */ 700 /* main cleanup only for RT tasks */
489 if (is_realtime(dead_tsk)) 701 if (is_realtime(dead_tsk)) {
702 litmus_pre_exit_task(dead_tsk); /* todo: double check that no Linux rq lock is held */
490 litmus_exit_task(dead_tsk); 703 litmus_exit_task(dead_tsk);
704 }
491} 705}
492 706
493 707
diff --git a/litmus/litmus_pai_softirq.c b/litmus/litmus_pai_softirq.c
new file mode 100644
index 000000000000..300571a81bbd
--- /dev/null
+++ b/litmus/litmus_pai_softirq.c
@@ -0,0 +1,64 @@
1#include <linux/interrupt.h>
2#include <linux/percpu.h>
3#include <linux/cpu.h>
4#include <linux/kthread.h>
5#include <linux/ftrace.h>
6#include <linux/smp.h>
7#include <linux/slab.h>
8#include <linux/mutex.h>
9
10#include <linux/sched.h>
11#include <linux/cpuset.h>
12
13#include <litmus/litmus.h>
14#include <litmus/sched_trace.h>
15#include <litmus/jobs.h>
16#include <litmus/sched_plugin.h>
17#include <litmus/litmus_softirq.h>
18
19
20
21int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
22{
23 int ret = 0; /* assume failure */
24 if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
25 {
26 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
27 BUG();
28 }
29
30 ret = litmus->enqueue_pai_tasklet(t);
31
32 return(ret);
33}
34
35EXPORT_SYMBOL(__litmus_tasklet_schedule);
36
37
38
39// failure causes default Linux handling.
40int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
41{
42 int ret = 0; /* assume failure */
43 return(ret);
44}
45EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);
46
47
48// failure causes default Linux handling.
49int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id)
50{
51 int ret = 0; /* assume failure */
52 return(ret);
53}
54EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first);
55
56
57// failure causes default Linux handling.
58int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
59{
60 int ret = 0; /* assume failure */
61 return(ret);
62}
63EXPORT_SYMBOL(__litmus_schedule_work);
64
diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
index 4bf725a36c9c..136fecfb0b8b 100644
--- a/litmus/litmus_proc.c
+++ b/litmus/litmus_proc.c
@@ -20,11 +20,18 @@ static struct proc_dir_entry *litmus_dir = NULL,
20#ifdef CONFIG_RELEASE_MASTER 20#ifdef CONFIG_RELEASE_MASTER
21 *release_master_file = NULL, 21 *release_master_file = NULL,
22#endif 22#endif
23#ifdef CONFIG_LITMUS_SOFTIRQD
24 *klmirqd_file = NULL,
25#endif
23 *plugs_file = NULL; 26 *plugs_file = NULL;
24 27
25/* in litmus/sync.c */ 28/* in litmus/sync.c */
26int count_tasks_waiting_for_release(void); 29int count_tasks_waiting_for_release(void);
27 30
31extern int proc_read_klmirqd_stats(char *page, char **start,
32 off_t off, int count,
33 int *eof, void *data);
34
28static int proc_read_stats(char *page, char **start, 35static int proc_read_stats(char *page, char **start,
29 off_t off, int count, 36 off_t off, int count,
30 int *eof, void *data) 37 int *eof, void *data)
@@ -161,6 +168,12 @@ int __init init_litmus_proc(void)
161 release_master_file->write_proc = proc_write_release_master; 168 release_master_file->write_proc = proc_write_release_master;
162#endif 169#endif
163 170
171#ifdef CONFIG_LITMUS_SOFTIRQD
172 klmirqd_file =
173 create_proc_read_entry("klmirqd_stats", 0444, litmus_dir,
174 proc_read_klmirqd_stats, NULL);
175#endif
176
164 stat_file = create_proc_read_entry("stats", 0444, litmus_dir, 177 stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
165 proc_read_stats, NULL); 178 proc_read_stats, NULL);
166 179
@@ -187,6 +200,10 @@ void exit_litmus_proc(void)
187 remove_proc_entry("stats", litmus_dir); 200 remove_proc_entry("stats", litmus_dir);
188 if (curr_file) 201 if (curr_file)
189 remove_proc_entry("active_plugin", litmus_dir); 202 remove_proc_entry("active_plugin", litmus_dir);
203#ifdef CONFIG_LITMUS_SOFTIRQD
204 if (klmirqd_file)
205 remove_proc_entry("klmirqd_stats", litmus_dir);
206#endif
190#ifdef CONFIG_RELEASE_MASTER 207#ifdef CONFIG_RELEASE_MASTER
191 if (release_master_file) 208 if (release_master_file)
192 remove_proc_entry("release_master", litmus_dir); 209 remove_proc_entry("release_master", litmus_dir);
diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c
new file mode 100644
index 000000000000..464a78d780ad
--- /dev/null
+++ b/litmus/litmus_softirq.c
@@ -0,0 +1,1205 @@
1#include <linux/interrupt.h>
2#include <linux/percpu.h>
3#include <linux/cpu.h>
4#include <linux/kthread.h>
5#include <linux/ftrace.h>
6#include <linux/smp.h>
7#include <linux/slab.h>
8#include <linux/mutex.h>
9
10#include <linux/sched.h>
11#include <linux/cpuset.h>
12
13#include <litmus/litmus.h>
14#include <litmus/sched_trace.h>
15#include <litmus/jobs.h>
16#include <litmus/sched_plugin.h>
17#include <litmus/litmus_softirq.h>
18
19/* TODO: Remove unneeded mb() and other barriers. */
20
21enum pending_flags
22{
23 LIT_TASKLET_LOW = 0x1,
24 LIT_TASKLET_HI = LIT_TASKLET_LOW<<1,
25 LIT_WORK = LIT_TASKLET_HI<<1
26};
27
28struct klmirqd_registration
29{
30 raw_spinlock_t lock;
31 u32 nr_threads;
32 unsigned int initialized:1;
33 unsigned int shuttingdown:1;
34 struct list_head threads;
35};
36
37static atomic_t klmirqd_id_gen = ATOMIC_INIT(-1);
38
39static struct klmirqd_registration klmirqd_state;
40
41
42
43void init_klmirqd(void)
44{
45 raw_spin_lock_init(&klmirqd_state.lock);
46
47 klmirqd_state.nr_threads = 0;
48 klmirqd_state.initialized = 1;
49 klmirqd_state.shuttingdown = 0;
50 INIT_LIST_HEAD(&klmirqd_state.threads);
51}
52
53static int __klmirqd_is_ready(void)
54{
55 return (klmirqd_state.initialized == 1 && klmirqd_state.shuttingdown == 0);
56}
57
58int klmirqd_is_ready(void)
59{
60 unsigned long flags;
61 int ret;
62
63 raw_spin_lock_irqsave(&klmirqd_state.lock, flags);
64 ret = __klmirqd_is_ready();
65 raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags);
66
67 return ret;
68}
69
70int klmirqd_is_dead(void)
71{
72 return(!klmirqd_is_ready());
73}
74
75
76void kill_klmirqd(void)
77{
78 if(!klmirqd_is_dead())
79 {
80 unsigned long flags;
81 struct list_head *pos;
82 struct list_head *q;
83
84 raw_spin_lock_irqsave(&klmirqd_state.lock, flags);
85
86 TRACE("%s: Killing all klmirqd threads! (%d of them)\n", __FUNCTION__, klmirqd_state.nr_threads);
87
88 klmirqd_state.shuttingdown = 1;
89
90 list_for_each_safe(pos, q, &klmirqd_state.threads) {
91 struct klmirqd_info* info = list_entry(pos, struct klmirqd_info, klmirqd_reg);
92
93 if(info->terminating != 1)
94 {
95 info->terminating = 1;
96 mb(); /* just to be sure? */
97 flush_pending(info->klmirqd);
98
99 /* signal termination */
100 raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags);
101 kthread_stop(info->klmirqd);
102 raw_spin_lock_irqsave(&klmirqd_state.lock, flags);
103 }
104 }
105
106 raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags);
107 }
108}
109
110
111
112void kill_klmirqd_thread(struct task_struct* klmirqd_thread)
113{
114 unsigned long flags;
115 struct klmirqd_info* info;
116
117 if (!tsk_rt(klmirqd_thread)->is_interrupt_thread) {
118 TRACE("%s/%d is not a klmirqd thread\n", klmirqd_thread->comm, klmirqd_thread->pid);
119 return;
120 }
121
122 TRACE("%s: Killing klmirqd thread %s/%d\n", __FUNCTION__, klmirqd_thread->comm, klmirqd_thread->pid);
123
124 raw_spin_lock_irqsave(&klmirqd_state.lock, flags);
125
126 info = tsk_rt(klmirqd_thread)->klmirqd_info;
127
128 if(info->terminating != 1) {
129 info->terminating = 1;
130 mb();
131
132 flush_pending(klmirqd_thread);
133 kthread_stop(klmirqd_thread);
134 }
135
136 raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags);
137}
138
139struct klmirqd_launch_data
140{
141 int cpu_affinity;
142 klmirqd_callback_t* cb;
143 char name[MAX_KLMIRQD_NAME_LEN+1];
144 struct work_struct work;
145};
146
147static int run_klmirqd(void* callback);
148
149
150/* executed by a kworker from workqueues */
151static void __launch_klmirqd_thread(struct work_struct *work)
152{
153 int id;
154 struct task_struct* thread = NULL;
155 struct klmirqd_launch_data* launch_data =
156 container_of(work, struct klmirqd_launch_data, work);
157
158 TRACE("Creating klmirqd thread\n");
159
160
161
162 if (launch_data->cpu_affinity != -1) {
163 if (launch_data->name[0] == '\0') {
164 id = atomic_inc_return(&klmirqd_id_gen);
165 TRACE("Launching klmirqd_th%d/%d\n", id, launch_data->cpu_affinity);
166
167 thread = kthread_create(
168 run_klmirqd,
169 /* treat the affinity as a pointer, we'll cast it back later */
170 (void*)launch_data->cb,
171 "klmirqd_th%d/%d",
172 id,
173 launch_data->cpu_affinity);
174 }
175 else {
176 TRACE("Launching %s/%d\n", launch_data->name, launch_data->cpu_affinity);
177
178 thread = kthread_create(
179 run_klmirqd,
180 /* treat the affinity as a pointer, we'll cast it back later */
181 (void*)launch_data->cb,
182 "%s/%d",
183 launch_data->name,
184 launch_data->cpu_affinity);
185 }
186
187 /* litmus will put is in the right cluster. */
188 kthread_bind(thread, launch_data->cpu_affinity);
189 }
190 else {
191 if (launch_data->name[0] == '\0') {
192 id = atomic_inc_return(&klmirqd_id_gen);
193 TRACE("Launching klmirqd_th%d\n", id);
194
195 thread = kthread_create(
196 run_klmirqd,
197 /* treat the affinity as a pointer, we'll cast it back later */
198 (void*)launch_data->cb,
199 "klmirqd_th%d",
200 id);
201
202 }
203 else {
204 TRACE("Launching %s\n", launch_data->name);
205
206 thread = kthread_create(
207 run_klmirqd,
208 /* treat the affinity as a pointer, we'll cast it back later */
209 (void*)launch_data->cb,
210 launch_data->name);
211 }
212
213
214 }
215
216 if (thread) {
217 wake_up_process(thread);
218 }
219 else {
220 TRACE("Could not create thread!\n");
221 }
222
223 kfree(launch_data);
224}
225
226
227int launch_klmirqd_thread(char* name, int cpu, klmirqd_callback_t* cb)
228{
229 struct klmirqd_launch_data* delayed_launch;
230
231 if (!klmirqd_is_ready()) {
232 TRACE("klmirqd is not ready. Check that it was initialized!\n");
233 return -1;
234 }
235
236 /* tell a work queue to launch the threads. we can't make scheduling
237 calls since we're in an atomic state. */
238 delayed_launch = kmalloc(sizeof(struct klmirqd_launch_data), GFP_ATOMIC);
239 delayed_launch->cpu_affinity = cpu;
240 delayed_launch->cb = cb;
241 INIT_WORK(&delayed_launch->work, __launch_klmirqd_thread);
242
243 if(name) {
244 snprintf(delayed_launch->name, MAX_KLMIRQD_NAME_LEN+1, "%s", name);
245 }
246 else {
247 delayed_launch->name[0] = '\0';
248 }
249
250 schedule_work(&delayed_launch->work);
251
252 return 0;
253}
254
255
256
257
258#define KLMIRQD_SLICE_NR_JIFFIES 1
259#define KLMIRQD_SLICE_NS ((NSEC_PER_SEC / HZ) * KLMIRQD_SLICE_NR_JIFFIES)
260
261static int become_litmus_daemon(struct task_struct* tsk)
262{
263 int ret = 0;
264
265 struct rt_task tp = {
266 .period = KLMIRQD_SLICE_NS, /* dummy 1 second period */
267 .relative_deadline = KLMIRQD_SLICE_NS,
268 .exec_cost = KLMIRQD_SLICE_NS,
269 .phase = 0,
270 .cpu = task_cpu(current),
271 .budget_policy = NO_ENFORCEMENT,
272 .budget_signal_policy = NO_SIGNALS,
273 .cls = RT_CLASS_BEST_EFFORT
274 };
275
276 struct sched_param param = { .sched_priority = 0};
277
278 TRACE_CUR("Setting %s/%d as daemon thread.\n", tsk->comm, tsk->pid);
279
280 /* set task params */
281 tsk_rt(tsk)->task_params = tp;
282 tsk_rt(tsk)->is_interrupt_thread = 1;
283
284 /* inform the OS we're SCHED_LITMUS --
285 sched_setscheduler_nocheck() calls litmus_admit_task(). */
286 sched_setscheduler_nocheck(tsk, SCHED_LITMUS, &param);
287
288 return ret;
289}
290
291static int become_normal_daemon(struct task_struct* tsk)
292{
293 int ret = 0;
294
295 struct sched_param param = { .sched_priority = 0};
296 sched_setscheduler_nocheck(tsk, SCHED_NORMAL, &param);
297
298 return ret;
299}
300
301static int register_klmirqd(struct task_struct* tsk)
302{
303 int retval = 0;
304 unsigned long flags;
305 struct klmirqd_info *info = NULL;
306
307 if (!tsk_rt(tsk)->is_interrupt_thread) {
308 TRACE("Only proxy threads already running in Litmus may become klmirqd threads!\n");
309 WARN_ON(1);
310 retval = -1;
311 goto out;
312 }
313
314 raw_spin_lock_irqsave(&klmirqd_state.lock, flags);
315
316 if (!__klmirqd_is_ready()) {
317 TRACE("klmirqd is not ready! Did you forget to initialize it?\n");
318 WARN_ON(1);
319 retval = -1;
320 goto out_unlock;
321 }
322
323 /* allocate and initialize klmirqd data for the thread */
324 info = kmalloc(sizeof(struct klmirqd_info), GFP_KERNEL);
325 if (!info) {
326 TRACE("Failed to allocate klmirqd_info struct!\n");
327 retval = -1; /* todo: pick better code */
328 goto out_unlock;
329 }
330 memset(info, 0, sizeof(struct klmirqd_info));
331 info->klmirqd = tsk;
332 info->pending_tasklets_hi.tail = &info->pending_tasklets_hi.head;
333 info->pending_tasklets.tail = &info->pending_tasklets.head;
334 INIT_LIST_HEAD(&info->worklist);
335 INIT_LIST_HEAD(&info->klmirqd_reg);
336 raw_spin_lock_init(&info->lock);
337
338
339 /* now register with klmirqd */
340 list_add_tail(&info->klmirqd_reg, &klmirqd_state.threads);
341 ++klmirqd_state.nr_threads;
342
343 /* update the task struct to point to klmirqd info */
344 tsk_rt(tsk)->klmirqd_info = info;
345
346out_unlock:
347 raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags);
348
349out:
350 return retval;
351}
352
353static int unregister_klmirqd(struct task_struct* tsk)
354{
355 int retval = 0;
356 unsigned long flags;
357 struct klmirqd_info *info = tsk_rt(tsk)->klmirqd_info;
358
359 if (!tsk_rt(tsk)->is_interrupt_thread || !info) {
360 TRACE("%s/%d is not a klmirqd thread!\n", tsk->comm, tsk->pid);
361 WARN_ON(1);
362 retval = -1;
363 goto out;
364 }
365
366 raw_spin_lock_irqsave(&klmirqd_state.lock, flags);
367
368 /* remove the entry in the klmirqd thread list */
369 list_del(&info->klmirqd_reg);
370 mb();
371 --klmirqd_state.nr_threads;
372
373 /* remove link to klmirqd info from thread */
374 tsk_rt(tsk)->klmirqd_info = NULL;
375
376 /* clean up memory */
377 kfree(info);
378
379 raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags);
380
381out:
382 return retval;
383}
384
385
386
387
388
389
390int proc_read_klmirqd_stats(char *page, char **start,
391 off_t off, int count,
392 int *eof, void *data)
393{
394 unsigned long flags;
395 int len;
396
397 raw_spin_lock_irqsave(&klmirqd_state.lock, flags);
398
399 if (klmirqd_state.initialized) {
400 if (!klmirqd_state.shuttingdown) {
401 struct list_head *pos;
402
403 len = snprintf(page, PAGE_SIZE,
404 "num ready klmirqds: %d\n\n",
405 klmirqd_state.nr_threads);
406
407 list_for_each(pos, &klmirqd_state.threads) {
408 struct klmirqd_info* info = list_entry(pos, struct klmirqd_info, klmirqd_reg);
409
410 len +=
411 snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */
412 "klmirqd_thread: %s/%d\n"
413 "\tcurrent_owner: %s/%d\n"
414 "\tpending: %x\n"
415 "\tnum hi: %d\n"
416 "\tnum low: %d\n"
417 "\tnum work: %d\n\n",
418 info->klmirqd->comm, info->klmirqd->pid,
419 (info->current_owner != NULL) ?
420 info->current_owner->comm : "(null)",
421 (info->current_owner != NULL) ?
422 info->current_owner->pid : 0,
423 info->pending,
424 atomic_read(&info->num_hi_pending),
425 atomic_read(&info->num_low_pending),
426 atomic_read(&info->num_work_pending));
427 }
428 }
429 else {
430 len = snprintf(page, PAGE_SIZE, "klmirqd is shutting down\n");
431 }
432 }
433 else {
434 len = snprintf(page, PAGE_SIZE, "klmirqd is not initialized!\n");
435 }
436
437 raw_spin_unlock_irqrestore(&klmirqd_state.lock, flags);
438
439 return(len);
440}
441
442
443
444
445
446#if 0
447static atomic_t dump_id = ATOMIC_INIT(0);
448
449static void __dump_state(struct klmirqd_info* which, const char* caller)
450{
451 struct tasklet_struct* list;
452
453 int id = atomic_inc_return(&dump_id);
454
455 //if(in_interrupt())
456 {
457 if(which->current_owner)
458 {
459 TRACE("(id: %d caller: %s)\n"
460 "klmirqd: %s/%d\n"
461 "current owner: %s/%d\n"
462 "pending: %x\n",
463 id, caller,
464 which->klmirqd->comm, which->klmirqd->pid,
465 which->current_owner->comm, which->current_owner->pid,
466 which->pending);
467 }
468 else
469 {
470 TRACE("(id: %d caller: %s)\n"
471 "klmirqd: %s/%d\n"
472 "current owner: %p\n"
473 "pending: %x\n",
474 id, caller,
475 which->klmirqd->comm, which->klmirqd->pid,
476 NULL,
477 which->pending);
478 }
479
480 list = which->pending_tasklets.head;
481 while(list)
482 {
483 struct tasklet_struct *t = list;
484 list = list->next; /* advance */
485 if(t->owner)
486 TRACE("(id: %d caller: %s) Tasklet: %x, Owner = %s/%d\n", id, caller, t, t->owner->comm, t->owner->pid);
487 else
488 TRACE("(id: %d caller: %s) Tasklet: %x, Owner = %p\n", id, caller, t, NULL);
489 }
490 }
491}
492
493static void dump_state(struct klmirqd_info* which, const char* caller)
494{
495 unsigned long flags;
496
497 raw_spin_lock_irqsave(&which->lock, flags);
498 __dump_state(which, caller);
499 raw_spin_unlock_irqrestore(&which->lock, flags);
500}
501#endif
502
503
504
505
506
507
508
509
510
511
512
513/* forward declarations */
514static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
515 struct klmirqd_info *which,
516 int wakeup);
517static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
518 struct klmirqd_info *which,
519 int wakeup);
520static void ___litmus_schedule_work(struct work_struct *w,
521 struct klmirqd_info *which,
522 int wakeup);
523
524
525inline static u32 litirq_pending_hi_irqoff(struct klmirqd_info* which)
526{
527 return (which->pending & LIT_TASKLET_HI);
528}
529
530inline static u32 litirq_pending_low_irqoff(struct klmirqd_info* which)
531{
532 return (which->pending & LIT_TASKLET_LOW);
533}
534
535inline static u32 litirq_pending_work_irqoff(struct klmirqd_info* which)
536{
537 return (which->pending & LIT_WORK);
538}
539
540inline static u32 litirq_pending_irqoff(struct klmirqd_info* which)
541{
542 return(which->pending);
543}
544
545
546inline static u32 litirq_pending(struct klmirqd_info* which)
547{
548 unsigned long flags;
549 u32 pending;
550
551 raw_spin_lock_irqsave(&which->lock, flags);
552 pending = litirq_pending_irqoff(which);
553 raw_spin_unlock_irqrestore(&which->lock, flags);
554
555 return pending;
556};
557
558static void wakeup_litirqd_locked(struct klmirqd_info* which)
559{
560 /* Interrupts are disabled: no need to stop preemption */
561 if (which && which->klmirqd)
562 {
563 if(which->klmirqd->state != TASK_RUNNING)
564 {
565 TRACE("%s: Waking up klmirqd: %s/%d\n", __FUNCTION__,
566 which->klmirqd->comm, which->klmirqd->pid);
567
568 wake_up_process(which->klmirqd);
569 }
570 }
571}
572
573
574static void do_lit_tasklet(struct klmirqd_info* which,
575 struct tasklet_head* pending_tasklets)
576{
577 unsigned long flags;
578 struct tasklet_struct *list;
579 atomic_t* count;
580
581 raw_spin_lock_irqsave(&which->lock, flags);
582
583 //__dump_state(which, "do_lit_tasklet: before steal");
584
585 /* copy out the tasklets for our private use. */
586 list = pending_tasklets->head;
587 pending_tasklets->head = NULL;
588 pending_tasklets->tail = &pending_tasklets->head;
589
590 /* remove pending flag */
591 which->pending &= (pending_tasklets == &which->pending_tasklets) ?
592 ~LIT_TASKLET_LOW :
593 ~LIT_TASKLET_HI;
594
595 count = (pending_tasklets == &which->pending_tasklets) ?
596 &which->num_low_pending:
597 &which->num_hi_pending;
598
599 //__dump_state(which, "do_lit_tasklet: after steal");
600
601 raw_spin_unlock_irqrestore(&which->lock, flags);
602
603
604 while(list)
605 {
606 struct tasklet_struct *t = list;
607
608 /* advance, lest we forget */
609 list = list->next;
610
611 /* execute tasklet if it has my priority and is free */
612 if (tasklet_trylock(t)) {
613 if (!atomic_read(&t->count)) {
614
615 sched_trace_tasklet_begin(t->owner);
616
617 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
618 {
619 BUG();
620 }
621 TRACE_CUR("%s: Invoking tasklet.\n", __FUNCTION__);
622 t->func(t->data);
623 tasklet_unlock(t);
624
625 atomic_dec(count);
626
627 sched_trace_tasklet_end(t->owner, 0ul);
628
629 continue; /* process more tasklets */
630 }
631 tasklet_unlock(t);
632 }
633
634 TRACE_CUR("%s: Could not invoke tasklet. Requeuing.\n", __FUNCTION__);
635
636 /* couldn't process tasklet. put it back at the end of the queue. */
637 if(pending_tasklets == &which->pending_tasklets)
638 ___litmus_tasklet_schedule(t, which, 0);
639 else
640 ___litmus_tasklet_hi_schedule(t, which, 0);
641 }
642}
643
644
645// returns 1 if priorities need to be changed to continue processing
646// pending tasklets.
647static void do_litirq(struct klmirqd_info* which)
648{
649 u32 pending;
650
651 if(in_interrupt())
652 {
653 TRACE("%s: exiting early: in interrupt context!\n", __FUNCTION__);
654 return;
655 }
656
657 if(which->klmirqd != current)
658 {
659 TRACE_CUR("%s: exiting early: thread/info mismatch! Running %s/%d but given %s/%d.\n",
660 __FUNCTION__, current->comm, current->pid,
661 which->klmirqd->comm, which->klmirqd->pid);
662 return;
663 }
664
665 if(!is_realtime(current))
666 {
667 TRACE_CUR("%s: exiting early: klmirqd is not real-time. Sched Policy = %d\n",
668 __FUNCTION__, current->policy);
669 return;
670 }
671
672
673 /* We only handle tasklets & work objects, no need for RCU triggers? */
674
675 pending = litirq_pending(which);
676 if(pending) {
677 /* extract the work to do and do it! */
678 if(pending & LIT_TASKLET_HI) {
679 TRACE_CUR("%s: Invoking HI tasklets.\n", __FUNCTION__);
680 do_lit_tasklet(which, &which->pending_tasklets_hi);
681 }
682
683 if(pending & LIT_TASKLET_LOW) {
684 TRACE_CUR("%s: Invoking LOW tasklets.\n", __FUNCTION__);
685 do_lit_tasklet(which, &which->pending_tasklets);
686 }
687 }
688}
689
690
691static void do_work(struct klmirqd_info* which)
692{
693 unsigned long flags;
694 struct work_struct* work;
695 work_func_t f;
696
697 // only execute one work-queue item to yield to tasklets.
698 // ...is this a good idea, or should we just batch them?
699 raw_spin_lock_irqsave(&which->lock, flags);
700
701 if(!litirq_pending_work_irqoff(which))
702 {
703 raw_spin_unlock_irqrestore(&which->lock, flags);
704 goto no_work;
705 }
706
707 work = list_first_entry(&which->worklist, struct work_struct, entry);
708 list_del_init(&work->entry);
709
710 if(list_empty(&which->worklist))
711 {
712 which->pending &= ~LIT_WORK;
713 }
714
715 raw_spin_unlock_irqrestore(&which->lock, flags);
716
717
718 TRACE_CUR("%s: Invoking work object.\n", __FUNCTION__);
719 // do the work!
720 work_clear_pending(work);
721 f = work->func;
722 f(work); /* can't touch 'work' after this point,
723 the user may have freed it. */
724
725 atomic_dec(&which->num_work_pending);
726
727no_work:
728 return;
729}
730
731
732
733/* main loop for klitsoftirqd */
734static int run_klmirqd(void* callback)
735{
736 int retval = 0;
737 struct klmirqd_info* info = NULL;
738 klmirqd_callback_t* cb = (klmirqd_callback_t*)(callback);
739
740 retval = become_litmus_daemon(current);
741 if (retval != 0) {
742 TRACE_CUR("%s: Failed to transition to rt-task.\n", __FUNCTION__);
743 goto failed;
744 }
745
746 retval = register_klmirqd(current);
747 if (retval != 0) {
748 TRACE_CUR("%s: Failed to become a klmirqd thread.\n", __FUNCTION__);
749 goto failed_sched_normal;
750 }
751
752 if (cb && cb->func) {
753 retval = cb->func(cb->arg);
754 if (retval != 0) {
755 TRACE_CUR("%s: klmirqd callback reported failure. retval = %d\n", __FUNCTION__, retval);
756 goto failed_unregister;
757 }
758 }
759
760 /* enter the interrupt handling workloop */
761
762 info = tsk_rt(current)->klmirqd_info;
763
764 set_current_state(TASK_INTERRUPTIBLE);
765
766 while (!kthread_should_stop())
767 {
768 preempt_disable();
769 if (!litirq_pending(info))
770 {
771 /* sleep for work */
772 TRACE_CUR("%s: No more tasklets or work objects. Going to sleep.\n",
773 __FUNCTION__);
774 preempt_enable_no_resched();
775 schedule();
776
777 if(kthread_should_stop()) /* bail out */
778 {
779 TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
780 continue;
781 }
782
783 preempt_disable();
784 }
785
786 __set_current_state(TASK_RUNNING);
787
788 while (litirq_pending(info))
789 {
790 preempt_enable_no_resched();
791
792 if(kthread_should_stop())
793 {
794 TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
795 break;
796 }
797
798 preempt_disable();
799
800 /* Double check that there's still pending work and the owner hasn't
801 * changed. Pending items may have been flushed while we were sleeping.
802 */
803 if(litirq_pending(info))
804 {
805 TRACE_CUR("%s: Executing tasklets and/or work objects.\n",
806 __FUNCTION__);
807
808 do_litirq(info);
809
810 preempt_enable_no_resched();
811
812 // work objects are preemptible.
813 do_work(info);
814 }
815 else
816 {
817 TRACE_CUR("%s: Pending work was flushed!\n", __FUNCTION__);
818
819 preempt_enable_no_resched();
820 }
821
822 cond_resched();
823 preempt_disable();
824 }
825 preempt_enable();
826 set_current_state(TASK_INTERRUPTIBLE);
827 }
828 __set_current_state(TASK_RUNNING);
829
830failed_unregister:
831 /* remove our registration from klmirqd */
832 unregister_klmirqd(current);
833
834failed_sched_normal:
835 become_normal_daemon(current);
836
837failed:
838 return retval;
839}
840
841
842void flush_pending(struct task_struct* tsk)
843{
844 unsigned long flags;
845 struct tasklet_struct *list;
846 u32 work_flushed = 0;
847
848 struct klmirqd_info *which;
849
850 if (!tsk_rt(tsk)->is_interrupt_thread) {
851 TRACE("%s/%d is not a proxy thread\n", tsk->comm, tsk->pid);
852 WARN_ON(1);
853 return;
854 }
855
856 which = tsk_rt(tsk)->klmirqd_info;
857 if (!which) {
858 TRACE("%s/%d is not a klmirqd thread!\n", tsk->comm, tsk->pid);
859 WARN_ON(1);
860 return;
861 }
862
863
864 raw_spin_lock_irqsave(&which->lock, flags);
865
866 //__dump_state(which, "flush_pending: before");
867
868 // flush hi tasklets.
869 if(litirq_pending_hi_irqoff(which))
870 {
871 which->pending &= ~LIT_TASKLET_HI;
872
873 list = which->pending_tasklets_hi.head;
874 which->pending_tasklets_hi.head = NULL;
875 which->pending_tasklets_hi.tail = &which->pending_tasklets_hi.head;
876
877 TRACE("%s: Handing HI tasklets back to Linux.\n", __FUNCTION__);
878
879 while(list)
880 {
881 struct tasklet_struct *t = list;
882 list = list->next;
883
884 if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
885 {
886 BUG();
887 }
888
889 work_flushed |= LIT_TASKLET_HI;
890
891 t->owner = NULL;
892
893 // WTF?
894 if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
895 {
896 atomic_dec(&which->num_hi_pending);
897 ___tasklet_hi_schedule(t);
898 }
899 else
900 {
901 TRACE("%s: dropped hi tasklet??\n", __FUNCTION__);
902 BUG();
903 }
904
905 }
906 }
907
908 // flush low tasklets.
909 if(litirq_pending_low_irqoff(which))
910 {
911 which->pending &= ~LIT_TASKLET_LOW;
912
913 list = which->pending_tasklets.head;
914 which->pending_tasklets.head = NULL;
915 which->pending_tasklets.tail = &which->pending_tasklets.head;
916
917 TRACE("%s: Handing LOW tasklets back to Linux.\n", __FUNCTION__);
918
919 while(list)
920 {
921 struct tasklet_struct *t = list;
922 list = list->next;
923
924 if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
925 {
926 BUG();
927 }
928
929 work_flushed |= LIT_TASKLET_LOW;
930
931 t->owner = NULL;
932// sched_trace_tasklet_end(owner, 1ul);
933
934 if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
935 {
936 atomic_dec(&which->num_low_pending);
937 ___tasklet_schedule(t);
938 }
939 else
940 {
941 TRACE("%s: dropped tasklet??\n", __FUNCTION__);
942 BUG();
943 }
944 }
945 }
946
947 // flush work objects
948 if(litirq_pending_work_irqoff(which))
949 {
950 which->pending &= ~LIT_WORK;
951
952 TRACE("%s: Handing work objects back to Linux.\n", __FUNCTION__);
953
954 while(!list_empty(&which->worklist))
955 {
956 struct work_struct* work =
957 list_first_entry(&which->worklist, struct work_struct, entry);
958 list_del_init(&work->entry);
959
960 work_flushed |= LIT_WORK;
961 atomic_dec(&which->num_work_pending);
962
963 work->owner = NULL;
964// sched_trace_work_end(owner, current, 1ul);
965 __schedule_work(work);
966 }
967 }
968
969 //__dump_state(which, "flush_pending: after (before reeval prio)");
970
971
972 mb(); /* commit changes to pending flags */
973
974 raw_spin_unlock_irqrestore(&which->lock, flags);
975}
976
977
978
979
980static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
981 struct klmirqd_info *which,
982 int wakeup)
983{
984 unsigned long flags;
985 u32 old_pending;
986
987 t->next = NULL;
988
989 raw_spin_lock_irqsave(&which->lock, flags);
990
991 //__dump_state(which, "___litmus_tasklet_schedule: before queuing");
992
993 *(which->pending_tasklets.tail) = t;
994 which->pending_tasklets.tail = &t->next;
995
996 old_pending = which->pending;
997 which->pending |= LIT_TASKLET_LOW;
998
999 atomic_inc(&which->num_low_pending);
1000
1001 mb();
1002
1003 if(!old_pending && wakeup)
1004 {
1005 wakeup_litirqd_locked(which); /* wake up the klmirqd */
1006 }
1007
1008 //__dump_state(which, "___litmus_tasklet_schedule: after queuing");
1009
1010 raw_spin_unlock_irqrestore(&which->lock, flags);
1011}
1012
1013
1014int __litmus_tasklet_schedule(struct tasklet_struct *t, struct task_struct* klmirqd_thread)
1015{
1016 int ret = 0; /* assume failure */
1017 struct klmirqd_info* info;
1018
1019 if (unlikely(!is_realtime(klmirqd_thread) ||
1020 !tsk_rt(klmirqd_thread)->is_interrupt_thread ||
1021 !tsk_rt(klmirqd_thread)->klmirqd_info)) {
1022 TRACE("%s: %s/%d can't handle tasklets\n", klmirqd_thread->comm, klmirqd_thread->pid);
1023 return ret;
1024 }
1025
1026 info = tsk_rt(klmirqd_thread)->klmirqd_info;
1027
1028 if (likely(!info->terminating)) {
1029 ret = 1;
1030 ___litmus_tasklet_schedule(t, info, 1);
1031 }
1032 else {
1033 TRACE("%s: Tasklet rejected because %s/%d is terminating\n", klmirqd_thread->comm, klmirqd_thread->pid);
1034 }
1035 return(ret);
1036}
1037
1038EXPORT_SYMBOL(__litmus_tasklet_schedule);
1039
1040
1041static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
1042 struct klmirqd_info *which,
1043 int wakeup)
1044{
1045 unsigned long flags;
1046 u32 old_pending;
1047
1048 t->next = NULL;
1049
1050 raw_spin_lock_irqsave(&which->lock, flags);
1051
1052 *(which->pending_tasklets_hi.tail) = t;
1053 which->pending_tasklets_hi.tail = &t->next;
1054
1055 old_pending = which->pending;
1056 which->pending |= LIT_TASKLET_HI;
1057
1058 atomic_inc(&which->num_hi_pending);
1059
1060 mb();
1061
1062 if(!old_pending && wakeup)
1063 {
1064 wakeup_litirqd_locked(which); /* wake up the klmirqd */
1065 }
1066
1067 raw_spin_unlock_irqrestore(&which->lock, flags);
1068}
1069
1070int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, struct task_struct* klmirqd_thread)
1071{
1072 int ret = 0; /* assume failure */
1073 struct klmirqd_info* info;
1074
1075 if (unlikely(!is_realtime(klmirqd_thread) ||
1076 !tsk_rt(klmirqd_thread)->is_interrupt_thread ||
1077 !tsk_rt(klmirqd_thread)->klmirqd_info)) {
1078 TRACE("%s: %s/%d can't handle tasklets\n", klmirqd_thread->comm, klmirqd_thread->pid);
1079 return ret;
1080 }
1081
1082 info = tsk_rt(klmirqd_thread)->klmirqd_info;
1083
1084 if (likely(!info->terminating)) {
1085 ret = 1;
1086 ___litmus_tasklet_hi_schedule(t, info, 1);
1087 }
1088 else {
1089 TRACE("%s: Tasklet rejected because %s/%d is terminating\n", klmirqd_thread->comm, klmirqd_thread->pid);
1090 }
1091
1092 return(ret);
1093}
1094
1095EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);
1096
1097
1098int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, struct task_struct* klmirqd_thread)
1099{
1100 int ret = 0; /* assume failure */
1101 u32 old_pending;
1102 struct klmirqd_info* info;
1103
1104 BUG_ON(!irqs_disabled());
1105
1106 if (unlikely(!is_realtime(klmirqd_thread) ||
1107 !tsk_rt(klmirqd_thread)->is_interrupt_thread ||
1108 !tsk_rt(klmirqd_thread)->klmirqd_info)) {
1109 TRACE("%s: %s/%d can't handle tasklets\n", klmirqd_thread->comm, klmirqd_thread->pid);
1110 return ret;
1111 }
1112
1113 info = tsk_rt(klmirqd_thread)->klmirqd_info;
1114
1115 if (likely(!info->terminating)) {
1116
1117 raw_spin_lock(&info->lock);
1118
1119 ret = 1; // success!
1120
1121 t->next = info->pending_tasklets_hi.head;
1122 info->pending_tasklets_hi.head = t;
1123
1124 old_pending = info->pending;
1125 info->pending |= LIT_TASKLET_HI;
1126
1127 atomic_inc(&info->num_hi_pending);
1128
1129 mb();
1130
1131 if(!old_pending) {
1132 wakeup_litirqd_locked(info); /* wake up the klmirqd */
1133 }
1134
1135 raw_spin_unlock(&info->lock);
1136 }
1137 else {
1138 TRACE("%s: Tasklet rejected because %s/%d is terminating\n", klmirqd_thread->comm, klmirqd_thread->pid);
1139 }
1140
1141 return(ret);
1142}
1143
1144EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first);
1145
1146
1147
1148static void ___litmus_schedule_work(struct work_struct *w,
1149 struct klmirqd_info *which,
1150 int wakeup)
1151{
1152 unsigned long flags;
1153 u32 old_pending;
1154
1155 raw_spin_lock_irqsave(&which->lock, flags);
1156
1157 work_pending(w);
1158 list_add_tail(&w->entry, &which->worklist);
1159
1160 old_pending = which->pending;
1161 which->pending |= LIT_WORK;
1162
1163 atomic_inc(&which->num_work_pending);
1164
1165 mb();
1166
1167 if(!old_pending && wakeup)
1168 {
1169 wakeup_litirqd_locked(which); /* wakeup the klmirqd */
1170 }
1171
1172 raw_spin_unlock_irqrestore(&which->lock, flags);
1173}
1174
1175int __litmus_schedule_work(struct work_struct *w, struct task_struct* klmirqd_thread)
1176{
1177 int ret = 1; /* assume success */
1178 struct klmirqd_info* info;
1179
1180 if (unlikely(!is_realtime(klmirqd_thread) ||
1181 !tsk_rt(klmirqd_thread)->is_interrupt_thread ||
1182 !tsk_rt(klmirqd_thread)->klmirqd_info)) {
1183 TRACE("%s: %s/%d can't handle work items\n", klmirqd_thread->comm, klmirqd_thread->pid);
1184 return ret;
1185 }
1186
1187 info = tsk_rt(klmirqd_thread)->klmirqd_info;
1188
1189
1190 if (likely(!info->terminating)) {
1191 ___litmus_schedule_work(w, info, 1);
1192 }
1193 else {
1194 TRACE("%s: Work rejected because %s/%d is terminating\n", klmirqd_thread->comm, klmirqd_thread->pid);
1195 ret = 0;
1196 }
1197
1198 return(ret);
1199}
1200EXPORT_SYMBOL(__litmus_schedule_work);
1201
1202
1203
1204
1205
diff --git a/litmus/locking.c b/litmus/locking.c
index 43d9aece2e74..c21ec1ae36d7 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -8,8 +8,17 @@
8#include <litmus/litmus.h> 8#include <litmus/litmus.h>
9#include <litmus/sched_plugin.h> 9#include <litmus/sched_plugin.h>
10#include <litmus/trace.h> 10#include <litmus/trace.h>
11#include <litmus/litmus.h>
11#include <litmus/wait.h> 12#include <litmus/wait.h>
12 13
14#ifdef CONFIG_LITMUS_DGL_SUPPORT
15#include <linux/uaccess.h>
16#endif
17
18#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
19#include <litmus/gpu_affinity.h>
20#endif
21
13static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg); 22static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg);
14static int open_generic_lock(struct od_table_entry* entry, void* __user arg); 23static int open_generic_lock(struct od_table_entry* entry, void* __user arg);
15static int close_generic_lock(struct od_table_entry* entry); 24static int close_generic_lock(struct od_table_entry* entry);
@@ -22,6 +31,9 @@ struct fdso_ops generic_lock_ops = {
22 .destroy = destroy_generic_lock 31 .destroy = destroy_generic_lock
23}; 32};
24 33
34static atomic_t lock_id_gen = ATOMIC_INIT(0);
35
36
25static inline bool is_lock(struct od_table_entry* entry) 37static inline bool is_lock(struct od_table_entry* entry)
26{ 38{
27 return entry->class == &generic_lock_ops; 39 return entry->class == &generic_lock_ops;
@@ -39,8 +51,21 @@ static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user ar
39 int err; 51 int err;
40 52
41 err = litmus->allocate_lock(&lock, type, arg); 53 err = litmus->allocate_lock(&lock, type, arg);
42 if (err == 0) 54 if (err == 0) {
55#ifdef CONFIG_LITMUS_NESTED_LOCKING
56 lock->nest.lock = lock;
57 lock->nest.hp_waiter_eff_prio = NULL;
58
59 INIT_BINHEAP_NODE(&lock->nest.hp_binheap_node);
60 if(!lock->nest.hp_waiter_ptr) {
61 TRACE_CUR("BEWARE: hp_waiter_ptr should probably not be NULL in "
62 "most uses. (exception: IKGLP donors)\n");
63 }
64#endif
65 lock->type = type;
66 lock->ident = atomic_inc_return(&lock_id_gen);
43 *obj_ref = lock; 67 *obj_ref = lock;
68 }
44 return err; 69 return err;
45} 70}
46 71
@@ -83,7 +108,8 @@ asmlinkage long sys_litmus_lock(int lock_od)
83 entry = get_entry_for_od(lock_od); 108 entry = get_entry_for_od(lock_od);
84 if (entry && is_lock(entry)) { 109 if (entry && is_lock(entry)) {
85 l = get_lock(entry); 110 l = get_lock(entry);
86 TRACE_CUR("attempts to lock 0x%p\n", l); 111 //TRACE_CUR("attempts to lock 0x%p\n", l);
112 TRACE_CUR("attempts to lock %d\n", l->ident);
87 err = l->ops->lock(l); 113 err = l->ops->lock(l);
88 } 114 }
89 115
@@ -111,7 +137,8 @@ asmlinkage long sys_litmus_unlock(int lock_od)
111 entry = get_entry_for_od(lock_od); 137 entry = get_entry_for_od(lock_od);
112 if (entry && is_lock(entry)) { 138 if (entry && is_lock(entry)) {
113 l = get_lock(entry); 139 l = get_lock(entry);
114 TRACE_CUR("attempts to unlock 0x%p\n", l); 140 //TRACE_CUR("attempts to unlock 0x%p\n", l);
141 TRACE_CUR("attempts to unlock %d\n", l->ident);
115 err = l->ops->unlock(l); 142 err = l->ops->unlock(l);
116 } 143 }
117 144
@@ -138,6 +165,365 @@ struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
138 return(t); 165 return(t);
139} 166}
140 167
168#ifdef CONFIG_LITMUS_NESTED_LOCKING
169
170void print_hp_waiters(struct binheap_node* n, int depth)
171{
172 struct litmus_lock *l;
173 struct nested_info *nest;
174 char padding[81] = " ";
175 struct task_struct *hp = NULL;
176 struct task_struct *hp_eff = NULL;
177 struct task_struct *node_prio = NULL;
178
179
180 if(n == NULL) {
181 TRACE("+-> %p\n", NULL);
182 return;
183 }
184
185 nest = binheap_entry(n, struct nested_info, hp_binheap_node);
186 l = nest->lock;
187
188 if(depth*2 <= 80)
189 padding[depth*2] = '\0';
190
191 if(nest->hp_waiter_ptr && *(nest->hp_waiter_ptr)) {
192 hp = *(nest->hp_waiter_ptr);
193
194 if(tsk_rt(hp)->inh_task) {
195 hp_eff = tsk_rt(hp)->inh_task;
196 }
197 }
198
199 node_prio = nest->hp_waiter_eff_prio;
200
201 TRACE("%s+-> %s/%d [waiter = %s/%d] [waiter's inh = %s/%d] (lock = %d)\n",
202 padding,
203 (node_prio) ? node_prio->comm : "nil",
204 (node_prio) ? node_prio->pid : -1,
205 (hp) ? hp->comm : "nil",
206 (hp) ? hp->pid : -1,
207 (hp_eff) ? hp_eff->comm : "nil",
208 (hp_eff) ? hp_eff->pid : -1,
209 l->ident);
210
211 if(n->left) print_hp_waiters(n->left, depth+1);
212 if(n->right) print_hp_waiters(n->right, depth+1);
213}
214#endif
215
216
217#ifdef CONFIG_LITMUS_DGL_SUPPORT
218
219void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/)
220{
221 /*
222 We pick the next lock in reverse order. This causes inheritance propagation
223 from locks received earlier to flow in the same direction as regular nested
224 locking. This might make fine-grain DGL easier in the future.
225 */
226
227 BUG_ON(tsk_rt(dgl_wait->task)->blocked_lock);
228
229 //WARN_ON(dgl_wait->locks[dgl_wait->last_primary] != prev_lock);
230
231 // note reverse order
232 for(dgl_wait->last_primary = dgl_wait->last_primary - 1;
233 dgl_wait->last_primary >= 0;
234 --(dgl_wait->last_primary)){
235 if(!dgl_wait->locks[dgl_wait->last_primary]->ops->is_owner(
236 dgl_wait->locks[dgl_wait->last_primary], dgl_wait->task)) {
237
238 tsk_rt(dgl_wait->task)->blocked_lock =
239 dgl_wait->locks[dgl_wait->last_primary];
240 mb();
241
242 TRACE_CUR("New blocked lock is %d\n",
243 dgl_wait->locks[dgl_wait->last_primary]->ident);
244
245 break;
246 }
247 }
248}
249
250int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key)
251{
252 // should never be called.
253 BUG();
254 return 1;
255}
256
257void __waitqueue_dgl_remove_first(wait_queue_head_t *wq,
258 dgl_wait_state_t** dgl_wait,
259 struct task_struct **task)
260{
261 wait_queue_t *q;
262
263 *dgl_wait = NULL;
264 *task = NULL;
265
266 if (waitqueue_active(wq)) {
267 q = list_entry(wq->task_list.next,
268 wait_queue_t, task_list);
269
270 if(q->func == dgl_wake_up) {
271 *dgl_wait = (dgl_wait_state_t*) q->private;
272 }
273 else {
274 *task = (struct task_struct*) q->private;
275 }
276
277 __remove_wait_queue(wq, q);
278 }
279}
280
281void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait)
282{
283 init_waitqueue_entry(wq_node, dgl_wait->task);
284 wq_node->private = dgl_wait;
285 wq_node->func = dgl_wake_up;
286}
287
288
289static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait)
290{
291 int i;
292 unsigned long irqflags; //, dummyflags;
293 raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(dgl_wait->task);
294
295 BUG_ON(dgl_wait->task != current);
296
297 raw_spin_lock_irqsave(dgl_lock, irqflags);
298
299
300 dgl_wait->nr_remaining = dgl_wait->size;
301
302 TRACE_CUR("Locking DGL with size %d\n", dgl_wait->size);
303
304 // try to acquire each lock. enqueue (non-blocking) if it is unavailable.
305 for(i = 0; i < dgl_wait->size; ++i) {
306 struct litmus_lock *l = dgl_wait->locks[i];
307
308 // dgl_lock() must set task state to TASK_UNINTERRUPTIBLE if task blocks.
309
310 if(l->ops->dgl_lock(l, dgl_wait, &dgl_wait->wq_nodes[i])) {
311 --(dgl_wait->nr_remaining);
312 TRACE_CUR("Acquired lock %d immediatly.\n", l->ident);
313 }
314 }
315
316 if(dgl_wait->nr_remaining == 0) {
317 // acquired entire group immediatly
318 TRACE_CUR("Acquired all locks in DGL immediatly!\n");
319 }
320 else {
321
322 TRACE_CUR("As many as %d locks in DGL are pending. Suspending.\n",
323 dgl_wait->nr_remaining);
324
325#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
326 // KLUDGE: don't count this suspension as time in the critical gpu
327 // critical section
328 if(tsk_rt(dgl_wait->task)->held_gpus) {
329 tsk_rt(dgl_wait->task)->suspend_gpu_tracker_on_block = 1;
330 }
331#endif
332
333 // note reverse order. see comments in select_next_lock for reason.
334 for(i = dgl_wait->size - 1; i >= 0; --i) {
335 struct litmus_lock *l = dgl_wait->locks[i];
336 if(!l->ops->is_owner(l, dgl_wait->task)) { // double-check to be thread safe
337
338 TRACE_CUR("Activating priority inheritance on lock %d\n",
339 l->ident);
340
341 TS_DGL_LOCK_SUSPEND;
342
343 l->ops->enable_priority(l, dgl_wait);
344 dgl_wait->last_primary = i;
345
346 TRACE_CUR("Suspending for lock %d\n", l->ident);
347
348 raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending
349
350 schedule(); // suspend!!!
351
352 TS_DGL_LOCK_RESUME;
353
354 TRACE_CUR("Woken up from DGL suspension.\n");
355
356 goto all_acquired; // we should hold all locks when we wake up.
357 }
358 }
359
360 TRACE_CUR("Didn't have to suspend after all, but calling schedule() anyway.\n");
361 //BUG();
362 }
363
364 raw_spin_unlock_irqrestore(dgl_lock, irqflags);
365
366all_acquired:
367
368 // FOR SANITY CHECK FOR TESTING
369// for(i = 0; i < dgl_wait->size; ++i) {
370// struct litmus_lock *l = dgl_wait->locks[i];
371// BUG_ON(!l->ops->is_owner(l, dgl_wait->task));
372// }
373
374 TRACE_CUR("Acquired entire DGL\n");
375
376 return 0;
377}
378
379static int supports_dgl(struct litmus_lock *l)
380{
381 struct litmus_lock_ops* ops = l->ops;
382
383 return (ops->dgl_lock &&
384 ops->is_owner &&
385 ops->enable_priority);
386}
387
388asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size)
389{
390 struct task_struct *t = current;
391 long err = -EINVAL;
392 int dgl_ods[MAX_DGL_SIZE];
393 int i;
394
395 dgl_wait_state_t dgl_wait_state; // lives on the stack until all resources in DGL are held.
396
397 if(dgl_size > MAX_DGL_SIZE || dgl_size < 1)
398 goto out;
399
400 if(!access_ok(VERIFY_READ, usr_dgl_ods, dgl_size*(sizeof(int))))
401 goto out;
402
403 if(__copy_from_user(&dgl_ods, usr_dgl_ods, dgl_size*(sizeof(int))))
404 goto out;
405
406 if (!is_realtime(t)) {
407 err = -EPERM;
408 goto out;
409 }
410
411 for(i = 0; i < dgl_size; ++i) {
412 struct od_table_entry *entry = get_entry_for_od(dgl_ods[i]);
413 if(entry && is_lock(entry)) {
414 dgl_wait_state.locks[i] = get_lock(entry);
415 if(!supports_dgl(dgl_wait_state.locks[i])) {
416 TRACE_CUR("Lock %d does not support all required DGL operations.\n",
417 dgl_wait_state.locks[i]->ident);
418 goto out;
419 }
420 }
421 else {
422 TRACE_CUR("Invalid lock identifier\n");
423 goto out;
424 }
425 }
426
427 dgl_wait_state.task = t;
428 dgl_wait_state.size = dgl_size;
429
430 TS_DGL_LOCK_START;
431 err = do_litmus_dgl_lock(&dgl_wait_state);
432
433 /* Note: task my have been suspended or preempted in between! Take
434 * this into account when computing overheads. */
435 TS_DGL_LOCK_END;
436
437out:
438 return err;
439}
440
441static long do_litmus_dgl_unlock(struct litmus_lock* dgl_locks[], int dgl_size)
442{
443 int i;
444 long err = 0;
445
446 TRACE_CUR("Unlocking a DGL of %d size\n", dgl_size);
447
448 for(i = dgl_size - 1; i >= 0; --i) { // unlock in reverse order
449
450 struct litmus_lock *l = dgl_locks[i];
451 long tmp_err;
452
453 TRACE_CUR("Unlocking lock %d of DGL.\n", l->ident);
454
455 tmp_err = l->ops->unlock(l);
456
457 if(tmp_err) {
458 TRACE_CUR("There was an error unlocking %d: %d.\n", l->ident, tmp_err);
459 err = tmp_err;
460 }
461 }
462
463 TRACE_CUR("DGL unlocked. err = %d\n", err);
464
465 return err;
466}
467
468asmlinkage long sys_litmus_dgl_unlock(void* __user usr_dgl_ods, int dgl_size)
469{
470 long err = -EINVAL;
471 int dgl_ods[MAX_DGL_SIZE];
472 struct od_table_entry* entry;
473 int i;
474
475 struct litmus_lock* dgl_locks[MAX_DGL_SIZE];
476
477 if(dgl_size > MAX_DGL_SIZE || dgl_size < 1)
478 goto out;
479
480 if(!access_ok(VERIFY_READ, usr_dgl_ods, dgl_size*(sizeof(int))))
481 goto out;
482
483 if(__copy_from_user(&dgl_ods, usr_dgl_ods, dgl_size*(sizeof(int))))
484 goto out;
485
486 for(i = 0; i < dgl_size; ++i) {
487 entry = get_entry_for_od(dgl_ods[i]);
488 if(entry && is_lock(entry)) {
489 dgl_locks[i] = get_lock(entry);
490 if(!supports_dgl(dgl_locks[i])) {
491 TRACE_CUR("Lock %d does not support all required DGL operations.\n",
492 dgl_locks[i]->ident);
493 goto out;
494 }
495 }
496 else {
497 TRACE_CUR("Invalid lock identifier\n");
498 goto out;
499 }
500 }
501
502 TS_DGL_UNLOCK_START;
503 err = do_litmus_dgl_unlock(dgl_locks, dgl_size);
504
505 /* Note: task my have been suspended or preempted in between! Take
506 * this into account when computing overheads. */
507 TS_DGL_UNLOCK_END;
508
509out:
510 return err;
511}
512
513#else // CONFIG_LITMUS_DGL_SUPPORT
514
515asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size)
516{
517 return -ENOSYS;
518}
519
520asmlinkage long sys_litmus_dgl_unlock(void* __user usr_dgl_ods, int dgl_size)
521{
522 return -ENOSYS;
523}
524
525#endif
526
141unsigned int __add_wait_queue_prio_exclusive( 527unsigned int __add_wait_queue_prio_exclusive(
142 wait_queue_head_t* head, 528 wait_queue_head_t* head,
143 prio_wait_queue_t *new) 529 prio_wait_queue_t *new)
@@ -171,7 +557,60 @@ out:
171} 557}
172 558
173 559
174#else 560void suspend_for_lock(void)
561{
562#if defined(CONFIG_REALTIME_AUX_TASKS) || defined(CONFIG_LITMUS_NVIDIA)
563 struct task_struct *t = current;
564#endif
565
566#ifdef CONFIG_REALTIME_AUX_TASKS
567 unsigned int aux_restore = 0;
568 unsigned int aux_hide;
569#endif
570
571#ifdef CONFIG_LITMUS_NVIDIA
572 unsigned int gpu_restore = 0;
573 unsigned int gpu_hide;
574#endif
575
576//#ifdef CONFIG_REALTIME_AUX_TASKS
577// if (tsk_rt(t)->has_aux_tasks) {
578// /* hide from aux tasks so they can't inherit our priority when we block
579// * for a litmus lock. inheritance is already going to a litmus lock
580// * holder. */
581// aux_hide = tsk_rt(t)->hide_from_aux_tasks;
582// aux_restore = 1;
583// tsk_rt(t)->hide_from_aux_tasks = 1;
584// }
585//#endif
586
587#ifdef CONFIG_LITMUS_NVIDIA
588 if (tsk_rt(t)->held_gpus) {
589 gpu_hide = tsk_rt(t)->hide_from_gpu;
590 gpu_restore = 1;
591 tsk_rt(t)->hide_from_gpu = 1;
592 }
593#endif
594
595 schedule();
596
597#ifdef CONFIG_LITMUS_NVIDIA
598 if (gpu_restore) {
599 /* restore our state */
600 tsk_rt(t)->hide_from_gpu = gpu_hide;
601 }
602#endif
603
604#ifdef CONFIG_REALTIME_AUX_TASKS
605 if (aux_restore) {
606 /* restore our state */
607 tsk_rt(t)->hide_from_aux_tasks = aux_hide;
608 }
609#endif
610}
611
612
613#else // CONFIG_LITMUS_LOCKING
175 614
176struct fdso_ops generic_lock_ops = {}; 615struct fdso_ops generic_lock_ops = {};
177 616
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
new file mode 100644
index 000000000000..5a63fb732e8b
--- /dev/null
+++ b/litmus/nvidia_info.c
@@ -0,0 +1,1137 @@
1#include <linux/module.h>
2#include <linux/semaphore.h>
3#include <linux/pci.h>
4
5#include <litmus/sched_trace.h>
6#include <litmus/nvidia_info.h>
7#include <litmus/litmus.h>
8
9#include <litmus/sched_plugin.h>
10
11#include <litmus/binheap.h>
12
13#ifdef CONFIG_LITMUS_SOFTIRQD
14#include <litmus/litmus_softirq.h>
15#endif
16
17typedef unsigned char NvV8; /* "void": enumerated or multiple fields */
18typedef unsigned short NvV16; /* "void": enumerated or multiple fields */
19typedef unsigned char NvU8; /* 0 to 255 */
20typedef unsigned short NvU16; /* 0 to 65535 */
21typedef signed char NvS8; /* -128 to 127 */
22typedef signed short NvS16; /* -32768 to 32767 */
23typedef float NvF32; /* IEEE Single Precision (S1E8M23) */
24typedef double NvF64; /* IEEE Double Precision (S1E11M52) */
25typedef unsigned int NvV32; /* "void": enumerated or multiple fields */
26typedef unsigned int NvU32; /* 0 to 4294967295 */
27typedef unsigned long long NvU64; /* 0 to 18446744073709551615 */
28typedef union
29{
30 volatile NvV8 Reg008[1];
31 volatile NvV16 Reg016[1];
32 volatile NvV32 Reg032[1];
33} litmus_nv_hwreg_t, * litmus_nv_phwreg_t;
34
35typedef struct
36{
37 NvU64 address;
38#ifdef CONFIG_CUDA_5_0
39 NvU64 strapped_size;
40#endif
41 NvU64 size;
42 NvU32 offset;
43 NvU32 *map;
44 litmus_nv_phwreg_t map_u;
45} litmus_nv_aperture_t;
46
47typedef struct
48{
49 void *priv; /* private data */
50 void *os_state; /* os-specific device state */
51
52#ifndef CONFIG_CUDA_5_0
53 int rmInitialized;
54#endif
55 int flags;
56
57 /* PCI config info */
58 NvU32 domain;
59 NvU16 bus;
60 NvU16 slot;
61 NvU16 vendor_id;
62 NvU16 device_id;
63 NvU16 subsystem_id;
64 NvU32 gpu_id;
65 void *handle;
66
67 NvU32 pci_cfg_space[16];
68
69 /* physical characteristics */
70 litmus_nv_aperture_t bars[3];
71 litmus_nv_aperture_t *regs;
72 litmus_nv_aperture_t *fb, ud;
73 litmus_nv_aperture_t agp;
74
75 NvU32 interrupt_line;
76
77 NvU32 agp_config;
78 NvU32 agp_status;
79
80 NvU32 primary_vga;
81
82 NvU32 sim_env;
83
84 NvU32 rc_timer_enabled;
85
86 /* list of events allocated for this device */
87 void *event_list;
88
89 void *kern_mappings;
90
91} litmus_nv_state_t;
92
93typedef struct work_struct litmus_nv_task_t;
94
95typedef struct litmus_nv_work_s {
96 litmus_nv_task_t task;
97 void *data;
98} litmus_nv_work_t;
99
100typedef struct litmus_nv_linux_state_s {
101 litmus_nv_state_t nv_state;
102 atomic_t usage_count;
103
104 struct pci_dev *dev;
105 void *agp_bridge;
106 void *alloc_queue;
107
108 void *timer_sp;
109 void *isr_sp;
110 void *pci_cfgchk_sp;
111 void *isr_bh_sp;
112
113#if defined(CONFIG_CUDA_4_0) || defined(CONFIG_CUDA_5_0)
114 char registry_keys[512];
115#endif
116
117 /* keep track of any pending bottom halfes */
118 struct tasklet_struct tasklet;
119 litmus_nv_work_t work;
120
121 /* get a timer callback every second */
122 struct timer_list rc_timer;
123
124 /* lock for linux-specific data, not used by core rm */
125 struct semaphore ldata_lock;
126
127 /* lock for linux-specific alloc queue */
128 struct semaphore at_lock;
129
130#if 0
131#if defined(NV_USER_MAP)
132 /* list of user mappings */
133 struct nv_usermap_s *usermap_list;
134
135 /* lock for VMware-specific mapping list */
136 struct semaphore mt_lock;
137#endif /* defined(NV_USER_MAP) */
138#if defined(NV_PM_SUPPORT_OLD_STYLE_APM)
139 void *apm_nv_dev;
140#endif
141#endif
142
143 NvU32 device_num;
144 struct litmus_nv_linux_state_s *next;
145} litmus_nv_linux_state_t;
146
147void dump_nvidia_info(const struct tasklet_struct *t)
148{
149 litmus_nv_state_t* nvstate = NULL;
150 litmus_nv_linux_state_t* linuxstate = NULL;
151 struct pci_dev* pci = NULL;
152
153 nvstate = (litmus_nv_state_t*)(t->data);
154
155 if(nvstate)
156 {
157 TRACE("NV State:\n"
158 "\ttasklet ptr = %p\n"
159 "\tstate ptr = %p\n"
160 "\tprivate data ptr = %p\n"
161 "\tos state ptr = %p\n"
162 "\tdomain = %u\n"
163 "\tbus = %u\n"
164 "\tslot = %u\n"
165 "\tvender_id = %u\n"
166 "\tdevice_id = %u\n"
167 "\tsubsystem_id = %u\n"
168 "\tgpu_id = %u\n"
169 "\tinterrupt_line = %u\n",
170 t,
171 nvstate,
172 nvstate->priv,
173 nvstate->os_state,
174 nvstate->domain,
175 nvstate->bus,
176 nvstate->slot,
177 nvstate->vendor_id,
178 nvstate->device_id,
179 nvstate->subsystem_id,
180 nvstate->gpu_id,
181 nvstate->interrupt_line);
182
183 linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
184 }
185 else
186 {
187 TRACE("INVALID NVSTATE????\n");
188 }
189
190 if(linuxstate)
191 {
192 int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate);
193 int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state));
194 int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
195
196
197 TRACE("LINUX NV State:\n"
198 "\tlinux nv state ptr: %p\n"
199 "\taddress of tasklet: %p\n"
200 "\taddress of work: %p\n"
201 "\tusage_count: %d\n"
202 "\tdevice_num: %u\n"
203 "\ttasklet addr == this tasklet: %d\n"
204 "\tpci: %p\n",
205 linuxstate,
206 &(linuxstate->tasklet),
207 &(linuxstate->work),
208 atomic_read(&(linuxstate->usage_count)),
209 linuxstate->device_num,
210 (t == &(linuxstate->tasklet)),
211 linuxstate->dev);
212
213 pci = linuxstate->dev;
214
215 TRACE("Offsets:\n"
216 "\tOffset from LinuxState: %d, %x\n"
217 "\tOffset from NVState: %d, %x\n"
218 "\tOffset from parameter: %d, %x\n"
219 "\tdevice_num: %u\n",
220 ls_offset, ls_offset,
221 ns_offset_raw, ns_offset_raw,
222 ns_offset_desired, ns_offset_desired,
223 *((u32*)((void*)nvstate + ns_offset_desired)));
224 }
225 else
226 {
227 TRACE("INVALID LINUXNVSTATE?????\n");
228 }
229
230#if 0
231 if(pci)
232 {
233 TRACE("PCI DEV Info:\n"
234 "pci device ptr: %p\n"
235 "\tdevfn = %d\n"
236 "\tvendor = %d\n"
237 "\tdevice = %d\n"
238 "\tsubsystem_vendor = %d\n"
239 "\tsubsystem_device = %d\n"
240 "\tslot # = %d\n",
241 pci,
242 pci->devfn,
243 pci->vendor,
244 pci->device,
245 pci->subsystem_vendor,
246 pci->subsystem_device,
247 pci->slot->number);
248 }
249 else
250 {
251 TRACE("INVALID PCIDEV PTR?????\n");
252 }
253#endif
254}
255
256
257
258static struct module* nvidia_mod = NULL;
259
260
261
262
263#if 0
264static int nvidia_ready_module_notify(struct notifier_block *self,
265 unsigned long val, void *data)
266{
267 mutex_lock(&module_mutex);
268 nvidia_mod = find_module("nvidia");
269 mutex_unlock(&module_mutex);
270
271 if(nvidia_mod != NULL)
272 {
273 TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__,
274 (void*)(nvidia_mod->module_core),
275 (void*)(nvidia_mod->module_core) + nvidia_mod->core_size);
276 init_nv_device_reg();
277 return(0);
278 }
279 else
280 {
281 TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__);
282 }
283}
284
285static int nvidia_going_module_notify(struct notifier_block *self,
286 unsigned long val, void *data)
287{
288 nvidia_mod = NULL;
289 mb();
290
291 return 0;
292}
293
294static struct notifier_block nvidia_ready = {
295 .notifier_call = nvidia_ready_module_notify,
296 .priority = 1,
297};
298
299static struct notifier_block nvidia_going = {
300 .notifier_call = nvidia_going_module_notify,
301 .priority = 1,
302};
303#endif
304
305
306
307static int init_nv_device_reg(void);
308static int shutdown_nv_device_reg(void);
309
310
311int init_nvidia_info(void)
312{
313 mutex_lock(&module_mutex);
314 nvidia_mod = find_module("nvidia");
315 mutex_unlock(&module_mutex);
316 if(nvidia_mod != NULL)
317 {
318 TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__,
319 (void*)(nvidia_mod->module_core),
320 (void*)(nvidia_mod->module_core) + nvidia_mod->core_size);
321 init_nv_device_reg();
322 return(0);
323 }
324 else
325 {
326 TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__);
327
328 init_nv_device_reg();
329 return(0);
330// return(-1);
331 }
332}
333
334void shutdown_nvidia_info(void)
335{
336 nvidia_mod = NULL;
337 mb();
338
339 shutdown_nv_device_reg();
340}
341
342/* works with pointers to static data inside the module too. */
343int is_nvidia_func(void* func_addr)
344{
345 int ret = 0;
346 if(nvidia_mod)
347 {
348 ret = within_module_core((long unsigned int)func_addr, nvidia_mod);
349 /*
350 if(ret)
351 {
352 TRACE("%s : %p is in NVIDIA module: %d\n",
353 __FUNCTION__, func_addr, ret);
354 }*/
355 }
356
357 return(ret);
358}
359
360u32 get_tasklet_nv_device_num(const struct tasklet_struct *t)
361{
362 // life is too short to use hard-coded offsets. update this later.
363 litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data);
364 litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
365
366 BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM);
367
368 return(linuxstate->device_num);
369}
370
371u32 get_work_nv_device_num(const struct work_struct *t)
372{
373 // offset determined though observed behavior of the NV driver.
374 const int DEVICE_NUM_OFFSET = sizeof(struct work_struct);
375 void* state = (void*)(t);
376 void** device_num_ptr = state + DEVICE_NUM_OFFSET;
377 return(*((u32*)(*device_num_ptr)));
378}
379
380
381///////////////////////////////////////////////////////////////////////////////
382///////////////////////////////////////////////////////////////////////////////
383///////////////////////////////////////////////////////////////////////////////
384
385
386typedef struct {
387 raw_spinlock_t lock; /* not needed if GPU not shared between scheudling domains */
388 struct binheap owners;
389
390#ifdef CONFIG_LITMUS_SOFTIRQD
391 klmirqd_callback_t callback;
392 struct task_struct* thread;
393 int ready:1; /* todo: make threads check for the ready flag */
394#endif
395
396#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG
397 struct tasklet_struct nv_klmirqd_dbg_tasklet;
398#endif
399}nv_device_registry_t;
400
401
402static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];
403
404
405
406#ifdef CONFIG_LITMUS_SOFTIRQD
407static int nvidia_klmirqd_cb(void *arg)
408{
409 unsigned long flags;
410 int reg_device_id = (int)(long long)(arg);
411 nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
412
413 TRACE("nv klmirqd callback for GPU %d\n", reg_device_id);
414
415 raw_spin_lock_irqsave(&reg->lock, flags);
416 reg->thread = current;
417 reg->ready = 1;
418 raw_spin_unlock_irqrestore(&reg->lock, flags);
419
420 return 0;
421}
422#endif
423
424#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG
425struct nv_klmirqd_dbg_timer_struct
426{
427 struct hrtimer timer;
428};
429
430static struct nv_klmirqd_dbg_timer_struct nv_klmirqd_dbg_timer;
431
432static void nv_klmirqd_arm_dbg_timer(lt_t relative_time)
433{
434 lt_t when_to_fire = litmus_clock() + relative_time;
435
436 TRACE("next nv tasklet in %d ns\n", relative_time);
437
438 __hrtimer_start_range_ns(&nv_klmirqd_dbg_timer.timer,
439 ns_to_ktime(when_to_fire),
440 0,
441 HRTIMER_MODE_ABS_PINNED,
442 0);
443}
444
445static void nv_klmirqd_dbg_tasklet_func(unsigned long arg)
446{
447 lt_t now = litmus_clock();
448 nv_device_registry_t *reg = (nv_device_registry_t*)arg;
449 int gpunum = reg - &NV_DEVICE_REG[0];
450
451 TRACE("nv klmirqd routine invoked for GPU %d!\n", gpunum);
452
453 /* set up the next timer */
454 nv_klmirqd_arm_dbg_timer(now % (NSEC_PER_MSEC * 10)); // within the next 10ms.
455}
456
457
458static enum hrtimer_restart nvklmirqd_timer_func(struct hrtimer *timer)
459{
460 lt_t now = litmus_clock();
461 int gpu = (int)(now % num_online_gpus());
462 nv_device_registry_t *reg;
463
464 TRACE("nvklmirqd_timer invoked!\n");
465
466 reg = &NV_DEVICE_REG[gpu];
467
468 if (reg->thread && reg->ready) {
469 TRACE("Adding a tasklet for GPU %d\n", gpu);
470 litmus_tasklet_schedule(&reg->nv_klmirqd_dbg_tasklet, reg->thread);
471 }
472 else {
473 TRACE("nv klmirqd is not ready!\n");
474 nv_klmirqd_arm_dbg_timer(now % (NSEC_PER_MSEC * 10)); // within the next 10ms.
475 }
476
477 return HRTIMER_NORESTART;
478}
479#endif
480
481
482static int gpu_owner_max_priority_order(struct binheap_node *a,
483 struct binheap_node *b)
484{
485 struct task_struct *d_a = container_of(binheap_entry(a, struct rt_param, gpu_owner_node),
486 struct task_struct, rt_param);
487 struct task_struct *d_b = container_of(binheap_entry(b, struct rt_param, gpu_owner_node),
488 struct task_struct, rt_param);
489
490 BUG_ON(!d_a);
491 BUG_ON(!d_b);
492
493 return litmus->compare(d_a, d_b);
494}
495
496static int init_nv_device_reg(void)
497{
498 int i;
499 char name[MAX_KLMIRQD_NAME_LEN+1];
500
501#ifdef CONFIG_LITMUS_SOFTIRQD
502 if (!klmirqd_is_ready()) {
503 TRACE("klmirqd is not ready!\n");
504 return 0;
505 }
506#endif
507
508 memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG));
509 mb();
510
511
512 for(i = 0; i < num_online_gpus(); ++i) {
513 raw_spin_lock_init(&NV_DEVICE_REG[i].lock);
514 INIT_BINHEAP_HANDLE(&NV_DEVICE_REG[i].owners, gpu_owner_max_priority_order);
515
516#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG
517 tasklet_init(&NV_DEVICE_REG[i].nv_klmirqd_dbg_tasklet, nv_klmirqd_dbg_tasklet_func, (unsigned long)&NV_DEVICE_REG[i]);
518#endif
519
520#ifdef CONFIG_LITMUS_SOFTIRQD
521 {
522 int default_cpu = litmus->map_gpu_to_cpu(i);
523
524 snprintf(name, MAX_KLMIRQD_NAME_LEN, "nvklmirqd%d", i);
525
526 NV_DEVICE_REG[i].callback.func = nvidia_klmirqd_cb;
527 NV_DEVICE_REG[i].callback.arg = (void*)(long long)(i);
528 mb();
529
530 if(launch_klmirqd_thread(name, default_cpu, &NV_DEVICE_REG[i].callback) != 0) {
531 TRACE("Failed to create klmirqd thread for GPU %d\n", i);
532 }
533 }
534#endif
535 }
536
537#ifdef CONFIG_LITMUS_NV_KLMIRQD_DEBUG
538 hrtimer_init(&nv_klmirqd_dbg_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
539 nv_klmirqd_dbg_timer.timer.function = nvklmirqd_timer_func;
540 nv_klmirqd_arm_dbg_timer(NSEC_PER_MSEC * 1000);
541#endif
542
543 return(1);
544}
545
546
547/* The following code is full of nasty race conditions... */
548/* spawning of klimirqd threads can race with init_nv_device_reg()!!!! */
549static int shutdown_nv_device_reg(void)
550{
551 TRACE("Shutting down nv device registration.\n");
552
553#ifdef CONFIG_LITMUS_SOFTIRQD
554 {
555 int i;
556 nv_device_registry_t *reg;
557
558 for (i = 0; i < num_online_gpus(); ++i) {
559
560 TRACE("Shutting down GPU %d.\n", i);
561
562 reg = &NV_DEVICE_REG[i];
563
564 if (reg->thread && reg->ready) {
565 kill_klmirqd_thread(reg->thread);
566
567 /* assume that all goes according to plan... */
568 reg->thread = NULL;
569 reg->ready = 0;
570 }
571
572 while (!binheap_empty(&reg->owners)) {
573 binheap_delete_root(&reg->owners, struct rt_param, gpu_owner_node);
574 }
575 }
576 }
577#endif
578
579 return(1);
580}
581
582
583/* use to get the owner of nv_device_id. */
584struct task_struct* get_nv_max_device_owner(u32 target_device_id)
585{
586 struct task_struct *owner = NULL;
587 nv_device_registry_t *reg;
588
589 BUG_ON(target_device_id >= NV_DEVICE_NUM);
590
591 reg = &NV_DEVICE_REG[target_device_id];
592
593 if (!binheap_empty(&reg->owners)) {
594 struct task_struct *hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
595 struct task_struct, rt_param);
596 TRACE_CUR("hp: %s/%d\n", hp->comm, hp->pid);
597 }
598
599 return(owner);
600}
601
602#ifdef CONFIG_LITMUS_SOFTIRQD
603struct task_struct* get_nv_klmirqd_thread(u32 target_device_id)
604{
605 struct task_struct *klmirqd = NULL;
606 nv_device_registry_t *reg;
607
608 BUG_ON(target_device_id >= NV_DEVICE_NUM);
609
610 reg = &NV_DEVICE_REG[target_device_id];
611
612 if(likely(reg->ready)) {
613 klmirqd = reg->thread;
614 }
615
616 return klmirqd;
617}
618#endif
619
620
621
622
623
624#ifdef CONFIG_LITMUS_SOFTIRQD
625static int gpu_klmirqd_increase_priority(struct task_struct *klmirqd, struct task_struct *hp)
626{
627 int retval = 0;
628
629 TRACE_CUR("Increasing priority of nv klmirqd: %s/%d.\n", klmirqd->comm, klmirqd->pid);
630
631 /* the klmirqd thread should never attempt to hold a litmus-level real-time
632 * so nested support is not required */
633 retval = litmus->__increase_prio(klmirqd, hp);
634
635 return retval;
636}
637
638static int gpu_klmirqd_decrease_priority(struct task_struct *klmirqd, struct task_struct *hp)
639{
640 int retval = 0;
641
642 TRACE_CUR("Decreasing priority of nv klmirqd: %s/%d.\n", klmirqd->comm, klmirqd->pid);
643
644 /* the klmirqd thread should never attempt to hold a litmus-level real-time
645 * so nested support is not required */
646 retval = litmus->__decrease_prio(klmirqd, hp);
647
648 return retval;
649}
650#endif
651
652
653
654
655/* call when an gpu owner becomes real-time */
656long enable_gpu_owner(struct task_struct *t)
657{
658 long retval = 0;
659// unsigned long flags;
660 int gpu;
661 nv_device_registry_t *reg;
662
663#ifdef CONFIG_LITMUS_SOFTIRQD
664 struct task_struct *hp;
665#endif
666
667 if (!tsk_rt(t)->held_gpus) {
668 TRACE_CUR("task %s/%d does not hold any GPUs\n", t->comm, t->pid);
669 return -1;
670 }
671
672 BUG_ON(!is_realtime(t));
673
674 gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
675
676 if (binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) {
677 TRACE_CUR("task %s/%d is already active on GPU %d\n", t->comm, t->pid, gpu);
678 goto out;
679 }
680
681 /* update the registration (and maybe klmirqd) */
682 reg = &NV_DEVICE_REG[gpu];
683
684// raw_spin_lock_irqsave(&reg->lock, flags);
685
686 binheap_add(&tsk_rt(t)->gpu_owner_node, &reg->owners,
687 struct rt_param, gpu_owner_node);
688
689
690#ifdef CONFIG_LITMUS_SOFTIRQD
691 hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
692 struct task_struct, rt_param);
693
694 if (hp == t) {
695 /* we're the new hp */
696 TRACE_CUR("%s/%d is new hp on GPU %d.\n", t->comm, t->pid, gpu);
697
698 retval = gpu_klmirqd_increase_priority(reg->thread, (tsk_rt(hp)->inh_task)? tsk_rt(hp)->inh_task : hp);
699 }
700#endif
701
702// raw_spin_unlock_irqsave(&reg->lock, flags);
703
704out:
705 return retval;
706}
707
708/* call when an gpu owner exits real-time */
709long disable_gpu_owner(struct task_struct *t)
710{
711 long retval = 0;
712// unsigned long flags;
713 int gpu;
714 nv_device_registry_t *reg;
715
716#ifdef CONFIG_LITMUS_SOFTIRQD
717 struct task_struct *hp;
718 struct task_struct *new_hp = NULL;
719#endif
720
721 if (!tsk_rt(t)->held_gpus) {
722 TRACE_CUR("task %s/%d does not hold any GPUs\n", t->comm, t->pid);
723 return -1;
724 }
725
726 BUG_ON(!is_realtime(t));
727
728 gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
729
730 if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) {
731 TRACE_CUR("task %s/%d is not active on GPU %d\n", t->comm, t->pid, gpu);
732 goto out;
733 }
734
735 TRACE_CUR("task %s/%d exiting from GPU %d.\n", t->comm, t->pid, gpu);
736
737
738 reg = &NV_DEVICE_REG[gpu];
739
740// raw_spin_lock_irqsave(&reg->lock, flags);
741
742
743#ifdef CONFIG_LITMUS_SOFTIRQD
744 hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
745 struct task_struct, rt_param);
746
747 binheap_delete(&tsk_rt(t)->gpu_owner_node, &reg->owners);
748
749
750 if (!binheap_empty(&reg->owners)) {
751 new_hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
752 struct task_struct, rt_param);
753 }
754
755 if (hp == t && new_hp != t) {
756 struct task_struct *to_inh = NULL;
757
758 TRACE_CUR("%s/%d is no longer hp on GPU %d.\n", t->comm, t->pid, gpu);
759
760 if (new_hp) {
761 to_inh = (tsk_rt(new_hp)->inh_task) ? tsk_rt(new_hp)->inh_task : new_hp;
762 }
763
764 retval = gpu_klmirqd_decrease_priority(reg->thread, to_inh);
765 }
766#else
767 binheap_delete(&tsk_rt(t)->gpu_owner_node, &reg->owners);
768#endif
769
770// raw_spin_unlock_irqsave(&reg->lock, flags);
771
772
773out:
774 return retval;
775}
776
777
778
779
780
781
782
783
784
785
786int gpu_owner_increase_priority(struct task_struct *t)
787{
788 int retval = 0;
789 int gpu;
790 nv_device_registry_t *reg;
791
792 struct task_struct *hp = NULL;
793 struct task_struct *hp_eff = NULL;
794
795 BUG_ON(!is_realtime(t));
796 BUG_ON(!tsk_rt(t)->held_gpus);
797
798 gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
799
800 if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) {
801 WARN_ON(!is_running(t));
802 TRACE_CUR("gpu klmirqd may not inherit from %s/%d on GPU %d\n",
803 t->comm, t->pid, gpu);
804 goto out;
805 }
806
807
808
809
810 TRACE_CUR("task %s/%d on GPU %d increasing priority.\n", t->comm, t->pid, gpu);
811 reg = &NV_DEVICE_REG[gpu];
812
813 hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
814 struct task_struct, rt_param);
815 hp_eff = effective_priority(hp);
816
817 if (hp != t) { /* our position in the heap may have changed. hp is already at the root. */
818 binheap_decrease(&tsk_rt(t)->gpu_owner_node, &reg->owners);
819 }
820
821 hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
822 struct task_struct, rt_param);
823
824 if (effective_priority(hp) != hp_eff) { /* the eff. prio. of hp has changed */
825 hp_eff = effective_priority(hp);
826 TRACE_CUR("%s/%d is new hp on GPU %d.\n", t->comm, t->pid, gpu);
827
828 retval = gpu_klmirqd_increase_priority(reg->thread, hp_eff);
829 }
830
831out:
832 return retval;
833}
834
835
836int gpu_owner_decrease_priority(struct task_struct *t)
837{
838 int retval = 0;
839 int gpu;
840 nv_device_registry_t *reg;
841
842 struct task_struct *hp = NULL;
843 struct task_struct *hp_eff = NULL;
844
845 BUG_ON(!is_realtime(t));
846 BUG_ON(!tsk_rt(t)->held_gpus);
847
848 gpu = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
849
850 if (!binheap_is_in_heap(&tsk_rt(t)->gpu_owner_node)) {
851 WARN_ON(!is_running(t));
852 TRACE_CUR("nv klmirqd may not inherit from %s/%d on GPU %d\n",
853 t->comm, t->pid, gpu);
854 goto out;
855 }
856
857 TRACE_CUR("task %s/%d on GPU %d decresing priority.\n", t->comm, t->pid, gpu);
858 reg = &NV_DEVICE_REG[gpu];
859
860 hp = container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
861 struct task_struct, rt_param);
862 hp_eff = effective_priority(hp);
863 binheap_delete(&tsk_rt(t)->gpu_owner_node, &reg->owners);
864 binheap_add(&tsk_rt(t)->gpu_owner_node, &reg->owners,
865 struct rt_param, gpu_owner_node);
866
867 if (hp == t) { /* t was originally the hp */
868 struct task_struct *new_hp =
869 container_of(binheap_top_entry(&reg->owners, struct rt_param, gpu_owner_node),
870 struct task_struct, rt_param);
871 if (effective_priority(new_hp) != hp_eff) { /* eff prio. of hp has changed */
872 hp_eff = effective_priority(new_hp);
873 TRACE_CUR("%s/%d is no longer hp on GPU %d.\n", t->comm, t->pid, gpu);
874 retval = gpu_klmirqd_decrease_priority(reg->thread, hp_eff);
875 }
876 }
877
878out:
879 return retval;
880}
881
882
883
884
885
886
887
888
889
890static int __reg_nv_device(int reg_device_id, struct task_struct *t)
891{
892 __set_bit(reg_device_id, &tsk_rt(t)->held_gpus);
893
894 return(0);
895}
896
897static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
898{
899 __clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus);
900
901 return(0);
902}
903
904
905int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t)
906{
907 int ret;
908
909 if((reg_device_id < num_online_gpus()) && (reg_device_id >= 0))
910 {
911 if(reg_action)
912 ret = __reg_nv_device(reg_device_id, t);
913 else
914 ret = __clear_reg_nv_device(reg_device_id, t);
915 }
916 else
917 {
918 ret = -ENODEV;
919 }
920
921 return(ret);
922}
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
947//void pai_check_priority_increase(struct task_struct *t, int reg_device_id)
948//{
949// unsigned long flags;
950// nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
951//
952//
953//
954// if(reg->max_prio_owner != t) {
955//
956// raw_spin_lock_irqsave(&reg->lock, flags);
957//
958// if(reg->max_prio_owner != t) {
959// if(litmus->compare(t, reg->max_prio_owner)) {
960// litmus->change_prio_pai_tasklet(reg->max_prio_owner, t);
961// reg->max_prio_owner = t;
962// }
963// }
964//
965// raw_spin_unlock_irqrestore(&reg->lock, flags);
966// }
967//}
968//
969//
970//void pai_check_priority_decrease(struct task_struct *t, int reg_device_id)
971//{
972// unsigned long flags;
973// nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
974//
975// if(reg->max_prio_owner == t) {
976//
977// raw_spin_lock_irqsave(&reg->lock, flags);
978//
979// if(reg->max_prio_owner == t) {
980// reg->max_prio_owner = find_hp_owner(reg, NULL);
981// if(reg->max_prio_owner != t) {
982// litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
983// }
984// }
985//
986// raw_spin_unlock_irqrestore(&reg->lock, flags);
987// }
988//}
989#endif
990
991
992
993
994
995//static int __reg_nv_device(int reg_device_id, struct task_struct *t)
996//{
997// int ret = 0;
998// int i;
999// struct task_struct *old_max = NULL;
1000//
1001//
1002// raw_spin_lock_irqsave(&reg->lock, flags);
1003//
1004// if(reg->nr_owners < NV_MAX_SIMULT_USERS) {
1005// TRACE_TASK(t, "registers GPU %d\n", reg_device_id);
1006// for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
1007// if(reg->owners[i] == NULL) {
1008// reg->owners[i] = t;
1009//
1010// //if(edf_higher_prio(t, reg->max_prio_owner)) {
1011// if(litmus->compare(t, reg->max_prio_owner)) {
1012// old_max = reg->max_prio_owner;
1013// reg->max_prio_owner = t;
1014//
1015//#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1016// litmus->change_prio_pai_tasklet(old_max, t);
1017//#endif
1018// }
1019//
1020//#ifdef CONFIG_LITMUS_SOFTIRQD
1021// down_and_set_stat(t, HELD, &tsk_rt(t)->klmirqd_sem);
1022//#endif
1023// ++(reg->nr_owners);
1024//
1025// break;
1026// }
1027// }
1028// }
1029// else
1030// {
1031// TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
1032// //ret = -EBUSY;
1033// }
1034//
1035// raw_spin_unlock_irqrestore(&reg->lock, flags);
1036//
1037// __set_bit(reg_device_id, &tsk_rt(t)->held_gpus);
1038//
1039// return(ret);
1040//}
1041//
1042//static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
1043//{
1044// int ret = 0;
1045// int i;
1046// unsigned long flags;
1047// nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id];
1048//
1049//#ifdef CONFIG_LITMUS_SOFTIRQD
1050// struct task_struct* klmirqd_th = get_klmirqd(de_reg_device_id);
1051//#endif
1052//
1053// if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) {
1054// return ret;
1055// }
1056//
1057// raw_spin_lock_irqsave(&reg->lock, flags);
1058//
1059// TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id);
1060//
1061// for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
1062// if(reg->owners[i] == t) {
1063//#ifdef CONFIG_LITMUS_SOFTIRQD
1064// flush_pending(klmirqd_th, t);
1065//#endif
1066// if(reg->max_prio_owner == t) {
1067// reg->max_prio_owner = find_hp_owner(reg, t);
1068//#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1069// litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
1070//#endif
1071// }
1072//
1073//#ifdef CONFIG_LITMUS_SOFTIRQD
1074// up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klmirqd_sem);
1075//#endif
1076//
1077// reg->owners[i] = NULL;
1078// --(reg->nr_owners);
1079//
1080// break;
1081// }
1082// }
1083//
1084// raw_spin_unlock_irqrestore(&reg->lock, flags);
1085//
1086// __clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus);
1087//
1088// return(ret);
1089//}
1090//
1091//
1092//int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t)
1093//{
1094// int ret;
1095//
1096// if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0))
1097// {
1098// if(reg_action)
1099// ret = __reg_nv_device(reg_device_id, t);
1100// else
1101// ret = __clear_reg_nv_device(reg_device_id, t);
1102// }
1103// else
1104// {
1105// ret = -ENODEV;
1106// }
1107//
1108// return(ret);
1109//}
1110
1111
1112
1113//void lock_nv_registry(u32 target_device_id, unsigned long* flags)
1114//{
1115// BUG_ON(target_device_id >= NV_DEVICE_NUM);
1116//
1117// if(in_interrupt())
1118// TRACE("Locking registry for %d.\n", target_device_id);
1119// else
1120// TRACE_CUR("Locking registry for %d.\n", target_device_id);
1121//
1122// raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags);
1123//}
1124//
1125//void unlock_nv_registry(u32 target_device_id, unsigned long* flags)
1126//{
1127// BUG_ON(target_device_id >= NV_DEVICE_NUM);
1128//
1129// if(in_interrupt())
1130// TRACE("Unlocking registry for %d.\n", target_device_id);
1131// else
1132// TRACE_CUR("Unlocking registry for %d.\n", target_device_id);
1133//
1134// raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags);
1135//}
1136
1137
diff --git a/litmus/preempt.c b/litmus/preempt.c
index 6be2f26728b8..86ad2efb591a 100644
--- a/litmus/preempt.c
+++ b/litmus/preempt.c
@@ -27,10 +27,12 @@ void sched_state_will_schedule(struct task_struct* tsk)
27 set_sched_state(PICKED_WRONG_TASK); 27 set_sched_state(PICKED_WRONG_TASK);
28 else 28 else
29 set_sched_state(WILL_SCHEDULE); 29 set_sched_state(WILL_SCHEDULE);
30 } else 30 } else {
31 /* Litmus tasks should never be subject to a remote 31 /* Litmus tasks should never be subject to a remote
32 * set_tsk_need_resched(). */ 32 * set_tsk_need_resched(). */
33 BUG_ON(is_realtime(tsk)); 33 //BUG_ON(is_realtime(tsk));
34 }
35
34#ifdef CONFIG_PREEMPT_STATE_TRACE 36#ifdef CONFIG_PREEMPT_STATE_TRACE
35 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n", 37 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
36 __builtin_return_address(0)); 38 __builtin_return_address(0));
@@ -46,14 +48,18 @@ void sched_state_ipi(void)
46 /* Cause scheduler to be invoked. 48 /* Cause scheduler to be invoked.
47 * This will cause a transition to WILL_SCHEDULE. */ 49 * This will cause a transition to WILL_SCHEDULE. */
48 set_tsk_need_resched(current); 50 set_tsk_need_resched(current);
51 /*
49 TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n", 52 TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n",
50 current->comm, current->pid); 53 current->comm, current->pid);
54 */
51 TS_SEND_RESCHED_END; 55 TS_SEND_RESCHED_END;
52 } else { 56 } else {
53 /* ignore */ 57 /* ignore */
58 /*
54 TRACE_STATE("ignoring IPI in state %x (%s)\n", 59 TRACE_STATE("ignoring IPI in state %x (%s)\n",
55 get_sched_state(), 60 get_sched_state(),
56 sched_state_name(get_sched_state())); 61 sched_state_name(get_sched_state()));
62 */
57 } 63 }
58} 64}
59 65
@@ -70,23 +76,34 @@ void litmus_reschedule(int cpu)
70 * is not aware of the need to reschedule at this point. */ 76 * is not aware of the need to reschedule at this point. */
71 77
72 /* is a context switch in progress? */ 78 /* is a context switch in progress? */
73 if (cpu_is_in_sched_state(cpu, TASK_PICKED)) 79 if (cpu_is_in_sched_state(cpu, TASK_PICKED)) {
74 picked_transition_ok = sched_state_transition_on( 80 picked_transition_ok = sched_state_transition_on(
75 cpu, TASK_PICKED, PICKED_WRONG_TASK); 81 cpu, TASK_PICKED, PICKED_WRONG_TASK);
76 82
83 TRACE_CUR("cpu %d: picked_transition_ok = %d\n", cpu, picked_transition_ok);
84 }
85 else {
86 TRACE_CUR("cpu %d: picked_transition_ok = 0 (static)\n", cpu);
87 }
88
77 if (!picked_transition_ok && 89 if (!picked_transition_ok &&
78 cpu_is_in_sched_state(cpu, TASK_SCHEDULED)) { 90 cpu_is_in_sched_state(cpu, TASK_SCHEDULED)) {
79 /* We either raced with the end of the context switch, or the 91 /* We either raced with the end of the context switch, or the
80 * CPU was in TASK_SCHEDULED anyway. */ 92 * CPU was in TASK_SCHEDULED anyway. */
81 scheduled_transition_ok = sched_state_transition_on( 93 scheduled_transition_ok = sched_state_transition_on(
82 cpu, TASK_SCHEDULED, SHOULD_SCHEDULE); 94 cpu, TASK_SCHEDULED, SHOULD_SCHEDULE);
95 TRACE_CUR("cpu %d: scheduled_transition_ok = %d\n", cpu, scheduled_transition_ok);
96 }
97 else {
98 TRACE_CUR("cpu %d: scheduled_transition_ok = 0 (static)\n", cpu);
83 } 99 }
84 100
85 /* If the CPU was in state TASK_SCHEDULED, then we need to cause the 101 /* If the CPU was in state TASK_SCHEDULED, then we need to cause the
86 * scheduler to be invoked. */ 102 * scheduler to be invoked. */
87 if (scheduled_transition_ok) { 103 if (scheduled_transition_ok) {
88 if (smp_processor_id() == cpu) 104 if (smp_processor_id() == cpu) {
89 set_tsk_need_resched(current); 105 set_tsk_need_resched(current);
106 }
90 else { 107 else {
91 TS_SEND_RESCHED_START(cpu); 108 TS_SEND_RESCHED_START(cpu);
92 smp_send_reschedule(cpu); 109 smp_send_reschedule(cpu);
@@ -101,11 +118,16 @@ void litmus_reschedule(int cpu)
101 118
102void litmus_reschedule_local(void) 119void litmus_reschedule_local(void)
103{ 120{
104 if (is_in_sched_state(TASK_PICKED)) 121 if (is_in_sched_state(TASK_PICKED)) {
105 set_sched_state(PICKED_WRONG_TASK); 122 set_sched_state(PICKED_WRONG_TASK);
123
124 TRACE_CUR("cpu %d: transitioned to PICKED_WRONG_TASK\n", smp_processor_id());
125 }
106 else if (is_in_sched_state(TASK_SCHEDULED | SHOULD_SCHEDULE)) { 126 else if (is_in_sched_state(TASK_SCHEDULED | SHOULD_SCHEDULE)) {
107 set_sched_state(WILL_SCHEDULE); 127 set_sched_state(WILL_SCHEDULE);
108 set_tsk_need_resched(current); 128 set_tsk_need_resched(current);
129
130 TRACE_CUR("cpu %d: transitioned to WILL_SCHEDULE\n", smp_processor_id());
109 } 131 }
110} 132}
111 133
diff --git a/litmus/rsm_lock.c b/litmus/rsm_lock.c
new file mode 100644
index 000000000000..3dfd8ae9d221
--- /dev/null
+++ b/litmus/rsm_lock.c
@@ -0,0 +1,796 @@
1#include <linux/slab.h>
2#include <linux/uaccess.h>
3
4#include <litmus/trace.h>
5#include <litmus/sched_plugin.h>
6#include <litmus/rsm_lock.h>
7
8//#include <litmus/edf_common.h>
9
10#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
11#include <litmus/gpu_affinity.h>
12#endif
13
14
15/* caller is responsible for locking */
16static struct task_struct* rsm_mutex_find_hp_waiter(struct rsm_mutex *mutex,
17 struct task_struct* skip)
18{
19 wait_queue_t *q;
20 struct list_head *pos;
21 struct task_struct *queued = NULL, *found = NULL;
22
23#ifdef CONFIG_LITMUS_DGL_SUPPORT
24 dgl_wait_state_t *dgl_wait = NULL;
25#endif
26
27 list_for_each(pos, &mutex->wait.task_list) {
28 q = list_entry(pos, wait_queue_t, task_list);
29
30#ifdef CONFIG_LITMUS_DGL_SUPPORT
31 if(q->func == dgl_wake_up) {
32 dgl_wait = (dgl_wait_state_t*) q->private;
33 if(tsk_rt(dgl_wait->task)->blocked_lock == &mutex->litmus_lock) {
34 queued = dgl_wait->task;
35 }
36 else {
37 queued = NULL; // skip it.
38 }
39 }
40 else {
41 queued = (struct task_struct*) q->private;
42 }
43#else
44 queued = (struct task_struct*) q->private;
45#endif
46
47 /* Compare task prios, find high prio task. */
48 //if (queued && queued != skip && edf_higher_prio(queued, found)) {
49 if (queued && queued != skip && litmus->compare(queued, found)) {
50 found = queued;
51 }
52 }
53 return found;
54}
55
56
57#ifdef CONFIG_LITMUS_DGL_SUPPORT
58
59int rsm_mutex_is_owner(struct litmus_lock *l, struct task_struct *t)
60{
61 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
62 return(mutex->owner == t);
63}
64
65// return 1 if resource was immediatly acquired.
66// Assumes mutex->lock is held.
67// Must set task state to TASK_UNINTERRUPTIBLE if task blocks.
68int rsm_mutex_dgl_lock(struct litmus_lock *l, dgl_wait_state_t* dgl_wait,
69 wait_queue_t* wq_node)
70{
71 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
72 struct task_struct *t = dgl_wait->task;
73
74 int acquired_immediatly = 0;
75
76 BUG_ON(t != current);
77
78 if (mutex->owner) {
79 TRACE_TASK(t, "Enqueuing on lock %d.\n", l->ident);
80
81 init_dgl_waitqueue_entry(wq_node, dgl_wait);
82
83 set_task_state(t, TASK_UNINTERRUPTIBLE);
84 __add_wait_queue_tail_exclusive(&mutex->wait, wq_node);
85 } else {
86 TRACE_TASK(t, "Acquired lock %d with no blocking.\n", l->ident);
87
88 /* it's ours now */
89 mutex->owner = t;
90
91 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
92 binheap_add(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
93 struct nested_info, hp_binheap_node);
94 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
95
96 acquired_immediatly = 1;
97 }
98
99 return acquired_immediatly;
100}
101
102void rsm_mutex_enable_priority(struct litmus_lock *l,
103 dgl_wait_state_t* dgl_wait)
104{
105 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
106 struct task_struct *t = dgl_wait->task;
107 struct task_struct *owner = mutex->owner;
108 unsigned long flags = 0; // these are unused under DGL coarse-grain locking
109
110 BUG_ON(owner == t);
111
112 tsk_rt(t)->blocked_lock = l;
113 mb();
114
115 //if (edf_higher_prio(t, mutex->hp_waiter)) {
116 if (litmus->compare(t, mutex->hp_waiter)) {
117
118 struct task_struct *old_max_eff_prio;
119 struct task_struct *new_max_eff_prio;
120 struct task_struct *new_prio = NULL;
121
122 if(mutex->hp_waiter)
123 TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
124 mutex->hp_waiter->comm, mutex->hp_waiter->pid);
125 else
126 TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
127
128 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
129
130 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
131 mutex->hp_waiter = t;
132 l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
133 binheap_decrease(&l->nest.hp_binheap_node,
134 &tsk_rt(owner)->hp_blocked_tasks);
135 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
136
137 if(new_max_eff_prio != old_max_eff_prio) {
138 TRACE_TASK(t, "is new hp_waiter.\n");
139
140 if ((effective_priority(owner) == old_max_eff_prio) ||
141 //(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))){
142 (litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))){
143 new_prio = new_max_eff_prio;
144 }
145 }
146 else {
147 TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
148 }
149
150 if(new_prio) {
151 litmus->nested_increase_prio(owner, new_prio,
152 &mutex->lock, flags); // unlocks lock.
153 }
154 else {
155 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
156 unlock_fine_irqrestore(&mutex->lock, flags);
157 }
158 }
159 else {
160 TRACE_TASK(t, "no change in hp_waiter.\n");
161 unlock_fine_irqrestore(&mutex->lock, flags);
162 }
163}
164
165static void select_next_lock_if_primary(struct litmus_lock *l,
166 dgl_wait_state_t *dgl_wait)
167{
168 if(tsk_rt(dgl_wait->task)->blocked_lock == l) {
169 TRACE_CUR("Lock %d in DGL was primary for %s/%d.\n",
170 l->ident, dgl_wait->task->comm, dgl_wait->task->pid);
171 tsk_rt(dgl_wait->task)->blocked_lock = NULL;
172 mb();
173 select_next_lock(dgl_wait /*, l*/); // pick the next lock to be blocked on
174 }
175 else {
176 TRACE_CUR("Got lock early! Lock %d in DGL was NOT primary for %s/%d.\n",
177 l->ident, dgl_wait->task->comm, dgl_wait->task->pid);
178 }
179}
180#endif
181
182
183
184
185int rsm_mutex_lock(struct litmus_lock* l)
186{
187 struct task_struct *t = current;
188 struct task_struct *owner;
189 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
190 wait_queue_t wait;
191 unsigned long flags;
192
193#ifdef CONFIG_LITMUS_DGL_SUPPORT
194 raw_spinlock_t *dgl_lock;
195#endif
196
197 if (!is_realtime(t))
198 return -EPERM;
199
200#ifdef CONFIG_LITMUS_DGL_SUPPORT
201 dgl_lock = litmus->get_dgl_spinlock(t);
202#endif
203
204 lock_global_irqsave(dgl_lock, flags);
205 lock_fine_irqsave(&mutex->lock, flags);
206
207 if (mutex->owner) {
208 TRACE_TASK(t, "Blocking on lock %d.\n", l->ident);
209
210#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
211 // KLUDGE: don't count this suspension as time in the critical gpu
212 // critical section
213 if(tsk_rt(t)->held_gpus) {
214 tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
215 }
216#endif
217
218 /* resource is not free => must suspend and wait */
219
220 owner = mutex->owner;
221
222 init_waitqueue_entry(&wait, t);
223
224 tsk_rt(t)->blocked_lock = l; /* record where we are blocked */
225 mb(); // needed?
226
227 /* FIXME: interruptible would be nice some day */
228 set_task_state(t, TASK_UNINTERRUPTIBLE);
229
230 __add_wait_queue_tail_exclusive(&mutex->wait, &wait);
231
232 /* check if we need to activate priority inheritance */
233 //if (edf_higher_prio(t, mutex->hp_waiter)) {
234 if (litmus->compare(t, mutex->hp_waiter)) {
235
236 struct task_struct *old_max_eff_prio;
237 struct task_struct *new_max_eff_prio;
238 struct task_struct *new_prio = NULL;
239
240 if(mutex->hp_waiter)
241 TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
242 mutex->hp_waiter->comm, mutex->hp_waiter->pid);
243 else
244 TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
245
246 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
247
248 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
249 mutex->hp_waiter = t;
250 l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
251 binheap_decrease(&l->nest.hp_binheap_node,
252 &tsk_rt(owner)->hp_blocked_tasks);
253 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
254
255 if(new_max_eff_prio != old_max_eff_prio) {
256 TRACE_TASK(t, "is new hp_waiter.\n");
257
258 if ((effective_priority(owner) == old_max_eff_prio) ||
259 //(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))){
260 (litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))){
261 new_prio = new_max_eff_prio;
262 }
263 }
264 else {
265 TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
266 }
267
268 if(new_prio) {
269 litmus->nested_increase_prio(owner, new_prio, &mutex->lock,
270 flags); // unlocks lock.
271 }
272 else {
273 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
274 unlock_fine_irqrestore(&mutex->lock, flags);
275 }
276 }
277 else {
278 TRACE_TASK(t, "no change in hp_waiter.\n");
279
280 unlock_fine_irqrestore(&mutex->lock, flags);
281 }
282
283 unlock_global_irqrestore(dgl_lock, flags);
284
285 TS_LOCK_SUSPEND;
286
287 /* We depend on the FIFO order. Thus, we don't need to recheck
288 * when we wake up; we are guaranteed to have the lock since
289 * there is only one wake up per release.
290 */
291
292 suspend_for_lock();
293
294 TS_LOCK_RESUME;
295
296 /* Since we hold the lock, no other task will change
297 * ->owner. We can thus check it without acquiring the spin
298 * lock. */
299 BUG_ON(mutex->owner != t);
300
301 TRACE_TASK(t, "Acquired lock %d.\n", l->ident);
302
303 } else {
304 TRACE_TASK(t, "Acquired lock %d with no blocking.\n", l->ident);
305
306 /* it's ours now */
307 mutex->owner = t;
308
309 raw_spin_lock(&tsk_rt(mutex->owner)->hp_blocked_tasks_lock);
310 binheap_add(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
311 struct nested_info, hp_binheap_node);
312 raw_spin_unlock(&tsk_rt(mutex->owner)->hp_blocked_tasks_lock);
313
314
315 unlock_fine_irqrestore(&mutex->lock, flags);
316 unlock_global_irqrestore(dgl_lock, flags);
317 }
318
319 return 0;
320}
321
322
323
324int rsm_mutex_unlock(struct litmus_lock* l)
325{
326 struct task_struct *t = current, *next = NULL;
327 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
328 unsigned long flags;
329
330 struct task_struct *old_max_eff_prio;
331
332 int wake_up_task = 1;
333
334#ifdef CONFIG_LITMUS_DGL_SUPPORT
335 dgl_wait_state_t *dgl_wait = NULL;
336 raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t);
337#endif
338
339 int err = 0;
340
341 if (mutex->owner != t) {
342 err = -EINVAL;
343 return err;
344 }
345
346 lock_global_irqsave(dgl_lock, flags);
347 lock_fine_irqsave(&mutex->lock, flags);
348
349 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
350
351 TRACE_TASK(t, "Freeing lock %d\n", l->ident);
352
353 old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
354 binheap_delete(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks);
355
356 if(tsk_rt(t)->inh_task){
357 struct task_struct *new_max_eff_prio =
358 top_priority(&tsk_rt(t)->hp_blocked_tasks);
359
360 if((new_max_eff_prio == NULL) ||
361 /* there was a change in eff prio */
362 ( (new_max_eff_prio != old_max_eff_prio) &&
363 /* and owner had the old eff prio */
364 (effective_priority(t) == old_max_eff_prio)) )
365 {
366 // old_max_eff_prio > new_max_eff_prio
367
368 //if(__edf_higher_prio(new_max_eff_prio, BASE, t, EFFECTIVE)) {
369 if(litmus->__compare(new_max_eff_prio, BASE, t, EFFECTIVE)) {
370 TRACE_TASK(t, "new_max_eff_prio > task's eff_prio-- new_max_eff_prio: %s/%d task: %s/%d [%s/%d]\n",
371 new_max_eff_prio->comm, new_max_eff_prio->pid,
372 t->comm, t->pid, tsk_rt(t)->inh_task->comm,
373 tsk_rt(t)->inh_task->pid);
374 WARN_ON(1);
375 }
376
377 litmus->decrease_prio(t, new_max_eff_prio);
378 }
379 }
380
381 if(binheap_empty(&tsk_rt(t)->hp_blocked_tasks) &&
382 tsk_rt(t)->inh_task != NULL)
383 {
384 WARN_ON(tsk_rt(t)->inh_task != NULL);
385 TRACE_TASK(t, "No more locks are held, but eff_prio = %s/%d\n",
386 tsk_rt(t)->inh_task->comm, tsk_rt(t)->inh_task->pid);
387 }
388
389 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
390
391
392 /* check if there are jobs waiting for this resource */
393#ifdef CONFIG_LITMUS_DGL_SUPPORT
394 __waitqueue_dgl_remove_first(&mutex->wait, &dgl_wait, &next);
395 if(dgl_wait) {
396 next = dgl_wait->task;
397 //select_next_lock_if_primary(l, dgl_wait);
398 }
399#else
400 next = __waitqueue_remove_first(&mutex->wait);
401#endif
402 if (next) {
403 /* next becomes the resouce holder */
404 mutex->owner = next;
405 TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid);
406
407 /* determine new hp_waiter if necessary */
408 if (next == mutex->hp_waiter) {
409
410 TRACE_TASK(next, "was highest-prio waiter\n");
411 /* next has the highest priority --- it doesn't need to
412 * inherit. However, we need to make sure that the
413 * next-highest priority in the queue is reflected in
414 * hp_waiter. */
415 mutex->hp_waiter = rsm_mutex_find_hp_waiter(mutex, next);
416 l->nest.hp_waiter_eff_prio = (mutex->hp_waiter) ?
417 effective_priority(mutex->hp_waiter) :
418 NULL;
419
420 if (mutex->hp_waiter)
421 TRACE_TASK(mutex->hp_waiter, "is new highest-prio waiter\n");
422 else
423 TRACE("no further waiters\n");
424
425 raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
426
427 binheap_add(&l->nest.hp_binheap_node,
428 &tsk_rt(next)->hp_blocked_tasks,
429 struct nested_info, hp_binheap_node);
430
431#ifdef CONFIG_LITMUS_DGL_SUPPORT
432 if(dgl_wait) {
433 select_next_lock_if_primary(l, dgl_wait);
434 //wake_up_task = atomic_dec_and_test(&dgl_wait->nr_remaining);
435 --(dgl_wait->nr_remaining);
436 wake_up_task = (dgl_wait->nr_remaining == 0);
437 }
438#endif
439 raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
440 }
441 else {
442 /* Well, if 'next' is not the highest-priority waiter,
443 * then it (probably) ought to inherit the highest-priority
444 * waiter's priority. */
445 TRACE_TASK(next, "is not hp_waiter of lock %d.\n", l->ident);
446
447 raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
448
449 binheap_add(&l->nest.hp_binheap_node,
450 &tsk_rt(next)->hp_blocked_tasks,
451 struct nested_info, hp_binheap_node);
452
453#ifdef CONFIG_LITMUS_DGL_SUPPORT
454 if(dgl_wait) {
455 select_next_lock_if_primary(l, dgl_wait);
456 --(dgl_wait->nr_remaining);
457 wake_up_task = (dgl_wait->nr_remaining == 0);
458 }
459#endif
460
461 /* It is possible that 'next' *should* be the hp_waiter, but isn't
462 * because that update hasn't yet executed (update operation is
463 * probably blocked on mutex->lock). So only inherit if the top of
464 * 'next's top heap node is indeed the effective prio. of hp_waiter.
465 * (We use l->hp_waiter_eff_prio instead of effective_priority(hp_waiter)
466 * since the effective priority of hp_waiter can change (and the
467 * update has not made it to this lock).)
468 */
469#ifdef CONFIG_LITMUS_DGL_SUPPORT
470 if((l->nest.hp_waiter_eff_prio != NULL) &&
471 (top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
472 l->nest.hp_waiter_eff_prio))
473 {
474 if(dgl_wait && tsk_rt(next)->blocked_lock) {
475 BUG_ON(wake_up_task);
476 //if(__edf_higher_prio(l->nest.hp_waiter_eff_prio, BASE, next, EFFECTIVE)) {
477 if(litmus->__compare(l->nest.hp_waiter_eff_prio, BASE, next, EFFECTIVE)) {
478 litmus->nested_increase_prio(next,
479 l->nest.hp_waiter_eff_prio, &mutex->lock, flags); // unlocks lock && hp_blocked_tasks_lock.
480 goto out; // all spinlocks are released. bail out now.
481 }
482 }
483 else {
484 litmus->increase_prio(next, l->nest.hp_waiter_eff_prio);
485 }
486 }
487
488 raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
489#else
490 if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
491 l->nest.hp_waiter_eff_prio))
492 {
493 litmus->increase_prio(next, l->nest.hp_waiter_eff_prio);
494 }
495 raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
496#endif
497 }
498
499 if(wake_up_task) {
500 TRACE_TASK(next, "waking up since it is no longer blocked.\n");
501
502 tsk_rt(next)->blocked_lock = NULL;
503 mb();
504
505#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
506 // re-enable tracking
507 if(tsk_rt(next)->held_gpus) {
508 tsk_rt(next)->suspend_gpu_tracker_on_block = 0;
509 }
510#endif
511
512 wake_up_process(next);
513 }
514 else {
515 TRACE_TASK(next, "is still blocked.\n");
516 }
517 }
518 else {
519 /* becomes available */
520 mutex->owner = NULL;
521 }
522
523 unlock_fine_irqrestore(&mutex->lock, flags);
524
525#ifdef CONFIG_LITMUS_DGL_SUPPORT
526out:
527#endif
528 unlock_global_irqrestore(dgl_lock, flags);
529
530 return err;
531}
532
533
534void rsm_mutex_propagate_increase_inheritance(struct litmus_lock* l,
535 struct task_struct* t,
536 raw_spinlock_t* to_unlock,
537 unsigned long irqflags)
538{
539 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
540
541 // relay-style locking
542 lock_fine(&mutex->lock);
543 unlock_fine(to_unlock);
544
545 if(tsk_rt(t)->blocked_lock == l) { // prevent race on tsk_rt(t)->blocked
546 struct task_struct *owner = mutex->owner;
547
548 struct task_struct *old_max_eff_prio;
549 struct task_struct *new_max_eff_prio;
550
551 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
552
553 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
554
555 //if((t != mutex->hp_waiter) && edf_higher_prio(t, mutex->hp_waiter)) {
556 if((t != mutex->hp_waiter) && litmus->compare(t, mutex->hp_waiter)) {
557 TRACE_TASK(t, "is new highest-prio waiter by propagation.\n");
558 mutex->hp_waiter = t;
559 }
560 if(t == mutex->hp_waiter) {
561 // reflect the decreased priority in the heap node.
562 l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
563
564 BUG_ON(!binheap_is_in_heap(&l->nest.hp_binheap_node));
565 BUG_ON(!binheap_is_in_this_heap(&l->nest.hp_binheap_node,
566 &tsk_rt(owner)->hp_blocked_tasks));
567
568 binheap_decrease(&l->nest.hp_binheap_node,
569 &tsk_rt(owner)->hp_blocked_tasks);
570 }
571
572 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
573
574
575 if(new_max_eff_prio != old_max_eff_prio) {
576 // new_max_eff_prio > old_max_eff_prio holds.
577 if ((effective_priority(owner) == old_max_eff_prio) ||
578 //(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))) {
579 (litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))) {
580 TRACE_CUR("Propagating inheritance to holder of lock %d.\n",
581 l->ident);
582
583 // beware: recursion
584 litmus->nested_increase_prio(owner, new_max_eff_prio,
585 &mutex->lock, irqflags); // unlocks mutex->lock
586 }
587 else {
588 TRACE_CUR("Lower priority than holder %s/%d. No propagation.\n",
589 owner->comm, owner->pid);
590 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
591 unlock_fine_irqrestore(&mutex->lock, irqflags);
592 }
593 }
594 else {
595 TRACE_TASK(mutex->owner, "No change in maxiumum effective priority.\n");
596 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
597 unlock_fine_irqrestore(&mutex->lock, irqflags);
598 }
599 }
600 else {
601 struct litmus_lock *still_blocked = tsk_rt(t)->blocked_lock;
602
603 TRACE_TASK(t, "is not blocked on lock %d.\n", l->ident);
604 if(still_blocked) {
605 TRACE_TASK(t, "is still blocked on a lock though (lock %d).\n",
606 still_blocked->ident);
607 if(still_blocked->ops->propagate_increase_inheritance) {
608 /* due to relay-style nesting of spinlocks (acq. A, acq. B, free A, free B)
609 we know that task 't' has not released any locks behind us in this
610 chain. Propagation just needs to catch up with task 't'. */
611 still_blocked->ops->propagate_increase_inheritance(still_blocked,
612 t,
613 &mutex->lock,
614 irqflags);
615 }
616 else {
617 TRACE_TASK(t,
618 "Inheritor is blocked on lock (%p) that does not "
619 "support nesting!\n",
620 still_blocked);
621 unlock_fine_irqrestore(&mutex->lock, irqflags);
622 }
623 }
624 else {
625 unlock_fine_irqrestore(&mutex->lock, irqflags);
626 }
627 }
628}
629
630
631void rsm_mutex_propagate_decrease_inheritance(struct litmus_lock* l,
632 struct task_struct* t,
633 raw_spinlock_t* to_unlock,
634 unsigned long irqflags)
635{
636 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
637
638 // relay-style locking
639 lock_fine(&mutex->lock);
640 unlock_fine(to_unlock);
641
642 if(tsk_rt(t)->blocked_lock == l) { // prevent race on tsk_rt(t)->blocked
643 if(t == mutex->hp_waiter) {
644 struct task_struct *owner = mutex->owner;
645
646 struct task_struct *old_max_eff_prio;
647 struct task_struct *new_max_eff_prio;
648
649 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
650
651 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
652
653 binheap_delete(&l->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks);
654 mutex->hp_waiter = rsm_mutex_find_hp_waiter(mutex, NULL);
655 l->nest.hp_waiter_eff_prio = (mutex->hp_waiter) ?
656 effective_priority(mutex->hp_waiter) : NULL;
657 binheap_add(&l->nest.hp_binheap_node,
658 &tsk_rt(owner)->hp_blocked_tasks,
659 struct nested_info, hp_binheap_node);
660
661 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
662
663 if((old_max_eff_prio != new_max_eff_prio) &&
664 (effective_priority(owner) == old_max_eff_prio))
665 {
666 // Need to set new effective_priority for owner
667
668 struct task_struct *decreased_prio;
669
670 TRACE_CUR("Propagating decreased inheritance to holder of lock %d.\n",
671 l->ident);
672
673 //if(__edf_higher_prio(new_max_eff_prio, BASE, owner, BASE)) {
674 if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
675 TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of lock %d.\n",
676 (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
677 (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
678 owner->comm,
679 owner->pid,
680 l->ident);
681
682 decreased_prio = new_max_eff_prio;
683 }
684 else {
685 TRACE_CUR("%s/%d has lesser base priority than base priority of owner (%s/%d) of lock %d.\n",
686 (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
687 (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
688 owner->comm,
689 owner->pid,
690 l->ident);
691
692 decreased_prio = NULL;
693 }
694
695 // beware: recursion
696 litmus->nested_decrease_prio(owner, decreased_prio, &mutex->lock, irqflags); // will unlock mutex->lock
697 }
698 else {
699 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
700 unlock_fine_irqrestore(&mutex->lock, irqflags);
701 }
702 }
703 else {
704 TRACE_TASK(t, "is not hp_waiter. No propagation.\n");
705 unlock_fine_irqrestore(&mutex->lock, irqflags);
706 }
707 }
708 else {
709 struct litmus_lock *still_blocked = tsk_rt(t)->blocked_lock;
710
711 TRACE_TASK(t, "is not blocked on lock %d.\n", l->ident);
712 if(still_blocked) {
713 TRACE_TASK(t, "is still blocked on a lock though (lock %d).\n",
714 still_blocked->ident);
715 if(still_blocked->ops->propagate_decrease_inheritance) {
716 /* due to linked nesting of spinlocks (acq. A, acq. B, free A, free B)
717 we know that task 't' has not released any locks behind us in this
718 chain. propagation just needs to catch up with task 't' */
719 still_blocked->ops->propagate_decrease_inheritance(still_blocked,
720 t,
721 &mutex->lock,
722 irqflags);
723 }
724 else {
725 TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
726 still_blocked);
727 unlock_fine_irqrestore(&mutex->lock, irqflags);
728 }
729 }
730 else {
731 unlock_fine_irqrestore(&mutex->lock, irqflags);
732 }
733 }
734}
735
736
737int rsm_mutex_close(struct litmus_lock* l)
738{
739 struct task_struct *t = current;
740 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
741 unsigned long flags;
742
743 int owner;
744
745#ifdef CONFIG_LITMUS_DGL_SUPPORT
746 raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t);
747#endif
748
749 lock_global_irqsave(dgl_lock, flags);
750 lock_fine_irqsave(&mutex->lock, flags);
751
752 owner = (mutex->owner == t);
753
754 unlock_fine_irqrestore(&mutex->lock, flags);
755 unlock_global_irqrestore(dgl_lock, flags);
756
757 if (owner)
758 rsm_mutex_unlock(l);
759
760 return 0;
761}
762
763void rsm_mutex_free(struct litmus_lock* lock)
764{
765 kfree(rsm_mutex_from_lock(lock));
766}
767
768struct litmus_lock* rsm_mutex_new(struct litmus_lock_ops* ops)
769{
770 struct rsm_mutex* mutex;
771
772 mutex = kmalloc(sizeof(*mutex), GFP_KERNEL);
773 if (!mutex)
774 return NULL;
775
776 mutex->litmus_lock.ops = ops;
777 mutex->owner = NULL;
778 mutex->hp_waiter = NULL;
779 init_waitqueue_head(&mutex->wait);
780
781
782#ifdef CONFIG_DEBUG_SPINLOCK
783 {
784 __raw_spin_lock_init(&mutex->lock,
785 ((struct litmus_lock*)mutex)->cheat_lockdep,
786 &((struct litmus_lock*)mutex)->key);
787 }
788#else
789 raw_spin_lock_init(&mutex->lock);
790#endif
791
792 ((struct litmus_lock*)mutex)->nest.hp_waiter_ptr = &mutex->hp_waiter;
793
794 return &mutex->litmus_lock;
795}
796
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
index 1683d3847560..54322e278a1e 100644
--- a/litmus/rt_domain.c
+++ b/litmus/rt_domain.c
@@ -300,10 +300,15 @@ void rt_domain_init(rt_domain_t *rt,
300 */ 300 */
301void __add_ready(rt_domain_t* rt, struct task_struct *new) 301void __add_ready(rt_domain_t* rt, struct task_struct *new)
302{ 302{
303 TRACE("rt: adding %s/%d (%llu, %llu, %llu) rel=%llu " 303 TRACE("rt: adding %s/%d (%llu, %llu, %llu) "
304 "to ready queue at %llu\n", 304 "[inh_task: %s/%d (%llu, %llu %llu)] "
305 new->comm, new->pid, 305 "rel=%llu to ready queue at %llu\n",
306 get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new), 306 new->comm, new->pid, get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new),
307 (tsk_rt(new)->inh_task) ? tsk_rt(new)->inh_task->comm : "(nil)",
308 (tsk_rt(new)->inh_task) ? tsk_rt(new)->inh_task->pid : 0,
309 (tsk_rt(new)->inh_task) ? get_exec_cost(tsk_rt(new)->inh_task) : 0,
310 (tsk_rt(new)->inh_task) ? get_rt_period(tsk_rt(new)->inh_task) : 0,
311 (tsk_rt(new)->inh_task) ? get_rt_relative_deadline(tsk_rt(new)->inh_task) : 0,
307 get_release(new), litmus_clock()); 312 get_release(new), litmus_clock());
308 313
309 BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node)); 314 BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node));
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index b45b46fc4fca..db47f4413329 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -29,7 +29,7 @@
29#include <linux/percpu.h> 29#include <linux/percpu.h>
30#include <linux/sched.h> 30#include <linux/sched.h>
31#include <linux/slab.h> 31#include <linux/slab.h>
32 32#include <linux/uaccess.h>
33#include <linux/module.h> 33#include <linux/module.h>
34 34
35#include <litmus/litmus.h> 35#include <litmus/litmus.h>
@@ -43,14 +43,48 @@
43#include <litmus/clustered.h> 43#include <litmus/clustered.h>
44 44
45#include <litmus/bheap.h> 45#include <litmus/bheap.h>
46#include <litmus/binheap.h>
47#include <litmus/trace.h>
48
49#ifdef CONFIG_LITMUS_LOCKING
50#include <litmus/kfmlp_lock.h>
51#endif
52
53#ifdef CONFIG_LITMUS_NESTED_LOCKING
54#include <litmus/rsm_lock.h>
55#include <litmus/ikglp_lock.h>
56#endif
46 57
47#ifdef CONFIG_SCHED_CPU_AFFINITY 58#ifdef CONFIG_SCHED_CPU_AFFINITY
48#include <litmus/affinity.h> 59#include <litmus/affinity.h>
49#endif 60#endif
50 61
62#ifdef CONFIG_REALTIME_AUX_TASKS
63#include <litmus/aux_tasks.h>
64#endif
65
51/* to configure the cluster size */ 66/* to configure the cluster size */
52#include <litmus/litmus_proc.h> 67#include <litmus/litmus_proc.h>
53#include <linux/uaccess.h> 68
69#ifdef CONFIG_SCHED_CPU_AFFINITY
70#include <litmus/affinity.h>
71#endif
72
73#ifdef CONFIG_LITMUS_SOFTIRQD
74#include <litmus/litmus_softirq.h>
75#endif
76
77#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
78#include <linux/interrupt.h>
79#endif
80
81#ifdef CONFIG_LITMUS_NVIDIA
82#include <litmus/nvidia_info.h>
83#endif
84
85#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
86#include <litmus/gpu_affinity.h>
87#endif
54 88
55/* Reference configuration variable. Determines which cache level is used to 89/* Reference configuration variable. Determines which cache level is used to
56 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that 90 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
@@ -71,7 +105,7 @@ typedef struct {
71 struct task_struct* linked; /* only RT tasks */ 105 struct task_struct* linked; /* only RT tasks */
72 struct task_struct* scheduled; /* only RT tasks */ 106 struct task_struct* scheduled; /* only RT tasks */
73 atomic_t will_schedule; /* prevent unneeded IPIs */ 107 atomic_t will_schedule; /* prevent unneeded IPIs */
74 struct bheap_node* hn; 108 struct binheap_node hn;
75} cpu_entry_t; 109} cpu_entry_t;
76 110
77/* one cpu_entry_t per CPU */ 111/* one cpu_entry_t per CPU */
@@ -97,10 +131,17 @@ typedef struct clusterdomain {
97 /* map of this cluster cpus */ 131 /* map of this cluster cpus */
98 cpumask_var_t cpu_map; 132 cpumask_var_t cpu_map;
99 /* the cpus queue themselves according to priority in here */ 133 /* the cpus queue themselves according to priority in here */
100 struct bheap_node *heap_node; 134 struct binheap cpu_heap;
101 struct bheap cpu_heap;
102 /* lock for this cluster */ 135 /* lock for this cluster */
103#define cluster_lock domain.ready_lock 136#define cluster_lock domain.ready_lock
137
138#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
139 struct tasklet_head pending_tasklets;
140#endif
141
142#ifdef CONFIG_LITMUS_DGL_SUPPORT
143 raw_spinlock_t dgl_lock;
144#endif
104} cedf_domain_t; 145} cedf_domain_t;
105 146
106/* a cedf_domain per cluster; allocation is done at init/activation time */ 147/* a cedf_domain per cluster; allocation is done at init/activation time */
@@ -109,6 +150,29 @@ cedf_domain_t *cedf;
109#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster) 150#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
110#define task_cpu_cluster(task) remote_cluster(get_partition(task)) 151#define task_cpu_cluster(task) remote_cluster(get_partition(task))
111 152
153/* total number of cluster */
154static int num_clusters;
155/* we do not support cluster of different sizes */
156static unsigned int cluster_size;
157
158static int clusters_allocated = 0;
159
160
161#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
162static int num_gpu_clusters;
163static unsigned int gpu_cluster_size;
164#endif
165
166
167#ifdef CONFIG_LITMUS_DGL_SUPPORT
168static raw_spinlock_t* cedf_get_dgl_spinlock(struct task_struct *t)
169{
170 cedf_domain_t *cluster = task_cpu_cluster(t);
171 return(&cluster->dgl_lock);
172}
173#endif
174
175
112/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling 176/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
113 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose 177 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
114 * information during the initialization of the plugin (e.g., topology) 178 * information during the initialization of the plugin (e.g., topology)
@@ -116,11 +180,11 @@ cedf_domain_t *cedf;
116 */ 180 */
117#define VERBOSE_INIT 181#define VERBOSE_INIT
118 182
119static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) 183static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b)
120{ 184{
121 cpu_entry_t *a, *b; 185 cpu_entry_t *a = binheap_entry(_a, cpu_entry_t, hn);
122 a = _a->value; 186 cpu_entry_t *b = binheap_entry(_b, cpu_entry_t, hn);
123 b = _b->value; 187
124 /* Note that a and b are inverted: we want the lowest-priority CPU at 188 /* Note that a and b are inverted: we want the lowest-priority CPU at
125 * the top of the heap. 189 * the top of the heap.
126 */ 190 */
@@ -134,20 +198,17 @@ static void update_cpu_position(cpu_entry_t *entry)
134{ 198{
135 cedf_domain_t *cluster = entry->cluster; 199 cedf_domain_t *cluster = entry->cluster;
136 200
137 if (likely(bheap_node_in_heap(entry->hn))) 201 if (likely(binheap_is_in_heap(&entry->hn))) {
138 bheap_delete(cpu_lower_prio, 202 binheap_delete(&entry->hn, &cluster->cpu_heap);
139 &cluster->cpu_heap, 203 }
140 entry->hn);
141 204
142 bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn); 205 binheap_add(&entry->hn, &cluster->cpu_heap, cpu_entry_t, hn);
143} 206}
144 207
145/* caller must hold cedf lock */ 208/* caller must hold cedf lock */
146static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster) 209static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
147{ 210{
148 struct bheap_node* hn; 211 return binheap_top_entry(&cluster->cpu_heap, cpu_entry_t, hn);
149 hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
150 return hn->value;
151} 212}
152 213
153 214
@@ -209,7 +270,7 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
209} 270}
210 271
211/* unlink - Make sure a task is not linked any longer to an entry 272/* unlink - Make sure a task is not linked any longer to an entry
212 * where it was linked before. Must hold cedf_lock. 273 * where it was linked before. Must hold cluster_lock.
213 */ 274 */
214static noinline void unlink(struct task_struct* t) 275static noinline void unlink(struct task_struct* t)
215{ 276{
@@ -245,7 +306,7 @@ static void preempt(cpu_entry_t *entry)
245} 306}
246 307
247/* requeue - Put an unlinked task into gsn-edf domain. 308/* requeue - Put an unlinked task into gsn-edf domain.
248 * Caller must hold cedf_lock. 309 * Caller must hold cluster_lock.
249 */ 310 */
250static noinline void requeue(struct task_struct* task) 311static noinline void requeue(struct task_struct* task)
251{ 312{
@@ -255,7 +316,15 @@ static noinline void requeue(struct task_struct* task)
255 BUG_ON(is_queued(task)); 316 BUG_ON(is_queued(task));
256 317
257 if (is_released(task, litmus_clock())) 318 if (is_released(task, litmus_clock()))
258 __add_ready(&cluster->domain, task); 319#ifdef CONFIG_REALTIME_AUX_TASKS
320 if (unlikely(tsk_rt(task)->is_aux_task && !is_running(task))) {
321 /* aux_task probably transitioned to real-time while it was blocked */
322 TRACE_CUR("aux task %s/%d is not ready!\n", task->comm, task->pid);
323 unlink(task); /* really needed? */
324 }
325 else
326#endif
327 __add_ready(&cluster->domain, task);
259 else { 328 else {
260 /* it has got to wait */ 329 /* it has got to wait */
261 add_release(&cluster->domain, task); 330 add_release(&cluster->domain, task);
@@ -340,13 +409,17 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
340 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); 409 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
341} 410}
342 411
343/* caller holds cedf_lock */ 412/* caller holds cluster_lock */
344static noinline void job_completion(struct task_struct *t, int forced) 413static noinline void job_completion(struct task_struct *t, int forced)
345{ 414{
346 BUG_ON(!t); 415 BUG_ON(!t);
347 416
348 sched_trace_task_completion(t, forced); 417 sched_trace_task_completion(t, forced);
349 418
419#ifdef CONFIG_LITMUS_NVIDIA
420 atomic_set(&tsk_rt(t)->nv_int_count, 0);
421#endif
422
350 TRACE_TASK(t, "job_completion().\n"); 423 TRACE_TASK(t, "job_completion().\n");
351 424
352 /* set flags */ 425 /* set flags */
@@ -371,25 +444,341 @@ static noinline void job_completion(struct task_struct *t, int forced)
371 */ 444 */
372static void cedf_tick(struct task_struct* t) 445static void cedf_tick(struct task_struct* t)
373{ 446{
374 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { 447 if (is_realtime(t) && budget_exhausted(t))
375 if (!is_np(t)) { 448 {
376 /* np tasks will be preempted when they become 449 if (budget_signalled(t) && !sigbudget_sent(t)) {
377 * preemptable again 450 /* signal exhaustion */
378 */ 451 send_sigbudget(t);
379 litmus_reschedule_local(); 452 }
380 set_will_schedule(); 453
381 TRACE("cedf_scheduler_tick: " 454 if (budget_enforced(t)) {
382 "%d is preemptable " 455 if (!is_np(t)) {
383 " => FORCE_RESCHED\n", t->pid); 456 /* np tasks will be preempted when they become
384 } else if (is_user_np(t)) { 457 * preemptable again
385 TRACE("cedf_scheduler_tick: " 458 */
386 "%d is non-preemptable, " 459 litmus_reschedule_local();
387 "preemption delayed.\n", t->pid); 460 set_will_schedule();
388 request_exit_np(t); 461 TRACE("cedf_scheduler_tick: "
462 "%d is preemptable "
463 " => FORCE_RESCHED\n", t->pid);
464 } else if (is_user_np(t)) {
465 TRACE("cedf_scheduler_tick: "
466 "%d is non-preemptable, "
467 "preemption delayed.\n", t->pid);
468 request_exit_np(t);
469 }
389 } 470 }
390 } 471 }
391} 472}
392 473
474
475
476
477
478
479
480
481
482
483
484
485
486#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
487
488
489static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
490{
491 if (!atomic_read(&tasklet->count)) {
492 if(tasklet->owner) {
493 sched_trace_tasklet_begin(tasklet->owner);
494 }
495
496 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
497 {
498 BUG();
499 }
500 TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n",
501 __FUNCTION__,
502 (tasklet->owner) ? tasklet->owner->pid : -1,
503 (tasklet->owner) ? 0 : 1);
504 tasklet->func(tasklet->data);
505 tasklet_unlock(tasklet);
506
507 if(tasklet->owner) {
508 sched_trace_tasklet_end(tasklet->owner, flushed);
509 }
510 }
511 else {
512 BUG();
513 }
514}
515
516
517static void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_task)
518{
519 int work_to_do = 1;
520 struct tasklet_struct *tasklet = NULL;
521 unsigned long flags;
522
523 while(work_to_do) {
524
525 TS_NV_SCHED_BOTISR_START;
526
527 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
528
529 if(cluster->pending_tasklets.head != NULL) {
530 // remove tasklet at head.
531 struct tasklet_struct *prev = NULL;
532 tasklet = cluster->pending_tasklets.head;
533
534 // find a tasklet with prio to execute; skip ones where
535 // sched_task has a higher priority.
536 // We use the '!edf' test instead of swaping function arguments since
537 // both sched_task and owner could be NULL. In this case, we want to
538 // still execute the tasklet.
539 while(tasklet && !edf_higher_prio(tasklet->owner, sched_task)) {
540 prev = tasklet;
541 tasklet = tasklet->next;
542 }
543
544 if(tasklet) { // found something to execuite
545 // remove the tasklet from the queue
546 if(prev) {
547 prev->next = tasklet->next;
548 if(prev->next == NULL) {
549 TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
550 cluster->pending_tasklets.tail = &(prev);
551 }
552 }
553 else {
554 cluster->pending_tasklets.head = tasklet->next;
555 if(tasklet->next == NULL) {
556 TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
557 cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
558 }
559 }
560 }
561 else {
562 TRACE("%s: No tasklets with eligible priority.\n", __FUNCTION__);
563 }
564 }
565 else {
566 TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
567 }
568
569 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
570
571 if(tasklet) {
572 __do_lit_tasklet(tasklet, 0ul);
573 tasklet = NULL;
574 }
575 else {
576 work_to_do = 0;
577 }
578
579 TS_NV_SCHED_BOTISR_END;
580 }
581}
582
583static void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster)
584{
585 struct tasklet_struct* step;
586
587 tasklet->next = NULL; // make sure there are no old values floating around
588
589 step = cluster->pending_tasklets.head;
590 if(step == NULL) {
591 TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
592 // insert at tail.
593 *(cluster->pending_tasklets.tail) = tasklet;
594 cluster->pending_tasklets.tail = &(tasklet->next);
595 }
596 else if((*(cluster->pending_tasklets.tail) != NULL) &&
597 edf_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) {
598 // insert at tail.
599 TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
600
601 *(cluster->pending_tasklets.tail) = tasklet;
602 cluster->pending_tasklets.tail = &(tasklet->next);
603 }
604 else {
605
606 // insert the tasklet somewhere in the middle.
607
608 TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
609
610 while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) {
611 step = step->next;
612 }
613
614 // insert tasklet right before step->next.
615
616 TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__,
617 tasklet->owner->pid,
618 (step->owner) ?
619 step->owner->pid :
620 -1,
621 (step->next) ?
622 ((step->next->owner) ?
623 step->next->owner->pid :
624 -1) :
625 -1);
626
627 tasklet->next = step->next;
628 step->next = tasklet;
629
630 // patch up the head if needed.
631 if(cluster->pending_tasklets.head == step)
632 {
633 TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
634 cluster->pending_tasklets.head = tasklet;
635 }
636 }
637}
638
639static void cedf_run_tasklets(struct task_struct* sched_task)
640{
641 cedf_domain_t* cluster;
642
643 preempt_disable();
644
645 cluster = (is_realtime(sched_task)) ?
646 task_cpu_cluster(sched_task) :
647 remote_cluster(smp_processor_id());
648
649 if(cluster && cluster->pending_tasklets.head != NULL) {
650 TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
651 do_lit_tasklets(cluster, sched_task);
652 }
653
654 preempt_enable_no_resched();
655}
656
657
658
659static int cedf_enqueue_pai_tasklet(struct tasklet_struct* tasklet)
660{
661#if 0
662 cedf_domain_t *cluster = NULL;
663 cpu_entry_t *targetCPU = NULL;
664 int thisCPU;
665 int runLocal = 0;
666 int runNow = 0;
667 unsigned long flags;
668
669 if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
670 {
671 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
672 return 0;
673 }
674
675 cluster = task_cpu_cluster(tasklet->owner);
676
677 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
678
679 thisCPU = smp_processor_id();
680
681#ifdef CONFIG_SCHED_CPU_AFFINITY
682 {
683 cpu_entry_t* affinity = NULL;
684
685 // use this CPU if it is in our cluster and isn't running any RT work.
686 if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(cedf_cpu_entries).linked == NULL)) {
687 affinity = &(__get_cpu_var(cedf_cpu_entries));
688 }
689 else {
690 // this CPU is busy or shouldn't run tasklet in this cluster.
691 // look for available near by CPUs.
692 // NOTE: Affinity towards owner and not this CPU. Is this right?
693 affinity =
694 cedf_get_nearest_available_cpu(cluster,
695 &per_cpu(cedf_cpu_entries, task_cpu(tasklet->owner)));
696 }
697
698 targetCPU = affinity;
699 }
700#endif
701
702 if (targetCPU == NULL) {
703 targetCPU = lowest_prio_cpu(cluster);
704 }
705
706 if (edf_higher_prio(tasklet->owner, targetCPU->linked)) {
707 if (thisCPU == targetCPU->cpu) {
708 TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
709 runLocal = 1;
710 runNow = 1;
711 }
712 else {
713 TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
714 runLocal = 0;
715 runNow = 1;
716 }
717 }
718 else {
719 runLocal = 0;
720 runNow = 0;
721 }
722
723 if(!runLocal) {
724 // enqueue the tasklet
725 __add_pai_tasklet(tasklet, cluster);
726 }
727
728 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
729
730
731 if (runLocal /*&& runNow */) { // runNow == 1 is implied
732 TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
733 __do_lit_tasklet(tasklet, 0ul);
734 }
735 else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied
736 TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
737 preempt(targetCPU); // need to be protected by cluster_lock?
738 }
739 else {
740 TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
741 }
742#else
743 TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
744 __do_lit_tasklet(tasklet, 0ul);
745#endif
746 return(1); // success
747}
748
749static void cedf_change_prio_pai_tasklet(struct task_struct *old_prio,
750 struct task_struct *new_prio)
751{
752 struct tasklet_struct* step;
753 unsigned long flags;
754 cedf_domain_t *cluster;
755 struct task_struct *probe;
756
757 // identify the cluster by the assignment of these tasks. one should
758 // be non-NULL.
759 probe = (old_prio) ? old_prio : new_prio;
760
761 if(probe) {
762 cluster = task_cpu_cluster(probe);
763
764 if(cluster->pending_tasklets.head != NULL) {
765 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
766 for(step = cluster->pending_tasklets.head; step != NULL; step = step->next) {
767 if(step->owner == old_prio) {
768 TRACE("%s: Found tasklet to change: %d\n", __FUNCTION__, step->owner->pid);
769 step->owner = new_prio;
770 }
771 }
772 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
773 }
774 }
775 else {
776 TRACE("%s: Both priorities were NULL\n");
777 }
778}
779
780#endif // PAI
781
393/* Getting schedule() right is a bit tricky. schedule() may not make any 782/* Getting schedule() right is a bit tricky. schedule() may not make any
394 * assumptions on the state of the current task since it may be called for a 783 * assumptions on the state of the current task since it may be called for a
395 * number of reasons. The reasons include a scheduler_tick() determined that it 784 * number of reasons. The reasons include a scheduler_tick() determined that it
@@ -415,7 +804,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
415{ 804{
416 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); 805 cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
417 cedf_domain_t *cluster = entry->cluster; 806 cedf_domain_t *cluster = entry->cluster;
418 int out_of_time, sleep, preempt, np, exists, blocks; 807 int out_of_time, signal_budget, sleep, preempt, np, exists, blocks;
419 struct task_struct* next = NULL; 808 struct task_struct* next = NULL;
420 809
421#ifdef CONFIG_RELEASE_MASTER 810#ifdef CONFIG_RELEASE_MASTER
@@ -442,6 +831,10 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
442 out_of_time = exists && 831 out_of_time = exists &&
443 budget_enforced(entry->scheduled) && 832 budget_enforced(entry->scheduled) &&
444 budget_exhausted(entry->scheduled); 833 budget_exhausted(entry->scheduled);
834 signal_budget = exists &&
835 budget_signalled(entry->scheduled) &&
836 budget_exhausted(entry->scheduled) &&
837 !sigbudget_sent(entry->scheduled);
445 np = exists && is_np(entry->scheduled); 838 np = exists && is_np(entry->scheduled);
446 sleep = exists && is_completed(entry->scheduled); 839 sleep = exists && is_completed(entry->scheduled);
447 preempt = entry->scheduled != entry->linked; 840 preempt = entry->scheduled != entry->linked;
@@ -460,12 +853,28 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
460 TRACE_TASK(prev, "will be preempted by %s/%d\n", 853 TRACE_TASK(prev, "will be preempted by %s/%d\n",
461 entry->linked->comm, entry->linked->pid); 854 entry->linked->comm, entry->linked->pid);
462 855
856 /* Send the signal that the budget has been exhausted */
857 if (signal_budget)
858 send_sigbudget(entry->scheduled);
463 859
464 /* If a task blocks we have no choice but to reschedule. 860 /* If a task blocks we have no choice but to reschedule.
465 */ 861 */
466 if (blocks) 862 if (blocks)
467 unlink(entry->scheduled); 863 unlink(entry->scheduled);
468 864
865#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
866 if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) {
867 if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
868 // don't track preemptions or locking protocol suspensions.
869 TRACE_TASK(entry->scheduled, "stopping GPU tracker.\n");
870 stop_gpu_tracker(entry->scheduled);
871 }
872 else if(blocks && !tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
873 TRACE_TASK(entry->scheduled, "GPU tracker remains on during suspension.\n");
874 }
875 }
876#endif
877
469 /* Request a sys_exit_np() call if we would like to preempt but cannot. 878 /* Request a sys_exit_np() call if we would like to preempt but cannot.
470 * We need to make sure to update the link structure anyway in case 879 * We need to make sure to update the link structure anyway in case
471 * that we are still linked. Multiple calls to request_exit_np() don't 880 * that we are still linked. Multiple calls to request_exit_np() don't
@@ -515,7 +924,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
515 raw_spin_unlock(&cluster->cluster_lock); 924 raw_spin_unlock(&cluster->cluster_lock);
516 925
517#ifdef WANT_ALL_SCHED_EVENTS 926#ifdef WANT_ALL_SCHED_EVENTS
518 TRACE("cedf_lock released, next=0x%p\n", next); 927 TRACE("cluster_lock released, next=0x%p\n", next);
519 928
520 if (next) 929 if (next)
521 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); 930 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
@@ -523,7 +932,6 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
523 TRACE("becomes idle at %llu.\n", litmus_clock()); 932 TRACE("becomes idle at %llu.\n", litmus_clock());
524#endif 933#endif
525 934
526
527 return next; 935 return next;
528} 936}
529 937
@@ -549,7 +957,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
549 cpu_entry_t* entry; 957 cpu_entry_t* entry;
550 cedf_domain_t* cluster; 958 cedf_domain_t* cluster;
551 959
552 TRACE("gsn edf: task new %d\n", t->pid); 960 TRACE("c-edf: task new %d\n", t->pid);
553 961
554 /* the cluster doesn't change even if t is running */ 962 /* the cluster doesn't change even if t is running */
555 cluster = task_cpu_cluster(t); 963 cluster = task_cpu_cluster(t);
@@ -587,7 +995,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
587static void cedf_task_wake_up(struct task_struct *task) 995static void cedf_task_wake_up(struct task_struct *task)
588{ 996{
589 unsigned long flags; 997 unsigned long flags;
590 lt_t now; 998 //lt_t now;
591 cedf_domain_t *cluster; 999 cedf_domain_t *cluster;
592 1000
593 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); 1001 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
@@ -595,6 +1003,9 @@ static void cedf_task_wake_up(struct task_struct *task)
595 cluster = task_cpu_cluster(task); 1003 cluster = task_cpu_cluster(task);
596 1004
597 raw_spin_lock_irqsave(&cluster->cluster_lock, flags); 1005 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1006
1007#if 0
1008 /* sporadic task model. will increment job numbers automatically */
598 now = litmus_clock(); 1009 now = litmus_clock();
599 if (is_tardy(task, now)) { 1010 if (is_tardy(task, now)) {
600 /* new sporadic release */ 1011 /* new sporadic release */
@@ -608,6 +1019,26 @@ static void cedf_task_wake_up(struct task_struct *task)
608 tsk_rt(task)->completed = 0; 1019 tsk_rt(task)->completed = 0;
609 } 1020 }
610 } 1021 }
1022#else
1023 /* periodic task model. don't force job to end.
1024 * rely on user to say when jobs complete or when budget expires. */
1025 tsk_rt(task)->completed = 0;
1026#endif
1027
1028#ifdef CONFIG_REALTIME_AUX_TASKS
1029 if (tsk_rt(task)->has_aux_tasks && !tsk_rt(task)->hide_from_aux_tasks) {
1030 TRACE_CUR("%s/%d is ready so aux tasks may not inherit.\n", task->comm, task->pid);
1031 disable_aux_task_owner(task);
1032 }
1033#endif
1034
1035#ifdef CONFIG_LITMUS_NVIDIA
1036 if (tsk_rt(task)->held_gpus && !tsk_rt(task)->hide_from_gpu) {
1037 TRACE_CUR("%s/%d is ready so gpu klmirqd tasks may not inherit.\n", task->comm, task->pid);
1038 disable_gpu_owner(task);
1039 }
1040#endif
1041
611 cedf_job_arrival(task); 1042 cedf_job_arrival(task);
612 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); 1043 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
613} 1044}
@@ -623,7 +1054,25 @@ static void cedf_task_block(struct task_struct *t)
623 1054
624 /* unlink if necessary */ 1055 /* unlink if necessary */
625 raw_spin_lock_irqsave(&cluster->cluster_lock, flags); 1056 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1057
626 unlink(t); 1058 unlink(t);
1059
1060#ifdef CONFIG_REALTIME_AUX_TASKS
1061 if (tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->hide_from_aux_tasks) {
1062
1063 TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid);
1064 enable_aux_task_owner(t);
1065 }
1066#endif
1067
1068#ifdef CONFIG_LITMUS_NVIDIA
1069 if (tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu) {
1070
1071 TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid);
1072 enable_gpu_owner(t);
1073 }
1074#endif
1075
627 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); 1076 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
628 1077
629 BUG_ON(!is_realtime(t)); 1078 BUG_ON(!is_realtime(t));
@@ -635,8 +1084,30 @@ static void cedf_task_exit(struct task_struct * t)
635 unsigned long flags; 1084 unsigned long flags;
636 cedf_domain_t *cluster = task_cpu_cluster(t); 1085 cedf_domain_t *cluster = task_cpu_cluster(t);
637 1086
1087#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1088 cedf_change_prio_pai_tasklet(t, NULL);
1089#endif
1090
638 /* unlink if necessary */ 1091 /* unlink if necessary */
639 raw_spin_lock_irqsave(&cluster->cluster_lock, flags); 1092 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
1093
1094#ifdef CONFIG_REALTIME_AUX_TASKS
1095 /* make sure we clean up on our way out */
1096 if (unlikely(tsk_rt(t)->is_aux_task)) {
1097 exit_aux_task(t);
1098 }
1099 else if(tsk_rt(t)->has_aux_tasks) {
1100 disable_aux_task_owner(t);
1101 }
1102#endif
1103
1104#ifdef CONFIG_LITMUS_NVIDIA
1105 /* make sure we clean up on our way out */
1106 if(tsk_rt(t)->held_gpus) {
1107 disable_gpu_owner(t);
1108 }
1109#endif
1110
640 unlink(t); 1111 unlink(t);
641 if (tsk_rt(t)->scheduled_on != NO_CPU) { 1112 if (tsk_rt(t)->scheduled_on != NO_CPU) {
642 cpu_entry_t *cpu; 1113 cpu_entry_t *cpu;
@@ -652,13 +1123,505 @@ static void cedf_task_exit(struct task_struct * t)
652 1123
653static long cedf_admit_task(struct task_struct* tsk) 1124static long cedf_admit_task(struct task_struct* tsk)
654{ 1125{
1126#ifdef CONFIG_LITMUS_NESTED_LOCKING
1127 INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks,
1128 edf_max_heap_base_priority_order);
1129#endif
1130
655 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; 1131 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
656} 1132}
657 1133
658/* total number of cluster */ 1134
659static int num_clusters; 1135
660/* we do not support cluster of different sizes */ 1136#ifdef CONFIG_LITMUS_LOCKING
661static unsigned int cluster_size; 1137
1138#include <litmus/fdso.h>
1139
1140
1141
1142/* called with IRQs off */
1143static int __increase_priority_inheritance(struct task_struct* t,
1144 struct task_struct* prio_inh)
1145{
1146 int success = 1;
1147 int linked_on;
1148 int check_preempt = 0;
1149 cedf_domain_t* cluster;
1150
1151 if (prio_inh && prio_inh == effective_priority(t)) {
1152 /* relationship already established. */
1153 TRACE_TASK(t, "already has effective priority of %s/%d\n",
1154 prio_inh->comm, prio_inh->pid);
1155 goto out;
1156 }
1157
1158 cluster = task_cpu_cluster(t);
1159
1160#ifdef CONFIG_LITMUS_NESTED_LOCKING
1161 /* this sanity check allows for weaker locking in protocols */
1162 /* TODO (klmirqd): Skip this check if 't' is a proxy thread (???) */
1163 if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) {
1164#endif
1165 TRACE_TASK(t, "inherits priority from %s/%d\n",
1166 prio_inh->comm, prio_inh->pid);
1167 tsk_rt(t)->inh_task = prio_inh;
1168
1169 linked_on = tsk_rt(t)->linked_on;
1170
1171 /* If it is scheduled, then we need to reorder the CPU heap. */
1172 if (linked_on != NO_CPU) {
1173 TRACE_TASK(t, "%s: linked on %d\n",
1174 __FUNCTION__, linked_on);
1175 /* Holder is scheduled; need to re-order CPUs.
1176 * We can't use heap_decrease() here since
1177 * the cpu_heap is ordered in reverse direction, so
1178 * it is actually an increase. */
1179 binheap_delete(&per_cpu(cedf_cpu_entries, linked_on).hn,
1180 &cluster->cpu_heap);
1181 binheap_add(&per_cpu(cedf_cpu_entries, linked_on).hn,
1182 &cluster->cpu_heap, cpu_entry_t, hn);
1183
1184 } else {
1185 /* holder may be queued: first stop queue changes */
1186 raw_spin_lock(&cluster->domain.release_lock);
1187 if (is_queued(t)) {
1188 TRACE_TASK(t, "%s: is queued\n",
1189 __FUNCTION__);
1190 /* We need to update the position of holder in some
1191 * heap. Note that this could be a release heap if we
1192 * budget enforcement is used and this job overran. */
1193 check_preempt =
1194 !bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node);
1195 } else {
1196 /* Nothing to do: if it is not queued and not linked
1197 * then it is either sleeping or currently being moved
1198 * by other code (e.g., a timer interrupt handler) that
1199 * will use the correct priority when enqueuing the
1200 * task. */
1201 TRACE_TASK(t, "%s: is NOT queued => Done.\n",
1202 __FUNCTION__);
1203 }
1204 raw_spin_unlock(&cluster->domain.release_lock);
1205
1206 /* If holder was enqueued in a release heap, then the following
1207 * preemption check is pointless, but we can't easily detect
1208 * that case. If you want to fix this, then consider that
1209 * simply adding a state flag requires O(n) time to update when
1210 * releasing n tasks, which conflicts with the goal to have
1211 * O(log n) merges. */
1212 if (check_preempt) {
1213 /* heap_decrease() hit the top level of the heap: make
1214 * sure preemption checks get the right task, not the
1215 * potentially stale cache. */
1216 bheap_uncache_min(edf_ready_order,
1217 &cluster->domain.ready_queue);
1218 check_for_preemptions(cluster);
1219 }
1220
1221#ifdef CONFIG_REALTIME_AUX_TASKS
1222 /* propagate to aux tasks */
1223 if (tsk_rt(t)->has_aux_tasks) {
1224 aux_task_owner_increase_priority(t);
1225 }
1226#endif
1227
1228#ifdef CONFIG_LITMUS_NVIDIA
1229 /* propagate to gpu klmirqd */
1230 if (tsk_rt(t)->held_gpus) {
1231 gpu_owner_increase_priority(t);
1232 }
1233#endif
1234 }
1235#ifdef CONFIG_LITMUS_NESTED_LOCKING
1236 }
1237 else {
1238 TRACE_TASK(t, "Spurious invalid priority increase. "
1239 "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
1240 "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
1241 t->comm, t->pid,
1242 effective_priority(t)->comm, effective_priority(t)->pid,
1243 (prio_inh) ? prio_inh->comm : "nil",
1244 (prio_inh) ? prio_inh->pid : -1);
1245 WARN_ON(!prio_inh);
1246 success = 0;
1247 }
1248#endif
1249
1250out:
1251 return success;
1252}
1253
1254/* called with IRQs off */
1255static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
1256{
1257 cedf_domain_t* cluster = task_cpu_cluster(t);
1258
1259 raw_spin_lock(&cluster->cluster_lock);
1260
1261 __increase_priority_inheritance(t, prio_inh);
1262
1263 raw_spin_unlock(&cluster->cluster_lock);
1264
1265#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
1266 if(tsk_rt(t)->held_gpus) {
1267 int i;
1268 for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
1269 i < NV_DEVICE_NUM;
1270 i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
1271 pai_check_priority_increase(t, i);
1272 }
1273 }
1274#endif
1275}
1276
1277/* called with IRQs off */
1278static int __decrease_priority_inheritance(struct task_struct* t,
1279 struct task_struct* prio_inh)
1280{
1281 int success = 1;
1282
1283 if (prio_inh == tsk_rt(t)->inh_task) {
1284 /* relationship already established. */
1285 TRACE_TASK(t, "already inherits priority from %s/%d\n",
1286 (prio_inh) ? prio_inh->comm : "(nil)",
1287 (prio_inh) ? prio_inh->pid : 0);
1288 goto out;
1289 }
1290
1291#ifdef CONFIG_LITMUS_NESTED_LOCKING
1292 if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) {
1293#endif
1294 /* A job only stops inheriting a priority when it releases a
1295 * resource. Thus we can make the following assumption.*/
1296 if(prio_inh)
1297 TRACE_TASK(t, "EFFECTIVE priority decreased to %s/%d\n",
1298 prio_inh->comm, prio_inh->pid);
1299 else
1300 TRACE_TASK(t, "base priority restored.\n");
1301
1302 tsk_rt(t)->inh_task = prio_inh;
1303
1304 if(tsk_rt(t)->scheduled_on != NO_CPU) {
1305 TRACE_TASK(t, "is scheduled.\n");
1306
1307 /* Check if rescheduling is necessary. We can't use heap_decrease()
1308 * since the priority was effectively lowered. */
1309 unlink(t);
1310 cedf_job_arrival(t);
1311 }
1312 else {
1313 cedf_domain_t* cluster = task_cpu_cluster(t);
1314 /* task is queued */
1315 raw_spin_lock(&cluster->domain.release_lock);
1316 if (is_queued(t)) {
1317 TRACE_TASK(t, "is queued.\n");
1318
1319 /* decrease in priority, so we have to re-add to binomial heap */
1320 unlink(t);
1321 cedf_job_arrival(t);
1322 }
1323 else {
1324 TRACE_TASK(t, "is not in scheduler. Probably on wait queue somewhere.\n");
1325 }
1326 raw_spin_unlock(&cluster->domain.release_lock);
1327 }
1328
1329#ifdef CONFIG_REALTIME_AUX_TASKS
1330 /* propagate to aux tasks */
1331 if (tsk_rt(t)->has_aux_tasks) {
1332 aux_task_owner_decrease_priority(t);
1333 }
1334#endif
1335
1336#ifdef CONFIG_LITMUS_NVIDIA
1337 /* propagate to gpu */
1338 if (tsk_rt(t)->held_gpus) {
1339 gpu_owner_decrease_priority(t);
1340 }
1341#endif
1342
1343#ifdef CONFIG_LITMUS_NESTED_LOCKING
1344 }
1345 else {
1346 TRACE_TASK(t, "Spurious invalid priority decrease. "
1347 "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
1348 "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
1349 t->comm, t->pid,
1350 effective_priority(t)->comm, effective_priority(t)->pid,
1351 (prio_inh) ? prio_inh->comm : "nil",
1352 (prio_inh) ? prio_inh->pid : -1);
1353 success = 0;
1354 }
1355#endif
1356
1357out:
1358 return success;
1359}
1360
1361static void decrease_priority_inheritance(struct task_struct* t,
1362 struct task_struct* prio_inh)
1363{
1364 cedf_domain_t* cluster = task_cpu_cluster(t);
1365
1366 raw_spin_lock(&cluster->cluster_lock);
1367 __decrease_priority_inheritance(t, prio_inh);
1368
1369 raw_spin_unlock(&cluster->cluster_lock);
1370
1371#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
1372 if(tsk_rt(t)->held_gpus) {
1373 int i;
1374 for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
1375 i < NV_DEVICE_NUM;
1376 i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
1377 pai_check_priority_decrease(t, i);
1378 }
1379 }
1380#endif
1381}
1382
1383
1384#ifdef CONFIG_LITMUS_NESTED_LOCKING
1385
1386/* called with IRQs off */
1387/* preconditions:
1388 (1) The 'hp_blocked_tasks_lock' of task 't' is held.
1389 (2) The lock 'to_unlock' is held.
1390 */
1391static void nested_increase_priority_inheritance(struct task_struct* t,
1392 struct task_struct* prio_inh,
1393 raw_spinlock_t *to_unlock,
1394 unsigned long irqflags)
1395{
1396 struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
1397
1398 if(tsk_rt(t)->inh_task != prio_inh) { // shield redundent calls.
1399 increase_priority_inheritance(t, prio_inh); // increase our prio.
1400 }
1401
1402 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
1403
1404
1405 if(blocked_lock) {
1406 if(blocked_lock->ops->propagate_increase_inheritance) {
1407 TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
1408 blocked_lock->ident);
1409
1410 // beware: recursion
1411 blocked_lock->ops->propagate_increase_inheritance(blocked_lock,
1412 t, to_unlock,
1413 irqflags);
1414 }
1415 else {
1416 TRACE_TASK(t, "Inheritor is blocked on lock (%d) that does not support nesting!\n",
1417 blocked_lock->ident);
1418 unlock_fine_irqrestore(to_unlock, irqflags);
1419 }
1420 }
1421 else {
1422 TRACE_TASK(t, "is not blocked. No propagation.\n");
1423 unlock_fine_irqrestore(to_unlock, irqflags);
1424 }
1425}
1426
1427/* called with IRQs off */
1428/* preconditions:
1429 (1) The 'hp_blocked_tasks_lock' of task 't' is held.
1430 (2) The lock 'to_unlock' is held.
1431 */
1432static void nested_decrease_priority_inheritance(struct task_struct* t,
1433 struct task_struct* prio_inh,
1434 raw_spinlock_t *to_unlock,
1435 unsigned long irqflags)
1436{
1437 struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
1438 decrease_priority_inheritance(t, prio_inh);
1439
1440 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
1441
1442 if(blocked_lock) {
1443 if(blocked_lock->ops->propagate_decrease_inheritance) {
1444 TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
1445 blocked_lock->ident);
1446
1447 // beware: recursion
1448 blocked_lock->ops->propagate_decrease_inheritance(blocked_lock, t,
1449 to_unlock,
1450 irqflags);
1451 }
1452 else {
1453 TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
1454 blocked_lock);
1455 unlock_fine_irqrestore(to_unlock, irqflags);
1456 }
1457 }
1458 else {
1459 TRACE_TASK(t, "is not blocked. No propagation.\n");
1460 unlock_fine_irqrestore(to_unlock, irqflags);
1461 }
1462}
1463
1464
1465/* ******************** RSM MUTEX ********************** */
1466
1467static struct litmus_lock_ops cedf_rsm_mutex_lock_ops = {
1468 .lock = rsm_mutex_lock,
1469 .unlock = rsm_mutex_unlock,
1470 .close = rsm_mutex_close,
1471 .deallocate = rsm_mutex_free,
1472
1473 .propagate_increase_inheritance = rsm_mutex_propagate_increase_inheritance,
1474 .propagate_decrease_inheritance = rsm_mutex_propagate_decrease_inheritance,
1475
1476#ifdef CONFIG_LITMUS_DGL_SUPPORT
1477 .dgl_lock = rsm_mutex_dgl_lock,
1478 .is_owner = rsm_mutex_is_owner,
1479 .enable_priority = rsm_mutex_enable_priority,
1480#endif
1481};
1482
1483static struct litmus_lock* cedf_new_rsm_mutex(void)
1484{
1485 return rsm_mutex_new(&cedf_rsm_mutex_lock_ops);
1486}
1487
1488/* ******************** IKGLP ********************** */
1489
1490static struct litmus_lock_ops cedf_ikglp_lock_ops = {
1491 .lock = ikglp_lock,
1492 .unlock = ikglp_unlock,
1493 .close = ikglp_close,
1494 .deallocate = ikglp_free,
1495
1496 // ikglp can only be an outer-most lock.
1497 .propagate_increase_inheritance = NULL,
1498 .propagate_decrease_inheritance = NULL,
1499};
1500
1501static struct litmus_lock* cedf_new_ikglp(void* __user arg)
1502{
1503 // assumes clusters of uniform size.
1504 return ikglp_new(cluster_size/num_clusters, &cedf_ikglp_lock_ops, arg);
1505}
1506
1507#endif /* CONFIG_LITMUS_NESTED_LOCKING */
1508
1509
1510
1511
1512/* ******************** KFMLP support ********************** */
1513
1514static struct litmus_lock_ops cedf_kfmlp_lock_ops = {
1515 .lock = kfmlp_lock,
1516 .unlock = kfmlp_unlock,
1517 .close = kfmlp_close,
1518 .deallocate = kfmlp_free,
1519
1520 // kfmlp can only be an outer-most lock.
1521 .propagate_increase_inheritance = NULL,
1522 .propagate_decrease_inheritance = NULL,
1523};
1524
1525
1526static struct litmus_lock* cedf_new_kfmlp(void* __user arg)
1527{
1528 return kfmlp_new(&cedf_kfmlp_lock_ops, arg);
1529}
1530
1531
1532/* **** lock constructor **** */
1533
1534static long cedf_allocate_lock(struct litmus_lock **lock, int type,
1535 void* __user args)
1536{
1537 int err;
1538
1539 switch (type) {
1540#ifdef CONFIG_LITMUS_NESTED_LOCKING
1541 case RSM_MUTEX:
1542 *lock = cedf_new_rsm_mutex();
1543 break;
1544
1545 case IKGLP_SEM:
1546 *lock = cedf_new_ikglp(args);
1547 break;
1548#endif
1549 case KFMLP_SEM:
1550 *lock = cedf_new_kfmlp(args);
1551 break;
1552
1553 default:
1554 err = -ENXIO;
1555 goto UNSUPPORTED_LOCK;
1556 };
1557
1558 if (*lock)
1559 err = 0;
1560 else
1561 err = -ENOMEM;
1562
1563UNSUPPORTED_LOCK:
1564 return err;
1565}
1566
1567#endif // CONFIG_LITMUS_LOCKING
1568
1569
1570#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1571static struct affinity_observer_ops cedf_kfmlp_affinity_ops = {
1572 .close = kfmlp_aff_obs_close,
1573 .deallocate = kfmlp_aff_obs_free,
1574};
1575
1576#ifdef CONFIG_LITMUS_NESTED_LOCKING
1577static struct affinity_observer_ops cedf_ikglp_affinity_ops = {
1578 .close = ikglp_aff_obs_close,
1579 .deallocate = ikglp_aff_obs_free,
1580};
1581#endif
1582
1583static long cedf_allocate_affinity_observer(struct affinity_observer **aff_obs,
1584 int type,
1585 void* __user args)
1586{
1587 int err;
1588
1589 switch (type) {
1590
1591 case KFMLP_SIMPLE_GPU_AFF_OBS:
1592 *aff_obs = kfmlp_simple_gpu_aff_obs_new(&cedf_kfmlp_affinity_ops, args);
1593 break;
1594
1595 case KFMLP_GPU_AFF_OBS:
1596 *aff_obs = kfmlp_gpu_aff_obs_new(&cedf_kfmlp_affinity_ops, args);
1597 break;
1598
1599#ifdef CONFIG_LITMUS_NESTED_LOCKING
1600 case IKGLP_SIMPLE_GPU_AFF_OBS:
1601 *aff_obs = ikglp_simple_gpu_aff_obs_new(&cedf_ikglp_affinity_ops, args);
1602 break;
1603
1604 case IKGLP_GPU_AFF_OBS:
1605 *aff_obs = ikglp_gpu_aff_obs_new(&cedf_ikglp_affinity_ops, args);
1606 break;
1607#endif
1608 default:
1609 err = -ENXIO;
1610 goto UNSUPPORTED_AFF_OBS;
1611 };
1612
1613 if (*aff_obs)
1614 err = 0;
1615 else
1616 err = -ENOMEM;
1617
1618UNSUPPORTED_AFF_OBS:
1619 return err;
1620}
1621#endif
1622
1623
1624
662 1625
663#ifdef VERBOSE_INIT 1626#ifdef VERBOSE_INIT
664static void print_cluster_topology(cpumask_var_t mask, int cpu) 1627static void print_cluster_topology(cpumask_var_t mask, int cpu)
@@ -673,16 +1636,17 @@ static void print_cluster_topology(cpumask_var_t mask, int cpu)
673} 1636}
674#endif 1637#endif
675 1638
676static int clusters_allocated = 0;
677
678static void cleanup_cedf(void) 1639static void cleanup_cedf(void)
679{ 1640{
680 int i; 1641 int i;
681 1642
1643#ifdef CONFIG_LITMUS_NVIDIA
1644 shutdown_nvidia_info();
1645#endif
1646
682 if (clusters_allocated) { 1647 if (clusters_allocated) {
683 for (i = 0; i < num_clusters; i++) { 1648 for (i = 0; i < num_clusters; i++) {
684 kfree(cedf[i].cpus); 1649 kfree(cedf[i].cpus);
685 kfree(cedf[i].heap_node);
686 free_cpumask_var(cedf[i].cpu_map); 1650 free_cpumask_var(cedf[i].cpu_map);
687 } 1651 }
688 1652
@@ -690,6 +1654,18 @@ static void cleanup_cedf(void)
690 } 1654 }
691} 1655}
692 1656
1657#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
1658static int cedf_map_gpu_to_cpu(int gpu)
1659{
1660 int cpu_cluster = gpu / gpu_cluster_size;
1661 int default_cpu = cedf[cpu_cluster].cpus[0]->cpu; // first CPU in given cluster
1662
1663 TRACE("CPU %d is default for GPU %d interrupt threads.\n", default_cpu, gpu);
1664
1665 return default_cpu;
1666}
1667#endif
1668
693static long cedf_activate_plugin(void) 1669static long cedf_activate_plugin(void)
694{ 1670{
695 int i, j, cpu, ccpu, cpu_count; 1671 int i, j, cpu, ccpu, cpu_count;
@@ -736,18 +1712,33 @@ static long cedf_activate_plugin(void)
736 printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n", 1712 printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n",
737 num_clusters, cluster_size); 1713 num_clusters, cluster_size);
738 1714
1715
1716#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
1717 num_gpu_clusters = min(num_clusters, num_online_gpus());
1718 gpu_cluster_size = num_online_gpus() / num_gpu_clusters;
1719
1720 if (((num_online_gpus() % gpu_cluster_size) != 0) ||
1721 (num_gpu_clusters != num_clusters)) {
1722 printk(KERN_WARNING "C-EDF: GPUs not uniformly distributed among CPU clusters.\n");
1723 }
1724#endif
1725
739 /* initialize clusters */ 1726 /* initialize clusters */
740 cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC); 1727 cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC);
741 for (i = 0; i < num_clusters; i++) { 1728 for (i = 0; i < num_clusters; i++) {
742 1729
743 cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t), 1730 cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
744 GFP_ATOMIC); 1731 GFP_ATOMIC);
745 cedf[i].heap_node = kmalloc( 1732 INIT_BINHEAP_HANDLE(&(cedf[i].cpu_heap), cpu_lower_prio);
746 cluster_size * sizeof(struct bheap_node),
747 GFP_ATOMIC);
748 bheap_init(&(cedf[i].cpu_heap));
749 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs); 1733 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
750 1734
1735
1736#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1737 cedf[i].pending_tasklets.head = NULL;
1738 cedf[i].pending_tasklets.tail = &(cedf[i].pending_tasklets.head);
1739#endif
1740
1741
751 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) 1742 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
752 return -ENOMEM; 1743 return -ENOMEM;
753#ifdef CONFIG_RELEASE_MASTER 1744#ifdef CONFIG_RELEASE_MASTER
@@ -758,6 +1749,10 @@ static long cedf_activate_plugin(void)
758 /* cycle through cluster and add cpus to them */ 1749 /* cycle through cluster and add cpus to them */
759 for (i = 0; i < num_clusters; i++) { 1750 for (i = 0; i < num_clusters; i++) {
760 1751
1752#ifdef CONFIG_LITMUS_DGL_SUPPORT
1753 raw_spin_lock_init(&cedf[i].dgl_lock);
1754#endif
1755
761 for_each_online_cpu(cpu) { 1756 for_each_online_cpu(cpu) {
762 /* check if the cpu is already in a cluster */ 1757 /* check if the cpu is already in a cluster */
763 for (j = 0; j < num_clusters; j++) 1758 for (j = 0; j < num_clusters; j++)
@@ -788,8 +1783,8 @@ static long cedf_activate_plugin(void)
788 atomic_set(&entry->will_schedule, 0); 1783 atomic_set(&entry->will_schedule, 0);
789 entry->cpu = ccpu; 1784 entry->cpu = ccpu;
790 entry->cluster = &cedf[i]; 1785 entry->cluster = &cedf[i];
791 entry->hn = &(cedf[i].heap_node[cpu_count]); 1786
792 bheap_node_init(&entry->hn, entry); 1787 INIT_BINHEAP_NODE(&entry->hn);
793 1788
794 cpu_count++; 1789 cpu_count++;
795 1790
@@ -806,6 +1801,14 @@ static long cedf_activate_plugin(void)
806 } 1801 }
807 } 1802 }
808 1803
1804#ifdef CONFIG_LITMUS_SOFTIRQD
1805 init_klmirqd();
1806#endif
1807
1808#ifdef CONFIG_LITMUS_NVIDIA
1809 init_nvidia_info();
1810#endif
1811
809 free_cpumask_var(mask); 1812 free_cpumask_var(mask);
810 clusters_allocated = 1; 1813 clusters_allocated = 1;
811 return 0; 1814 return 0;
@@ -824,6 +1827,33 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
824 .task_block = cedf_task_block, 1827 .task_block = cedf_task_block,
825 .admit_task = cedf_admit_task, 1828 .admit_task = cedf_admit_task,
826 .activate_plugin = cedf_activate_plugin, 1829 .activate_plugin = cedf_activate_plugin,
1830 .compare = edf_higher_prio,
1831#ifdef CONFIG_LITMUS_LOCKING
1832 .allocate_lock = cedf_allocate_lock,
1833 .increase_prio = increase_priority_inheritance,
1834 .decrease_prio = decrease_priority_inheritance,
1835 .__increase_prio = __increase_priority_inheritance,
1836 .__decrease_prio = __decrease_priority_inheritance,
1837#endif
1838#ifdef CONFIG_LITMUS_NESTED_LOCKING
1839 .nested_increase_prio = nested_increase_priority_inheritance,
1840 .nested_decrease_prio = nested_decrease_priority_inheritance,
1841 .__compare = __edf_higher_prio,
1842#endif
1843#ifdef CONFIG_LITMUS_DGL_SUPPORT
1844 .get_dgl_spinlock = cedf_get_dgl_spinlock,
1845#endif
1846#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1847 .allocate_aff_obs = cedf_allocate_affinity_observer,
1848#endif
1849#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1850 .enqueue_pai_tasklet = cedf_enqueue_pai_tasklet,
1851 .change_prio_pai_tasklet = cedf_change_prio_pai_tasklet,
1852 .run_tasklets = cedf_run_tasklets,
1853#endif
1854#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
1855 .map_gpu_to_cpu = cedf_map_gpu_to_cpu,
1856#endif
827}; 1857};
828 1858
829static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL; 1859static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index b8548b885b35..01791a18e8f3 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -12,24 +12,54 @@
12#include <linux/percpu.h> 12#include <linux/percpu.h>
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/uaccess.h>
16#include <linux/module.h>
15 17
16#include <litmus/litmus.h> 18#include <litmus/litmus.h>
17#include <litmus/jobs.h> 19#include <litmus/jobs.h>
18#include <litmus/sched_plugin.h> 20#include <litmus/sched_plugin.h>
19#include <litmus/edf_common.h> 21#include <litmus/edf_common.h>
20#include <litmus/sched_trace.h> 22#include <litmus/sched_trace.h>
21#include <litmus/trace.h>
22 23
23#include <litmus/preempt.h> 24#include <litmus/preempt.h>
24#include <litmus/budget.h> 25#include <litmus/budget.h>
25 26
26#include <litmus/bheap.h> 27#include <litmus/bheap.h>
28#include <litmus/binheap.h>
29#include <litmus/trace.h>
30
31#ifdef CONFIG_LITMUS_LOCKING
32#include <litmus/kfmlp_lock.h>
33#endif
34
35#ifdef CONFIG_LITMUS_NESTED_LOCKING
36#include <litmus/rsm_lock.h>
37#include <litmus/ikglp_lock.h>
38#endif
27 39
28#ifdef CONFIG_SCHED_CPU_AFFINITY 40#ifdef CONFIG_SCHED_CPU_AFFINITY
29#include <litmus/affinity.h> 41#include <litmus/affinity.h>
30#endif 42#endif
31 43
32#include <linux/module.h> 44#ifdef CONFIG_REALTIME_AUX_TASKS
45#include <litmus/aux_tasks.h>
46#endif
47
48#ifdef CONFIG_LITMUS_SOFTIRQD
49#include <litmus/litmus_softirq.h>
50#endif
51
52#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
53#include <linux/interrupt.h>
54#endif
55
56#ifdef CONFIG_LITMUS_NVIDIA
57#include <litmus/nvidia_info.h>
58#endif
59
60#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
61#include <litmus/gpu_affinity.h>
62#endif
33 63
34/* Overview of GSN-EDF operations. 64/* Overview of GSN-EDF operations.
35 * 65 *
@@ -104,52 +134,64 @@ typedef struct {
104 int cpu; 134 int cpu;
105 struct task_struct* linked; /* only RT tasks */ 135 struct task_struct* linked; /* only RT tasks */
106 struct task_struct* scheduled; /* only RT tasks */ 136 struct task_struct* scheduled; /* only RT tasks */
107 struct bheap_node* hn; 137 struct binheap_node hn;
108} cpu_entry_t; 138} cpu_entry_t;
109DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries); 139DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries);
110 140
111cpu_entry_t* gsnedf_cpus[NR_CPUS]; 141cpu_entry_t* gsnedf_cpus[NR_CPUS];
112 142
113/* the cpus queue themselves according to priority in here */ 143/* the cpus queue themselves according to priority in here */
114static struct bheap_node gsnedf_heap_node[NR_CPUS]; 144static struct binheap gsnedf_cpu_heap;
115static struct bheap gsnedf_cpu_heap;
116 145
117static rt_domain_t gsnedf; 146static rt_domain_t gsnedf;
118#define gsnedf_lock (gsnedf.ready_lock) 147#define gsnedf_lock (gsnedf.ready_lock)
119 148
149#ifdef CONFIG_LITMUS_DGL_SUPPORT
150static raw_spinlock_t dgl_lock;
151
152static raw_spinlock_t* gsnedf_get_dgl_spinlock(struct task_struct *t)
153{
154 return(&dgl_lock);
155}
156#endif
157
158#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
159struct tasklet_head gsnedf_pending_tasklets;
160#endif
161
120 162
121/* Uncomment this if you want to see all scheduling decisions in the 163/* Uncomment this if you want to see all scheduling decisions in the
122 * TRACE() log. 164 * TRACE() log.
123#define WANT_ALL_SCHED_EVENTS 165#define WANT_ALL_SCHED_EVENTS
124 */ 166 */
125 167
126static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) 168static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b)
127{ 169{
128 cpu_entry_t *a, *b; 170 cpu_entry_t *a = binheap_entry(_a, cpu_entry_t, hn);
129 a = _a->value; 171 cpu_entry_t *b = binheap_entry(_b, cpu_entry_t, hn);
130 b = _b->value; 172
131 /* Note that a and b are inverted: we want the lowest-priority CPU at 173 /* Note that a and b are inverted: we want the lowest-priority CPU at
132 * the top of the heap. 174 * the top of the heap.
133 */ 175 */
134 return edf_higher_prio(b->linked, a->linked); 176 return edf_higher_prio(b->linked, a->linked);
135} 177}
136 178
179
137/* update_cpu_position - Move the cpu entry to the correct place to maintain 180/* update_cpu_position - Move the cpu entry to the correct place to maintain
138 * order in the cpu queue. Caller must hold gsnedf lock. 181 * order in the cpu queue. Caller must hold gsnedf lock.
139 */ 182 */
140static void update_cpu_position(cpu_entry_t *entry) 183static void update_cpu_position(cpu_entry_t *entry)
141{ 184{
142 if (likely(bheap_node_in_heap(entry->hn))) 185 if (likely(binheap_is_in_heap(&entry->hn))) {
143 bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); 186 binheap_delete(&entry->hn, &gsnedf_cpu_heap);
144 bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); 187 }
188 binheap_add(&entry->hn, &gsnedf_cpu_heap, cpu_entry_t, hn);
145} 189}
146 190
147/* caller must hold gsnedf lock */ 191/* caller must hold gsnedf lock */
148static cpu_entry_t* lowest_prio_cpu(void) 192static cpu_entry_t* lowest_prio_cpu(void)
149{ 193{
150 struct bheap_node* hn; 194 return binheap_top_entry(&gsnedf_cpu_heap, cpu_entry_t, hn);
151 hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap);
152 return hn->value;
153} 195}
154 196
155 197
@@ -164,8 +206,17 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
164 struct task_struct* tmp; 206 struct task_struct* tmp;
165 int on_cpu; 207 int on_cpu;
166 208
209 //int print = (linked != NULL || entry->linked != NULL);
210
167 BUG_ON(linked && !is_realtime(linked)); 211 BUG_ON(linked && !is_realtime(linked));
168 212
213 /*
214 if (print) {
215 TRACE_CUR("linked = %s/%d\n", (linked) ? linked->comm : "(nil)", (linked)? linked->pid : 0);
216 TRACE_CUR("entry->linked = %s/%d\n", (entry->linked) ? entry->linked->comm : "(nil)", (entry->linked)? entry->linked->pid : 0);
217 }
218 */
219
169 /* Currently linked task is set to be unlinked. */ 220 /* Currently linked task is set to be unlinked. */
170 if (entry->linked) { 221 if (entry->linked) {
171 entry->linked->rt_param.linked_on = NO_CPU; 222 entry->linked->rt_param.linked_on = NO_CPU;
@@ -201,12 +252,18 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
201 linked->rt_param.linked_on = entry->cpu; 252 linked->rt_param.linked_on = entry->cpu;
202 } 253 }
203 entry->linked = linked; 254 entry->linked = linked;
204#ifdef WANT_ALL_SCHED_EVENTS 255
205 if (linked) 256 /*
206 TRACE_TASK(linked, "linked to %d.\n", entry->cpu); 257 if (print) {
207 else 258 //#ifdef WANT_ALL_SCHED_EVENTS
208 TRACE("NULL linked to %d.\n", entry->cpu); 259 if (linked)
209#endif 260 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
261 else
262 TRACE("NULL linked to %d.\n", entry->cpu);
263 //#endif
264 }
265 */
266
210 update_cpu_position(entry); 267 update_cpu_position(entry);
211} 268}
212 269
@@ -251,8 +308,17 @@ static noinline void requeue(struct task_struct* task)
251 /* sanity check before insertion */ 308 /* sanity check before insertion */
252 BUG_ON(is_queued(task)); 309 BUG_ON(is_queued(task));
253 310
254 if (is_released(task, litmus_clock())) 311 if (is_released(task, litmus_clock())) {
255 __add_ready(&gsnedf, task); 312#ifdef CONFIG_REALTIME_AUX_TASKS
313 if (unlikely(tsk_rt(task)->is_aux_task && !is_running(task))) {
314 /* aux_task probably transitioned to real-time while it was blocked */
315 TRACE_CUR("aux task %s/%d is not ready!\n", task->comm, task->pid);
316 unlink(task); /* really needed? */
317 }
318 else
319#endif
320 __add_ready(&gsnedf, task);
321 }
256 else { 322 else {
257 /* it has got to wait */ 323 /* it has got to wait */
258 add_release(&gsnedf, task); 324 add_release(&gsnedf, task);
@@ -326,6 +392,7 @@ static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
326 raw_spin_lock_irqsave(&gsnedf_lock, flags); 392 raw_spin_lock_irqsave(&gsnedf_lock, flags);
327 393
328 __merge_ready(rt, tasks); 394 __merge_ready(rt, tasks);
395
329 check_for_preemptions(); 396 check_for_preemptions();
330 397
331 raw_spin_unlock_irqrestore(&gsnedf_lock, flags); 398 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
@@ -338,12 +405,17 @@ static noinline void job_completion(struct task_struct *t, int forced)
338 405
339 sched_trace_task_completion(t, forced); 406 sched_trace_task_completion(t, forced);
340 407
408#ifdef CONFIG_LITMUS_NVIDIA
409 atomic_set(&tsk_rt(t)->nv_int_count, 0);
410#endif
411
341 TRACE_TASK(t, "job_completion().\n"); 412 TRACE_TASK(t, "job_completion().\n");
342 413
343 /* set flags */ 414 /* set flags */
344 tsk_rt(t)->completed = 1; 415 tsk_rt(t)->completed = 1;
345 /* prepare for next period */ 416 /* prepare for next period */
346 prepare_for_next_period(t); 417 prepare_for_next_period(t);
418
347 if (is_released(t, litmus_clock())) 419 if (is_released(t, litmus_clock()))
348 sched_trace_task_release(t); 420 sched_trace_task_release(t);
349 /* unlink */ 421 /* unlink */
@@ -362,24 +434,350 @@ static noinline void job_completion(struct task_struct *t, int forced)
362 */ 434 */
363static void gsnedf_tick(struct task_struct* t) 435static void gsnedf_tick(struct task_struct* t)
364{ 436{
365 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { 437 if (is_realtime(t) && budget_exhausted(t))
366 if (!is_np(t)) { 438 {
367 /* np tasks will be preempted when they become 439 if (budget_signalled(t) && !sigbudget_sent(t)) {
368 * preemptable again 440 /* signal exhaustion */
369 */ 441 send_sigbudget(t);
370 litmus_reschedule_local(); 442 }
371 TRACE("gsnedf_scheduler_tick: " 443
372 "%d is preemptable " 444 if (budget_enforced(t)) {
373 " => FORCE_RESCHED\n", t->pid); 445 if (!is_np(t)) {
374 } else if (is_user_np(t)) { 446 /* np tasks will be preempted when they become
375 TRACE("gsnedf_scheduler_tick: " 447 * preemptable again
376 "%d is non-preemptable, " 448 */
377 "preemption delayed.\n", t->pid); 449 litmus_reschedule_local();
378 request_exit_np(t); 450 TRACE("gsnedf_scheduler_tick: "
451 "%d is preemptable "
452 " => FORCE_RESCHED\n", t->pid);
453 } else if (is_user_np(t)) {
454 TRACE("gsnedf_scheduler_tick: "
455 "%d is non-preemptable, "
456 "preemption delayed.\n", t->pid);
457 request_exit_np(t);
458 }
459 }
460 }
461
462 /*
463 if(is_realtime(t)) {
464 TRACE_TASK(t, "tick %llu\n", litmus_clock());
465 }
466 */
467}
468
469
470
471
472#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
473
474
475static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
476{
477 if (!atomic_read(&tasklet->count)) {
478 if(tasklet->owner) {
479 sched_trace_tasklet_begin(tasklet->owner);
480 }
481
482 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
483 {
484 BUG();
485 }
486 TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n",
487 __FUNCTION__,
488 (tasklet->owner) ? tasklet->owner->pid : -1,
489 (tasklet->owner) ? 0 : 1);
490 tasklet->func(tasklet->data);
491 tasklet_unlock(tasklet);
492
493 if(tasklet->owner) {
494 sched_trace_tasklet_end(tasklet->owner, flushed);
495 }
496 }
497 else {
498 BUG();
499 }
500}
501
502static void do_lit_tasklets(struct task_struct* sched_task)
503{
504 int work_to_do = 1;
505 struct tasklet_struct *tasklet = NULL;
506 unsigned long flags;
507
508 while(work_to_do) {
509
510 TS_NV_SCHED_BOTISR_START;
511
512 // execute one tasklet that has higher priority
513 raw_spin_lock_irqsave(&gsnedf_lock, flags);
514
515 if(gsnedf_pending_tasklets.head != NULL) {
516 struct tasklet_struct *prev = NULL;
517 tasklet = gsnedf_pending_tasklets.head;
518
519 while(tasklet && edf_higher_prio(sched_task, tasklet->owner)) {
520 prev = tasklet;
521 tasklet = tasklet->next;
522 }
523
524 // remove the tasklet from the queue
525 if(prev) {
526 prev->next = tasklet->next;
527 if(prev->next == NULL) {
528 TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
529 gsnedf_pending_tasklets.tail = &(prev);
530 }
531 }
532 else {
533 gsnedf_pending_tasklets.head = tasklet->next;
534 if(tasklet->next == NULL) {
535 TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
536 gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
537 }
538 }
539 }
540 else {
541 TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
542 }
543
544 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
545
546 if(tasklet) {
547 __do_lit_tasklet(tasklet, 0ul);
548 tasklet = NULL;
549 }
550 else {
551 work_to_do = 0;
552 }
553
554 TS_NV_SCHED_BOTISR_END;
555 }
556}
557
558//static void do_lit_tasklets(struct task_struct* sched_task)
559//{
560// int work_to_do = 1;
561// struct tasklet_struct *tasklet = NULL;
562// //struct tasklet_struct *step;
563// unsigned long flags;
564//
565// while(work_to_do) {
566//
567// TS_NV_SCHED_BOTISR_START;
568//
569// // remove tasklet at head of list if it has higher priority.
570// raw_spin_lock_irqsave(&gsnedf_lock, flags);
571//
572// if(gsnedf_pending_tasklets.head != NULL) {
573// // remove tasklet at head.
574// tasklet = gsnedf_pending_tasklets.head;
575//
576// if(edf_higher_prio(tasklet->owner, sched_task)) {
577//
578// if(NULL == tasklet->next) {
579// // tasklet is at the head, list only has one element
580// TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
581// gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
582// }
583//
584// // remove the tasklet from the queue
585// gsnedf_pending_tasklets.head = tasklet->next;
586//
587// TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
588// }
589// else {
590// TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id());
591// tasklet = NULL;
592// }
593// }
594// else {
595// TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
596// }
597//
598// raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
599//
600// TS_NV_SCHED_BOTISR_END;
601//
602// if(tasklet) {
603// __do_lit_tasklet(tasklet, 0ul);
604// tasklet = NULL;
605// }
606// else {
607// work_to_do = 0;
608// }
609// }
610//
611// //TRACE("%s: exited.\n", __FUNCTION__);
612//}
613
614static void __add_pai_tasklet(struct tasklet_struct* tasklet)
615{
616 struct tasklet_struct* step;
617
618 tasklet->next = NULL; // make sure there are no old values floating around
619
620 step = gsnedf_pending_tasklets.head;
621 if(step == NULL) {
622 TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
623 // insert at tail.
624 *(gsnedf_pending_tasklets.tail) = tasklet;
625 gsnedf_pending_tasklets.tail = &(tasklet->next);
626 }
627 else if((*(gsnedf_pending_tasklets.tail) != NULL) &&
628 edf_higher_prio((*(gsnedf_pending_tasklets.tail))->owner, tasklet->owner)) {
629 // insert at tail.
630 TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
631
632 *(gsnedf_pending_tasklets.tail) = tasklet;
633 gsnedf_pending_tasklets.tail = &(tasklet->next);
634 }
635 else {
636 // insert the tasklet somewhere in the middle.
637
638 TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
639
640 while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) {
641 step = step->next;
642 }
643
644 // insert tasklet right before step->next.
645
646 TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1);
647
648 tasklet->next = step->next;
649 step->next = tasklet;
650
651 // patch up the head if needed.
652 if(gsnedf_pending_tasklets.head == step)
653 {
654 TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
655 gsnedf_pending_tasklets.head = tasklet;
656 }
657 }
658}
659
660static void gsnedf_run_tasklets(struct task_struct* sched_task)
661{
662 preempt_disable();
663
664 if(gsnedf_pending_tasklets.head != NULL) {
665 TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
666 do_lit_tasklets(sched_task);
667 }
668
669 preempt_enable_no_resched();
670}
671
672static int gsnedf_enqueue_pai_tasklet(struct tasklet_struct* tasklet)
673{
674 cpu_entry_t *targetCPU = NULL;
675 int thisCPU;
676 int runLocal = 0;
677 int runNow = 0;
678 unsigned long flags;
679
680 if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
681 {
682 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
683 return 0;
684 }
685
686
687 raw_spin_lock_irqsave(&gsnedf_lock, flags);
688
689 thisCPU = smp_processor_id();
690
691#ifdef CONFIG_SCHED_CPU_AFFINITY
692 {
693 cpu_entry_t* affinity = NULL;
694
695 // use this CPU if it is in our cluster and isn't running any RT work.
696 if(
697#ifdef CONFIG_RELEASE_MASTER
698 (thisCPU != gsnedf.release_master) &&
699#endif
700 (__get_cpu_var(gsnedf_cpu_entries).linked == NULL)) {
701 affinity = &(__get_cpu_var(gsnedf_cpu_entries));
702 }
703 else {
704 // this CPU is busy or shouldn't run tasklet in this cluster.
705 // look for available near by CPUs.
706 // NOTE: Affinity towards owner and not this CPU. Is this right?
707 affinity =
708 gsnedf_get_nearest_available_cpu(
709 &per_cpu(gsnedf_cpu_entries, task_cpu(tasklet->owner)));
710 }
711
712 targetCPU = affinity;
713 }
714#endif
715
716 if (targetCPU == NULL) {
717 targetCPU = lowest_prio_cpu();
718 }
719
720 if (edf_higher_prio(tasklet->owner, targetCPU->linked)) {
721 if (thisCPU == targetCPU->cpu) {
722 TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
723 runLocal = 1;
724 runNow = 1;
725 }
726 else {
727 TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
728 runLocal = 0;
729 runNow = 1;
730 }
731 }
732 else {
733 runLocal = 0;
734 runNow = 0;
735 }
736
737 if(!runLocal) {
738 // enqueue the tasklet
739 __add_pai_tasklet(tasklet);
740 }
741
742 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
743
744
745 if (runLocal /*&& runNow */) { // runNow == 1 is implied
746 TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
747 __do_lit_tasklet(tasklet, 0ul);
748 }
749 else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied
750 TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
751 preempt(targetCPU); // need to be protected by cedf_lock?
752 }
753 else {
754 TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
755 }
756
757 return(1); // success
758}
759
760static void gsnedf_change_prio_pai_tasklet(struct task_struct *old_prio,
761 struct task_struct *new_prio)
762{
763 struct tasklet_struct* step;
764 unsigned long flags;
765
766 if(gsnedf_pending_tasklets.head != NULL) {
767 raw_spin_lock_irqsave(&gsnedf_lock, flags);
768 for(step = gsnedf_pending_tasklets.head; step != NULL; step = step->next) {
769 if(step->owner == old_prio) {
770 TRACE("%s: Found tasklet to change: %d\n", __FUNCTION__, step->owner->pid);
771 step->owner = new_prio;
772 }
379 } 773 }
774 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
380 } 775 }
381} 776}
382 777
778#endif // end PAI
779
780
383/* Getting schedule() right is a bit tricky. schedule() may not make any 781/* Getting schedule() right is a bit tricky. schedule() may not make any
384 * assumptions on the state of the current task since it may be called for a 782 * assumptions on the state of the current task since it may be called for a
385 * number of reasons. The reasons include a scheduler_tick() determined that it 783 * number of reasons. The reasons include a scheduler_tick() determined that it
@@ -404,9 +802,11 @@ static void gsnedf_tick(struct task_struct* t)
404static struct task_struct* gsnedf_schedule(struct task_struct * prev) 802static struct task_struct* gsnedf_schedule(struct task_struct * prev)
405{ 803{
406 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); 804 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
407 int out_of_time, sleep, preempt, np, exists, blocks; 805 int out_of_time, signal_budget, sleep, preempt, np, exists, blocks;
408 struct task_struct* next = NULL; 806 struct task_struct* next = NULL;
409 807
808 //int completion = 0;
809
410#ifdef CONFIG_RELEASE_MASTER 810#ifdef CONFIG_RELEASE_MASTER
411 /* Bail out early if we are the release master. 811 /* Bail out early if we are the release master.
412 * The release master never schedules any real-time tasks. 812 * The release master never schedules any real-time tasks.
@@ -427,8 +827,13 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
427 /* (0) Determine state */ 827 /* (0) Determine state */
428 exists = entry->scheduled != NULL; 828 exists = entry->scheduled != NULL;
429 blocks = exists && !is_running(entry->scheduled); 829 blocks = exists && !is_running(entry->scheduled);
430 out_of_time = exists && budget_enforced(entry->scheduled) 830 out_of_time = exists &&
431 && budget_exhausted(entry->scheduled); 831 budget_enforced(entry->scheduled) &&
832 budget_exhausted(entry->scheduled);
833 signal_budget = exists &&
834 budget_signalled(entry->scheduled) &&
835 budget_exhausted(entry->scheduled) &&
836 !sigbudget_sent(entry->scheduled);
432 np = exists && is_np(entry->scheduled); 837 np = exists && is_np(entry->scheduled);
433 sleep = exists && is_completed(entry->scheduled); 838 sleep = exists && is_completed(entry->scheduled);
434 preempt = entry->scheduled != entry->linked; 839 preempt = entry->scheduled != entry->linked;
@@ -437,21 +842,36 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
437 TRACE_TASK(prev, "invoked gsnedf_schedule.\n"); 842 TRACE_TASK(prev, "invoked gsnedf_schedule.\n");
438#endif 843#endif
439 844
440 if (exists) 845 if (exists) {
441 TRACE_TASK(prev, 846 TRACE_TASK(prev,
442 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " 847 "blocks:%d out_of_time:%d signal_budget: %d np:%d sleep:%d preempt:%d "
443 "state:%d sig:%d\n", 848 "state:%d sig:%d\n",
444 blocks, out_of_time, np, sleep, preempt, 849 blocks, out_of_time, signal_budget, np, sleep, preempt,
445 prev->state, signal_pending(prev)); 850 prev->state, signal_pending(prev));
851 }
852
446 if (entry->linked && preempt) 853 if (entry->linked && preempt)
447 TRACE_TASK(prev, "will be preempted by %s/%d\n", 854 TRACE_TASK(prev, "will be preempted by %s/%d\n",
448 entry->linked->comm, entry->linked->pid); 855 entry->linked->comm, entry->linked->pid);
449 856
857 /* Send the signal that the budget has been exhausted */
858 if (signal_budget) {
859 send_sigbudget(entry->scheduled);
860 }
450 861
451 /* If a task blocks we have no choice but to reschedule. 862 /* If a task blocks we have no choice but to reschedule.
452 */ 863 */
453 if (blocks) 864 if (blocks) {
454 unlink(entry->scheduled); 865 unlink(entry->scheduled);
866 }
867
868#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
869 if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) {
870 if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
871 stop_gpu_tracker(entry->scheduled);
872 }
873 }
874#endif
455 875
456 /* Request a sys_exit_np() call if we would like to preempt but cannot. 876 /* Request a sys_exit_np() call if we would like to preempt but cannot.
457 * We need to make sure to update the link structure anyway in case 877 * We need to make sure to update the link structure anyway in case
@@ -468,8 +888,10 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
468 * this. Don't do a job completion if we block (can't have timers running 888 * this. Don't do a job completion if we block (can't have timers running
469 * for blocked jobs). 889 * for blocked jobs).
470 */ 890 */
471 if (!np && (out_of_time || sleep) && !blocks) 891 if (!np && (out_of_time || sleep) && !blocks) {
472 job_completion(entry->scheduled, !sleep); 892 job_completion(entry->scheduled, !sleep);
893 //completion = 1;
894 }
473 895
474 /* Link pending task if we became unlinked. 896 /* Link pending task if we became unlinked.
475 */ 897 */
@@ -492,12 +914,21 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
492 entry->scheduled->rt_param.scheduled_on = NO_CPU; 914 entry->scheduled->rt_param.scheduled_on = NO_CPU;
493 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); 915 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
494 } 916 }
495 } else 917 }
918 else
919 {
496 /* Only override Linux scheduler if we have a real-time task 920 /* Only override Linux scheduler if we have a real-time task
497 * scheduled that needs to continue. 921 * scheduled that needs to continue.
498 */ 922 */
499 if (exists) 923 if (exists)
500 next = prev; 924 next = prev;
925 }
926
927#if 0
928 if (completion) {
929 TRACE_CUR("switching away from a completion\n");
930 }
931#endif
501 932
502 sched_state_task_picked(); 933 sched_state_task_picked();
503 934
@@ -512,7 +943,6 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
512 TRACE("becomes idle at %llu.\n", litmus_clock()); 943 TRACE("becomes idle at %llu.\n", litmus_clock());
513#endif 944#endif
514 945
515
516 return next; 946 return next;
517} 947}
518 948
@@ -524,6 +954,7 @@ static void gsnedf_finish_switch(struct task_struct *prev)
524 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); 954 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
525 955
526 entry->scheduled = is_realtime(current) ? current : NULL; 956 entry->scheduled = is_realtime(current) ? current : NULL;
957
527#ifdef WANT_ALL_SCHED_EVENTS 958#ifdef WANT_ALL_SCHED_EVENTS
528 TRACE_TASK(prev, "switched away from\n"); 959 TRACE_TASK(prev, "switched away from\n");
529#endif 960#endif
@@ -537,7 +968,7 @@ static void gsnedf_task_new(struct task_struct * t, int on_rq, int running)
537 unsigned long flags; 968 unsigned long flags;
538 cpu_entry_t* entry; 969 cpu_entry_t* entry;
539 970
540 TRACE("gsn edf: task new %d\n", t->pid); 971 TRACE("gsn edf: task new = %d on_rq = %d running = %d\n", t->pid, on_rq, running);
541 972
542 raw_spin_lock_irqsave(&gsnedf_lock, flags); 973 raw_spin_lock_irqsave(&gsnedf_lock, flags);
543 974
@@ -572,11 +1003,14 @@ static void gsnedf_task_new(struct task_struct * t, int on_rq, int running)
572static void gsnedf_task_wake_up(struct task_struct *task) 1003static void gsnedf_task_wake_up(struct task_struct *task)
573{ 1004{
574 unsigned long flags; 1005 unsigned long flags;
575 lt_t now; 1006 //lt_t now;
576 1007
577 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); 1008 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
578 1009
579 raw_spin_lock_irqsave(&gsnedf_lock, flags); 1010 raw_spin_lock_irqsave(&gsnedf_lock, flags);
1011
1012#if 0
1013 /* sporadic task model. will increment job numbers automatically */
580 now = litmus_clock(); 1014 now = litmus_clock();
581 if (is_tardy(task, now)) { 1015 if (is_tardy(task, now)) {
582 /* new sporadic release */ 1016 /* new sporadic release */
@@ -590,6 +1024,25 @@ static void gsnedf_task_wake_up(struct task_struct *task)
590 tsk_rt(task)->completed = 0; 1024 tsk_rt(task)->completed = 0;
591 } 1025 }
592 } 1026 }
1027#else
1028 /* don't force job to end. rely on user to say when jobs complete */
1029 tsk_rt(task)->completed = 0;
1030#endif
1031
1032#ifdef CONFIG_REALTIME_AUX_TASKS
1033 if (tsk_rt(task)->has_aux_tasks && !tsk_rt(task)->hide_from_aux_tasks) {
1034 TRACE_CUR("%s/%d is ready so aux tasks may not inherit.\n", task->comm, task->pid);
1035 disable_aux_task_owner(task);
1036 }
1037#endif
1038
1039#ifdef CONFIG_LITMUS_NVIDIA
1040 if (tsk_rt(task)->held_gpus && !tsk_rt(task)->hide_from_gpu) {
1041 TRACE_CUR("%s/%d is ready so gpu klmirqd tasks may not inherit.\n", task->comm, task->pid);
1042 disable_gpu_owner(task);
1043 }
1044#endif
1045
593 gsnedf_job_arrival(task); 1046 gsnedf_job_arrival(task);
594 raw_spin_unlock_irqrestore(&gsnedf_lock, flags); 1047 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
595} 1048}
@@ -602,7 +1055,25 @@ static void gsnedf_task_block(struct task_struct *t)
602 1055
603 /* unlink if necessary */ 1056 /* unlink if necessary */
604 raw_spin_lock_irqsave(&gsnedf_lock, flags); 1057 raw_spin_lock_irqsave(&gsnedf_lock, flags);
1058
605 unlink(t); 1059 unlink(t);
1060
1061#ifdef CONFIG_REALTIME_AUX_TASKS
1062 if (tsk_rt(t)->has_aux_tasks && !tsk_rt(t)->hide_from_aux_tasks) {
1063
1064 TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid);
1065 enable_aux_task_owner(t);
1066 }
1067#endif
1068
1069#ifdef CONFIG_LITMUS_NVIDIA
1070 if (tsk_rt(t)->held_gpus && !tsk_rt(t)->hide_from_gpu) {
1071
1072 TRACE_CUR("%s/%d is blocked so aux tasks may inherit.\n", t->comm, t->pid);
1073 enable_gpu_owner(t);
1074 }
1075#endif
1076
606 raw_spin_unlock_irqrestore(&gsnedf_lock, flags); 1077 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
607 1078
608 BUG_ON(!is_realtime(t)); 1079 BUG_ON(!is_realtime(t));
@@ -613,8 +1084,30 @@ static void gsnedf_task_exit(struct task_struct * t)
613{ 1084{
614 unsigned long flags; 1085 unsigned long flags;
615 1086
1087#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1088 gsnedf_change_prio_pai_tasklet(t, NULL);
1089#endif
1090
616 /* unlink if necessary */ 1091 /* unlink if necessary */
617 raw_spin_lock_irqsave(&gsnedf_lock, flags); 1092 raw_spin_lock_irqsave(&gsnedf_lock, flags);
1093
1094#ifdef CONFIG_REALTIME_AUX_TASKS
1095 /* make sure we clean up on our way out */
1096 if (unlikely(tsk_rt(t)->is_aux_task)) {
1097 exit_aux_task(t);
1098 }
1099 else if(tsk_rt(t)->has_aux_tasks) {
1100 disable_aux_task_owner(t);
1101 }
1102#endif
1103
1104#ifdef CONFIG_LITMUS_NVIDIA
1105 /* make sure we clean up on our way out */
1106 if(tsk_rt(t)->held_gpus) {
1107 disable_gpu_owner(t);
1108 }
1109#endif
1110
618 unlink(t); 1111 unlink(t);
619 if (tsk_rt(t)->scheduled_on != NO_CPU) { 1112 if (tsk_rt(t)->scheduled_on != NO_CPU) {
620 gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL; 1113 gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
@@ -623,106 +1116,413 @@ static void gsnedf_task_exit(struct task_struct * t)
623 raw_spin_unlock_irqrestore(&gsnedf_lock, flags); 1116 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
624 1117
625 BUG_ON(!is_realtime(t)); 1118 BUG_ON(!is_realtime(t));
626 TRACE_TASK(t, "RIP\n"); 1119 TRACE_TASK(t, "RIP\n");
627} 1120}
628 1121
629 1122
630static long gsnedf_admit_task(struct task_struct* tsk) 1123static long gsnedf_admit_task(struct task_struct* tsk)
631{ 1124{
1125#ifdef CONFIG_LITMUS_NESTED_LOCKING
1126 INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks,
1127 edf_max_heap_base_priority_order);
1128#endif
1129
632 return 0; 1130 return 0;
633} 1131}
634 1132
1133
1134
1135
1136
1137
635#ifdef CONFIG_LITMUS_LOCKING 1138#ifdef CONFIG_LITMUS_LOCKING
636 1139
637#include <litmus/fdso.h> 1140#include <litmus/fdso.h>
638 1141
639/* called with IRQs off */ 1142/* called with IRQs off */
640static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) 1143static int __increase_priority_inheritance(struct task_struct* t,
1144 struct task_struct* prio_inh)
641{ 1145{
1146 int success = 1;
642 int linked_on; 1147 int linked_on;
643 int check_preempt = 0; 1148 int check_preempt = 0;
644 1149
645 raw_spin_lock(&gsnedf_lock); 1150 if (prio_inh && prio_inh == effective_priority(t)) {
1151 /* relationship already established. */
1152 TRACE_TASK(t, "already has effective priority of %s/%d\n",
1153 prio_inh->comm, prio_inh->pid);
1154 goto out;
1155 }
646 1156
647 TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid); 1157#ifdef CONFIG_LITMUS_NESTED_LOCKING
648 tsk_rt(t)->inh_task = prio_inh; 1158 /* this sanity check allows for weaker locking in protocols */
649 1159 if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) {
650 linked_on = tsk_rt(t)->linked_on; 1160#endif
651 1161 TRACE_TASK(t, "inherits priority from %s/%d\n",
652 /* If it is scheduled, then we need to reorder the CPU heap. */ 1162 prio_inh->comm, prio_inh->pid);
653 if (linked_on != NO_CPU) { 1163 tsk_rt(t)->inh_task = prio_inh;
654 TRACE_TASK(t, "%s: linked on %d\n", 1164
655 __FUNCTION__, linked_on); 1165 linked_on = tsk_rt(t)->linked_on;
656 /* Holder is scheduled; need to re-order CPUs. 1166
657 * We can't use heap_decrease() here since 1167 /* If it is scheduled, then we need to reorder the CPU heap. */
658 * the cpu_heap is ordered in reverse direction, so 1168 if (linked_on != NO_CPU) {
659 * it is actually an increase. */ 1169 TRACE_TASK(t, "%s: linked on %d\n",
660 bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, 1170 __FUNCTION__, linked_on);
661 gsnedf_cpus[linked_on]->hn); 1171 /* Holder is scheduled; need to re-order CPUs.
662 bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, 1172 * We can't use heap_decrease() here since
663 gsnedf_cpus[linked_on]->hn); 1173 * the cpu_heap is ordered in reverse direction, so
664 } else { 1174 * it is actually an increase. */
665 /* holder may be queued: first stop queue changes */ 1175 binheap_delete(&gsnedf_cpus[linked_on]->hn, &gsnedf_cpu_heap);
666 raw_spin_lock(&gsnedf.release_lock); 1176 binheap_add(&gsnedf_cpus[linked_on]->hn,
667 if (is_queued(t)) { 1177 &gsnedf_cpu_heap, cpu_entry_t, hn);
668 TRACE_TASK(t, "%s: is queued\n",
669 __FUNCTION__);
670 /* We need to update the position of holder in some
671 * heap. Note that this could be a release heap if we
672 * budget enforcement is used and this job overran. */
673 check_preempt =
674 !bheap_decrease(edf_ready_order,
675 tsk_rt(t)->heap_node);
676 } else { 1178 } else {
677 /* Nothing to do: if it is not queued and not linked 1179 /* holder may be queued: first stop queue changes */
678 * then it is either sleeping or currently being moved 1180 raw_spin_lock(&gsnedf.release_lock);
679 * by other code (e.g., a timer interrupt handler) that 1181 if (is_queued(t)) {
680 * will use the correct priority when enqueuing the 1182 TRACE_TASK(t, "%s: is queued\n",
681 * task. */ 1183 __FUNCTION__);
682 TRACE_TASK(t, "%s: is NOT queued => Done.\n", 1184 /* We need to update the position of holder in some
683 __FUNCTION__); 1185 * heap. Note that this could be a release heap if we
684 } 1186 * budget enforcement is used and this job overran. */
685 raw_spin_unlock(&gsnedf.release_lock); 1187 check_preempt =
686 1188 !bheap_decrease(edf_ready_order,
687 /* If holder was enqueued in a release heap, then the following 1189 tsk_rt(t)->heap_node);
688 * preemption check is pointless, but we can't easily detect 1190 } else {
689 * that case. If you want to fix this, then consider that 1191 /* Nothing to do: if it is not queued and not linked
690 * simply adding a state flag requires O(n) time to update when 1192 * then it is either sleeping or currently being moved
691 * releasing n tasks, which conflicts with the goal to have 1193 * by other code (e.g., a timer interrupt handler) that
692 * O(log n) merges. */ 1194 * will use the correct priority when enqueuing the
693 if (check_preempt) { 1195 * task. */
694 /* heap_decrease() hit the top level of the heap: make 1196 TRACE_TASK(t, "%s: is NOT queued => Done.\n",
695 * sure preemption checks get the right task, not the 1197 __FUNCTION__);
696 * potentially stale cache. */ 1198 }
697 bheap_uncache_min(edf_ready_order, 1199 raw_spin_unlock(&gsnedf.release_lock);
698 &gsnedf.ready_queue); 1200
699 check_for_preemptions(); 1201 /* If holder was enqueued in a release heap, then the following
1202 * preemption check is pointless, but we can't easily detect
1203 * that case. If you want to fix this, then consider that
1204 * simply adding a state flag requires O(n) time to update when
1205 * releasing n tasks, which conflicts with the goal to have
1206 * O(log n) merges. */
1207 if (check_preempt) {
1208 /* heap_decrease() hit the top level of the heap: make
1209 * sure preemption checks get the right task, not the
1210 * potentially stale cache. */
1211 bheap_uncache_min(edf_ready_order,
1212 &gsnedf.ready_queue);
1213 check_for_preemptions();
1214 }
1215
1216#ifdef CONFIG_REALTIME_AUX_TASKS
1217 /* propagate to aux tasks */
1218 if (tsk_rt(t)->has_aux_tasks) {
1219 aux_task_owner_increase_priority(t);
1220 }
1221#endif
1222
1223#ifdef CONFIG_LITMUS_NVIDIA
1224 /* propagate to gpu klmirqd */
1225 if (tsk_rt(t)->held_gpus) {
1226 gpu_owner_increase_priority(t);
1227 }
1228#endif
1229
700 } 1230 }
1231#ifdef CONFIG_LITMUS_NESTED_LOCKING
1232 }
1233 else {
1234 TRACE_TASK(t, "Spurious invalid priority increase. "
1235 "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
1236 "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
1237 t->comm, t->pid,
1238 effective_priority(t)->comm, effective_priority(t)->pid,
1239 (prio_inh) ? prio_inh->comm : "nil",
1240 (prio_inh) ? prio_inh->pid : -1);
1241 WARN_ON(!prio_inh);
1242 success = 0;
701 } 1243 }
1244#endif
702 1245
703 raw_spin_unlock(&gsnedf_lock); 1246out:
1247 return success;
704} 1248}
705 1249
706/* called with IRQs off */ 1250/* called with IRQs off */
707static void clear_priority_inheritance(struct task_struct* t) 1251static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
708{ 1252{
1253 int success;
1254
709 raw_spin_lock(&gsnedf_lock); 1255 raw_spin_lock(&gsnedf_lock);
710 1256
711 /* A job only stops inheriting a priority when it releases a 1257 success = __increase_priority_inheritance(t, prio_inh);
712 * resource. Thus we can make the following assumption.*/
713 BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU);
714 1258
715 TRACE_TASK(t, "priority restored\n"); 1259 raw_spin_unlock(&gsnedf_lock);
716 tsk_rt(t)->inh_task = NULL;
717 1260
718 /* Check if rescheduling is necessary. We can't use heap_decrease() 1261#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
719 * since the priority was effectively lowered. */ 1262 if(tsk_rt(t)->held_gpus) {
720 unlink(t); 1263 int i;
721 gsnedf_job_arrival(t); 1264 for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
1265 i < NV_DEVICE_NUM;
1266 i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
1267 pai_check_priority_increase(t, i);
1268 }
1269 }
1270#endif
1271}
1272
1273
1274/* called with IRQs off */
1275static int __decrease_priority_inheritance(struct task_struct* t,
1276 struct task_struct* prio_inh)
1277{
1278 int success = 1;
1279
1280 if (prio_inh == tsk_rt(t)->inh_task) {
1281 /* relationship already established. */
1282 TRACE_TASK(t, "already inherits priority from %s/%d\n",
1283 (prio_inh) ? prio_inh->comm : "(nil)",
1284 (prio_inh) ? prio_inh->pid : 0);
1285 goto out;
1286 }
1287
1288#ifdef CONFIG_LITMUS_NESTED_LOCKING
1289 if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) {
1290#endif
1291 /* A job only stops inheriting a priority when it releases a
1292 * resource. Thus we can make the following assumption.*/
1293 if(prio_inh)
1294 TRACE_TASK(t, "EFFECTIVE priority decreased to %s/%d\n",
1295 prio_inh->comm, prio_inh->pid);
1296 else
1297 TRACE_TASK(t, "base priority restored.\n");
1298
1299 tsk_rt(t)->inh_task = prio_inh;
1300
1301 if(tsk_rt(t)->scheduled_on != NO_CPU) {
1302 TRACE_TASK(t, "is scheduled.\n");
1303
1304 /* Check if rescheduling is necessary. We can't use heap_decrease()
1305 * since the priority was effectively lowered. */
1306 unlink(t);
1307 gsnedf_job_arrival(t);
1308 }
1309 else {
1310 /* task is queued */
1311 raw_spin_lock(&gsnedf.release_lock);
1312 if (is_queued(t)) {
1313 TRACE_TASK(t, "is queued.\n");
1314
1315 /* decrease in priority, so we have to re-add to binomial heap */
1316 unlink(t);
1317 gsnedf_job_arrival(t);
1318 }
1319 else {
1320 TRACE_TASK(t, "is not in scheduler. Probably on wait queue somewhere.\n");
1321 }
1322 raw_spin_unlock(&gsnedf.release_lock);
1323 }
1324
1325#ifdef CONFIG_REALTIME_AUX_TASKS
1326 /* propagate to aux tasks */
1327 if (tsk_rt(t)->has_aux_tasks) {
1328 aux_task_owner_decrease_priority(t);
1329 }
1330#endif
1331
1332#ifdef CONFIG_LITMUS_NVIDIA
1333 /* propagate to gpu */
1334 if (tsk_rt(t)->held_gpus) {
1335 gpu_owner_decrease_priority(t);
1336 }
1337#endif
1338
1339
1340#ifdef CONFIG_LITMUS_NESTED_LOCKING
1341 }
1342 else {
1343 TRACE_TASK(t, "Spurious invalid priority decrease. "
1344 "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
1345 "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
1346 t->comm, t->pid,
1347 effective_priority(t)->comm, effective_priority(t)->pid,
1348 (prio_inh) ? prio_inh->comm : "nil",
1349 (prio_inh) ? prio_inh->pid : -1);
1350 success = 0;
1351 }
1352#endif
1353
1354out:
1355 return success;
1356}
1357
1358static void decrease_priority_inheritance(struct task_struct* t,
1359 struct task_struct* prio_inh)
1360{
1361 int success;
1362
1363 raw_spin_lock(&gsnedf_lock);
1364
1365 success = __decrease_priority_inheritance(t, prio_inh);
722 1366
723 raw_spin_unlock(&gsnedf_lock); 1367 raw_spin_unlock(&gsnedf_lock);
1368
1369#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
1370 if(tsk_rt(t)->held_gpus) {
1371 int i;
1372 for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
1373 i < NV_DEVICE_NUM;
1374 i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
1375 pai_check_priority_decrease(t, i);
1376 }
1377 }
1378#endif
1379}
1380
1381
1382
1383#ifdef CONFIG_LITMUS_NESTED_LOCKING
1384
1385/* called with IRQs off */
1386/* preconditions:
1387 (1) The 'hp_blocked_tasks_lock' of task 't' is held.
1388 (2) The lock 'to_unlock' is held.
1389 */
1390static void nested_increase_priority_inheritance(struct task_struct* t,
1391 struct task_struct* prio_inh,
1392 raw_spinlock_t *to_unlock,
1393 unsigned long irqflags)
1394{
1395 struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
1396
1397 if(tsk_rt(t)->inh_task != prio_inh) { // shield redundent calls.
1398 increase_priority_inheritance(t, prio_inh); // increase our prio.
1399 }
1400
1401 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
1402
1403
1404 if(blocked_lock) {
1405 if(blocked_lock->ops->propagate_increase_inheritance) {
1406 TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
1407 blocked_lock->ident);
1408
1409 // beware: recursion
1410 blocked_lock->ops->propagate_increase_inheritance(blocked_lock,
1411 t, to_unlock,
1412 irqflags);
1413 }
1414 else {
1415 TRACE_TASK(t, "Inheritor is blocked on lock (%d) that does not support nesting!\n",
1416 blocked_lock->ident);
1417 unlock_fine_irqrestore(to_unlock, irqflags);
1418 }
1419 }
1420 else {
1421 TRACE_TASK(t, "is not blocked. No propagation.\n");
1422 unlock_fine_irqrestore(to_unlock, irqflags);
1423 }
1424}
1425
1426/* called with IRQs off */
1427/* preconditions:
1428 (1) The 'hp_blocked_tasks_lock' of task 't' is held.
1429 (2) The lock 'to_unlock' is held.
1430 */
1431static void nested_decrease_priority_inheritance(struct task_struct* t,
1432 struct task_struct* prio_inh,
1433 raw_spinlock_t *to_unlock,
1434 unsigned long irqflags)
1435{
1436 struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
1437 decrease_priority_inheritance(t, prio_inh);
1438
1439 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
1440
1441 if(blocked_lock) {
1442 if(blocked_lock->ops->propagate_decrease_inheritance) {
1443 TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
1444 blocked_lock->ident);
1445
1446 // beware: recursion
1447 blocked_lock->ops->propagate_decrease_inheritance(blocked_lock, t,
1448 to_unlock,
1449 irqflags);
1450 }
1451 else {
1452 TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
1453 blocked_lock);
1454 unlock_fine_irqrestore(to_unlock, irqflags);
1455 }
1456 }
1457 else {
1458 TRACE_TASK(t, "is not blocked. No propagation.\n");
1459 unlock_fine_irqrestore(to_unlock, irqflags);
1460 }
1461}
1462
1463
1464/* ******************** RSM MUTEX ********************** */
1465
1466static struct litmus_lock_ops gsnedf_rsm_mutex_lock_ops = {
1467 .lock = rsm_mutex_lock,
1468 .unlock = rsm_mutex_unlock,
1469 .close = rsm_mutex_close,
1470 .deallocate = rsm_mutex_free,
1471
1472 .propagate_increase_inheritance = rsm_mutex_propagate_increase_inheritance,
1473 .propagate_decrease_inheritance = rsm_mutex_propagate_decrease_inheritance,
1474
1475#ifdef CONFIG_LITMUS_DGL_SUPPORT
1476 .dgl_lock = rsm_mutex_dgl_lock,
1477 .is_owner = rsm_mutex_is_owner,
1478 .enable_priority = rsm_mutex_enable_priority,
1479#endif
1480};
1481
1482static struct litmus_lock* gsnedf_new_rsm_mutex(void)
1483{
1484 return rsm_mutex_new(&gsnedf_rsm_mutex_lock_ops);
724} 1485}
725 1486
1487/* ******************** IKGLP ********************** */
1488
1489static struct litmus_lock_ops gsnedf_ikglp_lock_ops = {
1490 .lock = ikglp_lock,
1491 .unlock = ikglp_unlock,
1492 .close = ikglp_close,
1493 .deallocate = ikglp_free,
1494
1495 // ikglp can only be an outer-most lock.
1496 .propagate_increase_inheritance = NULL,
1497 .propagate_decrease_inheritance = NULL,
1498};
1499
1500static struct litmus_lock* gsnedf_new_ikglp(void* __user arg)
1501{
1502 return ikglp_new(num_online_cpus(), &gsnedf_ikglp_lock_ops, arg);
1503}
1504
1505#endif /* CONFIG_LITMUS_NESTED_LOCKING */
1506
1507
1508/* ******************** KFMLP support ********************** */
1509
1510static struct litmus_lock_ops gsnedf_kfmlp_lock_ops = {
1511 .lock = kfmlp_lock,
1512 .unlock = kfmlp_unlock,
1513 .close = kfmlp_close,
1514 .deallocate = kfmlp_free,
1515
1516 // kfmlp can only be an outer-most lock.
1517 .propagate_increase_inheritance = NULL,
1518 .propagate_decrease_inheritance = NULL,
1519};
1520
1521
1522static struct litmus_lock* gsnedf_new_kfmlp(void* __user arg)
1523{
1524 return kfmlp_new(&gsnedf_kfmlp_lock_ops, arg);
1525}
726 1526
727/* ******************** FMLP support ********************** */ 1527/* ******************** FMLP support ********************** */
728 1528
@@ -789,7 +1589,7 @@ int gsnedf_fmlp_lock(struct litmus_lock* l)
789 if (edf_higher_prio(t, sem->hp_waiter)) { 1589 if (edf_higher_prio(t, sem->hp_waiter)) {
790 sem->hp_waiter = t; 1590 sem->hp_waiter = t;
791 if (edf_higher_prio(t, sem->owner)) 1591 if (edf_higher_prio(t, sem->owner))
792 set_priority_inheritance(sem->owner, sem->hp_waiter); 1592 increase_priority_inheritance(sem->owner, sem->hp_waiter);
793 } 1593 }
794 1594
795 TS_LOCK_SUSPEND; 1595 TS_LOCK_SUSPEND;
@@ -802,7 +1602,7 @@ int gsnedf_fmlp_lock(struct litmus_lock* l)
802 * there is only one wake up per release. 1602 * there is only one wake up per release.
803 */ 1603 */
804 1604
805 schedule(); 1605 suspend_for_lock();
806 1606
807 TS_LOCK_RESUME; 1607 TS_LOCK_RESUME;
808 1608
@@ -857,7 +1657,7 @@ int gsnedf_fmlp_unlock(struct litmus_lock* l)
857 /* Well, if next is not the highest-priority waiter, 1657 /* Well, if next is not the highest-priority waiter,
858 * then it ought to inherit the highest-priority 1658 * then it ought to inherit the highest-priority
859 * waiter's priority. */ 1659 * waiter's priority. */
860 set_priority_inheritance(next, sem->hp_waiter); 1660 increase_priority_inheritance(next, sem->hp_waiter);
861 } 1661 }
862 1662
863 /* wake up next */ 1663 /* wake up next */
@@ -868,7 +1668,7 @@ int gsnedf_fmlp_unlock(struct litmus_lock* l)
868 1668
869 /* we lose the benefit of priority inheritance (if any) */ 1669 /* we lose the benefit of priority inheritance (if any) */
870 if (tsk_rt(t)->inh_task) 1670 if (tsk_rt(t)->inh_task)
871 clear_priority_inheritance(t); 1671 decrease_priority_inheritance(t, NULL);
872 1672
873out: 1673out:
874 spin_unlock_irqrestore(&sem->wait.lock, flags); 1674 spin_unlock_irqrestore(&sem->wait.lock, flags);
@@ -906,6 +1706,11 @@ static struct litmus_lock_ops gsnedf_fmlp_lock_ops = {
906 .lock = gsnedf_fmlp_lock, 1706 .lock = gsnedf_fmlp_lock,
907 .unlock = gsnedf_fmlp_unlock, 1707 .unlock = gsnedf_fmlp_unlock,
908 .deallocate = gsnedf_fmlp_free, 1708 .deallocate = gsnedf_fmlp_free,
1709
1710#ifdef CONFIG_LITMUS_NESTED_LOCKING
1711 .propagate_increase_inheritance = NULL,
1712 .propagate_decrease_inheritance = NULL
1713#endif
909}; 1714};
910 1715
911static struct litmus_lock* gsnedf_new_fmlp(void) 1716static struct litmus_lock* gsnedf_new_fmlp(void)
@@ -924,31 +1729,110 @@ static struct litmus_lock* gsnedf_new_fmlp(void)
924 return &sem->litmus_lock; 1729 return &sem->litmus_lock;
925} 1730}
926 1731
927/* **** lock constructor **** */
928
929 1732
930static long gsnedf_allocate_lock(struct litmus_lock **lock, int type, 1733static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
931 void* __user unused) 1734 void* __user args)
932{ 1735{
933 int err = -ENXIO; 1736 int err;
934 1737
935 /* GSN-EDF currently only supports the FMLP for global resources. */
936 switch (type) { 1738 switch (type) {
937 1739
938 case FMLP_SEM: 1740 case FMLP_SEM:
939 /* Flexible Multiprocessor Locking Protocol */ 1741 /* Flexible Multiprocessor Locking Protocol */
940 *lock = gsnedf_new_fmlp(); 1742 *lock = gsnedf_new_fmlp();
941 if (*lock) 1743 break;
942 err = 0; 1744#ifdef CONFIG_LITMUS_NESTED_LOCKING
943 else 1745 case RSM_MUTEX:
944 err = -ENOMEM; 1746 *lock = gsnedf_new_rsm_mutex();
945 break; 1747 break;
946 1748
1749 case IKGLP_SEM:
1750 *lock = gsnedf_new_ikglp(args);
1751 break;
1752#endif
1753 case KFMLP_SEM:
1754 *lock = gsnedf_new_kfmlp(args);
1755 break;
1756 default:
1757 err = -ENXIO;
1758 goto UNSUPPORTED_LOCK;
1759 };
1760
1761 if (*lock)
1762 err = 0;
1763 else
1764 err = -ENOMEM;
1765
1766UNSUPPORTED_LOCK:
1767 return err;
1768}
1769
1770#endif // CONFIG_LITMUS_LOCKING
1771
1772
1773
1774
1775
1776#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1777static struct affinity_observer_ops gsnedf_kfmlp_affinity_ops = {
1778 .close = kfmlp_aff_obs_close,
1779 .deallocate = kfmlp_aff_obs_free,
1780};
1781
1782#ifdef CONFIG_LITMUS_NESTED_LOCKING
1783static struct affinity_observer_ops gsnedf_ikglp_affinity_ops = {
1784 .close = ikglp_aff_obs_close,
1785 .deallocate = ikglp_aff_obs_free,
1786};
1787#endif
1788
1789static long gsnedf_allocate_affinity_observer(
1790 struct affinity_observer **aff_obs,
1791 int type,
1792 void* __user args)
1793{
1794 int err;
1795
1796 switch (type) {
1797
1798 case KFMLP_SIMPLE_GPU_AFF_OBS:
1799 *aff_obs = kfmlp_simple_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args);
1800 break;
1801
1802 case KFMLP_GPU_AFF_OBS:
1803 *aff_obs = kfmlp_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args);
1804 break;
1805
1806#ifdef CONFIG_LITMUS_NESTED_LOCKING
1807 case IKGLP_SIMPLE_GPU_AFF_OBS:
1808 *aff_obs = ikglp_simple_gpu_aff_obs_new(&gsnedf_ikglp_affinity_ops, args);
1809 break;
1810
1811 case IKGLP_GPU_AFF_OBS:
1812 *aff_obs = ikglp_gpu_aff_obs_new(&gsnedf_ikglp_affinity_ops, args);
1813 break;
1814#endif
1815 default:
1816 err = -ENXIO;
1817 goto UNSUPPORTED_AFF_OBS;
947 }; 1818 };
948 1819
1820 if (*aff_obs)
1821 err = 0;
1822 else
1823 err = -ENOMEM;
1824
1825UNSUPPORTED_AFF_OBS:
949 return err; 1826 return err;
950} 1827}
1828#endif
1829
951 1830
1831#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
1832static int gsnedf_map_gpu_to_cpu(int gpu)
1833{
1834 return -1; // No CPU affinity needed.
1835}
952#endif 1836#endif
953 1837
954 1838
@@ -957,14 +1841,14 @@ static long gsnedf_activate_plugin(void)
957 int cpu; 1841 int cpu;
958 cpu_entry_t *entry; 1842 cpu_entry_t *entry;
959 1843
960 bheap_init(&gsnedf_cpu_heap); 1844 INIT_BINHEAP_HANDLE(&gsnedf_cpu_heap, cpu_lower_prio);
961#ifdef CONFIG_RELEASE_MASTER 1845#ifdef CONFIG_RELEASE_MASTER
962 gsnedf.release_master = atomic_read(&release_master_cpu); 1846 gsnedf.release_master = atomic_read(&release_master_cpu);
963#endif 1847#endif
964 1848
965 for_each_online_cpu(cpu) { 1849 for_each_online_cpu(cpu) {
966 entry = &per_cpu(gsnedf_cpu_entries, cpu); 1850 entry = &per_cpu(gsnedf_cpu_entries, cpu);
967 bheap_node_init(&entry->hn, entry); 1851 INIT_BINHEAP_NODE(&entry->hn);
968 entry->linked = NULL; 1852 entry->linked = NULL;
969 entry->scheduled = NULL; 1853 entry->scheduled = NULL;
970#ifdef CONFIG_RELEASE_MASTER 1854#ifdef CONFIG_RELEASE_MASTER
@@ -978,6 +1862,20 @@ static long gsnedf_activate_plugin(void)
978 } 1862 }
979#endif 1863#endif
980 } 1864 }
1865
1866#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1867 gsnedf_pending_tasklets.head = NULL;
1868 gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
1869#endif
1870
1871#ifdef CONFIG_LITMUS_SOFTIRQD
1872 init_klmirqd();
1873#endif
1874
1875#ifdef CONFIG_LITMUS_NVIDIA
1876 init_nvidia_info();
1877#endif
1878
981 return 0; 1879 return 0;
982} 1880}
983 1881
@@ -994,8 +1892,32 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
994 .task_block = gsnedf_task_block, 1892 .task_block = gsnedf_task_block,
995 .admit_task = gsnedf_admit_task, 1893 .admit_task = gsnedf_admit_task,
996 .activate_plugin = gsnedf_activate_plugin, 1894 .activate_plugin = gsnedf_activate_plugin,
1895 .compare = edf_higher_prio,
997#ifdef CONFIG_LITMUS_LOCKING 1896#ifdef CONFIG_LITMUS_LOCKING
998 .allocate_lock = gsnedf_allocate_lock, 1897 .allocate_lock = gsnedf_allocate_lock,
1898 .increase_prio = increase_priority_inheritance,
1899 .decrease_prio = decrease_priority_inheritance,
1900 .__increase_prio = __increase_priority_inheritance,
1901 .__decrease_prio = __decrease_priority_inheritance,
1902#endif
1903#ifdef CONFIG_LITMUS_NESTED_LOCKING
1904 .nested_increase_prio = nested_increase_priority_inheritance,
1905 .nested_decrease_prio = nested_decrease_priority_inheritance,
1906 .__compare = __edf_higher_prio,
1907#endif
1908#ifdef CONFIG_LITMUS_DGL_SUPPORT
1909 .get_dgl_spinlock = gsnedf_get_dgl_spinlock,
1910#endif
1911#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1912 .allocate_aff_obs = gsnedf_allocate_affinity_observer,
1913#endif
1914#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1915 .enqueue_pai_tasklet = gsnedf_enqueue_pai_tasklet,
1916 .change_prio_pai_tasklet = gsnedf_change_prio_pai_tasklet,
1917 .run_tasklets = gsnedf_run_tasklets,
1918#endif
1919#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
1920 .map_gpu_to_cpu = gsnedf_map_gpu_to_cpu,
999#endif 1921#endif
1000}; 1922};
1001 1923
@@ -1005,15 +1927,20 @@ static int __init init_gsn_edf(void)
1005 int cpu; 1927 int cpu;
1006 cpu_entry_t *entry; 1928 cpu_entry_t *entry;
1007 1929
1008 bheap_init(&gsnedf_cpu_heap); 1930 INIT_BINHEAP_HANDLE(&gsnedf_cpu_heap, cpu_lower_prio);
1009 /* initialize CPU state */ 1931 /* initialize CPU state */
1010 for (cpu = 0; cpu < NR_CPUS; cpu++) { 1932 for (cpu = 0; cpu < NR_CPUS; ++cpu) {
1011 entry = &per_cpu(gsnedf_cpu_entries, cpu); 1933 entry = &per_cpu(gsnedf_cpu_entries, cpu);
1012 gsnedf_cpus[cpu] = entry; 1934 gsnedf_cpus[cpu] = entry;
1013 entry->cpu = cpu; 1935 entry->cpu = cpu;
1014 entry->hn = &gsnedf_heap_node[cpu]; 1936
1015 bheap_node_init(&entry->hn, entry); 1937 INIT_BINHEAP_NODE(&entry->hn);
1016 } 1938 }
1939
1940#ifdef CONFIG_LITMUS_DGL_SUPPORT
1941 raw_spin_lock_init(&dgl_lock);
1942#endif
1943
1017 edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs); 1944 edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs);
1018 return register_sched_plugin(&gsn_edf_plugin); 1945 return register_sched_plugin(&gsn_edf_plugin);
1019} 1946}
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
index 6b32cf09abbd..9de03c95b825 100644
--- a/litmus/sched_litmus.c
+++ b/litmus/sched_litmus.c
@@ -175,8 +175,10 @@ static void enqueue_task_litmus(struct rq *rq, struct task_struct *p,
175 litmus->task_wake_up(p); 175 litmus->task_wake_up(p);
176 176
177 rq->litmus.nr_running++; 177 rq->litmus.nr_running++;
178 } else 178 } else {
179 TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n"); 179 TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n");
180 //WARN_ON(1);
181 }
180} 182}
181 183
182static void dequeue_task_litmus(struct rq *rq, struct task_struct *p, 184static void dequeue_task_litmus(struct rq *rq, struct task_struct *p,
diff --git a/litmus/sched_pfp.c b/litmus/sched_pfp.c
index 91e52391a173..a96c2b1aa26f 100644
--- a/litmus/sched_pfp.c
+++ b/litmus/sched_pfp.c
@@ -142,17 +142,25 @@ static void pfp_tick(struct task_struct *t)
142 */ 142 */
143 BUG_ON(is_realtime(t) && t != pfp->scheduled); 143 BUG_ON(is_realtime(t) && t != pfp->scheduled);
144 144
145 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { 145 if (is_realtime(t) && budget_exhausted(t))
146 if (!is_np(t)) { 146 {
147 litmus_reschedule_local(); 147 if (budget_signalled(t) && !sigbudget_sent(t)) {
148 TRACE("pfp_scheduler_tick: " 148 /* signal exhaustion */
149 "%d is preemptable " 149 send_sigbudget(t);
150 " => FORCE_RESCHED\n", t->pid); 150 }
151 } else if (is_user_np(t)) { 151
152 TRACE("pfp_scheduler_tick: " 152 if (budget_enforced(t)) {
153 "%d is non-preemptable, " 153 if (!is_np(t)) {
154 "preemption delayed.\n", t->pid); 154 litmus_reschedule_local();
155 request_exit_np(t); 155 TRACE("pfp_scheduler_tick: "
156 "%d is preemptable "
157 " => FORCE_RESCHED\n", t->pid);
158 } else if (is_user_np(t)) {
159 TRACE("pfp_scheduler_tick: "
160 "%d is non-preemptable, "
161 "preemption delayed.\n", t->pid);
162 request_exit_np(t);
163 }
156 } 164 }
157 } 165 }
158} 166}
@@ -162,7 +170,7 @@ static struct task_struct* pfp_schedule(struct task_struct * prev)
162 pfp_domain_t* pfp = local_pfp; 170 pfp_domain_t* pfp = local_pfp;
163 struct task_struct* next; 171 struct task_struct* next;
164 172
165 int out_of_time, sleep, preempt, np, exists, blocks, resched, migrate; 173 int out_of_time, signal_budget, sleep, preempt, np, exists, blocks, resched, migrate;
166 174
167 raw_spin_lock(&pfp->slock); 175 raw_spin_lock(&pfp->slock);
168 176
@@ -179,6 +187,10 @@ static struct task_struct* pfp_schedule(struct task_struct * prev)
179 out_of_time = exists && 187 out_of_time = exists &&
180 budget_enforced(pfp->scheduled) && 188 budget_enforced(pfp->scheduled) &&
181 budget_exhausted(pfp->scheduled); 189 budget_exhausted(pfp->scheduled);
190 signal_budget = exists &&
191 budget_signalled(pfp->scheduled) &&
192 budget_exhausted(pfp->scheduled) &&
193 !sigbudget_sent(pfp->scheduled);
182 np = exists && is_np(pfp->scheduled); 194 np = exists && is_np(pfp->scheduled);
183 sleep = exists && is_completed(pfp->scheduled); 195 sleep = exists && is_completed(pfp->scheduled);
184 migrate = exists && get_partition(pfp->scheduled) != pfp->cpu; 196 migrate = exists && get_partition(pfp->scheduled) != pfp->cpu;
@@ -190,6 +202,10 @@ static struct task_struct* pfp_schedule(struct task_struct * prev)
190 */ 202 */
191 resched = preempt; 203 resched = preempt;
192 204
205 /* Send the signal that the budget has been exhausted */
206 if (signal_budget)
207 send_sigbudget(pfp->scheduled);
208
193 /* If a task blocks we have no choice but to reschedule. 209 /* If a task blocks we have no choice but to reschedule.
194 */ 210 */
195 if (blocks) 211 if (blocks)
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index 00a1900d6457..76ff892122aa 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -13,6 +13,10 @@
13#include <litmus/preempt.h> 13#include <litmus/preempt.h>
14#include <litmus/jobs.h> 14#include <litmus/jobs.h>
15 15
16#ifdef CONFIG_LITMUS_NVIDIA
17#include <litmus/nvidia_info.h>
18#endif
19
16/* 20/*
17 * Generic function to trigger preemption on either local or remote cpu 21 * Generic function to trigger preemption on either local or remote cpu
18 * from scheduler plugins. The key feature is that this function is 22 * from scheduler plugins. The key feature is that this function is
@@ -27,11 +31,19 @@ void preempt_if_preemptable(struct task_struct* t, int cpu)
27 31
28 int reschedule = 0; 32 int reschedule = 0;
29 33
30 if (!t) 34 TRACE_CUR("preempt_if_preemptable: %s/%d\n",
35 (t) ? t->comm : "(nil)",
36 (t) ? t->pid : 0);
37
38 if (!t) {
39 TRACE_CUR("unconditionally reshcedule\n");
31 /* move non-real-time task out of the way */ 40 /* move non-real-time task out of the way */
32 reschedule = 1; 41 reschedule = 1;
42 }
33 else { 43 else {
34 if (smp_processor_id() == cpu) { 44 if (smp_processor_id() == cpu) {
45 TRACE_CUR("preempt local cpu.\n");
46
35 /* local CPU case */ 47 /* local CPU case */
36 /* check if we need to poke userspace */ 48 /* check if we need to poke userspace */
37 if (is_user_np(t)) 49 if (is_user_np(t))
@@ -43,14 +55,22 @@ void preempt_if_preemptable(struct task_struct* t, int cpu)
43 * currently-executing task */ 55 * currently-executing task */
44 reschedule = 1; 56 reschedule = 1;
45 } else { 57 } else {
58 int is_knp = is_kernel_np(t);
59 int reqexit = request_exit_np_atomic(t);
60 TRACE_CUR("preempt remote cpu: isknp = %d reqexit = %d\n", is_knp, reqexit);
61
46 /* Remote CPU case. Only notify if it's not a kernel 62 /* Remote CPU case. Only notify if it's not a kernel
47 * NP section and if we didn't set the userspace 63 * NP section and if we didn't set the userspace
48 * flag. */ 64 * flag. */
49 reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t)); 65 //reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t));
66 reschedule = !(is_knp || reqexit);
50 } 67 }
51 } 68 }
52 if (likely(reschedule)) 69
70 if (likely(reschedule)) {
71 TRACE_CUR("calling litmus_reschedule()\n");
53 litmus_reschedule(cpu); 72 litmus_reschedule(cpu);
73 }
54} 74}
55 75
56 76
@@ -102,6 +122,9 @@ static long litmus_dummy_complete_job(void)
102 122
103static long litmus_dummy_activate_plugin(void) 123static long litmus_dummy_activate_plugin(void)
104{ 124{
125#ifdef CONFIG_LITMUS_NVIDIA
126 shutdown_nvidia_info();
127#endif
105 return 0; 128 return 0;
106} 129}
107 130
@@ -110,14 +133,100 @@ static long litmus_dummy_deactivate_plugin(void)
110 return 0; 133 return 0;
111} 134}
112 135
113#ifdef CONFIG_LITMUS_LOCKING 136static int litmus_dummy_compare(struct task_struct* a, struct task_struct* b)
137{
138 TRACE_CUR("WARNING: Dummy compare function called!\n");
139 return 0;
140}
114 141
142#ifdef CONFIG_LITMUS_LOCKING
115static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type, 143static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
116 void* __user config) 144 void* __user config)
117{ 145{
118 return -ENXIO; 146 return -ENXIO;
119} 147}
120 148
149static void litmus_dummy_increase_prio(struct task_struct* t, struct task_struct* prio_inh)
150{
151}
152
153static void litmus_dummy_decrease_prio(struct task_struct* t, struct task_struct* prio_inh)
154{
155}
156
157static int litmus_dummy___increase_prio(struct task_struct* t, struct task_struct* prio_inh)
158{
159 TRACE_CUR("WARNING: Dummy litmus_dummy___increase_prio called!\n");
160 return 0;
161}
162
163static int litmus_dummy___decrease_prio(struct task_struct* t, struct task_struct* prio_inh)
164{
165 TRACE_CUR("WARNING: Dummy litmus_dummy___decrease_prio called!\n");
166 return 0;
167}
168#endif
169
170
171#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
172static int litmus_dummy_enqueue_pai_tasklet(struct tasklet_struct* t)
173{
174 TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
175 return(0); // failure.
176}
177
178static void litmus_dummy_change_prio_pai_tasklet(struct task_struct *old_prio,
179 struct task_struct *new_prio)
180{
181 TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
182}
183
184static void litmus_dummy_run_tasklets(struct task_struct* t)
185{
186 //TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
187}
188#endif
189
190#ifdef CONFIG_LITMUS_NESTED_LOCKING
191static void litmus_dummy_nested_increase_prio(struct task_struct* t, struct task_struct* prio_inh,
192 raw_spinlock_t *to_unlock, unsigned long irqflags)
193{
194}
195
196static void litmus_dummy_nested_decrease_prio(struct task_struct* t, struct task_struct* prio_inh,
197 raw_spinlock_t *to_unlock, unsigned long irqflags)
198{
199}
200
201static int litmus_dummy___compare(struct task_struct* a, comparison_mode_t a_mod,
202 struct task_struct* b, comparison_mode_t b_mode)
203{
204 TRACE_CUR("WARNING: Dummy compare function called!\n");
205 return 0;
206}
207#endif
208
209#ifdef CONFIG_LITMUS_DGL_SUPPORT
210static raw_spinlock_t* litmus_dummy_get_dgl_spinlock(struct task_struct *t)
211{
212 return NULL;
213}
214#endif
215
216#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
217static long litmus_dummy_allocate_aff_obs(struct affinity_observer **aff_obs,
218 int type,
219 void* __user config)
220{
221 return -ENXIO;
222}
223#endif
224
225#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
226static int litmus_dummy_map_gpu_to_cpu(int gpu)
227{
228 return 0;
229}
121#endif 230#endif
122 231
123 232
@@ -136,9 +245,34 @@ struct sched_plugin linux_sched_plugin = {
136 .finish_switch = litmus_dummy_finish_switch, 245 .finish_switch = litmus_dummy_finish_switch,
137 .activate_plugin = litmus_dummy_activate_plugin, 246 .activate_plugin = litmus_dummy_activate_plugin,
138 .deactivate_plugin = litmus_dummy_deactivate_plugin, 247 .deactivate_plugin = litmus_dummy_deactivate_plugin,
248 .compare = litmus_dummy_compare,
139#ifdef CONFIG_LITMUS_LOCKING 249#ifdef CONFIG_LITMUS_LOCKING
140 .allocate_lock = litmus_dummy_allocate_lock, 250 .allocate_lock = litmus_dummy_allocate_lock,
251 .increase_prio = litmus_dummy_increase_prio,
252 .decrease_prio = litmus_dummy_decrease_prio,
253 .__increase_prio = litmus_dummy___increase_prio,
254 .__decrease_prio = litmus_dummy___decrease_prio,
255#endif
256#ifdef CONFIG_LITMUS_NESTED_LOCKING
257 .nested_increase_prio = litmus_dummy_nested_increase_prio,
258 .nested_decrease_prio = litmus_dummy_nested_decrease_prio,
259 .__compare = litmus_dummy___compare,
260#endif
261#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
262 .enqueue_pai_tasklet = litmus_dummy_enqueue_pai_tasklet,
263 .change_prio_pai_tasklet = litmus_dummy_change_prio_pai_tasklet,
264 .run_tasklets = litmus_dummy_run_tasklets,
265#endif
266#ifdef CONFIG_LITMUS_DGL_SUPPORT
267 .get_dgl_spinlock = litmus_dummy_get_dgl_spinlock,
141#endif 268#endif
269#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
270 .allocate_aff_obs = litmus_dummy_allocate_aff_obs,
271#endif
272#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
273 .map_gpu_to_cpu = litmus_dummy_map_gpu_to_cpu,
274#endif
275
142 .admit_task = litmus_dummy_admit_task 276 .admit_task = litmus_dummy_admit_task
143}; 277};
144 278
@@ -174,9 +308,34 @@ int register_sched_plugin(struct sched_plugin* plugin)
174 CHECK(complete_job); 308 CHECK(complete_job);
175 CHECK(activate_plugin); 309 CHECK(activate_plugin);
176 CHECK(deactivate_plugin); 310 CHECK(deactivate_plugin);
311 CHECK(compare);
177#ifdef CONFIG_LITMUS_LOCKING 312#ifdef CONFIG_LITMUS_LOCKING
178 CHECK(allocate_lock); 313 CHECK(allocate_lock);
314 CHECK(increase_prio);
315 CHECK(decrease_prio);
316 CHECK(__increase_prio);
317 CHECK(__decrease_prio);
318#endif
319#ifdef CONFIG_LITMUS_NESTED_LOCKING
320 CHECK(nested_increase_prio);
321 CHECK(nested_decrease_prio);
322 CHECK(__compare);
323#endif
324#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
325 CHECK(enqueue_pai_tasklet);
326 CHECK(change_prio_pai_tasklet);
327 CHECK(run_tasklets);
179#endif 328#endif
329#ifdef CONFIG_LITMUS_DGL_SUPPORT
330 CHECK(get_dgl_spinlock);
331#endif
332#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
333 CHECK(allocate_aff_obs);
334#endif
335#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
336 CHECK(map_gpu_to_cpu);
337#endif
338
180 CHECK(admit_task); 339 CHECK(admit_task);
181 340
182 if (!plugin->release_at) 341 if (!plugin->release_at)
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
index 0e1675d2e572..63fa6103882a 100644
--- a/litmus/sched_psn_edf.c
+++ b/litmus/sched_psn_edf.c
@@ -174,17 +174,25 @@ static void psnedf_tick(struct task_struct *t)
174 */ 174 */
175 BUG_ON(is_realtime(t) && t != pedf->scheduled); 175 BUG_ON(is_realtime(t) && t != pedf->scheduled);
176 176
177 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { 177 if (is_realtime(t) && budget_exhausted(t))
178 if (!is_np(t)) { 178 {
179 litmus_reschedule_local(); 179 if (budget_signalled(t) && !sigbudget_sent(t)) {
180 TRACE("psnedf_scheduler_tick: " 180 /* signal exhaustion */
181 "%d is preemptable " 181 send_sigbudget(t);
182 " => FORCE_RESCHED\n", t->pid); 182 }
183 } else if (is_user_np(t)) { 183
184 TRACE("psnedf_scheduler_tick: " 184 if (budget_enforced(t)) {
185 "%d is non-preemptable, " 185 if (!is_np(t)) {
186 "preemption delayed.\n", t->pid); 186 litmus_reschedule_local();
187 request_exit_np(t); 187 TRACE("psnedf_scheduler_tick: "
188 "%d is preemptable "
189 " => FORCE_RESCHED\n", t->pid);
190 } else if (is_user_np(t)) {
191 TRACE("psnedf_scheduler_tick: "
192 "%d is non-preemptable, "
193 "preemption delayed.\n", t->pid);
194 request_exit_np(t);
195 }
188 } 196 }
189 } 197 }
190} 198}
@@ -195,8 +203,7 @@ static struct task_struct* psnedf_schedule(struct task_struct * prev)
195 rt_domain_t* edf = &pedf->domain; 203 rt_domain_t* edf = &pedf->domain;
196 struct task_struct* next; 204 struct task_struct* next;
197 205
198 int out_of_time, sleep, preempt, 206 int out_of_time, signal_budget, sleep, preempt, np, exists, blocks, resched;
199 np, exists, blocks, resched;
200 207
201 raw_spin_lock(&pedf->slock); 208 raw_spin_lock(&pedf->slock);
202 209
@@ -213,6 +220,10 @@ static struct task_struct* psnedf_schedule(struct task_struct * prev)
213 out_of_time = exists && 220 out_of_time = exists &&
214 budget_enforced(pedf->scheduled) && 221 budget_enforced(pedf->scheduled) &&
215 budget_exhausted(pedf->scheduled); 222 budget_exhausted(pedf->scheduled);
223 signal_budget = exists &&
224 budget_signalled(pedf->scheduled) &&
225 budget_exhausted(pedf->scheduled) &&
226 !sigbudget_sent(pedf->scheduled);
216 np = exists && is_np(pedf->scheduled); 227 np = exists && is_np(pedf->scheduled);
217 sleep = exists && is_completed(pedf->scheduled); 228 sleep = exists && is_completed(pedf->scheduled);
218 preempt = edf_preemption_needed(edf, prev); 229 preempt = edf_preemption_needed(edf, prev);
@@ -223,6 +234,10 @@ static struct task_struct* psnedf_schedule(struct task_struct * prev)
223 */ 234 */
224 resched = preempt; 235 resched = preempt;
225 236
237 /* Send the signal that the budget has been exhausted */
238 if (signal_budget)
239 send_sigbudget(pedf->scheduled);
240
226 /* If a task blocks we have no choice but to reschedule. 241 /* If a task blocks we have no choice but to reschedule.
227 */ 242 */
228 if (blocks) 243 if (blocks)
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
index 5ef8d09ab41f..f7f575346b54 100644
--- a/litmus/sched_task_trace.c
+++ b/litmus/sched_task_trace.c
@@ -7,6 +7,7 @@
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/percpu.h> 9#include <linux/percpu.h>
10#include <linux/hardirq.h>
10 11
11#include <litmus/ftdev.h> 12#include <litmus/ftdev.h>
12#include <litmus/litmus.h> 13#include <litmus/litmus.h>
@@ -16,13 +17,13 @@
16#include <litmus/ftdev.h> 17#include <litmus/ftdev.h>
17 18
18 19
19#define NO_EVENTS (1 << CONFIG_SCHED_TASK_TRACE_SHIFT) 20#define NUM_EVENTS (1 << (CONFIG_SCHED_TASK_TRACE_SHIFT+11))
20 21
21#define now() litmus_clock() 22#define now() litmus_clock()
22 23
23struct local_buffer { 24struct local_buffer {
24 struct st_event_record record[NO_EVENTS]; 25 struct st_event_record record[NUM_EVENTS];
25 char flag[NO_EVENTS]; 26 char flag[NUM_EVENTS];
26 struct ft_buffer ftbuf; 27 struct ft_buffer ftbuf;
27}; 28};
28 29
@@ -41,7 +42,7 @@ static int __init init_sched_task_trace(void)
41 int i, ok = 0, err; 42 int i, ok = 0, err;
42 printk("Allocated %u sched_trace_xxx() events per CPU " 43 printk("Allocated %u sched_trace_xxx() events per CPU "
43 "(buffer size: %d bytes)\n", 44 "(buffer size: %d bytes)\n",
44 NO_EVENTS, (int) sizeof(struct local_buffer)); 45 NUM_EVENTS, (int) sizeof(struct local_buffer));
45 46
46 err = ftdev_init(&st_dev, THIS_MODULE, 47 err = ftdev_init(&st_dev, THIS_MODULE,
47 num_online_cpus(), "sched_trace"); 48 num_online_cpus(), "sched_trace");
@@ -50,7 +51,7 @@ static int __init init_sched_task_trace(void)
50 51
51 for (i = 0; i < st_dev.minor_cnt; i++) { 52 for (i = 0; i < st_dev.minor_cnt; i++) {
52 buf = &per_cpu(st_event_buffer, i); 53 buf = &per_cpu(st_event_buffer, i);
53 ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS, 54 ok += init_ft_buffer(&buf->ftbuf, NUM_EVENTS,
54 sizeof(struct st_event_record), 55 sizeof(struct st_event_record),
55 buf->flag, 56 buf->flag,
56 buf->record); 57 buf->record);
@@ -154,7 +155,8 @@ feather_callback void do_sched_trace_task_switch_to(unsigned long id,
154{ 155{
155 struct task_struct *t = (struct task_struct*) _task; 156 struct task_struct *t = (struct task_struct*) _task;
156 struct st_event_record* rec; 157 struct st_event_record* rec;
157 if (is_realtime(t)) { 158 //if (is_realtime(t)) /* comment out to trace EVERYTHING */
159 {
158 rec = get_record(ST_SWITCH_TO, t); 160 rec = get_record(ST_SWITCH_TO, t);
159 if (rec) { 161 if (rec) {
160 rec->data.switch_to.when = now(); 162 rec->data.switch_to.when = now();
@@ -169,7 +171,8 @@ feather_callback void do_sched_trace_task_switch_away(unsigned long id,
169{ 171{
170 struct task_struct *t = (struct task_struct*) _task; 172 struct task_struct *t = (struct task_struct*) _task;
171 struct st_event_record* rec; 173 struct st_event_record* rec;
172 if (is_realtime(t)) { 174 //if (is_realtime(t)) /* comment out to trace EVERYTHING */
175 {
173 rec = get_record(ST_SWITCH_AWAY, t); 176 rec = get_record(ST_SWITCH_AWAY, t);
174 if (rec) { 177 if (rec) {
175 rec->data.switch_away.when = now(); 178 rec->data.switch_away.when = now();
@@ -188,6 +191,9 @@ feather_callback void do_sched_trace_task_completion(unsigned long id,
188 if (rec) { 191 if (rec) {
189 rec->data.completion.when = now(); 192 rec->data.completion.when = now();
190 rec->data.completion.forced = forced; 193 rec->data.completion.forced = forced;
194#ifdef LITMUS_NVIDIA
195 rec->data.completion.nv_int_count = (u16)atomic_read(&tsk_rt(t)->nv_int_count);
196#endif
191 put_record(rec); 197 put_record(rec);
192 } 198 }
193} 199}
@@ -239,3 +245,265 @@ feather_callback void do_sched_trace_action(unsigned long id,
239 put_record(rec); 245 put_record(rec);
240 } 246 }
241} 247}
248
249
250
251
252feather_callback void do_sched_trace_prediction_err(unsigned long id,
253 unsigned long _task,
254 unsigned long _distance,
255 unsigned long _rel_err)
256{
257 struct task_struct *t = (struct task_struct*) _task;
258 struct st_event_record *rec = get_record(ST_PREDICTION_ERR, t);
259
260 if (rec) {
261 gpu_migration_dist_t* distance = (gpu_migration_dist_t*) _distance;
262 fp_t* rel_err = (fp_t*) _rel_err;
263
264 rec->data.prediction_err.distance = *distance;
265 rec->data.prediction_err.rel_err = rel_err->val;
266 put_record(rec);
267 }
268}
269
270
271feather_callback void do_sched_trace_migration(unsigned long id,
272 unsigned long _task,
273 unsigned long _mig_info)
274{
275 struct task_struct *t = (struct task_struct*) _task;
276 struct st_event_record *rec = get_record(ST_MIGRATION, t);
277
278 if (rec) {
279 struct migration_info* mig_info = (struct migration_info*) _mig_info;
280
281 rec->hdr.extra = mig_info->distance;
282 rec->data.migration.observed = mig_info->observed;
283 rec->data.migration.estimated = mig_info->estimated;
284
285 put_record(rec);
286 }
287}
288
289
290
291
292
293
294
295
296
297feather_callback void do_sched_trace_tasklet_release(unsigned long id,
298 unsigned long _owner)
299{
300 struct task_struct *t = (struct task_struct*) _owner;
301 struct st_event_record *rec = get_record(ST_TASKLET_RELEASE, t);
302
303 if (rec) {
304 rec->data.tasklet_release.when = now();
305 put_record(rec);
306 }
307}
308
309
310feather_callback void do_sched_trace_tasklet_begin(unsigned long id,
311 unsigned long _owner)
312{
313 struct task_struct *t = (struct task_struct*) _owner;
314 struct st_event_record *rec = get_record(ST_TASKLET_BEGIN, t);
315
316 if (rec) {
317 rec->data.tasklet_begin.when = now();
318
319 if(!in_interrupt())
320 rec->data.tasklet_begin.exe_pid = current->pid;
321 else
322 rec->data.tasklet_begin.exe_pid = 0;
323
324 put_record(rec);
325 }
326}
327EXPORT_SYMBOL(do_sched_trace_tasklet_begin);
328
329
330feather_callback void do_sched_trace_tasklet_end(unsigned long id,
331 unsigned long _owner,
332 unsigned long _flushed)
333{
334 struct task_struct *t = (struct task_struct*) _owner;
335 struct st_event_record *rec = get_record(ST_TASKLET_END, t);
336
337 if (rec) {
338 rec->data.tasklet_end.when = now();
339 rec->data.tasklet_end.flushed = _flushed;
340
341 if(!in_interrupt())
342 rec->data.tasklet_end.exe_pid = current->pid;
343 else
344 rec->data.tasklet_end.exe_pid = 0;
345
346 put_record(rec);
347 }
348}
349EXPORT_SYMBOL(do_sched_trace_tasklet_end);
350
351
352feather_callback void do_sched_trace_work_release(unsigned long id,
353 unsigned long _owner)
354{
355 struct task_struct *t = (struct task_struct*) _owner;
356 struct st_event_record *rec = get_record(ST_WORK_RELEASE, t);
357
358 if (rec) {
359 rec->data.work_release.when = now();
360 put_record(rec);
361 }
362}
363
364
365feather_callback void do_sched_trace_work_begin(unsigned long id,
366 unsigned long _owner,
367 unsigned long _exe)
368{
369 struct task_struct *t = (struct task_struct*) _owner;
370 struct st_event_record *rec = get_record(ST_WORK_BEGIN, t);
371
372 if (rec) {
373 struct task_struct *exe = (struct task_struct*) _exe;
374 rec->data.work_begin.exe_pid = exe->pid;
375 rec->data.work_begin.when = now();
376 put_record(rec);
377 }
378}
379EXPORT_SYMBOL(do_sched_trace_work_begin);
380
381
382feather_callback void do_sched_trace_work_end(unsigned long id,
383 unsigned long _owner,
384 unsigned long _exe,
385 unsigned long _flushed)
386{
387 struct task_struct *t = (struct task_struct*) _owner;
388 struct st_event_record *rec = get_record(ST_WORK_END, t);
389
390 if (rec) {
391 struct task_struct *exe = (struct task_struct*) _exe;
392 rec->data.work_end.exe_pid = exe->pid;
393 rec->data.work_end.flushed = _flushed;
394 rec->data.work_end.when = now();
395 put_record(rec);
396 }
397}
398EXPORT_SYMBOL(do_sched_trace_work_end);
399
400
401feather_callback void do_sched_trace_eff_prio_change(unsigned long id,
402 unsigned long _task,
403 unsigned long _inh)
404{
405 struct task_struct *t = (struct task_struct*) _task;
406 struct st_event_record *rec = get_record(ST_EFF_PRIO_CHANGE, t);
407
408 if (rec) {
409 struct task_struct *inh = (struct task_struct*) _inh;
410 rec->data.effective_priority_change.when = now();
411 rec->data.effective_priority_change.inh_pid = (inh != NULL) ?
412 inh->pid :
413 0xffff;
414
415 put_record(rec);
416 }
417}
418
419/* pray for no nesting of nv interrupts on same CPU... */
420struct tracing_interrupt_map
421{
422 int active;
423 int count;
424 unsigned long data[128]; // assume nesting less than 128...
425 unsigned long serial[128];
426};
427DEFINE_PER_CPU(struct tracing_interrupt_map, active_interrupt_tracing);
428
429
430DEFINE_PER_CPU(u32, intCounter);
431
432feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
433 unsigned long _device)
434{
435 struct st_event_record *rec;
436 u32 serialNum;
437
438 {
439 u32* serial;
440 struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
441 if(!int_map->active == 0xcafebabe)
442 {
443 int_map->count++;
444 }
445 else
446 {
447 int_map->active = 0xcafebabe;
448 int_map->count = 1;
449 }
450 //int_map->data[int_map->count-1] = _device;
451
452 serial = &per_cpu(intCounter, smp_processor_id());
453 *serial += num_online_cpus();
454 serialNum = *serial;
455 int_map->serial[int_map->count-1] = serialNum;
456 }
457
458 rec = get_record(ST_NV_INTERRUPT_BEGIN, NULL);
459 if(rec) {
460 u32 device = _device;
461 rec->data.nv_interrupt_begin.when = now();
462 rec->data.nv_interrupt_begin.device = device;
463 rec->data.nv_interrupt_begin.serialNumber = serialNum;
464 put_record(rec);
465 }
466}
467EXPORT_SYMBOL(do_sched_trace_nv_interrupt_begin);
468
469/*
470int is_interrupt_tracing_active(void)
471{
472 struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
473 if(int_map->active == 0xcafebabe)
474 return 1;
475 return 0;
476}
477*/
478
479feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned long _device)
480{
481 struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
482 if(int_map->active == 0xcafebabe)
483 {
484 struct st_event_record *rec = get_record(ST_NV_INTERRUPT_END, NULL);
485
486 int_map->count--;
487 if(int_map->count == 0)
488 int_map->active = 0;
489
490 if(rec) {
491 u32 device = _device;
492 rec->data.nv_interrupt_end.when = now();
493 //rec->data.nv_interrupt_end.device = int_map->data[int_map->count];
494 rec->data.nv_interrupt_end.device = device;
495 rec->data.nv_interrupt_end.serialNumber = int_map->serial[int_map->count];
496 put_record(rec);
497 }
498 }
499}
500EXPORT_SYMBOL(do_sched_trace_nv_interrupt_end);
501
502
503
504
505
506
507
508
509
diff --git a/litmus/sched_trace_external.c b/litmus/sched_trace_external.c
new file mode 100644
index 000000000000..cf8e1d78aa77
--- /dev/null
+++ b/litmus/sched_trace_external.c
@@ -0,0 +1,64 @@
1#include <linux/module.h>
2
3#include <litmus/trace.h>
4#include <litmus/sched_trace.h>
5#include <litmus/litmus.h>
6
7void __sched_trace_tasklet_begin_external(struct task_struct* t)
8{
9 sched_trace_tasklet_begin(t);
10}
11EXPORT_SYMBOL(__sched_trace_tasklet_begin_external);
12
13void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed)
14{
15 sched_trace_tasklet_end(t, flushed);
16}
17EXPORT_SYMBOL(__sched_trace_tasklet_end_external);
18
19
20
21void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e)
22{
23 sched_trace_work_begin(t, e);
24}
25EXPORT_SYMBOL(__sched_trace_work_begin_external);
26
27void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f)
28{
29 sched_trace_work_end(t, e, f);
30}
31EXPORT_SYMBOL(__sched_trace_work_end_external);
32
33
34
35void __sched_trace_nv_interrupt_begin_external(u32 device)
36{
37 //unsigned long _device = device;
38 sched_trace_nv_interrupt_begin((unsigned long)device);
39}
40EXPORT_SYMBOL(__sched_trace_nv_interrupt_begin_external);
41
42void __sched_trace_nv_interrupt_end_external(u32 device)
43{
44 //unsigned long _device = device;
45 sched_trace_nv_interrupt_end((unsigned long)device);
46}
47EXPORT_SYMBOL(__sched_trace_nv_interrupt_end_external);
48
49
50#ifdef CONFIG_LITMUS_NVIDIA
51
52#define EXX_TS(evt) \
53void __##evt(void) { evt; } \
54EXPORT_SYMBOL(__##evt);
55
56EXX_TS(TS_NV_TOPISR_START)
57EXX_TS(TS_NV_TOPISR_END)
58EXX_TS(TS_NV_BOTISR_START)
59EXX_TS(TS_NV_BOTISR_END)
60EXX_TS(TS_NV_RELEASE_BOTISR_START)
61EXX_TS(TS_NV_RELEASE_BOTISR_END)
62
63#endif
64