aboutsummaryrefslogtreecommitdiffstats
path: root/litmus
diff options
context:
space:
mode:
Diffstat (limited to 'litmus')
-rw-r--r--litmus/Kconfig148
-rw-r--r--litmus/Makefile11
-rw-r--r--litmus/affinity.c2
-rw-r--r--litmus/binheap.c443
-rw-r--r--litmus/edf_common.c147
-rw-r--r--litmus/fdso.c13
-rw-r--r--litmus/gpu_affinity.c113
-rw-r--r--litmus/ikglp_lock.c2838
-rw-r--r--litmus/jobs.c17
-rw-r--r--litmus/kexclu_affinity.c92
-rw-r--r--litmus/kfmlp_lock.c1002
-rw-r--r--litmus/litmus.c126
-rw-r--r--litmus/litmus_pai_softirq.c64
-rw-r--r--litmus/litmus_proc.c17
-rw-r--r--litmus/litmus_softirq.c1582
-rw-r--r--litmus/locking.c393
-rw-r--r--litmus/nvidia_info.c597
-rw-r--r--litmus/preempt.c5
-rw-r--r--litmus/rsm_lock.c796
-rw-r--r--litmus/sched_cedf.c1062
-rw-r--r--litmus/sched_gsn_edf.c1032
-rw-r--r--litmus/sched_litmus.c2
-rw-r--r--litmus/sched_plugin.c135
-rw-r--r--litmus/sched_task_trace.c282
-rw-r--r--litmus/sched_trace_external.c64
25 files changed, 10798 insertions, 185 deletions
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 94b48e199577..8c156e4da528 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -60,6 +60,42 @@ config LITMUS_LOCKING
60 Say Yes if you want to include locking protocols such as the FMLP and 60 Say Yes if you want to include locking protocols such as the FMLP and
61 Baker's SRP. 61 Baker's SRP.
62 62
63config LITMUS_AFFINITY_LOCKING
64 bool "Enable affinity infrastructure in k-exclusion locking protocols."
65 depends on LITMUS_LOCKING
66 default n
67 help
68 Enable affinity tracking infrastructure in k-exclusion locking protocols.
69 This only enabled the *infrastructure* not actual affinity algorithms.
70
71 If unsure, say No.
72
73config LITMUS_NESTED_LOCKING
74 bool "Support for nested inheritance in locking protocols"
75 depends on LITMUS_LOCKING
76 default n
77 help
78 Enable nested priority inheritance.
79
80config LITMUS_DGL_SUPPORT
81 bool "Support for dynamic group locks"
82 depends on LITMUS_NESTED_LOCKING
83 default n
84 help
85 Enable dynamic group lock support.
86
87config LITMUS_MAX_DGL_SIZE
88 int "Maximum size of a dynamic group lock."
89 depends on LITMUS_DGL_SUPPORT
90 range 1 128
91 default "10"
92 help
93 Dynamic group lock data structures are allocated on the process
94 stack when a group is requested. We set a maximum size of
95 locks in a dynamic group lock to avoid dynamic allocation.
96
97 TODO: Batch DGL requests exceeding LITMUS_MAX_DGL_SIZE.
98
63endmenu 99endmenu
64 100
65menu "Performance Enhancements" 101menu "Performance Enhancements"
@@ -121,7 +157,7 @@ config SCHED_TASK_TRACE
121config SCHED_TASK_TRACE_SHIFT 157config SCHED_TASK_TRACE_SHIFT
122 int "Buffer size for sched_trace_xxx() events" 158 int "Buffer size for sched_trace_xxx() events"
123 depends on SCHED_TASK_TRACE 159 depends on SCHED_TASK_TRACE
124 range 8 13 160 range 8 15
125 default 9 161 default 9
126 help 162 help
127 163
@@ -215,4 +251,114 @@ config PREEMPT_STATE_TRACE
215 251
216endmenu 252endmenu
217 253
254menu "Interrupt Handling"
255
256choice
257 prompt "Scheduling of interrupt bottom-halves in Litmus."
258 default LITMUS_SOFTIRQD_NONE
259 depends on LITMUS_LOCKING && !LITMUS_THREAD_ALL_SOFTIRQ
260 help
261 Schedule tasklets with known priorities in Litmus.
262
263config LITMUS_SOFTIRQD_NONE
264 bool "No tasklet scheduling in Litmus."
265 help
266 Don't schedule tasklets in Litmus. Default.
267
268config LITMUS_SOFTIRQD
269 bool "Spawn klitirqd interrupt handling threads."
270 help
271 Create klitirqd interrupt handling threads. Work must be
272 specifically dispatched to these workers. (Softirqs for
273 Litmus tasks are not magically redirected to klitirqd.)
274
275 G-EDF/RM, C-EDF/RM ONLY for now!
276
277
278config LITMUS_PAI_SOFTIRQD
279 bool "Defer tasklets to context switch points."
280 help
281 Only execute scheduled tasklet bottom halves at
282 scheduling points. Trades context switch overhead
283 at the cost of non-preemptive durations of bottom half
284 processing.
285
286 G-EDF/RM, C-EDF/RM ONLY for now!
287
288endchoice
289
290
291config NR_LITMUS_SOFTIRQD
292 int "Number of klitirqd."
293 depends on LITMUS_SOFTIRQD
294 range 1 4096
295 default "1"
296 help
297 Should be <= to the number of CPUs in your system.
298
299config LITMUS_NVIDIA
300 bool "Litmus handling of NVIDIA interrupts."
301 default n
302 help
303 Direct tasklets from NVIDIA devices to Litmus's klitirqd
304 or PAI interrupt handling routines.
305
306 If unsure, say No.
307
308config LITMUS_AFFINITY_AWARE_GPU_ASSINGMENT
309 bool "Enable affinity-aware heuristics to improve GPU assignment."
310 depends on LITMUS_NVIDIA && LITMUS_AFFINITY_LOCKING
311 default n
312 help
313 Enable several heuristics to improve the assignment
314 of GPUs to real-time tasks to reduce the overheads
315 of memory migrations.
316
317 If unsure, say No.
318
319config NV_DEVICE_NUM
320 int "Number of NVIDIA GPUs."
321 depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
322 range 1 4096
323 default "1"
324 help
325 Should be (<= to the number of CPUs) and
326 (<= to the number of GPUs) in your system.
327
328config NV_MAX_SIMULT_USERS
329 int "Maximum number of threads sharing a GPU simultanously"
330 depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
331 range 1 3
332 default "2"
333 help
334 Should be equal to the #copy_engines + #execution_engines
335 of the GPUs in your system.
336
337 Scientific/Professional GPUs = 3 (ex. M2070, Quadro 6000?)
338 Consumer Fermi/Kepler GPUs = 2 (GTX-4xx thru -6xx)
339 Older = 1 (ex. GTX-2xx)
340
341choice
342 prompt "CUDA/Driver Version Support"
343 default CUDA_4_0
344 depends on LITMUS_NVIDIA
345 help
346 Select the version of CUDA/driver to support.
347
348config CUDA_4_0
349 bool "CUDA 4.0"
350 depends on LITMUS_NVIDIA
351 help
352 Support CUDA 4.0 RC2 (dev. driver version: x86_64-270.40)
353
354config CUDA_3_2
355 bool "CUDA 3.2"
356 depends on LITMUS_NVIDIA
357 help
358 Support CUDA 3.2 (dev. driver version: x86_64-260.24)
359
360endchoice
361
362endmenu
363
218endmenu 364endmenu
diff --git a/litmus/Makefile b/litmus/Makefile
index 7338180f196f..080cbf694a41 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -15,9 +15,11 @@ obj-y = sched_plugin.o litmus.o \
15 locking.o \ 15 locking.o \
16 srp.o \ 16 srp.o \
17 bheap.o \ 17 bheap.o \
18 binheap.o \
18 ctrldev.o \ 19 ctrldev.o \
19 sched_gsn_edf.o \ 20 sched_gsn_edf.o \
20 sched_psn_edf.o 21 sched_psn_edf.o \
22 kfmlp_lock.o
21 23
22obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o 24obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
23obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o 25obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
@@ -27,3 +29,10 @@ obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
27obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o 29obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
28obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o 30obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
29obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o 31obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
32
33obj-$(CONFIG_LITMUS_NESTED_LOCKING) += rsm_lock.o ikglp_lock.o
34obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o
35obj-$(CONFIG_LITMUS_PAI_SOFTIRQD) += litmus_pai_softirq.o
36obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o sched_trace_external.o
37
38obj-$(CONFIG_LITMUS_AFFINITY_LOCKING) += kexclu_affinity.o gpu_affinity.o
diff --git a/litmus/affinity.c b/litmus/affinity.c
index 3fa6dd789400..cd93249b5506 100644
--- a/litmus/affinity.c
+++ b/litmus/affinity.c
@@ -26,7 +26,7 @@ void init_topology(void) {
26 cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]); 26 cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]);
27 } 27 }
28 printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n", 28 printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
29 cpu, neigh_info[cpu].size[i], i, 29 cpu, neigh_info[cpu].size[i], i,
30 *cpumask_bits(neigh_info[cpu].neighbors[i])); 30 *cpumask_bits(neigh_info[cpu].neighbors[i]));
31 } 31 }
32 32
diff --git a/litmus/binheap.c b/litmus/binheap.c
new file mode 100644
index 000000000000..8d42403ad52c
--- /dev/null
+++ b/litmus/binheap.c
@@ -0,0 +1,443 @@
1#include <litmus/binheap.h>
2
3//extern void dump_node_data(struct binheap_node* parent, struct binheap_node* child);
4//extern void dump_node_data2(struct binheap_handle *handle, struct binheap_node* bad_node);
5
6int binheap_is_in_this_heap(struct binheap_node *node,
7 struct binheap_handle* heap)
8{
9 if(!binheap_is_in_heap(node)) {
10 return 0;
11 }
12
13 while(node->parent != NULL) {
14 node = node->parent;
15 }
16
17 return (node == heap->root);
18}
19
20/* Update the node reference pointers. Same logic as Litmus binomial heap. */
21static void __update_ref(struct binheap_node *parent,
22 struct binheap_node *child)
23{
24 *(parent->ref_ptr) = child;
25 *(child->ref_ptr) = parent;
26
27 swap(parent->ref_ptr, child->ref_ptr);
28}
29
30/* Swaps data between two nodes. */
31static void __binheap_swap(struct binheap_node *parent,
32 struct binheap_node *child)
33{
34// if(parent == BINHEAP_POISON || child == BINHEAP_POISON) {
35// dump_node_data(parent, child);
36// BUG();
37// }
38
39 swap(parent->data, child->data);
40 __update_ref(parent, child);
41}
42
43
44/* Swaps memory and data between two nodes. Actual nodes swap instead of
45 * just data. Needed when we delete nodes from the heap.
46 */
47static void __binheap_swap_safe(struct binheap_handle *handle,
48 struct binheap_node *a,
49 struct binheap_node *b)
50{
51 swap(a->data, b->data);
52 __update_ref(a, b);
53
54 if((a->parent != NULL) && (a->parent == b->parent)) {
55 /* special case: shared parent */
56 swap(a->parent->left, a->parent->right);
57 }
58 else {
59 /* Update pointers to swap parents. */
60
61 if(a->parent) {
62 if(a == a->parent->left) {
63 a->parent->left = b;
64 }
65 else {
66 a->parent->right = b;
67 }
68 }
69
70 if(b->parent) {
71 if(b == b->parent->left) {
72 b->parent->left = a;
73 }
74 else {
75 b->parent->right = a;
76 }
77 }
78
79 swap(a->parent, b->parent);
80 }
81
82 /* swap children */
83
84 if(a->left) {
85 a->left->parent = b;
86
87 if(a->right) {
88 a->right->parent = b;
89 }
90 }
91
92 if(b->left) {
93 b->left->parent = a;
94
95 if(b->right) {
96 b->right->parent = a;
97 }
98 }
99
100 swap(a->left, b->left);
101 swap(a->right, b->right);
102
103
104 /* update next/last/root pointers */
105
106 if(a == handle->next) {
107 handle->next = b;
108 }
109 else if(b == handle->next) {
110 handle->next = a;
111 }
112
113 if(a == handle->last) {
114 handle->last = b;
115 }
116 else if(b == handle->last) {
117 handle->last = a;
118 }
119
120 if(a == handle->root) {
121 handle->root = b;
122 }
123 else if(b == handle->root) {
124 handle->root = a;
125 }
126}
127
128
129/**
130 * Update the pointer to the last node in the complete binary tree.
131 * Called internally after the root node has been deleted.
132 */
133static void __binheap_update_last(struct binheap_handle *handle)
134{
135 struct binheap_node *temp = handle->last;
136
137 /* find a "bend" in the tree. */
138 while(temp->parent && (temp == temp->parent->left)) {
139 temp = temp->parent;
140 }
141
142 /* step over to sibling if we're not at root */
143 if(temp->parent != NULL) {
144 temp = temp->parent->left;
145 }
146
147 /* now travel right as far as possible. */
148 while(temp->right != NULL) {
149 temp = temp->right;
150 }
151
152 /* take one step to the left if we're not at the bottom-most level. */
153 if(temp->left != NULL) {
154 temp = temp->left;
155 }
156
157 //BUG_ON(!(temp->left == NULL && temp->right == NULL));
158
159 handle->last = temp;
160}
161
162/**
163 * Update the pointer to the node that will take the next inserted node.
164 * Called internally after a node has been inserted.
165 */
166static void __binheap_update_next(struct binheap_handle *handle)
167{
168 struct binheap_node *temp = handle->next;
169
170 /* find a "bend" in the tree. */
171 while(temp->parent && (temp == temp->parent->right)) {
172 temp = temp->parent;
173 }
174
175 /* step over to sibling if we're not at root */
176 if(temp->parent != NULL) {
177 temp = temp->parent->right;
178 }
179
180 /* now travel left as far as possible. */
181 while(temp->left != NULL) {
182 temp = temp->left;
183 }
184
185 handle->next = temp;
186}
187
188
189
190/* bubble node up towards root */
191static void __binheap_bubble_up(
192 struct binheap_handle *handle,
193 struct binheap_node *node)
194{
195 //BUG_ON(!binheap_is_in_heap(node));
196// if(!binheap_is_in_heap(node))
197// {
198// dump_node_data2(handle, node);
199// BUG();
200// }
201
202 while((node->parent != NULL) &&
203 ((node->data == BINHEAP_POISON) /* let BINHEAP_POISON data bubble to the top */ ||
204 handle->compare(node, node->parent))) {
205 __binheap_swap(node->parent, node);
206 node = node->parent;
207
208// if(!binheap_is_in_heap(node))
209// {
210// dump_node_data2(handle, node);
211// BUG();
212// }
213 }
214}
215
216
217/* bubble node down, swapping with min-child */
218static void __binheap_bubble_down(struct binheap_handle *handle)
219{
220 struct binheap_node *node = handle->root;
221
222 while(node->left != NULL) {
223 if(node->right && handle->compare(node->right, node->left)) {
224 if(handle->compare(node->right, node)) {
225 __binheap_swap(node, node->right);
226 node = node->right;
227 }
228 else {
229 break;
230 }
231 }
232 else {
233 if(handle->compare(node->left, node)) {
234 __binheap_swap(node, node->left);
235 node = node->left;
236 }
237 else {
238 break;
239 }
240 }
241 }
242}
243
244
245
246void __binheap_add(struct binheap_node *new_node,
247 struct binheap_handle *handle,
248 void *data)
249{
250// if(binheap_is_in_heap(new_node))
251// {
252// dump_node_data2(handle, new_node);
253// BUG();
254// }
255
256 new_node->data = data;
257 new_node->ref = new_node;
258 new_node->ref_ptr = &(new_node->ref);
259
260 if(!binheap_empty(handle)) {
261 /* insert left side first */
262 if(handle->next->left == NULL) {
263 handle->next->left = new_node;
264 new_node->parent = handle->next;
265 new_node->left = NULL;
266 new_node->right = NULL;
267
268 handle->last = new_node;
269
270 __binheap_bubble_up(handle, new_node);
271 }
272 else {
273 /* left occupied. insert right. */
274 handle->next->right = new_node;
275 new_node->parent = handle->next;
276 new_node->left = NULL;
277 new_node->right = NULL;
278
279 handle->last = new_node;
280
281 __binheap_update_next(handle);
282 __binheap_bubble_up(handle, new_node);
283 }
284 }
285 else {
286 /* first node in heap */
287
288 new_node->parent = NULL;
289 new_node->left = NULL;
290 new_node->right = NULL;
291
292 handle->root = new_node;
293 handle->next = new_node;
294 handle->last = new_node;
295 }
296}
297
298
299
300/**
301 * Removes the root node from the heap. The node is removed after coalescing
302 * the binheap_node with its original data pointer at the root of the tree.
303 *
304 * The 'last' node in the tree is then swapped up to the root and bubbled
305 * down.
306 */
307void __binheap_delete_root(struct binheap_handle *handle,
308 struct binheap_node *container)
309{
310 struct binheap_node *root = handle->root;
311
312// if(!binheap_is_in_heap(container))
313// {
314// dump_node_data2(handle, container);
315// BUG();
316// }
317
318 if(root != container) {
319 /* coalesce */
320 __binheap_swap_safe(handle, root, container);
321 root = container;
322 }
323
324 if(handle->last != root) {
325 /* swap 'last' node up to root and bubble it down. */
326
327 struct binheap_node *to_move = handle->last;
328
329 if(to_move->parent != root) {
330 handle->next = to_move->parent;
331
332 if(handle->next->right == to_move) {
333 /* disconnect from parent */
334 to_move->parent->right = NULL;
335 handle->last = handle->next->left;
336 }
337 else {
338 /* find new 'last' before we disconnect */
339 __binheap_update_last(handle);
340
341 /* disconnect from parent */
342 to_move->parent->left = NULL;
343 }
344 }
345 else {
346 /* 'last' is direct child of root */
347
348 handle->next = to_move;
349
350 if(to_move == to_move->parent->right) {
351 to_move->parent->right = NULL;
352 handle->last = to_move->parent->left;
353 }
354 else {
355 to_move->parent->left = NULL;
356 handle->last = to_move;
357 }
358 }
359 to_move->parent = NULL;
360
361 /* reconnect as root. We can't just swap data ptrs since root node
362 * may be freed after this function returns.
363 */
364 to_move->left = root->left;
365 to_move->right = root->right;
366 if(to_move->left != NULL) {
367 to_move->left->parent = to_move;
368 }
369 if(to_move->right != NULL) {
370 to_move->right->parent = to_move;
371 }
372
373 handle->root = to_move;
374
375 /* bubble down */
376 __binheap_bubble_down(handle);
377 }
378 else {
379 /* removing last node in tree */
380 handle->root = NULL;
381 handle->next = NULL;
382 handle->last = NULL;
383 }
384
385 /* mark as removed */
386 container->parent = BINHEAP_POISON;
387}
388
389
390/**
391 * Delete an arbitrary node. Bubble node to delete up to the root,
392 * and then delete to root.
393 */
394void __binheap_delete(struct binheap_node *node_to_delete,
395 struct binheap_handle *handle)
396{
397 struct binheap_node *target = node_to_delete->ref;
398 void *temp_data = target->data;
399
400// if(!binheap_is_in_heap(node_to_delete))
401// {
402// dump_node_data2(handle, node_to_delete);
403// BUG();
404// }
405//
406// if(!binheap_is_in_heap(target))
407// {
408// dump_node_data2(handle, target);
409// BUG();
410// }
411
412 /* temporarily set data to null to allow node to bubble up to the top. */
413 target->data = BINHEAP_POISON;
414
415 __binheap_bubble_up(handle, target);
416 __binheap_delete_root(handle, node_to_delete);
417
418 node_to_delete->data = temp_data; /* restore node data pointer */
419 //node_to_delete->parent = BINHEAP_POISON; /* poison the node */
420}
421
422/**
423 * Bubble up a node whose pointer has decreased in value.
424 */
425void __binheap_decrease(struct binheap_node *orig_node,
426 struct binheap_handle *handle)
427{
428 struct binheap_node *target = orig_node->ref;
429
430// if(!binheap_is_in_heap(orig_node))
431// {
432// dump_node_data2(handle, orig_node);
433// BUG();
434// }
435//
436// if(!binheap_is_in_heap(target))
437// {
438// dump_node_data2(handle, target);
439// BUG();
440// }
441//
442 __binheap_bubble_up(handle, target);
443}
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index 9b44dc2d8d1e..b346bdd65b3b 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -12,40 +12,61 @@
12#include <litmus/sched_plugin.h> 12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h> 13#include <litmus/sched_trace.h>
14 14
15#ifdef CONFIG_LITMUS_NESTED_LOCKING
16#include <litmus/locking.h>
17#endif
18
15#include <litmus/edf_common.h> 19#include <litmus/edf_common.h>
16 20
21
22
17/* edf_higher_prio - returns true if first has a higher EDF priority 23/* edf_higher_prio - returns true if first has a higher EDF priority
18 * than second. Deadline ties are broken by PID. 24 * than second. Deadline ties are broken by PID.
19 * 25 *
20 * both first and second may be NULL 26 * both first and second may be NULL
21 */ 27 */
22int edf_higher_prio(struct task_struct* first, 28#ifdef CONFIG_LITMUS_NESTED_LOCKING
23 struct task_struct* second) 29int __edf_higher_prio(
30 struct task_struct* first, comparison_mode_t first_mode,
31 struct task_struct* second, comparison_mode_t second_mode)
32#else
33int edf_higher_prio(struct task_struct* first, struct task_struct* second)
34#endif
24{ 35{
25 struct task_struct *first_task = first; 36 struct task_struct *first_task = first;
26 struct task_struct *second_task = second; 37 struct task_struct *second_task = second;
27 38
28 /* There is no point in comparing a task to itself. */ 39 /* There is no point in comparing a task to itself. */
29 if (first && first == second) { 40 if (first && first == second) {
30 TRACE_TASK(first, 41 TRACE_CUR("WARNING: pointless edf priority comparison: %s/%d\n", first->comm, first->pid);
31 "WARNING: pointless edf priority comparison.\n"); 42 WARN_ON(1);
32 return 0; 43 return 0;
33 } 44 }
34 45
35 46
36 /* check for NULL tasks */ 47 /* check for NULL tasks */
37 if (!first || !second) 48 if (!first || !second) {
38 return first && !second; 49 return first && !second;
50 }
39 51
40#ifdef CONFIG_LITMUS_LOCKING 52#ifdef CONFIG_LITMUS_LOCKING
41 53 /* Check for EFFECTIVE priorities. Change task
42 /* Check for inherited priorities. Change task
43 * used for comparison in such a case. 54 * used for comparison in such a case.
44 */ 55 */
45 if (unlikely(first->rt_param.inh_task)) 56 if (unlikely(first->rt_param.inh_task)
57#ifdef CONFIG_LITMUS_NESTED_LOCKING
58 && (first_mode == EFFECTIVE)
59#endif
60 ) {
46 first_task = first->rt_param.inh_task; 61 first_task = first->rt_param.inh_task;
47 if (unlikely(second->rt_param.inh_task)) 62 }
63 if (unlikely(second->rt_param.inh_task)
64#ifdef CONFIG_LITMUS_NESTED_LOCKING
65 && (second_mode == EFFECTIVE)
66#endif
67 ) {
48 second_task = second->rt_param.inh_task; 68 second_task = second->rt_param.inh_task;
69 }
49 70
50 /* Check for priority boosting. Tie-break by start of boosting. 71 /* Check for priority boosting. Tie-break by start of boosting.
51 */ 72 */
@@ -53,37 +74,109 @@ int edf_higher_prio(struct task_struct* first,
53 /* first_task is boosted, how about second_task? */ 74 /* first_task is boosted, how about second_task? */
54 if (!is_priority_boosted(second_task) || 75 if (!is_priority_boosted(second_task) ||
55 lt_before(get_boost_start(first_task), 76 lt_before(get_boost_start(first_task),
56 get_boost_start(second_task))) 77 get_boost_start(second_task))) {
57 return 1; 78 return 1;
58 else 79 }
80 else {
59 return 0; 81 return 0;
60 } else if (unlikely(is_priority_boosted(second_task))) 82 }
83 }
84 else if (unlikely(is_priority_boosted(second_task))) {
61 /* second_task is boosted, first is not*/ 85 /* second_task is boosted, first is not*/
62 return 0; 86 return 0;
87 }
63 88
64#endif 89#endif
65 90
91// // rate-monotonic for testing
92// if (!is_realtime(second_task)) {
93// return true;
94// }
95//
96// if (shorter_period(first_task, second_task)) {
97// return true;
98// }
99//
100// if (get_period(first_task) == get_period(second_task)) {
101// if (first_task->pid < second_task->pid) {
102// return true;
103// }
104// else if (first_task->pid == second_task->pid) {
105// return !second->rt_param.inh_task;
106// }
107// }
108
109 if (!is_realtime(second_task)) {
110 return true;
111 }
112
113 if (earlier_deadline(first_task, second_task)) {
114 return true;
115 }
116 if (get_deadline(first_task) == get_deadline(second_task)) {
117
118 if (shorter_period(first_task, second_task)) {
119 return true;
120 }
121 if (get_rt_period(first_task) == get_rt_period(second_task)) {
122 if (first_task->pid < second_task->pid) {
123 return true;
124 }
125 if (first_task->pid == second_task->pid) {
126#ifdef CONFIG_LITMUS_SOFTIRQD
127 if (first_task->rt_param.is_proxy_thread <
128 second_task->rt_param.is_proxy_thread) {
129 return true;
130 }
131 if(first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread) {
132 return !second->rt_param.inh_task;
133 }
134#else
135 return !second->rt_param.inh_task;
136#endif
137 }
138
139 }
140 }
141
142 return false;
143}
144
145
146#ifdef CONFIG_LITMUS_NESTED_LOCKING
147int edf_higher_prio(struct task_struct* first, struct task_struct* second)
148{
149 return __edf_higher_prio(first, EFFECTIVE, second, EFFECTIVE);
150}
151
152int edf_max_heap_order(struct binheap_node *a, struct binheap_node *b)
153{
154 struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node);
155 struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node);
66 156
67 return !is_realtime(second_task) || 157 return __edf_higher_prio(l_a->hp_waiter_eff_prio, EFFECTIVE, l_b->hp_waiter_eff_prio, EFFECTIVE);
158}
68 159
69 /* is the deadline of the first task earlier? 160int edf_min_heap_order(struct binheap_node *a, struct binheap_node *b)
70 * Then it has higher priority. 161{
71 */ 162 return edf_max_heap_order(b, a); // swap comparison
72 earlier_deadline(first_task, second_task) || 163}
73 164
74 /* Do we have a deadline tie? 165int edf_max_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b)
75 * Then break by PID. 166{
76 */ 167 struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node);
77 (get_deadline(first_task) == get_deadline(second_task) && 168 struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node);
78 (first_task->pid < second_task->pid ||
79 169
80 /* If the PIDs are the same then the task with the inherited 170 return __edf_higher_prio(l_a->hp_waiter_eff_prio, BASE, l_b->hp_waiter_eff_prio, BASE);
81 * priority wins.
82 */
83 (first_task->pid == second_task->pid &&
84 !second->rt_param.inh_task)));
85} 171}
86 172
173int edf_min_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b)
174{
175 return edf_max_heap_base_priority_order(b, a); // swap comparison
176}
177#endif
178
179
87int edf_ready_order(struct bheap_node* a, struct bheap_node* b) 180int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
88{ 181{
89 return edf_higher_prio(bheap2task(a), bheap2task(b)); 182 return edf_higher_prio(bheap2task(a), bheap2task(b));
diff --git a/litmus/fdso.c b/litmus/fdso.c
index aa7b384264e3..18fc61b6414a 100644
--- a/litmus/fdso.c
+++ b/litmus/fdso.c
@@ -20,9 +20,22 @@
20 20
21extern struct fdso_ops generic_lock_ops; 21extern struct fdso_ops generic_lock_ops;
22 22
23#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
24extern struct fdso_ops generic_affinity_ops;
25#endif
26
23static const struct fdso_ops* fdso_ops[] = { 27static const struct fdso_ops* fdso_ops[] = {
24 &generic_lock_ops, /* FMLP_SEM */ 28 &generic_lock_ops, /* FMLP_SEM */
25 &generic_lock_ops, /* SRP_SEM */ 29 &generic_lock_ops, /* SRP_SEM */
30 &generic_lock_ops, /* RSM_MUTEX */
31 &generic_lock_ops, /* IKGLP_SEM */
32 &generic_lock_ops, /* KFMLP_SEM */
33#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
34 &generic_affinity_ops, /* IKGLP_SIMPLE_GPU_AFF_OBS */
35 &generic_affinity_ops, /* IKGLP_GPU_AFF_OBS */
36 &generic_affinity_ops, /* KFMLP_SIMPLE_GPU_AFF_OBS */
37 &generic_affinity_ops, /* KFMLP_GPU_AFF_OBS */
38#endif
26}; 39};
27 40
28static int fdso_create(void** obj_ref, obj_type_t type, void* __user config) 41static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c
new file mode 100644
index 000000000000..9762be1a085e
--- /dev/null
+++ b/litmus/gpu_affinity.c
@@ -0,0 +1,113 @@
1
2#ifdef CONFIG_LITMUS_NVIDIA
3
4#include <linux/sched.h>
5#include <litmus/litmus.h>
6#include <litmus/gpu_affinity.h>
7
8#include <litmus/sched_trace.h>
9
10#define OBSERVATION_CAP 2*1e9
11
12static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
13{
14 fp_t relative_err;
15 fp_t err, new;
16 fp_t actual = _integer_to_fp(observed);
17
18 err = _sub(actual, fb->est);
19 new = _add(_mul(a, err), _mul(b, fb->accum_err));
20
21 relative_err = _div(err, actual);
22
23 fb->est = new;
24 fb->accum_err = _add(fb->accum_err, err);
25
26 return relative_err;
27}
28
29void update_gpu_estimate(struct task_struct *t, lt_t observed)
30{
31 feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
32
33 BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST);
34
35 if(unlikely(fb->est.val == 0)) {
36 // kludge-- cap observed values to prevent whacky estimations.
37 // whacky stuff happens during the first few jobs.
38 if(unlikely(observed > OBSERVATION_CAP)) {
39 TRACE_TASK(t, "Crazy observation was capped: %llu -> %llu\n",
40 observed, OBSERVATION_CAP);
41 observed = OBSERVATION_CAP;
42 }
43
44 // take the first observation as our estimate
45 // (initial value of 0 was bogus anyhow)
46 fb->est = _integer_to_fp(observed);
47 fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work.
48 }
49 else {
50 fp_t rel_err = update_estimate(fb,
51 tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration],
52 tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration],
53 observed);
54
55 if(unlikely(_fp_to_integer(fb->est) <= 0)) {
56 TRACE_TASK(t, "Invalid estimate. Patching.\n");
57 fb->est = _integer_to_fp(observed);
58 fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work.
59 }
60 else {
61// struct migration_info mig_info;
62
63 sched_trace_prediction_err(t,
64 &(tsk_rt(t)->gpu_migration),
65 &rel_err);
66
67// mig_info.observed = observed;
68// mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration);
69// mig_info.distance = tsk_rt(t)->gpu_migration;
70//
71// sched_trace_migration(t, &mig_info);
72 }
73 }
74
75 TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n",
76 tsk_rt(t)->gpu_migration,
77 observed,
78 _fp_to_integer(fb->est),
79 _point(fb->est));
80}
81
82gpu_migration_dist_t gpu_migration_distance(int a, int b)
83{
84 // GPUs organized in a binary hierarchy, no more than 2^MIG_FAR GPUs
85 int i;
86 int dist;
87
88 if(likely(a >= 0 && b >= 0)) {
89 for(i = 0; i <= MIG_FAR; ++i) {
90 if(a>>i == b>>i) {
91 dist = i;
92 goto out;
93 }
94 }
95 dist = MIG_NONE; // hopefully never reached.
96 TRACE_CUR("WARNING: GPU distance too far! %d -> %d\n", a, b);
97 }
98 else {
99 dist = MIG_NONE;
100 }
101
102out:
103 TRACE_CUR("Distance %d -> %d is %d\n",
104 a, b, dist);
105
106 return dist;
107}
108
109
110
111
112#endif
113
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
new file mode 100644
index 000000000000..83b708ab85cb
--- /dev/null
+++ b/litmus/ikglp_lock.c
@@ -0,0 +1,2838 @@
1#include <linux/slab.h>
2#include <linux/uaccess.h>
3
4#include <litmus/trace.h>
5#include <litmus/sched_plugin.h>
6#include <litmus/fdso.h>
7
8#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
9#include <litmus/gpu_affinity.h>
10#include <litmus/nvidia_info.h>
11#endif
12
13#include <litmus/ikglp_lock.h>
14
15// big signed value.
16#define IKGLP_INVAL_DISTANCE 0x7FFFFFFF
17
18int ikglp_max_heap_base_priority_order(struct binheap_node *a,
19 struct binheap_node *b)
20{
21 ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node);
22 ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node);
23
24 BUG_ON(!d_a);
25 BUG_ON(!d_b);
26
27 return litmus->__compare(d_a->task, BASE, d_b->task, BASE);
28}
29
30int ikglp_min_heap_base_priority_order(struct binheap_node *a,
31 struct binheap_node *b)
32{
33 ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node);
34 ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node);
35
36 return litmus->__compare(d_b->task, BASE, d_a->task, BASE);
37}
38
39int ikglp_donor_max_heap_base_priority_order(struct binheap_node *a,
40 struct binheap_node *b)
41{
42 ikglp_wait_state_t *d_a = binheap_entry(a, ikglp_wait_state_t, node);
43 ikglp_wait_state_t *d_b = binheap_entry(b, ikglp_wait_state_t, node);
44
45 return litmus->__compare(d_a->task, BASE, d_b->task, BASE);
46}
47
48
49int ikglp_min_heap_donee_order(struct binheap_node *a,
50 struct binheap_node *b)
51{
52 struct task_struct *prio_a, *prio_b;
53
54 ikglp_donee_heap_node_t *d_a =
55 binheap_entry(a, ikglp_donee_heap_node_t, node);
56 ikglp_donee_heap_node_t *d_b =
57 binheap_entry(b, ikglp_donee_heap_node_t, node);
58
59 if(!d_a->donor_info) {
60 prio_a = d_a->task;
61 }
62 else {
63 prio_a = d_a->donor_info->task;
64 BUG_ON(d_a->task != d_a->donor_info->donee_info->task);
65 }
66
67 if(!d_b->donor_info) {
68 prio_b = d_b->task;
69 }
70 else {
71 prio_b = d_b->donor_info->task;
72 BUG_ON(d_b->task != d_b->donor_info->donee_info->task);
73 }
74
75 // note reversed order
76 return litmus->__compare(prio_b, BASE, prio_a, BASE);
77}
78
79
80
81static inline int ikglp_get_idx(struct ikglp_semaphore *sem,
82 struct fifo_queue *queue)
83{
84 return (queue - &sem->fifo_queues[0]);
85}
86
87static inline struct fifo_queue* ikglp_get_queue(struct ikglp_semaphore *sem,
88 struct task_struct *holder)
89{
90 int i;
91 for(i = 0; i < sem->nr_replicas; ++i)
92 if(sem->fifo_queues[i].owner == holder)
93 return(&sem->fifo_queues[i]);
94 return(NULL);
95}
96
97
98
99static struct task_struct* ikglp_find_hp_waiter(struct fifo_queue *kqueue,
100 struct task_struct *skip)
101{
102 struct list_head *pos;
103 struct task_struct *queued, *found = NULL;
104
105 list_for_each(pos, &kqueue->wait.task_list) {
106 queued = (struct task_struct*) list_entry(pos,
107 wait_queue_t, task_list)->private;
108
109 /* Compare task prios, find high prio task. */
110 if(queued != skip && litmus->compare(queued, found))
111 found = queued;
112 }
113 return found;
114}
115
116static struct fifo_queue* ikglp_find_shortest(struct ikglp_semaphore *sem,
117 struct fifo_queue *search_start)
118{
119 // we start our search at search_start instead of at the beginning of the
120 // queue list to load-balance across all resources.
121 struct fifo_queue* step = search_start;
122 struct fifo_queue* shortest = sem->shortest_fifo_queue;
123
124 do {
125 step = (step+1 != &sem->fifo_queues[sem->nr_replicas]) ?
126 step+1 : &sem->fifo_queues[0];
127
128 if(step->count < shortest->count) {
129 shortest = step;
130 if(step->count == 0)
131 break; /* can't get any shorter */
132 }
133
134 }while(step != search_start);
135
136 return(shortest);
137}
138
139static inline struct task_struct* ikglp_mth_highest(struct ikglp_semaphore *sem)
140{
141 return binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node)->task;
142}
143
144
145
146#if 0
147static void print_global_list(struct binheap_node* n, int depth)
148{
149 ikglp_heap_node_t *global_heap_node;
150 char padding[81] = " ";
151
152 if(n == NULL) {
153 TRACE_CUR("+-> %p\n", NULL);
154 return;
155 }
156
157 global_heap_node = binheap_entry(n, ikglp_heap_node_t, node);
158
159 if(depth*2 <= 80)
160 padding[depth*2] = '\0';
161
162 TRACE_CUR("%s+-> %s/%d\n",
163 padding,
164 global_heap_node->task->comm,
165 global_heap_node->task->pid);
166
167 if(n->left) print_global_list(n->left, depth+1);
168 if(n->right) print_global_list(n->right, depth+1);
169}
170
171static void print_donees(struct ikglp_semaphore *sem, struct binheap_node *n, int depth)
172{
173 ikglp_donee_heap_node_t *donee_node;
174 char padding[81] = " ";
175 struct task_struct* donor = NULL;
176
177 if(n == NULL) {
178 TRACE_CUR("+-> %p\n", NULL);
179 return;
180 }
181
182 donee_node = binheap_entry(n, ikglp_donee_heap_node_t, node);
183
184 if(depth*2 <= 80)
185 padding[depth*2] = '\0';
186
187 if(donee_node->donor_info) {
188 donor = donee_node->donor_info->task;
189 }
190
191 TRACE_CUR("%s+-> %s/%d (d: %s/%d) (fq: %d)\n",
192 padding,
193 donee_node->task->comm,
194 donee_node->task->pid,
195 (donor) ? donor->comm : "nil",
196 (donor) ? donor->pid : -1,
197 ikglp_get_idx(sem, donee_node->fq));
198
199 if(n->left) print_donees(sem, n->left, depth+1);
200 if(n->right) print_donees(sem, n->right, depth+1);
201}
202
203static void print_donors(struct binheap_node *n, int depth)
204{
205 ikglp_wait_state_t *donor_node;
206 char padding[81] = " ";
207
208 if(n == NULL) {
209 TRACE_CUR("+-> %p\n", NULL);
210 return;
211 }
212
213 donor_node = binheap_entry(n, ikglp_wait_state_t, node);
214
215 if(depth*2 <= 80)
216 padding[depth*2] = '\0';
217
218
219 TRACE_CUR("%s+-> %s/%d (donee: %s/%d)\n",
220 padding,
221 donor_node->task->comm,
222 donor_node->task->pid,
223 donor_node->donee_info->task->comm,
224 donor_node->donee_info->task->pid);
225
226 if(n->left) print_donors(n->left, depth+1);
227 if(n->right) print_donors(n->right, depth+1);
228}
229#endif
230
231static void ikglp_add_global_list(struct ikglp_semaphore *sem,
232 struct task_struct *t,
233 ikglp_heap_node_t *node)
234{
235
236
237 node->task = t;
238 INIT_BINHEAP_NODE(&node->node);
239
240 if(sem->top_m_size < sem->m) {
241 TRACE_CUR("Trivially adding %s/%d to top-m global list.\n",
242 t->comm, t->pid);
243// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
244// print_global_list(sem->top_m.root, 1);
245
246 binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node);
247 ++(sem->top_m_size);
248
249// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
250// print_global_list(sem->top_m.root, 1);
251 }
252 else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) {
253 ikglp_heap_node_t *evicted =
254 binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node);
255
256 TRACE_CUR("Adding %s/%d to top-m and evicting %s/%d.\n",
257 t->comm, t->pid,
258 evicted->task->comm, evicted->task->pid);
259
260// TRACE_CUR("Not-Top-M Before:\n");
261// print_global_list(sem->not_top_m.root, 1);
262// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
263// print_global_list(sem->top_m.root, 1);
264
265
266 binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node);
267 INIT_BINHEAP_NODE(&evicted->node);
268 binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node);
269
270 binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node);
271
272// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
273// print_global_list(sem->top_m.root, 1);
274// TRACE_CUR("Not-Top-M After:\n");
275// print_global_list(sem->not_top_m.root, 1);
276 }
277 else {
278 TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n",
279 t->comm, t->pid);
280// TRACE_CUR("Not-Top-M Before:\n");
281// print_global_list(sem->not_top_m.root, 1);
282
283 binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node);
284
285// TRACE_CUR("Not-Top-M After:\n");
286// print_global_list(sem->not_top_m.root, 1);
287 }
288}
289
290
291static void ikglp_del_global_list(struct ikglp_semaphore *sem,
292 struct task_struct *t,
293 ikglp_heap_node_t *node)
294{
295 BUG_ON(!binheap_is_in_heap(&node->node));
296
297 TRACE_CUR("Removing %s/%d from global list.\n", t->comm, t->pid);
298
299 if(binheap_is_in_this_heap(&node->node, &sem->top_m)) {
300 TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid);
301
302// TRACE_CUR("Not-Top-M Before:\n");
303// print_global_list(sem->not_top_m.root, 1);
304// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
305// print_global_list(sem->top_m.root, 1);
306
307
308 binheap_delete(&node->node, &sem->top_m);
309
310 if(!binheap_empty(&sem->not_top_m)) {
311 ikglp_heap_node_t *promoted =
312 binheap_top_entry(&sem->not_top_m, ikglp_heap_node_t, node);
313
314 TRACE_CUR("Promoting %s/%d to top-m\n",
315 promoted->task->comm, promoted->task->pid);
316
317 binheap_delete_root(&sem->not_top_m, ikglp_heap_node_t, node);
318 INIT_BINHEAP_NODE(&promoted->node);
319
320 binheap_add(&promoted->node, &sem->top_m, ikglp_heap_node_t, node);
321 }
322 else {
323 TRACE_CUR("No one to promote to top-m.\n");
324 --(sem->top_m_size);
325 }
326
327// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
328// print_global_list(sem->top_m.root, 1);
329// TRACE_CUR("Not-Top-M After:\n");
330// print_global_list(sem->not_top_m.root, 1);
331 }
332 else {
333 TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid);
334// TRACE_CUR("Not-Top-M Before:\n");
335// print_global_list(sem->not_top_m.root, 1);
336
337 binheap_delete(&node->node, &sem->not_top_m);
338
339// TRACE_CUR("Not-Top-M After:\n");
340// print_global_list(sem->not_top_m.root, 1);
341 }
342}
343
344
345static void ikglp_add_donees(struct ikglp_semaphore *sem,
346 struct fifo_queue *fq,
347 struct task_struct *t,
348 ikglp_donee_heap_node_t* node)
349{
350// TRACE_CUR("Adding %s/%d to donee list.\n", t->comm, t->pid);
351// TRACE_CUR("donees Before:\n");
352// print_donees(sem, sem->donees.root, 1);
353
354 node->task = t;
355 node->donor_info = NULL;
356 node->fq = fq;
357 INIT_BINHEAP_NODE(&node->node);
358
359 binheap_add(&node->node, &sem->donees, ikglp_donee_heap_node_t, node);
360
361// TRACE_CUR("donees After:\n");
362// print_donees(sem, sem->donees.root, 1);
363}
364
365
366static void ikglp_refresh_owners_prio_increase(struct task_struct *t,
367 struct fifo_queue *fq,
368 struct ikglp_semaphore *sem,
369 unsigned long flags)
370{
371 // priority of 't' has increased (note: 't' might already be hp_waiter).
372 if ((t == fq->hp_waiter) || litmus->compare(t, fq->hp_waiter)) {
373 struct task_struct *old_max_eff_prio;
374 struct task_struct *new_max_eff_prio;
375 struct task_struct *new_prio = NULL;
376 struct task_struct *owner = fq->owner;
377
378 if(fq->hp_waiter)
379 TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
380 fq->hp_waiter->comm, fq->hp_waiter->pid);
381 else
382 TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
383
384 if(owner)
385 {
386 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
387
388// TRACE_TASK(owner, "Heap Before:\n");
389// print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0);
390
391 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
392
393 fq->hp_waiter = t;
394 fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
395
396 binheap_decrease(&fq->nest.hp_binheap_node,
397 &tsk_rt(owner)->hp_blocked_tasks);
398
399// TRACE_TASK(owner, "Heap After:\n");
400// print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0);
401
402 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
403
404 if(new_max_eff_prio != old_max_eff_prio) {
405 TRACE_TASK(t, "is new hp_waiter.\n");
406
407 if ((effective_priority(owner) == old_max_eff_prio) ||
408 (litmus->__compare(new_max_eff_prio, BASE,
409 owner, EFFECTIVE))){
410 new_prio = new_max_eff_prio;
411 }
412 }
413 else {
414 TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
415 }
416
417 if(new_prio) {
418 // set new inheritance and propagate
419 TRACE_TASK(t, "Effective priority changed for owner %s/%d to %s/%d\n",
420 owner->comm, owner->pid,
421 new_prio->comm, new_prio->pid);
422 litmus->nested_increase_prio(owner, new_prio, &sem->lock,
423 flags); // unlocks lock.
424 }
425 else {
426 TRACE_TASK(t, "No change in effective priority (is %s/%d). Propagation halted.\n",
427 new_max_eff_prio->comm, new_max_eff_prio->pid);
428 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
429 unlock_fine_irqrestore(&sem->lock, flags);
430 }
431 }
432 else {
433 fq->hp_waiter = t;
434 fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
435
436 TRACE_TASK(t, "no owner.\n");
437 unlock_fine_irqrestore(&sem->lock, flags);
438 }
439 }
440 else {
441 TRACE_TASK(t, "hp_waiter is unaffected.\n");
442 unlock_fine_irqrestore(&sem->lock, flags);
443 }
444}
445
446// hp_waiter has decreased
447static void ikglp_refresh_owners_prio_decrease(struct fifo_queue *fq,
448 struct ikglp_semaphore *sem,
449 unsigned long flags)
450{
451 struct task_struct *owner = fq->owner;
452
453 struct task_struct *old_max_eff_prio;
454 struct task_struct *new_max_eff_prio;
455
456 if(!owner) {
457 TRACE_CUR("No owner. Returning.\n");
458 unlock_fine_irqrestore(&sem->lock, flags);
459 return;
460 }
461
462 TRACE_CUR("ikglp_refresh_owners_prio_decrease\n");
463
464 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
465
466 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
467
468 binheap_delete(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks);
469 fq->nest.hp_waiter_eff_prio = fq->hp_waiter;
470 binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks,
471 struct nested_info, hp_binheap_node);
472
473 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
474
475 if((old_max_eff_prio != new_max_eff_prio) &&
476 (effective_priority(owner) == old_max_eff_prio))
477 {
478 // Need to set new effective_priority for owner
479 struct task_struct *decreased_prio;
480
481 TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n",
482 ikglp_get_idx(sem, fq));
483
484 if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
485 TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of fq %d.\n",
486 (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
487 (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
488 owner->comm,
489 owner->pid,
490 ikglp_get_idx(sem, fq));
491
492 decreased_prio = new_max_eff_prio;
493 }
494 else {
495 TRACE_CUR("%s/%d has lesser base priority than base priority of owner (%s/%d) of fq %d.\n",
496 (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
497 (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
498 owner->comm,
499 owner->pid,
500 ikglp_get_idx(sem, fq));
501
502 decreased_prio = NULL;
503 }
504
505 // beware: recursion
506 litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags); // will unlock mutex->lock
507 }
508 else {
509 TRACE_TASK(owner, "No need to propagate priority decrease forward.\n");
510 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
511 unlock_fine_irqrestore(&sem->lock, flags);
512 }
513}
514
515
516static void ikglp_remove_donation_from_owner(struct binheap_node *n,
517 struct fifo_queue *fq,
518 struct ikglp_semaphore *sem,
519 unsigned long flags)
520{
521 struct task_struct *owner = fq->owner;
522
523 struct task_struct *old_max_eff_prio;
524 struct task_struct *new_max_eff_prio;
525
526 BUG_ON(!owner);
527
528 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
529
530 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
531
532 binheap_delete(n, &tsk_rt(owner)->hp_blocked_tasks);
533
534 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
535
536 if((old_max_eff_prio != new_max_eff_prio) &&
537 (effective_priority(owner) == old_max_eff_prio))
538 {
539 // Need to set new effective_priority for owner
540 struct task_struct *decreased_prio;
541
542 TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n",
543 ikglp_get_idx(sem, fq));
544
545 if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
546 TRACE_CUR("has greater base priority than base priority of owner of fq %d.\n",
547 ikglp_get_idx(sem, fq));
548 decreased_prio = new_max_eff_prio;
549 }
550 else {
551 TRACE_CUR("has lesser base priority than base priority of owner of fq %d.\n",
552 ikglp_get_idx(sem, fq));
553 decreased_prio = NULL;
554 }
555
556 // beware: recursion
557 litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags); // will unlock mutex->lock
558 }
559 else {
560 TRACE_TASK(owner, "No need to propagate priority decrease forward.\n");
561 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
562 unlock_fine_irqrestore(&sem->lock, flags);
563 }
564}
565
566static void ikglp_remove_donation_from_fq_waiter(struct task_struct *t,
567 struct binheap_node *n)
568{
569 struct task_struct *old_max_eff_prio;
570 struct task_struct *new_max_eff_prio;
571
572 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
573
574 old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
575
576 binheap_delete(n, &tsk_rt(t)->hp_blocked_tasks);
577
578 new_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
579
580 if((old_max_eff_prio != new_max_eff_prio) &&
581 (effective_priority(t) == old_max_eff_prio))
582 {
583 // Need to set new effective_priority for owner
584 struct task_struct *decreased_prio;
585
586 if(litmus->__compare(new_max_eff_prio, BASE, t, BASE)) {
587 decreased_prio = new_max_eff_prio;
588 }
589 else {
590 decreased_prio = NULL;
591 }
592
593 tsk_rt(t)->inh_task = decreased_prio;
594 }
595
596 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
597}
598
599static void ikglp_get_immediate(struct task_struct* t,
600 struct fifo_queue *fq,
601 struct ikglp_semaphore *sem,
602 unsigned long flags)
603{
604 // resource available now
605 TRACE_CUR("queue %d: acquired immediately\n", ikglp_get_idx(sem, fq));
606
607 fq->owner = t;
608
609 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
610 binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
611 struct nested_info, hp_binheap_node);
612 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
613
614 ++(fq->count);
615
616 ikglp_add_global_list(sem, t, &fq->global_heap_node);
617 ikglp_add_donees(sem, fq, t, &fq->donee_heap_node);
618
619 sem->shortest_fifo_queue = ikglp_find_shortest(sem, sem->shortest_fifo_queue);
620
621#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
622 if(sem->aff_obs) {
623 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t);
624 sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, t);
625 }
626#endif
627
628 unlock_fine_irqrestore(&sem->lock, flags);
629}
630
631
632
633
634
635static void __ikglp_enqueue_on_fq(struct ikglp_semaphore *sem,
636 struct fifo_queue* fq,
637 struct task_struct* t,
638 wait_queue_t *wait,
639 ikglp_heap_node_t *global_heap_node,
640 ikglp_donee_heap_node_t *donee_heap_node)
641{
642 /* resource is not free => must suspend and wait */
643 TRACE_TASK(t, "Enqueuing on fq %d.\n",
644 ikglp_get_idx(sem, fq));
645
646 init_waitqueue_entry(wait, t);
647
648 __add_wait_queue_tail_exclusive(&fq->wait, wait);
649
650 ++(fq->count);
651 ++(sem->nr_in_fifos);
652
653 // update global list.
654 if(likely(global_heap_node)) {
655 if(binheap_is_in_heap(&global_heap_node->node)) {
656 WARN_ON(1);
657 ikglp_del_global_list(sem, t, global_heap_node);
658 }
659 ikglp_add_global_list(sem, t, global_heap_node);
660 }
661 // update donor eligiblity list.
662 if(likely(donee_heap_node)) {
663// if(binheap_is_in_heap(&donee_heap_node->node)) {
664// WARN_ON(1);
665// }
666 ikglp_add_donees(sem, fq, t, donee_heap_node);
667 }
668
669 if(sem->shortest_fifo_queue == fq) {
670 sem->shortest_fifo_queue = ikglp_find_shortest(sem, fq);
671 }
672
673#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
674 if(sem->aff_obs) {
675 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t);
676 }
677#endif
678
679 TRACE_TASK(t, "shortest queue is now %d\n", ikglp_get_idx(sem, fq));
680}
681
682
683static void ikglp_enqueue_on_fq(
684 struct ikglp_semaphore *sem,
685 struct fifo_queue *fq,
686 ikglp_wait_state_t *wait,
687 unsigned long flags)
688{
689 /* resource is not free => must suspend and wait */
690 TRACE_TASK(wait->task, "queue %d: Resource is not free => must suspend and wait.\n",
691 ikglp_get_idx(sem, fq));
692
693 INIT_BINHEAP_NODE(&wait->global_heap_node.node);
694 INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
695
696 __ikglp_enqueue_on_fq(sem, fq, wait->task, &wait->fq_node,
697 &wait->global_heap_node, &wait->donee_heap_node);
698
699 ikglp_refresh_owners_prio_increase(wait->task, fq, sem, flags); // unlocks sem->lock
700}
701
702
703static void __ikglp_enqueue_on_pq(struct ikglp_semaphore *sem,
704 ikglp_wait_state_t *wait)
705{
706 TRACE_TASK(wait->task, "goes to PQ.\n");
707
708 wait->pq_node.task = wait->task; // copy over task (little redundant...)
709
710 binheap_add(&wait->pq_node.node, &sem->priority_queue,
711 ikglp_heap_node_t, node);
712}
713
714static void ikglp_enqueue_on_pq(struct ikglp_semaphore *sem,
715 ikglp_wait_state_t *wait)
716{
717 INIT_BINHEAP_NODE(&wait->global_heap_node.node);
718 INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
719 INIT_BINHEAP_NODE(&wait->pq_node.node);
720
721 __ikglp_enqueue_on_pq(sem, wait);
722}
723
724static void ikglp_enqueue_on_donor(struct ikglp_semaphore *sem,
725 ikglp_wait_state_t* wait,
726 unsigned long flags)
727{
728 struct task_struct *t = wait->task;
729 ikglp_donee_heap_node_t *donee_node = NULL;
730 struct task_struct *donee;
731
732 struct task_struct *old_max_eff_prio;
733 struct task_struct *new_max_eff_prio;
734 struct task_struct *new_prio = NULL;
735
736 INIT_BINHEAP_NODE(&wait->global_heap_node.node);
737 INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
738 INIT_BINHEAP_NODE(&wait->pq_node.node);
739 INIT_BINHEAP_NODE(&wait->node);
740
741// TRACE_CUR("Adding %s/%d as donor.\n", t->comm, t->pid);
742// TRACE_CUR("donors Before:\n");
743// print_donors(sem->donors.root, 1);
744
745 // Add donor to the global list.
746 ikglp_add_global_list(sem, t, &wait->global_heap_node);
747
748 // Select a donee
749#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
750 donee_node = (sem->aff_obs) ?
751 sem->aff_obs->ops->advise_donee_selection(sem->aff_obs, t) :
752 binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
753#else
754 donee_node = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
755#endif
756
757 donee = donee_node->task;
758
759 TRACE_TASK(t, "Donee selected: %s/%d\n", donee->comm, donee->pid);
760
761 TRACE_CUR("Temporarily removing %s/%d to donee list.\n",
762 donee->comm, donee->pid);
763// TRACE_CUR("donees Before:\n");
764// print_donees(sem, sem->donees.root, 1);
765
766 //binheap_delete_root(&sem->donees, ikglp_donee_heap_node_t, node); // will re-add it shortly
767 binheap_delete(&donee_node->node, &sem->donees);
768
769// TRACE_CUR("donees After:\n");
770// print_donees(sem, sem->donees.root, 1);
771
772
773 wait->donee_info = donee_node;
774
775 // Add t to donor heap.
776 binheap_add(&wait->node, &sem->donors, ikglp_wait_state_t, node);
777
778 // Now adjust the donee's priority.
779
780 // Lock the donee's inheritance heap.
781 raw_spin_lock(&tsk_rt(donee)->hp_blocked_tasks_lock);
782
783 old_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks);
784
785 if(donee_node->donor_info) {
786 // Steal donation relation. Evict old donor to PQ.
787
788 // Remove old donor from donor heap
789 ikglp_wait_state_t *old_wait = donee_node->donor_info;
790 struct task_struct *old_donor = old_wait->task;
791
792 TRACE_TASK(t, "Donee (%s/%d) had donor %s/%d. Moving old donor to PQ.\n",
793 donee->comm, donee->pid, old_donor->comm, old_donor->pid);
794
795 binheap_delete(&old_wait->node, &sem->donors);
796
797 // Remove donation from donee's inheritance heap.
798 binheap_delete(&old_wait->prio_donation.hp_binheap_node,
799 &tsk_rt(donee)->hp_blocked_tasks);
800 // WARNING: have not updated inh_prio!
801
802 // Add old donor to PQ.
803 __ikglp_enqueue_on_pq(sem, old_wait);
804
805 // Remove old donor from the global heap.
806 ikglp_del_global_list(sem, old_donor, &old_wait->global_heap_node);
807 }
808
809 // Add back donee's node to the donees heap with increased prio
810 donee_node->donor_info = wait;
811 INIT_BINHEAP_NODE(&donee_node->node);
812
813
814 TRACE_CUR("Adding %s/%d back to donee list.\n", donee->comm, donee->pid);
815// TRACE_CUR("donees Before:\n");
816// print_donees(sem, sem->donees.root, 1);
817
818 binheap_add(&donee_node->node, &sem->donees, ikglp_donee_heap_node_t, node);
819
820// TRACE_CUR("donees After:\n");
821// print_donees(sem, sem->donees.root, 1);
822
823 // Add an inheritance/donation to the donee's inheritance heap.
824 wait->prio_donation.lock = (struct litmus_lock*)sem;
825 wait->prio_donation.hp_waiter_eff_prio = t;
826 wait->prio_donation.hp_waiter_ptr = NULL;
827 INIT_BINHEAP_NODE(&wait->prio_donation.hp_binheap_node);
828
829 binheap_add(&wait->prio_donation.hp_binheap_node,
830 &tsk_rt(donee)->hp_blocked_tasks,
831 struct nested_info, hp_binheap_node);
832
833 new_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks);
834
835 if(new_max_eff_prio != old_max_eff_prio) {
836 if ((effective_priority(donee) == old_max_eff_prio) ||
837 (litmus->__compare(new_max_eff_prio, BASE, donee, EFFECTIVE))){
838 TRACE_TASK(t, "Donation increases %s/%d's effective priority\n",
839 donee->comm, donee->pid);
840 new_prio = new_max_eff_prio;
841 }
842// else {
843// // should be bug. donor would not be in top-m.
844// TRACE_TASK(t, "Donation is not greater than base prio of %s/%d?\n", donee->comm, donee->pid);
845// WARN_ON(1);
846// }
847// }
848// else {
849// // should be bug. donor would not be in top-m.
850// TRACE_TASK(t, "No change in %s/%d's inheritance heap?\n", donee->comm, donee->pid);
851// WARN_ON(1);
852 }
853
854 if(new_prio) {
855 struct fifo_queue *donee_fq = donee_node->fq;
856
857 if(donee != donee_fq->owner) {
858 TRACE_TASK(t, "%s/%d is not the owner. Propagating priority to owner %s/%d.\n",
859 donee->comm, donee->pid,
860 donee_fq->owner->comm, donee_fq->owner->pid);
861
862 raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock);
863 ikglp_refresh_owners_prio_increase(donee, donee_fq, sem, flags); // unlocks sem->lock
864 }
865 else {
866 TRACE_TASK(t, "%s/%d is the owner. Progatating priority immediatly.\n",
867 donee->comm, donee->pid);
868 litmus->nested_increase_prio(donee, new_prio, &sem->lock, flags); // unlocks sem->lock and donee's heap lock
869 }
870 }
871 else {
872 TRACE_TASK(t, "No change in effective priority (it is %d/%s). BUG?\n",
873 new_max_eff_prio->comm, new_max_eff_prio->pid);
874 raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock);
875 unlock_fine_irqrestore(&sem->lock, flags);
876 }
877
878
879// TRACE_CUR("donors After:\n");
880// print_donors(sem->donors.root, 1);
881}
882
883int ikglp_lock(struct litmus_lock* l)
884{
885 struct task_struct* t = current;
886 struct ikglp_semaphore *sem = ikglp_from_lock(l);
887 unsigned long flags = 0, real_flags;
888 struct fifo_queue *fq = NULL;
889 int replica = -EINVAL;
890
891#ifdef CONFIG_LITMUS_DGL_SUPPORT
892 raw_spinlock_t *dgl_lock;
893#endif
894
895 ikglp_wait_state_t wait;
896
897 if (!is_realtime(t))
898 return -EPERM;
899
900#ifdef CONFIG_LITMUS_DGL_SUPPORT
901 dgl_lock = litmus->get_dgl_spinlock(t);
902#endif
903
904 raw_spin_lock_irqsave(&sem->real_lock, real_flags);
905
906 lock_global_irqsave(dgl_lock, flags);
907 lock_fine_irqsave(&sem->lock, flags);
908
909 if(sem->nr_in_fifos < sem->m) {
910 // enqueue somwhere
911#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
912 fq = (sem->aff_obs) ?
913 sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) :
914 sem->shortest_fifo_queue;
915#else
916 fq = sem->shortest_fifo_queue;
917#endif
918 if(fq->count == 0) {
919 // take available resource
920 replica = ikglp_get_idx(sem, fq);
921
922 ikglp_get_immediate(t, fq, sem, flags); // unlocks sem->lock
923
924 unlock_global_irqrestore(dgl_lock, flags);
925 raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
926 goto acquired;
927 }
928 else {
929 wait.task = t; // THIS IS CRITICALLY IMPORTANT!!!
930
931 tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked
932 mb();
933
934 /* FIXME: interruptible would be nice some day */
935 set_task_state(t, TASK_UNINTERRUPTIBLE);
936
937 ikglp_enqueue_on_fq(sem, fq, &wait, flags); // unlocks sem->lock
938 }
939 }
940 else {
941 // donor!
942 wait.task = t; // THIS IS CRITICALLY IMPORTANT!!!
943
944 tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked
945 mb();
946
947 /* FIXME: interruptible would be nice some day */
948 set_task_state(t, TASK_UNINTERRUPTIBLE);
949
950 if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) {
951 // enqueue on PQ
952 ikglp_enqueue_on_pq(sem, &wait);
953 unlock_fine_irqrestore(&sem->lock, flags);
954 }
955 else {
956 // enqueue as donor
957 ikglp_enqueue_on_donor(sem, &wait, flags); // unlocks sem->lock
958 }
959 }
960
961 unlock_global_irqrestore(dgl_lock, flags);
962 raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
963
964 TS_LOCK_SUSPEND;
965
966 schedule();
967
968 TS_LOCK_RESUME;
969
970 fq = ikglp_get_queue(sem, t);
971 BUG_ON(!fq);
972
973 replica = ikglp_get_idx(sem, fq);
974
975acquired:
976 TRACE_CUR("Acquired lock %d, queue %d\n",
977 l->ident, replica);
978
979#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
980 if(sem->aff_obs) {
981 return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq);
982 }
983#endif
984
985 return replica;
986}
987
988//int ikglp_lock(struct litmus_lock* l)
989//{
990// struct task_struct* t = current;
991// struct ikglp_semaphore *sem = ikglp_from_lock(l);
992// unsigned long flags = 0, real_flags;
993// struct fifo_queue *fq = NULL;
994// int replica = -EINVAL;
995//
996//#ifdef CONFIG_LITMUS_DGL_SUPPORT
997// raw_spinlock_t *dgl_lock;
998//#endif
999//
1000// ikglp_wait_state_t wait;
1001//
1002// if (!is_realtime(t))
1003// return -EPERM;
1004//
1005//#ifdef CONFIG_LITMUS_DGL_SUPPORT
1006// dgl_lock = litmus->get_dgl_spinlock(t);
1007//#endif
1008//
1009// raw_spin_lock_irqsave(&sem->real_lock, real_flags);
1010//
1011// lock_global_irqsave(dgl_lock, flags);
1012// lock_fine_irqsave(&sem->lock, flags);
1013//
1014//
1015//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1016// fq = (sem->aff_obs) ?
1017// sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) :
1018// sem->shortest_fifo_queue;
1019//#else
1020// fq = sem->shortest_fifo_queue;
1021//#endif
1022//
1023// if(fq->count == 0) {
1024// // take available resource
1025// replica = ikglp_get_idx(sem, fq);
1026//
1027// ikglp_get_immediate(t, fq, sem, flags); // unlocks sem->lock
1028//
1029// unlock_global_irqrestore(dgl_lock, flags);
1030// raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
1031// }
1032// else
1033// {
1034// // we have to suspend.
1035//
1036// wait.task = t; // THIS IS CRITICALLY IMPORTANT!!!
1037//
1038// tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked
1039// mb();
1040//
1041// /* FIXME: interruptible would be nice some day */
1042// set_task_state(t, TASK_UNINTERRUPTIBLE);
1043//
1044// if(fq->count < sem->max_fifo_len) {
1045// // enqueue on fq
1046// ikglp_enqueue_on_fq(sem, fq, &wait, flags); // unlocks sem->lock
1047// }
1048// else {
1049//
1050// TRACE_CUR("IKGLP fifo queues are full (at least they better be).\n");
1051//
1052// // no room in fifos. Go to PQ or donors.
1053//
1054// if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) {
1055// // enqueue on PQ
1056// ikglp_enqueue_on_pq(sem, &wait);
1057// unlock_fine_irqrestore(&sem->lock, flags);
1058// }
1059// else {
1060// // enqueue as donor
1061// ikglp_enqueue_on_donor(sem, &wait, flags); // unlocks sem->lock
1062// }
1063// }
1064//
1065// unlock_global_irqrestore(dgl_lock, flags);
1066// raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
1067//
1068// TS_LOCK_SUSPEND;
1069//
1070// schedule();
1071//
1072// TS_LOCK_RESUME;
1073//
1074// fq = ikglp_get_queue(sem, t);
1075// BUG_ON(!fq);
1076//
1077// replica = ikglp_get_idx(sem, fq);
1078// }
1079//
1080// TRACE_CUR("Acquired lock %d, queue %d\n",
1081// l->ident, replica);
1082//
1083//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1084// if(sem->aff_obs) {
1085// return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq);
1086// }
1087//#endif
1088//
1089// return replica;
1090//}
1091
1092static void ikglp_move_donor_to_fq(struct ikglp_semaphore *sem,
1093 struct fifo_queue *fq,
1094 ikglp_wait_state_t *donor_info)
1095{
1096 struct task_struct *t = donor_info->task;
1097
1098 TRACE_CUR("Donor %s/%d being moved to fq %d\n",
1099 t->comm,
1100 t->pid,
1101 ikglp_get_idx(sem, fq));
1102
1103 binheap_delete(&donor_info->node, &sem->donors);
1104
1105 __ikglp_enqueue_on_fq(sem, fq, t,
1106 &donor_info->fq_node,
1107 NULL, // already in global_list, so pass null to prevent adding 2nd time.
1108 &donor_info->donee_heap_node);
1109
1110 // warning:
1111 // ikglp_update_owners_prio(t, fq, sem, flags) has not been called.
1112}
1113
1114static void ikglp_move_pq_to_fq(struct ikglp_semaphore *sem,
1115 struct fifo_queue *fq,
1116 ikglp_wait_state_t *wait)
1117{
1118 struct task_struct *t = wait->task;
1119
1120 TRACE_CUR("PQ request %s/%d being moved to fq %d\n",
1121 t->comm,
1122 t->pid,
1123 ikglp_get_idx(sem, fq));
1124
1125 binheap_delete(&wait->pq_node.node, &sem->priority_queue);
1126
1127 __ikglp_enqueue_on_fq(sem, fq, t,
1128 &wait->fq_node,
1129 &wait->global_heap_node,
1130 &wait->donee_heap_node);
1131 // warning:
1132 // ikglp_update_owners_prio(t, fq, sem, flags) has not been called.
1133}
1134
1135static ikglp_wait_state_t* ikglp_find_hp_waiter_to_steal(
1136 struct ikglp_semaphore* sem)
1137{
1138 /* must hold sem->lock */
1139
1140 struct fifo_queue *fq = NULL;
1141 struct list_head *pos;
1142 struct task_struct *queued;
1143 int i;
1144
1145 for(i = 0; i < sem->nr_replicas; ++i) {
1146 if( (sem->fifo_queues[i].count > 1) &&
1147 (!fq || litmus->compare(sem->fifo_queues[i].hp_waiter, fq->hp_waiter)) ) {
1148
1149 TRACE_CUR("hp_waiter on fq %d (%s/%d) has higher prio than hp_waiter on fq %d (%s/%d)\n",
1150 ikglp_get_idx(sem, &sem->fifo_queues[i]),
1151 sem->fifo_queues[i].hp_waiter->comm,
1152 sem->fifo_queues[i].hp_waiter->pid,
1153 (fq) ? ikglp_get_idx(sem, fq) : -1,
1154 (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->comm : "nil") : "nilXX",
1155 (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->pid : -1) : -2);
1156
1157 fq = &sem->fifo_queues[i];
1158
1159 WARN_ON(!(fq->hp_waiter));
1160 }
1161 }
1162
1163 if(fq) {
1164 struct task_struct *max_hp = fq->hp_waiter;
1165 ikglp_wait_state_t* ret = NULL;
1166
1167 TRACE_CUR("Searching for %s/%d on fq %d\n",
1168 max_hp->comm,
1169 max_hp->pid,
1170 ikglp_get_idx(sem, fq));
1171
1172 BUG_ON(!max_hp);
1173
1174 list_for_each(pos, &fq->wait.task_list) {
1175 wait_queue_t *wait = list_entry(pos, wait_queue_t, task_list);
1176
1177 queued = (struct task_struct*) wait->private;
1178
1179 TRACE_CUR("fq %d entry: %s/%d\n",
1180 ikglp_get_idx(sem, fq),
1181 queued->comm,
1182 queued->pid);
1183
1184 /* Compare task prios, find high prio task. */
1185 if (queued == max_hp) {
1186 TRACE_CUR("Found it!\n");
1187 ret = container_of(wait, ikglp_wait_state_t, fq_node);
1188 }
1189 }
1190
1191 WARN_ON(!ret);
1192 return ret;
1193 }
1194
1195 return(NULL);
1196}
1197
1198static void ikglp_steal_to_fq(struct ikglp_semaphore *sem,
1199 struct fifo_queue *fq,
1200 ikglp_wait_state_t *fq_wait)
1201{
1202 struct task_struct *t = fq_wait->task;
1203 struct fifo_queue *fq_steal = fq_wait->donee_heap_node.fq;
1204
1205 TRACE_CUR("FQ request %s/%d being moved to fq %d\n",
1206 t->comm,
1207 t->pid,
1208 ikglp_get_idx(sem, fq));
1209
1210 fq_wait->donee_heap_node.fq = fq; // just to be safe
1211
1212
1213 __remove_wait_queue(&fq_steal->wait, &fq_wait->fq_node);
1214 --(fq_steal->count);
1215
1216#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1217 if(sem->aff_obs) {
1218 sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq_steal, t);
1219 }
1220#endif
1221
1222 if(t == fq_steal->hp_waiter) {
1223 fq_steal->hp_waiter = ikglp_find_hp_waiter(fq_steal, NULL);
1224 TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
1225 ikglp_get_idx(sem, fq_steal),
1226 (fq_steal->hp_waiter) ? fq_steal->hp_waiter->comm : "nil",
1227 (fq_steal->hp_waiter) ? fq_steal->hp_waiter->pid : -1);
1228 }
1229
1230
1231 // Update shortest.
1232 if(fq_steal->count < sem->shortest_fifo_queue->count) {
1233 sem->shortest_fifo_queue = fq_steal;
1234 }
1235
1236 __ikglp_enqueue_on_fq(sem, fq, t,
1237 &fq_wait->fq_node,
1238 NULL,
1239 NULL);
1240
1241 // warning: We have not checked the priority inheritance of fq's owner yet.
1242}
1243
1244
1245static void ikglp_migrate_fq_to_owner_heap_nodes(struct ikglp_semaphore *sem,
1246 struct fifo_queue *fq,
1247 ikglp_wait_state_t *old_wait)
1248{
1249 struct task_struct *t = old_wait->task;
1250
1251 BUG_ON(old_wait->donee_heap_node.fq != fq);
1252
1253 TRACE_TASK(t, "Migrating wait_state to memory of queue %d.\n",
1254 ikglp_get_idx(sem, fq));
1255
1256 // need to migrate global_heap_node and donee_heap_node off of the stack
1257 // to the nodes allocated for the owner of this fq.
1258
1259 // TODO: Enhance binheap() to perform this operation in place.
1260
1261 ikglp_del_global_list(sem, t, &old_wait->global_heap_node); // remove
1262 fq->global_heap_node = old_wait->global_heap_node; // copy
1263 ikglp_add_global_list(sem, t, &fq->global_heap_node); // re-add
1264
1265 binheap_delete(&old_wait->donee_heap_node.node, &sem->donees); // remove
1266 fq->donee_heap_node = old_wait->donee_heap_node; // copy
1267
1268 if(fq->donee_heap_node.donor_info) {
1269 // let donor know that our location has changed
1270 BUG_ON(fq->donee_heap_node.donor_info->donee_info->task != t); // validate cross-link
1271 fq->donee_heap_node.donor_info->donee_info = &fq->donee_heap_node;
1272 }
1273 INIT_BINHEAP_NODE(&fq->donee_heap_node.node);
1274 binheap_add(&fq->donee_heap_node.node, &sem->donees,
1275 ikglp_donee_heap_node_t, node); // re-add
1276}
1277
1278int ikglp_unlock(struct litmus_lock* l)
1279{
1280 struct ikglp_semaphore *sem = ikglp_from_lock(l);
1281 struct task_struct *t = current;
1282 struct task_struct *donee = NULL;
1283 struct task_struct *next = NULL;
1284 struct task_struct *new_on_fq = NULL;
1285 struct fifo_queue *fq_of_new_on_fq = NULL;
1286
1287 ikglp_wait_state_t *other_donor_info = NULL;
1288 struct fifo_queue *to_steal = NULL;
1289 int need_steal_prio_reeval = 0;
1290 struct fifo_queue *fq;
1291
1292#ifdef CONFIG_LITMUS_DGL_SUPPORT
1293 raw_spinlock_t *dgl_lock;
1294#endif
1295
1296 unsigned long flags = 0, real_flags;
1297
1298 int err = 0;
1299
1300 fq = ikglp_get_queue(sem, t); // returns NULL if 't' is not owner.
1301
1302 if (!fq) {
1303 err = -EINVAL;
1304 goto out;
1305 }
1306
1307#ifdef CONFIG_LITMUS_DGL_SUPPORT
1308 dgl_lock = litmus->get_dgl_spinlock(t);
1309#endif
1310 raw_spin_lock_irqsave(&sem->real_lock, real_flags);
1311
1312 lock_global_irqsave(dgl_lock, flags); // TODO: Push this deeper
1313 lock_fine_irqsave(&sem->lock, flags);
1314
1315 TRACE_TASK(t, "Freeing replica %d.\n", ikglp_get_idx(sem, fq));
1316
1317
1318 // Remove 't' from the heaps, but data in nodes will still be good.
1319 ikglp_del_global_list(sem, t, &fq->global_heap_node);
1320 binheap_delete(&fq->donee_heap_node.node, &sem->donees);
1321
1322 fq->owner = NULL; // no longer owned!!
1323 --(fq->count);
1324 if(fq->count < sem->shortest_fifo_queue->count) {
1325 sem->shortest_fifo_queue = fq;
1326 }
1327 --(sem->nr_in_fifos);
1328
1329#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1330 if(sem->aff_obs) {
1331 sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq, t);
1332 sem->aff_obs->ops->notify_freed(sem->aff_obs, fq, t);
1333 }
1334#endif
1335
1336 // Move the next request into the FQ and update heaps as needed.
1337 // We defer re-evaluation of priorities to later in the function.
1338 if(fq->donee_heap_node.donor_info) { // move my donor to FQ
1339 ikglp_wait_state_t *donor_info = fq->donee_heap_node.donor_info;
1340
1341 new_on_fq = donor_info->task;
1342
1343 // donor moved to FQ
1344 donee = t;
1345
1346#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1347 if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
1348 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1349 if(fq_of_new_on_fq->count == 0) {
1350 // ignore it?
1351// fq_of_new_on_fq = fq;
1352 }
1353 }
1354 else {
1355 fq_of_new_on_fq = fq;
1356 }
1357#else
1358 fq_of_new_on_fq = fq;
1359#endif
1360
1361 TRACE_TASK(t, "Moving MY donor (%s/%d) to fq %d (non-aff wanted fq %d).\n",
1362 new_on_fq->comm, new_on_fq->pid,
1363 ikglp_get_idx(sem, fq_of_new_on_fq),
1364 ikglp_get_idx(sem, fq));
1365
1366
1367 ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, donor_info);
1368 }
1369 else if(!binheap_empty(&sem->donors)) { // No donor, so move any donor to FQ
1370 // move other donor to FQ
1371 // Select a donor
1372#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1373 other_donor_info = (sem->aff_obs) ?
1374 sem->aff_obs->ops->advise_donor_to_fq(sem->aff_obs, fq) :
1375 binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
1376#else
1377 other_donor_info = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
1378#endif
1379
1380 new_on_fq = other_donor_info->task;
1381 donee = other_donor_info->donee_info->task;
1382
1383 // update the donee's heap position.
1384 other_donor_info->donee_info->donor_info = NULL; // clear the cross-link
1385 binheap_decrease(&other_donor_info->donee_info->node, &sem->donees);
1386
1387#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1388 if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
1389 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1390 if(fq_of_new_on_fq->count == 0) {
1391 // ignore it?
1392// fq_of_new_on_fq = fq;
1393 }
1394 }
1395 else {
1396 fq_of_new_on_fq = fq;
1397 }
1398#else
1399 fq_of_new_on_fq = fq;
1400#endif
1401
1402 TRACE_TASK(t, "Moving a donor (%s/%d) to fq %d (non-aff wanted fq %d).\n",
1403 new_on_fq->comm, new_on_fq->pid,
1404 ikglp_get_idx(sem, fq_of_new_on_fq),
1405 ikglp_get_idx(sem, fq));
1406
1407 ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, other_donor_info);
1408 }
1409 else if(!binheap_empty(&sem->priority_queue)) { // No donors, so move PQ
1410 ikglp_heap_node_t *pq_node = binheap_top_entry(&sem->priority_queue,
1411 ikglp_heap_node_t, node);
1412 ikglp_wait_state_t *pq_wait = container_of(pq_node, ikglp_wait_state_t,
1413 pq_node);
1414
1415 new_on_fq = pq_wait->task;
1416
1417#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1418 if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
1419 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1420 if(fq_of_new_on_fq->count == 0) {
1421 // ignore it?
1422// fq_of_new_on_fq = fq;
1423 }
1424 }
1425 else {
1426 fq_of_new_on_fq = fq;
1427 }
1428#else
1429 fq_of_new_on_fq = fq;
1430#endif
1431
1432 TRACE_TASK(t, "Moving a pq waiter (%s/%d) to fq %d (non-aff wanted fq %d).\n",
1433 new_on_fq->comm, new_on_fq->pid,
1434 ikglp_get_idx(sem, fq_of_new_on_fq),
1435 ikglp_get_idx(sem, fq));
1436
1437 ikglp_move_pq_to_fq(sem, fq_of_new_on_fq, pq_wait);
1438 }
1439 else if(fq->count == 0) { // No PQ and this queue is empty, so steal.
1440 ikglp_wait_state_t *fq_wait;
1441
1442 TRACE_TASK(t, "Looking to steal a request for fq %d...\n",
1443 ikglp_get_idx(sem, fq));
1444
1445#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1446 fq_wait = (sem->aff_obs) ?
1447 sem->aff_obs->ops->advise_steal(sem->aff_obs, fq) :
1448 ikglp_find_hp_waiter_to_steal(sem);
1449#else
1450 fq_wait = ikglp_find_hp_waiter_to_steal(sem);
1451#endif
1452
1453 if(fq_wait) {
1454 to_steal = fq_wait->donee_heap_node.fq;
1455
1456 new_on_fq = fq_wait->task;
1457 fq_of_new_on_fq = fq;
1458 need_steal_prio_reeval = (new_on_fq == to_steal->hp_waiter);
1459
1460 TRACE_TASK(t, "Found %s/%d of fq %d to steal for fq %d...\n",
1461 new_on_fq->comm, new_on_fq->pid,
1462 ikglp_get_idx(sem, to_steal),
1463 ikglp_get_idx(sem, fq));
1464
1465 ikglp_steal_to_fq(sem, fq, fq_wait);
1466 }
1467 else {
1468 TRACE_TASK(t, "Found nothing to steal for fq %d.\n",
1469 ikglp_get_idx(sem, fq));
1470 }
1471 }
1472 else { // move no one
1473 }
1474
1475 // 't' must drop all priority and clean up data structures before hand-off.
1476
1477 // DROP ALL INHERITANCE. IKGLP MUST BE OUTER-MOST
1478 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
1479 {
1480 int count = 0;
1481 while(!binheap_empty(&tsk_rt(t)->hp_blocked_tasks)) {
1482 binheap_delete_root(&tsk_rt(t)->hp_blocked_tasks,
1483 struct nested_info, hp_binheap_node);
1484 ++count;
1485 }
1486 litmus->decrease_prio(t, NULL);
1487 WARN_ON(count > 2); // should not be greater than 2. only local fq inh and donation can be possible.
1488 }
1489 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
1490
1491
1492
1493 // Now patch up other priorities.
1494 //
1495 // At most one of the following:
1496 // if(donee && donee != t), decrease prio, propagate to owner, or onward
1497 // if(to_steal), update owner's prio (hp_waiter has already been set)
1498 //
1499
1500 BUG_ON((other_donor_info != NULL) && (to_steal != NULL));
1501
1502 if(other_donor_info) {
1503 struct fifo_queue *other_fq = other_donor_info->donee_info->fq;
1504
1505 BUG_ON(!donee);
1506 BUG_ON(donee == t);
1507
1508 TRACE_TASK(t, "Terminating donation relation of donor %s/%d to donee %s/%d!\n",
1509 other_donor_info->task->comm, other_donor_info->task->pid,
1510 donee->comm, donee->pid);
1511
1512 // need to terminate donation relation.
1513 if(donee == other_fq->owner) {
1514 TRACE_TASK(t, "Donee %s/%d is an owner of fq %d.\n",
1515 donee->comm, donee->pid,
1516 ikglp_get_idx(sem, other_fq));
1517
1518 ikglp_remove_donation_from_owner(&other_donor_info->prio_donation.hp_binheap_node, other_fq, sem, flags);
1519 lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
1520 }
1521 else {
1522 TRACE_TASK(t, "Donee %s/%d is an blocked in of fq %d.\n",
1523 donee->comm, donee->pid,
1524 ikglp_get_idx(sem, other_fq));
1525
1526 ikglp_remove_donation_from_fq_waiter(donee, &other_donor_info->prio_donation.hp_binheap_node);
1527 if(donee == other_fq->hp_waiter) {
1528 TRACE_TASK(t, "Donee %s/%d was an hp_waiter of fq %d. Rechecking hp_waiter.\n",
1529 donee->comm, donee->pid,
1530 ikglp_get_idx(sem, other_fq));
1531
1532 other_fq->hp_waiter = ikglp_find_hp_waiter(other_fq, NULL);
1533 TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
1534 ikglp_get_idx(sem, other_fq),
1535 (other_fq->hp_waiter) ? other_fq->hp_waiter->comm : "nil",
1536 (other_fq->hp_waiter) ? other_fq->hp_waiter->pid : -1);
1537
1538 ikglp_refresh_owners_prio_decrease(other_fq, sem, flags); // unlocks sem->lock. reacquire it.
1539 lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
1540 }
1541 }
1542 }
1543 else if(to_steal) {
1544 TRACE_TASK(t, "Rechecking priority inheritance of fq %d, triggered by stealing.\n",
1545 ikglp_get_idx(sem, to_steal));
1546
1547 if(need_steal_prio_reeval) {
1548 ikglp_refresh_owners_prio_decrease(to_steal, sem, flags); // unlocks sem->lock. reacquire it.
1549 lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
1550 }
1551 }
1552
1553 // check for new HP waiter.
1554 if(new_on_fq) {
1555 if(fq == fq_of_new_on_fq) {
1556 // fq->owner is null, so just update the hp_waiter without locking.
1557 if(new_on_fq == fq->hp_waiter) {
1558 TRACE_TASK(t, "new_on_fq is already hp_waiter.\n",
1559 fq->hp_waiter->comm, fq->hp_waiter->pid);
1560 fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); // set this just to be sure...
1561 }
1562 else if(litmus->compare(new_on_fq, fq->hp_waiter)) {
1563 if(fq->hp_waiter)
1564 TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
1565 fq->hp_waiter->comm, fq->hp_waiter->pid);
1566 else
1567 TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
1568
1569 fq->hp_waiter = new_on_fq;
1570 fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
1571
1572 TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
1573 ikglp_get_idx(sem, fq),
1574 (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
1575 (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
1576 }
1577 }
1578 else {
1579 ikglp_refresh_owners_prio_increase(new_on_fq, fq_of_new_on_fq, sem, flags); // unlocks sem->lock. reacquire it.
1580 lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
1581 }
1582 }
1583
1584wake_kludge:
1585 if(waitqueue_active(&fq->wait))
1586 {
1587 wait_queue_t *wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list);
1588 ikglp_wait_state_t *fq_wait = container_of(wait, ikglp_wait_state_t, fq_node);
1589 next = (struct task_struct*) wait->private;
1590
1591 __remove_wait_queue(&fq->wait, wait);
1592
1593 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
1594 ikglp_get_idx(sem, fq),
1595 next->comm, next->pid);
1596
1597 // migrate wait-state to fifo-memory.
1598 ikglp_migrate_fq_to_owner_heap_nodes(sem, fq, fq_wait);
1599
1600 /* next becomes the resouce holder */
1601 fq->owner = next;
1602 tsk_rt(next)->blocked_lock = NULL;
1603
1604#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1605 if(sem->aff_obs) {
1606 sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, next);
1607 }
1608#endif
1609
1610 /* determine new hp_waiter if necessary */
1611 if (next == fq->hp_waiter) {
1612
1613 TRACE_TASK(next, "was highest-prio waiter\n");
1614 /* next has the highest priority --- it doesn't need to
1615 * inherit. However, we need to make sure that the
1616 * next-highest priority in the queue is reflected in
1617 * hp_waiter. */
1618 fq->hp_waiter = ikglp_find_hp_waiter(fq, NULL);
1619 TRACE_TASK(next, "New hp_waiter for fq %d is %s/%d!\n",
1620 ikglp_get_idx(sem, fq),
1621 (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
1622 (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
1623
1624 fq->nest.hp_waiter_eff_prio = (fq->hp_waiter) ?
1625 effective_priority(fq->hp_waiter) : NULL;
1626
1627 if (fq->hp_waiter)
1628 TRACE_TASK(fq->hp_waiter, "is new highest-prio waiter\n");
1629 else
1630 TRACE("no further waiters\n");
1631
1632 raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
1633
1634// TRACE_TASK(next, "Heap Before:\n");
1635// print_hp_waiters(tsk_rt(next)->hp_blocked_tasks.root, 0);
1636
1637 binheap_add(&fq->nest.hp_binheap_node,
1638 &tsk_rt(next)->hp_blocked_tasks,
1639 struct nested_info,
1640 hp_binheap_node);
1641
1642// TRACE_TASK(next, "Heap After:\n");
1643// print_hp_waiters(tsk_rt(next)->hp_blocked_tasks.root, 0);
1644
1645 raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
1646 }
1647 else {
1648 /* Well, if 'next' is not the highest-priority waiter,
1649 * then it (probably) ought to inherit the highest-priority
1650 * waiter's priority. */
1651 TRACE_TASK(next, "is not hp_waiter of replica %d. hp_waiter is %s/%d\n",
1652 ikglp_get_idx(sem, fq),
1653 (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
1654 (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
1655
1656 raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
1657
1658 binheap_add(&fq->nest.hp_binheap_node,
1659 &tsk_rt(next)->hp_blocked_tasks,
1660 struct nested_info,
1661 hp_binheap_node);
1662
1663 /* It is possible that 'next' *should* be the hp_waiter, but isn't
1664 * because that update hasn't yet executed (update operation is
1665 * probably blocked on mutex->lock). So only inherit if the top of
1666 * 'next's top heap node is indeed the effective prio. of hp_waiter.
1667 * (We use fq->hp_waiter_eff_prio instead of effective_priority(hp_waiter)
1668 * since the effective priority of hp_waiter can change (and the
1669 * update has not made it to this lock).)
1670 */
1671 if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
1672 fq->nest.hp_waiter_eff_prio))
1673 {
1674 if(fq->nest.hp_waiter_eff_prio)
1675 litmus->increase_prio(next, fq->nest.hp_waiter_eff_prio);
1676 else
1677 WARN_ON(1);
1678 }
1679
1680 raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
1681 }
1682
1683
1684 // wake up the new resource holder!
1685 wake_up_process(next);
1686 }
1687 if(fq_of_new_on_fq && fq_of_new_on_fq != fq && fq_of_new_on_fq->count == 1) {
1688 // The guy we promoted when to an empty FQ. (Why didn't stealing pick this up?)
1689 // Wake up the new guy too.
1690
1691 BUG_ON(fq_of_new_on_fq->owner != NULL);
1692
1693 fq = fq_of_new_on_fq;
1694 fq_of_new_on_fq = NULL;
1695 goto wake_kludge;
1696 }
1697
1698 unlock_fine_irqrestore(&sem->lock, flags);
1699 unlock_global_irqrestore(dgl_lock, flags);
1700
1701 raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
1702
1703out:
1704 return err;
1705}
1706
1707
1708
1709int ikglp_close(struct litmus_lock* l)
1710{
1711 struct task_struct *t = current;
1712 struct ikglp_semaphore *sem = ikglp_from_lock(l);
1713 unsigned long flags;
1714
1715 int owner = 0;
1716 int i;
1717
1718 raw_spin_lock_irqsave(&sem->real_lock, flags);
1719
1720 for(i = 0; i < sem->nr_replicas; ++i) {
1721 if(sem->fifo_queues[i].owner == t) {
1722 owner = 1;
1723 break;
1724 }
1725 }
1726
1727 raw_spin_unlock_irqrestore(&sem->real_lock, flags);
1728
1729 if (owner)
1730 ikglp_unlock(l);
1731
1732 return 0;
1733}
1734
1735void ikglp_free(struct litmus_lock* l)
1736{
1737 struct ikglp_semaphore *sem = ikglp_from_lock(l);
1738
1739 kfree(sem->fifo_queues);
1740 kfree(sem);
1741}
1742
1743
1744
1745struct litmus_lock* ikglp_new(int m,
1746 struct litmus_lock_ops* ops,
1747 void* __user arg)
1748{
1749 struct ikglp_semaphore* sem;
1750 int nr_replicas = 0;
1751 int i;
1752
1753 if(!access_ok(VERIFY_READ, arg, sizeof(nr_replicas)))
1754 {
1755 return(NULL);
1756 }
1757 if(__copy_from_user(&nr_replicas, arg, sizeof(nr_replicas)))
1758 {
1759 return(NULL);
1760 }
1761 if(nr_replicas < 1)
1762 {
1763 return(NULL);
1764 }
1765
1766 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1767 if(!sem)
1768 {
1769 return NULL;
1770 }
1771
1772 sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*nr_replicas, GFP_KERNEL);
1773 if(!sem->fifo_queues)
1774 {
1775 kfree(sem);
1776 return NULL;
1777 }
1778
1779 sem->litmus_lock.ops = ops;
1780
1781#ifdef CONFIG_DEBUG_SPINLOCK
1782 {
1783 __raw_spin_lock_init(&sem->lock, ((struct litmus_lock*)sem)->cheat_lockdep, &((struct litmus_lock*)sem)->key);
1784 }
1785#else
1786 raw_spin_lock_init(&sem->lock);
1787#endif
1788
1789 raw_spin_lock_init(&sem->real_lock);
1790
1791 sem->nr_replicas = nr_replicas;
1792 sem->m = m;
1793 sem->max_fifo_len = (sem->m/nr_replicas) + ((sem->m%nr_replicas) != 0);
1794 sem->nr_in_fifos = 0;
1795
1796 TRACE("New IKGLP Sem: m = %d, k = %d, max fifo_len = %d\n",
1797 sem->m,
1798 sem->nr_replicas,
1799 sem->max_fifo_len);
1800
1801 for(i = 0; i < nr_replicas; ++i)
1802 {
1803 struct fifo_queue* q = &(sem->fifo_queues[i]);
1804
1805 q->owner = NULL;
1806 q->hp_waiter = NULL;
1807 init_waitqueue_head(&q->wait);
1808 q->count = 0;
1809
1810 q->global_heap_node.task = NULL;
1811 INIT_BINHEAP_NODE(&q->global_heap_node.node);
1812
1813 q->donee_heap_node.task = NULL;
1814 q->donee_heap_node.donor_info = NULL;
1815 q->donee_heap_node.fq = NULL;
1816 INIT_BINHEAP_NODE(&q->donee_heap_node.node);
1817
1818 q->nest.lock = (struct litmus_lock*)sem;
1819 q->nest.hp_waiter_eff_prio = NULL;
1820 q->nest.hp_waiter_ptr = &q->hp_waiter;
1821 INIT_BINHEAP_NODE(&q->nest.hp_binheap_node);
1822 }
1823
1824 sem->shortest_fifo_queue = &sem->fifo_queues[0];
1825
1826 sem->top_m_size = 0;
1827
1828 // init heaps
1829 INIT_BINHEAP_HANDLE(&sem->top_m, ikglp_min_heap_base_priority_order);
1830 INIT_BINHEAP_HANDLE(&sem->not_top_m, ikglp_max_heap_base_priority_order);
1831 INIT_BINHEAP_HANDLE(&sem->donees, ikglp_min_heap_donee_order);
1832 INIT_BINHEAP_HANDLE(&sem->priority_queue, ikglp_max_heap_base_priority_order);
1833 INIT_BINHEAP_HANDLE(&sem->donors, ikglp_donor_max_heap_base_priority_order);
1834
1835#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1836 sem->aff_obs = NULL;
1837#endif
1838
1839 return &sem->litmus_lock;
1840}
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
1871
1872static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica)
1873{
1874 int gpu = replica % aff->nr_rsrc;
1875 return gpu;
1876}
1877
1878static inline int replica_to_gpu(struct ikglp_affinity* aff, int replica)
1879{
1880 int gpu = __replica_to_gpu(aff, replica) + aff->offset;
1881 return gpu;
1882}
1883
1884static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu)
1885{
1886 int replica = gpu - aff->offset;
1887 return replica;
1888}
1889
1890
1891int ikglp_aff_obs_close(struct affinity_observer* obs)
1892{
1893 return 0;
1894}
1895
1896void ikglp_aff_obs_free(struct affinity_observer* obs)
1897{
1898 struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs);
1899 kfree(ikglp_aff->nr_cur_users_on_rsrc);
1900 kfree(ikglp_aff->q_info);
1901 kfree(ikglp_aff);
1902}
1903
1904static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* ops,
1905 struct ikglp_affinity_ops* ikglp_ops,
1906 void* __user args)
1907{
1908 struct ikglp_affinity* ikglp_aff;
1909 struct gpu_affinity_observer_args aff_args;
1910 struct ikglp_semaphore* sem;
1911 int i;
1912 unsigned long flags;
1913
1914 if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
1915 return(NULL);
1916 }
1917 if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
1918 return(NULL);
1919 }
1920
1921 sem = (struct ikglp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
1922
1923 if(sem->litmus_lock.type != IKGLP_SEM) {
1924 TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type);
1925 return(NULL);
1926 }
1927
1928 if((aff_args.nr_simult_users <= 0) ||
1929 (sem->nr_replicas%aff_args.nr_simult_users != 0)) {
1930 TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
1931 "(%d) per replica. #replicas should be evenly divisible "
1932 "by #simult_users.\n",
1933 sem->litmus_lock.ident,
1934 sem->nr_replicas,
1935 aff_args.nr_simult_users);
1936 return(NULL);
1937 }
1938
1939 if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
1940 TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
1941 NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
1942// return(NULL);
1943 }
1944
1945 ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL);
1946 if(!ikglp_aff) {
1947 return(NULL);
1948 }
1949
1950 ikglp_aff->q_info = kmalloc(sizeof(struct ikglp_queue_info)*sem->nr_replicas, GFP_KERNEL);
1951 if(!ikglp_aff->q_info) {
1952 kfree(ikglp_aff);
1953 return(NULL);
1954 }
1955
1956 ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
1957 if(!ikglp_aff->nr_cur_users_on_rsrc) {
1958 kfree(ikglp_aff->q_info);
1959 kfree(ikglp_aff);
1960 return(NULL);
1961 }
1962
1963 affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs);
1964
1965 ikglp_aff->ops = ikglp_ops;
1966 ikglp_aff->offset = aff_args.replica_to_gpu_offset;
1967 ikglp_aff->nr_simult = aff_args.nr_simult_users;
1968 ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult;
1969 ikglp_aff->relax_max_fifo_len = (aff_args.relaxed_rules) ? 1 : 0;
1970
1971 TRACE_CUR("GPU affinity_observer: offset = %d, nr_simult = %d, "
1972 "nr_rsrc = %d, relaxed_fifo_len = %d\n",
1973 ikglp_aff->offset, ikglp_aff->nr_simult, ikglp_aff->nr_rsrc,
1974 ikglp_aff->relax_max_fifo_len);
1975
1976 memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
1977
1978 for(i = 0; i < sem->nr_replicas; ++i) {
1979 ikglp_aff->q_info[i].q = &sem->fifo_queues[i];
1980 ikglp_aff->q_info[i].estimated_len = 0;
1981
1982 // multiple q_info's will point to the same resource (aka GPU) if
1983 // aff_args.nr_simult_users > 1
1984 ikglp_aff->q_info[i].nr_cur_users = &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)];
1985 }
1986
1987 // attach observer to the lock
1988 raw_spin_lock_irqsave(&sem->real_lock, flags);
1989 sem->aff_obs = ikglp_aff;
1990 raw_spin_unlock_irqrestore(&sem->real_lock, flags);
1991
1992 return &ikglp_aff->obs;
1993}
1994
1995
1996
1997
1998static int gpu_replica_to_resource(struct ikglp_affinity* aff,
1999 struct fifo_queue* fq) {
2000 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2001 return(replica_to_gpu(aff, ikglp_get_idx(sem, fq)));
2002}
2003
2004
2005// Smart IKGLP Affinity
2006
2007//static inline struct ikglp_queue_info* ikglp_aff_find_shortest(struct ikglp_affinity* aff)
2008//{
2009// struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2010// struct ikglp_queue_info *shortest = &aff->q_info[0];
2011// int i;
2012//
2013// for(i = 1; i < sem->nr_replicas; ++i) {
2014// if(aff->q_info[i].estimated_len < shortest->estimated_len) {
2015// shortest = &aff->q_info[i];
2016// }
2017// }
2018//
2019// return(shortest);
2020//}
2021
2022struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
2023{
2024 // advise_enqueue must be smart as not not break IKGLP rules:
2025 // * No queue can be greater than ceil(m/k) in length. We may return
2026 // such a queue, but IKGLP will be smart enough as to send requests
2027 // to donors or PQ.
2028 // * Cannot let a queue idle if there exist waiting PQ/donors
2029 // -- needed to guarantee parallel progress of waiters.
2030 //
2031 // We may be able to relax some of these constraints, but this will have to
2032 // be carefully evaluated.
2033 //
2034 // Huristic strategy: Find the shortest queue that is not full.
2035
2036 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2037 lt_t min_len;
2038 int min_nr_users;
2039 struct ikglp_queue_info *shortest;
2040 struct fifo_queue *to_enqueue;
2041 int i;
2042 int affinity_gpu;
2043
2044 int max_fifo_len = (aff->relax_max_fifo_len) ?
2045 sem->m : sem->max_fifo_len;
2046
2047 // simply pick the shortest queue if, we have no affinity, or we have
2048 // affinity with the shortest
2049 if(unlikely(tsk_rt(t)->last_gpu < 0)) {
2050 affinity_gpu = aff->offset; // first gpu
2051 TRACE_CUR("no affinity\n");
2052 }
2053 else {
2054 affinity_gpu = tsk_rt(t)->last_gpu;
2055 }
2056
2057 // all things being equal, let's start with the queue with which we have
2058 // affinity. this helps us maintain affinity even when we don't have
2059 // an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
2060 shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
2061
2062 // if(shortest == aff->shortest_queue) {
2063 // TRACE_CUR("special case: have affinity with shortest queue\n");
2064 // goto out;
2065 // }
2066
2067 min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
2068 min_nr_users = *(shortest->nr_cur_users);
2069
2070 TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
2071 get_gpu_estimate(t, MIG_LOCAL),
2072 ikglp_get_idx(sem, shortest->q),
2073 shortest->q->count,
2074 min_len);
2075
2076 for(i = 0; i < sem->nr_replicas; ++i) {
2077 if(&aff->q_info[i] != shortest) {
2078 if(aff->q_info[i].q->count < max_fifo_len) {
2079
2080 lt_t est_len =
2081 aff->q_info[i].estimated_len +
2082 get_gpu_estimate(t,
2083 gpu_migration_distance(tsk_rt(t)->last_gpu,
2084 replica_to_gpu(aff, i)));
2085
2086 // queue is smaller, or they're equal and the other has a smaller number
2087 // of total users.
2088 //
2089 // tie-break on the shortest number of simult users. this only kicks in
2090 // when there are more than 1 empty queues.
2091 if((shortest->q->count >= max_fifo_len) || /* 'shortest' is full and i-th queue is not */
2092 (est_len < min_len) || /* i-th queue has shortest length */
2093 ((est_len == min_len) && /* equal lengths, but one has fewer over-all users */
2094 (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
2095
2096 shortest = &aff->q_info[i];
2097 min_len = est_len;
2098 min_nr_users = *(aff->q_info[i].nr_cur_users);
2099 }
2100
2101 TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
2102 get_gpu_estimate(t,
2103 gpu_migration_distance(tsk_rt(t)->last_gpu,
2104 replica_to_gpu(aff, i))),
2105 ikglp_get_idx(sem, aff->q_info[i].q),
2106 aff->q_info[i].q->count,
2107 est_len);
2108 }
2109 else {
2110 TRACE_CUR("queue %d is too long. ineligible for enqueue.\n",
2111 ikglp_get_idx(sem, aff->q_info[i].q));
2112 }
2113 }
2114 }
2115
2116 if(shortest->q->count >= max_fifo_len) {
2117 TRACE_CUR("selected fq %d is too long, but returning it anyway.\n",
2118 ikglp_get_idx(sem, shortest->q));
2119 }
2120
2121 to_enqueue = shortest->q;
2122 TRACE_CUR("enqueue on fq %d (count = %d) (non-aff wanted fq %d)\n",
2123 ikglp_get_idx(sem, to_enqueue),
2124 to_enqueue->count,
2125 ikglp_get_idx(sem, sem->shortest_fifo_queue));
2126
2127 return to_enqueue;
2128
2129 //return(sem->shortest_fifo_queue);
2130}
2131
2132
2133
2134
2135static ikglp_wait_state_t* pick_steal(struct ikglp_affinity* aff,
2136 int dest_gpu,
2137 struct fifo_queue* fq)
2138{
2139 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2140 ikglp_wait_state_t *wait = NULL;
2141 int max_improvement = -(MIG_NONE+1);
2142 int replica = ikglp_get_idx(sem, fq);
2143
2144 if(waitqueue_active(&fq->wait)) {
2145 int this_gpu = replica_to_gpu(aff, replica);
2146 struct list_head *pos;
2147
2148 list_for_each(pos, &fq->wait.task_list) {
2149 wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list);
2150 ikglp_wait_state_t *tmp_wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
2151
2152 int tmp_improvement =
2153 gpu_migration_distance(this_gpu, tsk_rt(tmp_wait->task)->last_gpu) -
2154 gpu_migration_distance(dest_gpu, tsk_rt(tmp_wait->task)->last_gpu);
2155
2156 if(tmp_improvement > max_improvement) {
2157 wait = tmp_wait;
2158 max_improvement = tmp_improvement;
2159
2160 if(max_improvement >= (MIG_NONE-1)) {
2161 goto out;
2162 }
2163 }
2164 }
2165
2166 BUG_ON(!wait);
2167 }
2168 else {
2169 TRACE_CUR("fq %d is empty!\n", replica);
2170 }
2171
2172out:
2173
2174 TRACE_CUR("Candidate victim from fq %d is %s/%d. aff improvement = %d.\n",
2175 replica,
2176 (wait) ? wait->task->comm : "nil",
2177 (wait) ? wait->task->pid : -1,
2178 max_improvement);
2179
2180 return wait;
2181}
2182
2183
2184ikglp_wait_state_t* gpu_ikglp_advise_steal(struct ikglp_affinity* aff,
2185 struct fifo_queue* dst)
2186{
2187 // Huristic strategy: Find task with greatest improvement in affinity.
2188 //
2189 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2190 ikglp_wait_state_t *to_steal_state = NULL;
2191// ikglp_wait_state_t *default_to_steal_state = ikglp_find_hp_waiter_to_steal(sem);
2192 int max_improvement = -(MIG_NONE+1);
2193 int replica, i;
2194 int dest_gpu;
2195
2196 replica = ikglp_get_idx(sem, dst);
2197 dest_gpu = replica_to_gpu(aff, replica);
2198
2199 for(i = 0; i < sem->nr_replicas; ++i) {
2200 ikglp_wait_state_t *tmp_to_steal_state =
2201 pick_steal(aff, dest_gpu, &sem->fifo_queues[i]);
2202
2203 if(tmp_to_steal_state) {
2204 int tmp_improvement =
2205 gpu_migration_distance(replica_to_gpu(aff, i), tsk_rt(tmp_to_steal_state->task)->last_gpu) -
2206 gpu_migration_distance(dest_gpu, tsk_rt(tmp_to_steal_state->task)->last_gpu);
2207
2208 if(tmp_improvement > max_improvement) {
2209 to_steal_state = tmp_to_steal_state;
2210 max_improvement = tmp_improvement;
2211
2212 if(max_improvement >= (MIG_NONE-1)) {
2213 goto out;
2214 }
2215 }
2216 }
2217 }
2218
2219out:
2220 if(!to_steal_state) {
2221 TRACE_CUR("Could not find anyone to steal.\n");
2222 }
2223 else {
2224 TRACE_CUR("Selected victim %s/%d on fq %d (GPU %d) for fq %d (GPU %d): improvement = %d\n",
2225 to_steal_state->task->comm, to_steal_state->task->pid,
2226 ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq),
2227 replica_to_gpu(aff, ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq)),
2228 ikglp_get_idx(sem, dst),
2229 dest_gpu,
2230 max_improvement);
2231
2232// TRACE_CUR("Non-aff wanted to select victim %s/%d on fq %d (GPU %d) for fq %d (GPU %d): improvement = %d\n",
2233// default_to_steal_state->task->comm, default_to_steal_state->task->pid,
2234// ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq),
2235// replica_to_gpu(aff, ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq)),
2236// ikglp_get_idx(sem, dst),
2237// replica_to_gpu(aff, ikglp_get_idx(sem, dst)),
2238//
2239// gpu_migration_distance(
2240// replica_to_gpu(aff, ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq)),
2241// tsk_rt(default_to_steal_state->task)->last_gpu) -
2242// gpu_migration_distance(dest_gpu, tsk_rt(default_to_steal_state->task)->last_gpu));
2243 }
2244
2245 return(to_steal_state);
2246}
2247
2248
2249static inline int has_donor(wait_queue_t* fq_wait)
2250{
2251 ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
2252 return(wait->donee_heap_node.donor_info != NULL);
2253}
2254
2255static ikglp_donee_heap_node_t* pick_donee(struct ikglp_affinity* aff,
2256 struct fifo_queue* fq,
2257 int* dist_from_head)
2258{
2259 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2260 struct task_struct *donee;
2261 ikglp_donee_heap_node_t *donee_node;
2262 struct task_struct *mth_highest = ikglp_mth_highest(sem);
2263
2264// lt_t now = litmus_clock();
2265//
2266// TRACE_CUR("fq %d: mth_highest: %s/%d, deadline = %d: (donor) = ??? ",
2267// ikglp_get_idx(sem, fq),
2268// mth_highest->comm, mth_highest->pid,
2269// (int)get_deadline(mth_highest) - now);
2270
2271 if(fq->owner &&
2272 fq->donee_heap_node.donor_info == NULL &&
2273 mth_highest != fq->owner &&
2274 litmus->__compare(mth_highest, BASE, fq->owner, BASE)) {
2275 donee = fq->owner;
2276 donee_node = &(fq->donee_heap_node);
2277 *dist_from_head = 0;
2278
2279 BUG_ON(donee != donee_node->task);
2280
2281 TRACE_CUR("picked owner of fq %d as donee\n",
2282 ikglp_get_idx(sem, fq));
2283
2284 goto out;
2285 }
2286 else if(waitqueue_active(&fq->wait)) {
2287 struct list_head *pos;
2288
2289
2290// TRACE_CUR("fq %d: owner: %s/%d, deadline = %d: (donor) = %s/%d "
2291// "(mth_highest != fq->owner) = %d "
2292// "(mth_highest > fq->owner) = %d\n",
2293// ikglp_get_idx(sem, fq),
2294// (fq->owner) ? fq->owner->comm : "nil",
2295// (fq->owner) ? fq->owner->pid : -1,
2296// (fq->owner) ? (int)get_deadline(fq->owner) - now : -999,
2297// (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->comm : "nil",
2298// (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->pid : -1,
2299// (mth_highest != fq->owner),
2300// (litmus->__compare(mth_highest, BASE, fq->owner, BASE)));
2301
2302
2303 *dist_from_head = 1;
2304
2305 // iterating from the start of the queue is nice since this means
2306 // the donee will be closer to obtaining a resource.
2307 list_for_each(pos, &fq->wait.task_list) {
2308 wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list);
2309 ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
2310
2311// TRACE_CUR("fq %d: waiter %d: %s/%d, deadline = %d (donor) = %s/%d "
2312// "(mth_highest != wait->task) = %d "
2313// "(mth_highest > wait->task) = %d\n",
2314// ikglp_get_idx(sem, fq),
2315// dist_from_head,
2316// wait->task->comm, wait->task->pid,
2317// (int)get_deadline(wait->task) - now,
2318// (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->comm : "nil",
2319// (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->pid : -1,
2320// (mth_highest != wait->task),
2321// (litmus->__compare(mth_highest, BASE, wait->task, BASE)));
2322
2323
2324 if(!has_donor(fq_wait) &&
2325 mth_highest != wait->task &&
2326 litmus->__compare(mth_highest, BASE, wait->task, BASE)) {
2327 donee = (struct task_struct*) fq_wait->private;
2328 donee_node = &wait->donee_heap_node;
2329
2330 BUG_ON(donee != donee_node->task);
2331
2332 TRACE_CUR("picked waiter in fq %d as donee\n",
2333 ikglp_get_idx(sem, fq));
2334
2335 goto out;
2336 }
2337 ++(*dist_from_head);
2338 }
2339 }
2340
2341 donee = NULL;
2342 donee_node = NULL;
2343 //*dist_from_head = sem->max_fifo_len + 1;
2344 *dist_from_head = IKGLP_INVAL_DISTANCE;
2345
2346 TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq));
2347
2348out:
2349
2350 TRACE_CUR("Candidate donee for fq %d is %s/%d (dist_from_head = %d)\n",
2351 ikglp_get_idx(sem, fq),
2352 (donee) ? (donee)->comm : "nil",
2353 (donee) ? (donee)->pid : -1,
2354 *dist_from_head);
2355
2356 return donee_node;
2357}
2358
2359ikglp_donee_heap_node_t* gpu_ikglp_advise_donee_selection(
2360 struct ikglp_affinity* aff,
2361 struct task_struct* donor)
2362{
2363 // Huristic strategy: Find the highest-priority donee that is waiting on
2364 // a queue closest to our affinity. (1) The donee CANNOT already have a
2365 // donor (exception: donee is the lowest-prio task in the donee heap).
2366 // (2) Requests in 'top_m' heap are ineligible.
2367 //
2368 // Further strategy: amongst elible donees waiting for the same GPU, pick
2369 // the one closest to the head of the FIFO queue (including owners).
2370 //
2371 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2372 ikglp_donee_heap_node_t *donee_node;
2373 gpu_migration_dist_t distance;
2374 int start, i, j;
2375
2376 ikglp_donee_heap_node_t *default_donee;
2377 ikglp_wait_state_t *default_donee_donor_info;
2378
2379 if(tsk_rt(donor)->last_gpu < 0) {
2380 // no affinity. just return the min prio, like standard IKGLP
2381 // TODO: Find something closer to the head of the queue??
2382 donee_node = binheap_top_entry(&sem->donees,
2383 ikglp_donee_heap_node_t,
2384 node);
2385 goto out;
2386 }
2387
2388
2389 // Temporarily break any donation relation the default donee (the lowest
2390 // prio task in the FIFO queues) to make it eligible for selection below.
2391 //
2392 // NOTE: The original donor relation *must* be restored, even if we select
2393 // the default donee throug affinity-aware selection, before returning
2394 // from this function so we don't screw up our heap ordering.
2395 // The standard IKGLP algorithm will steal the donor relationship if needed.
2396 default_donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
2397 default_donee_donor_info = default_donee->donor_info; // back-up donor relation
2398 default_donee->donor_info = NULL; // temporarily break any donor relation.
2399
2400 // initialize our search
2401 donee_node = NULL;
2402 distance = MIG_NONE;
2403
2404 // TODO: The below search logic may work well for locating nodes to steal
2405 // when an FQ goes idle. Validate this code and apply it to stealing.
2406
2407 // begin search with affinity GPU.
2408 start = gpu_to_base_replica(aff, tsk_rt(donor)->last_gpu);
2409 i = start;
2410 do { // "for each gpu" / "for each aff->nr_rsrc"
2411 gpu_migration_dist_t temp_distance = gpu_migration_distance(start, i);
2412
2413 // only interested in queues that will improve our distance
2414 if(temp_distance < distance || donee_node == NULL) {
2415 int dist_from_head = IKGLP_INVAL_DISTANCE;
2416
2417 TRACE_CUR("searching for donor on GPU %d", i);
2418
2419 // visit each queue and pick a donee. bail as soon as we find
2420 // one for this class.
2421
2422 for(j = 0; j < aff->nr_simult; ++j) {
2423 int temp_dist_from_head;
2424 ikglp_donee_heap_node_t *temp_donee_node;
2425 struct fifo_queue *fq;
2426
2427 fq = &(sem->fifo_queues[i + j*aff->nr_rsrc]);
2428 temp_donee_node = pick_donee(aff, fq, &temp_dist_from_head);
2429
2430 if(temp_dist_from_head < dist_from_head)
2431 {
2432 // we check all the FQs for this GPU to spread priorities
2433 // out across the queues. does this decrease jitter?
2434 donee_node = temp_donee_node;
2435 dist_from_head = temp_dist_from_head;
2436 }
2437 }
2438
2439 if(dist_from_head != IKGLP_INVAL_DISTANCE) {
2440 TRACE_CUR("found donee %s/%d and is the %d-th waiter.\n",
2441 donee_node->task->comm, donee_node->task->pid,
2442 dist_from_head);
2443 }
2444 else {
2445 TRACE_CUR("found no eligible donors from GPU %d\n", i);
2446 }
2447 }
2448 else {
2449 TRACE_CUR("skipping GPU %d (distance = %d, best donor "
2450 "distance = %d)\n", i, temp_distance, distance);
2451 }
2452
2453 i = (i+1 < aff->nr_rsrc) ? i+1 : 0; // increment with wrap-around
2454 } while (i != start);
2455
2456
2457 // restore old donor info state.
2458 default_donee->donor_info = default_donee_donor_info;
2459
2460 if(!donee_node) {
2461 donee_node = default_donee;
2462
2463 TRACE_CUR("Could not find a donee. We have to steal one.\n");
2464 WARN_ON(default_donee->donor_info == NULL);
2465 }
2466
2467out:
2468
2469 TRACE_CUR("Selected donee %s/%d on fq %d (GPU %d) for %s/%d with affinity for GPU %d\n",
2470 donee_node->task->comm, donee_node->task->pid,
2471 ikglp_get_idx(sem, donee_node->fq),
2472 replica_to_gpu(aff, ikglp_get_idx(sem, donee_node->fq)),
2473 donor->comm, donor->pid, tsk_rt(donor)->last_gpu);
2474
2475 return(donee_node);
2476}
2477
2478
2479
2480static void __find_closest_donor(int target_gpu,
2481 struct binheap_node* donor_node,
2482 ikglp_wait_state_t** cur_closest,
2483 int* cur_dist)
2484{
2485 ikglp_wait_state_t *this_donor =
2486 binheap_entry(donor_node, ikglp_wait_state_t, node);
2487
2488 int this_dist =
2489 gpu_migration_distance(target_gpu, tsk_rt(this_donor->task)->last_gpu);
2490
2491// TRACE_CUR("%s/%d: dist from target = %d\n",
2492// this_donor->task->comm,
2493// this_donor->task->pid,
2494// this_dist);
2495
2496 if(this_dist < *cur_dist) {
2497 // take this donor
2498 *cur_dist = this_dist;
2499 *cur_closest = this_donor;
2500 }
2501 else if(this_dist == *cur_dist) {
2502 // priority tie-break. Even though this is a pre-order traversal,
2503 // this is a heap, not a binary tree, so we still need to do a priority
2504 // comparision.
2505 if(!(*cur_closest) ||
2506 litmus->compare(this_donor->task, (*cur_closest)->task)) {
2507 *cur_dist = this_dist;
2508 *cur_closest = this_donor;
2509 }
2510 }
2511
2512 if(donor_node->left) __find_closest_donor(target_gpu, donor_node->left, cur_closest, cur_dist);
2513 if(donor_node->right) __find_closest_donor(target_gpu, donor_node->right, cur_closest, cur_dist);
2514}
2515
2516ikglp_wait_state_t* gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq)
2517{
2518 // Huristic strategy: Find donor with the closest affinity to fq.
2519 // Tie-break on priority.
2520
2521 // We need to iterate over all the donors to do this. Unfortunatly,
2522 // our donors are organized in a heap. We'll visit each node with a
2523 // recurisve call. This is realitively safe since there are only sem->m
2524 // donors, at most. We won't recurse too deeply to have to worry about
2525 // our stack. (even with 128 CPUs, our nest depth is at most 7 deep).
2526
2527 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2528 ikglp_wait_state_t *donor = NULL;
2529 int distance = MIG_NONE;
2530 int gpu = replica_to_gpu(aff, ikglp_get_idx(sem, fq));
2531 ikglp_wait_state_t* default_donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
2532
2533 __find_closest_donor(gpu, sem->donors.root, &donor, &distance);
2534
2535 TRACE_CUR("Selected donor %s/%d (distance = %d) to move to fq %d "
2536 "(non-aff wanted %s/%d). differs = %d\n",
2537 donor->task->comm, donor->task->pid,
2538 distance,
2539 ikglp_get_idx(sem, fq),
2540 default_donor->task->comm, default_donor->task->pid,
2541 (donor->task != default_donor->task)
2542 );
2543
2544 return(donor);
2545}
2546
2547
2548
2549void gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2550{
2551 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2552 int replica = ikglp_get_idx(sem, fq);
2553 int gpu = replica_to_gpu(aff, replica);
2554 struct ikglp_queue_info *info = &aff->q_info[replica];
2555 lt_t est_time;
2556 lt_t est_len_before;
2557
2558 if(current == t) {
2559 tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
2560 }
2561
2562 est_len_before = info->estimated_len;
2563 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
2564 info->estimated_len += est_time;
2565
2566 TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n",
2567 ikglp_get_idx(sem, info->q),
2568 est_len_before, est_time,
2569 info->estimated_len);
2570
2571 // if(aff->shortest_queue == info) {
2572 // // we may no longer be the shortest
2573 // aff->shortest_queue = ikglp_aff_find_shortest(aff);
2574 //
2575 // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
2576 // ikglp_get_idx(sem, aff->shortest_queue->q),
2577 // aff->shortest_queue->q->count,
2578 // aff->shortest_queue->estimated_len);
2579 // }
2580}
2581
2582void gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2583{
2584 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2585 int replica = ikglp_get_idx(sem, fq);
2586 int gpu = replica_to_gpu(aff, replica);
2587 struct ikglp_queue_info *info = &aff->q_info[replica];
2588 lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
2589
2590 if(est_time > info->estimated_len) {
2591 WARN_ON(1);
2592 info->estimated_len = 0;
2593 }
2594 else {
2595 info->estimated_len -= est_time;
2596 }
2597
2598 TRACE_CUR("fq %d est len is now %llu\n",
2599 ikglp_get_idx(sem, info->q),
2600 info->estimated_len);
2601
2602 // check to see if we're the shortest queue now.
2603 // if((aff->shortest_queue != info) &&
2604 // (aff->shortest_queue->estimated_len > info->estimated_len)) {
2605 //
2606 // aff->shortest_queue = info;
2607 //
2608 // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
2609 // ikglp_get_idx(sem, info->q),
2610 // info->q->count,
2611 // info->estimated_len);
2612 // }
2613}
2614
2615void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
2616 struct fifo_queue* fq,
2617 struct task_struct* t)
2618{
2619 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2620 int replica = ikglp_get_idx(sem, fq);
2621 int gpu = replica_to_gpu(aff, replica);
2622
2623 tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration
2624
2625 TRACE_CUR("%s/%d acquired gpu %d (prev = %d). migration type = %d\n",
2626 t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, tsk_rt(t)->gpu_migration);
2627
2628 // count the number or resource holders
2629 ++(*(aff->q_info[replica].nr_cur_users));
2630
2631 reg_nv_device(gpu, 1, t); // register
2632
2633 tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
2634 reset_gpu_tracker(t);
2635 start_gpu_tracker(t);
2636}
2637
2638void gpu_ikglp_notify_freed(struct ikglp_affinity* aff,
2639 struct fifo_queue* fq,
2640 struct task_struct* t)
2641{
2642 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2643 int replica = ikglp_get_idx(sem, fq);
2644 int gpu = replica_to_gpu(aff, replica);
2645 lt_t est_time;
2646
2647 stop_gpu_tracker(t); // stop the tracker before we do anything else.
2648
2649 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
2650
2651 // count the number or resource holders
2652 --(*(aff->q_info[replica].nr_cur_users));
2653
2654 reg_nv_device(gpu, 0, t); // unregister
2655
2656 // update estimates
2657 update_gpu_estimate(t, get_gpu_time(t));
2658
2659 TRACE_CUR("%s/%d freed gpu %d (prev = %d). mig type = %d. actual time was %llu. "
2660 "estimated was %llu. diff is %d\n",
2661 t->comm, t->pid, gpu, tsk_rt(t)->last_gpu,
2662 tsk_rt(t)->gpu_migration,
2663 get_gpu_time(t),
2664 est_time,
2665 (long long)get_gpu_time(t) - (long long)est_time);
2666
2667 tsk_rt(t)->last_gpu = gpu;
2668}
2669
2670struct ikglp_affinity_ops gpu_ikglp_affinity =
2671{
2672 .advise_enqueue = gpu_ikglp_advise_enqueue,
2673 .advise_steal = gpu_ikglp_advise_steal,
2674 .advise_donee_selection = gpu_ikglp_advise_donee_selection,
2675 .advise_donor_to_fq = gpu_ikglp_advise_donor_to_fq,
2676
2677 .notify_enqueue = gpu_ikglp_notify_enqueue,
2678 .notify_dequeue = gpu_ikglp_notify_dequeue,
2679 .notify_acquired = gpu_ikglp_notify_acquired,
2680 .notify_freed = gpu_ikglp_notify_freed,
2681
2682 .replica_to_resource = gpu_replica_to_resource,
2683};
2684
2685struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
2686 void* __user args)
2687{
2688 return ikglp_aff_obs_new(ops, &gpu_ikglp_affinity, args);
2689}
2690
2691
2692
2693
2694
2695
2696
2697
2698// Simple ikglp Affinity (standard ikglp with auto-gpu registration)
2699
2700struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
2701{
2702 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2703 int min_count;
2704 int min_nr_users;
2705 struct ikglp_queue_info *shortest;
2706 struct fifo_queue *to_enqueue;
2707 int i;
2708
2709 // TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n");
2710
2711 shortest = &aff->q_info[0];
2712 min_count = shortest->q->count;
2713 min_nr_users = *(shortest->nr_cur_users);
2714
2715 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
2716 ikglp_get_idx(sem, shortest->q),
2717 shortest->q->count,
2718 min_nr_users);
2719
2720 for(i = 1; i < sem->nr_replicas; ++i) {
2721 int len = aff->q_info[i].q->count;
2722
2723 // queue is smaller, or they're equal and the other has a smaller number
2724 // of total users.
2725 //
2726 // tie-break on the shortest number of simult users. this only kicks in
2727 // when there are more than 1 empty queues.
2728 if((len < min_count) ||
2729 ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
2730 shortest = &aff->q_info[i];
2731 min_count = shortest->q->count;
2732 min_nr_users = *(aff->q_info[i].nr_cur_users);
2733 }
2734
2735 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
2736 ikglp_get_idx(sem, aff->q_info[i].q),
2737 aff->q_info[i].q->count,
2738 *(aff->q_info[i].nr_cur_users));
2739 }
2740
2741 to_enqueue = shortest->q;
2742 TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
2743 ikglp_get_idx(sem, to_enqueue),
2744 ikglp_get_idx(sem, sem->shortest_fifo_queue));
2745
2746 return to_enqueue;
2747}
2748
2749ikglp_wait_state_t* simple_gpu_ikglp_advise_steal(struct ikglp_affinity* aff,
2750 struct fifo_queue* dst)
2751{
2752 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2753 // TRACE_CUR("Simple GPU ikglp advise_steal invoked\n");
2754 return ikglp_find_hp_waiter_to_steal(sem);
2755}
2756
2757ikglp_donee_heap_node_t* simple_gpu_ikglp_advise_donee_selection(struct ikglp_affinity* aff, struct task_struct* donor)
2758{
2759 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2760 ikglp_donee_heap_node_t *donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
2761 return(donee);
2762}
2763
2764ikglp_wait_state_t* simple_gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq)
2765{
2766 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2767 ikglp_wait_state_t* donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
2768 return(donor);
2769}
2770
2771void simple_gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2772{
2773 // TRACE_CUR("Simple GPU ikglp notify_enqueue invoked\n");
2774}
2775
2776void simple_gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2777{
2778 // TRACE_CUR("Simple GPU ikglp notify_dequeue invoked\n");
2779}
2780
2781void simple_gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2782{
2783 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2784 int replica = ikglp_get_idx(sem, fq);
2785 int gpu = replica_to_gpu(aff, replica);
2786
2787 // TRACE_CUR("Simple GPU ikglp notify_acquired invoked\n");
2788
2789 // count the number or resource holders
2790 ++(*(aff->q_info[replica].nr_cur_users));
2791
2792 reg_nv_device(gpu, 1, t); // register
2793}
2794
2795void simple_gpu_ikglp_notify_freed(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2796{
2797 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2798 int replica = ikglp_get_idx(sem, fq);
2799 int gpu = replica_to_gpu(aff, replica);
2800
2801 // TRACE_CUR("Simple GPU ikglp notify_freed invoked\n");
2802 // count the number or resource holders
2803 --(*(aff->q_info[replica].nr_cur_users));
2804
2805 reg_nv_device(gpu, 0, t); // unregister
2806}
2807
2808struct ikglp_affinity_ops simple_gpu_ikglp_affinity =
2809{
2810 .advise_enqueue = simple_gpu_ikglp_advise_enqueue,
2811 .advise_steal = simple_gpu_ikglp_advise_steal,
2812 .advise_donee_selection = simple_gpu_ikglp_advise_donee_selection,
2813 .advise_donor_to_fq = simple_gpu_ikglp_advise_donor_to_fq,
2814
2815 .notify_enqueue = simple_gpu_ikglp_notify_enqueue,
2816 .notify_dequeue = simple_gpu_ikglp_notify_dequeue,
2817 .notify_acquired = simple_gpu_ikglp_notify_acquired,
2818 .notify_freed = simple_gpu_ikglp_notify_freed,
2819
2820 .replica_to_resource = gpu_replica_to_resource,
2821};
2822
2823struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
2824 void* __user args)
2825{
2826 return ikglp_aff_obs_new(ops, &simple_gpu_ikglp_affinity, args);
2827}
2828
2829#endif
2830
2831
2832
2833
2834
2835
2836
2837
2838
diff --git a/litmus/jobs.c b/litmus/jobs.c
index 36e314625d86..1d97462cc128 100644
--- a/litmus/jobs.c
+++ b/litmus/jobs.c
@@ -10,8 +10,21 @@ void prepare_for_next_period(struct task_struct *t)
10{ 10{
11 BUG_ON(!t); 11 BUG_ON(!t);
12 /* prepare next release */ 12 /* prepare next release */
13 t->rt_param.job_params.release = t->rt_param.job_params.deadline; 13
14 t->rt_param.job_params.deadline += get_rt_period(t); 14 if(tsk_rt(t)->task_params.cls == RT_CLASS_SOFT_W_SLIP) {
15 /* allow the release point to slip if we've passed our deadline. */
16 lt_t now = litmus_clock();
17 t->rt_param.job_params.release =
18 (t->rt_param.job_params.deadline < now) ?
19 now : t->rt_param.job_params.deadline;
20 t->rt_param.job_params.deadline =
21 t->rt_param.job_params.release + get_rt_period(t);
22 }
23 else {
24 t->rt_param.job_params.release = t->rt_param.job_params.deadline;
25 t->rt_param.job_params.deadline += get_rt_period(t);
26 }
27
15 t->rt_param.job_params.exec_time = 0; 28 t->rt_param.job_params.exec_time = 0;
16 /* update job sequence number */ 29 /* update job sequence number */
17 t->rt_param.job_params.job_no++; 30 t->rt_param.job_params.job_no++;
diff --git a/litmus/kexclu_affinity.c b/litmus/kexclu_affinity.c
new file mode 100644
index 000000000000..5ef5e54d600d
--- /dev/null
+++ b/litmus/kexclu_affinity.c
@@ -0,0 +1,92 @@
1#include <litmus/fdso.h>
2#include <litmus/sched_plugin.h>
3#include <litmus/trace.h>
4#include <litmus/litmus.h>
5#include <litmus/locking.h>
6
7#include <litmus/kexclu_affinity.h>
8
9static int create_generic_aff_obs(void** obj_ref, obj_type_t type, void* __user arg);
10static int open_generic_aff_obs(struct od_table_entry* entry, void* __user arg);
11static int close_generic_aff_obs(struct od_table_entry* entry);
12static void destroy_generic_aff_obs(obj_type_t type, void* sem);
13
14struct fdso_ops generic_affinity_ops = {
15 .create = create_generic_aff_obs,
16 .open = open_generic_aff_obs,
17 .close = close_generic_aff_obs,
18 .destroy = destroy_generic_aff_obs
19};
20
21static atomic_t aff_obs_id_gen = ATOMIC_INIT(0);
22
23static inline bool is_affinity_observer(struct od_table_entry *entry)
24{
25 return (entry->class == &generic_affinity_ops);
26}
27
28static inline struct affinity_observer* get_affinity_observer(struct od_table_entry* entry)
29{
30 BUG_ON(!is_affinity_observer(entry));
31 return (struct affinity_observer*) entry->obj->obj;
32}
33
34static int create_generic_aff_obs(void** obj_ref, obj_type_t type, void* __user arg)
35{
36 struct affinity_observer* aff_obs;
37 int err;
38
39 err = litmus->allocate_aff_obs(&aff_obs, type, arg);
40 if (err == 0) {
41 BUG_ON(!aff_obs->lock);
42 aff_obs->type = type;
43 *obj_ref = aff_obs;
44 }
45 return err;
46}
47
48static int open_generic_aff_obs(struct od_table_entry* entry, void* __user arg)
49{
50 struct affinity_observer* aff_obs = get_affinity_observer(entry);
51 if (aff_obs->ops->open)
52 return aff_obs->ops->open(aff_obs, arg);
53 else
54 return 0; /* default: any task can open it */
55}
56
57static int close_generic_aff_obs(struct od_table_entry* entry)
58{
59 struct affinity_observer* aff_obs = get_affinity_observer(entry);
60 if (aff_obs->ops->close)
61 return aff_obs->ops->close(aff_obs);
62 else
63 return 0; /* default: closing succeeds */
64}
65
66static void destroy_generic_aff_obs(obj_type_t type, void* obj)
67{
68 struct affinity_observer* aff_obs = (struct affinity_observer*) obj;
69 aff_obs->ops->deallocate(aff_obs);
70}
71
72
73struct litmus_lock* get_lock_from_od(int od)
74{
75 extern struct fdso_ops generic_lock_ops;
76
77 struct od_table_entry *entry = get_entry_for_od(od);
78
79 if(entry && entry->class == &generic_lock_ops) {
80 return (struct litmus_lock*) entry->obj->obj;
81 }
82 return NULL;
83}
84
85void affinity_observer_new(struct affinity_observer* aff,
86 struct affinity_observer_ops* ops,
87 struct affinity_observer_args* args)
88{
89 aff->ops = ops;
90 aff->lock = get_lock_from_od(args->lock_od);
91 aff->ident = atomic_inc_return(&aff_obs_id_gen);
92} \ No newline at end of file
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
new file mode 100644
index 000000000000..bff857ed8d4e
--- /dev/null
+++ b/litmus/kfmlp_lock.c
@@ -0,0 +1,1002 @@
1#include <linux/slab.h>
2#include <linux/uaccess.h>
3
4#include <litmus/trace.h>
5#include <litmus/sched_plugin.h>
6#include <litmus/fdso.h>
7
8#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
9#include <litmus/gpu_affinity.h>
10#include <litmus/nvidia_info.h>
11#endif
12
13#include <litmus/kfmlp_lock.h>
14
15static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
16 struct kfmlp_queue* queue)
17{
18 return (queue - &sem->queues[0]);
19}
20
21static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
22 struct task_struct* holder)
23{
24 int i;
25 for(i = 0; i < sem->num_resources; ++i)
26 if(sem->queues[i].owner == holder)
27 return(&sem->queues[i]);
28 return(NULL);
29}
30
31/* caller is responsible for locking */
32static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
33 struct task_struct *skip)
34{
35 struct list_head *pos;
36 struct task_struct *queued, *found = NULL;
37
38 list_for_each(pos, &kqueue->wait.task_list) {
39 queued = (struct task_struct*) list_entry(pos, wait_queue_t,
40 task_list)->private;
41
42 /* Compare task prios, find high prio task. */
43 //if (queued != skip && edf_higher_prio(queued, found))
44 if (queued != skip && litmus->compare(queued, found))
45 found = queued;
46 }
47 return found;
48}
49
50static inline struct kfmlp_queue* kfmlp_find_shortest(struct kfmlp_semaphore* sem,
51 struct kfmlp_queue* search_start)
52{
53 // we start our search at search_start instead of at the beginning of the
54 // queue list to load-balance across all resources.
55 struct kfmlp_queue* step = search_start;
56 struct kfmlp_queue* shortest = sem->shortest_queue;
57
58 do
59 {
60 step = (step+1 != &sem->queues[sem->num_resources]) ?
61 step+1 : &sem->queues[0];
62
63 if(step->count < shortest->count)
64 {
65 shortest = step;
66 if(step->count == 0)
67 break; /* can't get any shorter */
68 }
69
70 }while(step != search_start);
71
72 return(shortest);
73}
74
75
76static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem,
77 wait_queue_t** to_steal,
78 struct kfmlp_queue** to_steal_from)
79{
80 /* must hold sem->lock */
81
82 int i;
83
84 *to_steal = NULL;
85 *to_steal_from = NULL;
86
87 for(i = 0; i < sem->num_resources; ++i)
88 {
89 if( (sem->queues[i].count > 1) &&
90 ((*to_steal_from == NULL) ||
91 //(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
92 (litmus->compare(sem->queues[i].hp_waiter, (*to_steal_from)->hp_waiter))) )
93 {
94 *to_steal_from = &sem->queues[i];
95 }
96 }
97
98 if(*to_steal_from)
99 {
100 struct list_head *pos;
101 struct task_struct *target = (*to_steal_from)->hp_waiter;
102
103 TRACE_CUR("want to steal hp_waiter (%s/%d) from queue %d\n",
104 target->comm,
105 target->pid,
106 kfmlp_get_idx(sem, *to_steal_from));
107
108 list_for_each(pos, &(*to_steal_from)->wait.task_list)
109 {
110 wait_queue_t *node = list_entry(pos, wait_queue_t, task_list);
111 struct task_struct *queued = (struct task_struct*) node->private;
112 /* Compare task prios, find high prio task. */
113 if (queued == target)
114 {
115 *to_steal = node;
116
117 TRACE_CUR("steal: selected %s/%d from queue %d\n",
118 queued->comm, queued->pid,
119 kfmlp_get_idx(sem, *to_steal_from));
120
121 return queued;
122 }
123 }
124
125 TRACE_CUR("Could not find %s/%d in queue %d!!! THIS IS A BUG!\n",
126 target->comm,
127 target->pid,
128 kfmlp_get_idx(sem, *to_steal_from));
129 }
130
131 return NULL;
132}
133
134static void kfmlp_steal_node(struct kfmlp_semaphore *sem,
135 struct kfmlp_queue *dst,
136 wait_queue_t *wait,
137 struct kfmlp_queue *src)
138{
139 struct task_struct* t = (struct task_struct*) wait->private;
140
141 __remove_wait_queue(&src->wait, wait);
142 --(src->count);
143
144 if(t == src->hp_waiter) {
145 src->hp_waiter = kfmlp_find_hp_waiter(src, NULL);
146
147 TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
148 kfmlp_get_idx(sem, src),
149 (src->hp_waiter) ? src->hp_waiter->comm : "nil",
150 (src->hp_waiter) ? src->hp_waiter->pid : -1);
151
152 if(src->owner && tsk_rt(src->owner)->inh_task == t) {
153 litmus->decrease_prio(src->owner, src->hp_waiter);
154 }
155 }
156
157 if(sem->shortest_queue->count > src->count) {
158 sem->shortest_queue = src;
159 TRACE_CUR("queue %d is the shortest\n", kfmlp_get_idx(sem, sem->shortest_queue));
160 }
161
162#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
163 if(sem->aff_obs) {
164 sem->aff_obs->ops->notify_dequeue(sem->aff_obs, src, t);
165 }
166#endif
167
168 init_waitqueue_entry(wait, t);
169 __add_wait_queue_tail_exclusive(&dst->wait, wait);
170 ++(dst->count);
171
172 if(litmus->compare(t, dst->hp_waiter)) {
173 dst->hp_waiter = t;
174
175 TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
176 kfmlp_get_idx(sem, dst),
177 t->comm, t->pid);
178
179 if(dst->owner && litmus->compare(t, dst->owner))
180 {
181 litmus->increase_prio(dst->owner, t);
182 }
183 }
184
185#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
186 if(sem->aff_obs) {
187 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, dst, t);
188 }
189#endif
190}
191
192
193int kfmlp_lock(struct litmus_lock* l)
194{
195 struct task_struct* t = current;
196 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
197 struct kfmlp_queue* my_queue = NULL;
198 wait_queue_t wait;
199 unsigned long flags;
200
201 if (!is_realtime(t))
202 return -EPERM;
203
204 spin_lock_irqsave(&sem->lock, flags);
205
206#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
207 if(sem->aff_obs) {
208 my_queue = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t);
209 }
210 if(!my_queue) {
211 my_queue = sem->shortest_queue;
212 }
213#else
214 my_queue = sem->shortest_queue;
215#endif
216
217 if (my_queue->owner) {
218 /* resource is not free => must suspend and wait */
219 TRACE_CUR("queue %d: Resource is not free => must suspend and wait. (queue size = %d)\n",
220 kfmlp_get_idx(sem, my_queue),
221 my_queue->count);
222
223 init_waitqueue_entry(&wait, t);
224
225 /* FIXME: interruptible would be nice some day */
226 set_task_state(t, TASK_UNINTERRUPTIBLE);
227
228 __add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
229
230 TRACE_CUR("queue %d: hp_waiter is currently %s/%d\n",
231 kfmlp_get_idx(sem, my_queue),
232 (my_queue->hp_waiter) ? my_queue->hp_waiter->comm : "nil",
233 (my_queue->hp_waiter) ? my_queue->hp_waiter->pid : -1);
234
235 /* check if we need to activate priority inheritance */
236 //if (edf_higher_prio(t, my_queue->hp_waiter))
237 if (litmus->compare(t, my_queue->hp_waiter)) {
238 my_queue->hp_waiter = t;
239 TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
240 kfmlp_get_idx(sem, my_queue),
241 t->comm, t->pid);
242
243 //if (edf_higher_prio(t, my_queue->owner))
244 if (litmus->compare(t, my_queue->owner)) {
245 litmus->increase_prio(my_queue->owner, my_queue->hp_waiter);
246 }
247 }
248
249 ++(my_queue->count);
250
251 if(my_queue == sem->shortest_queue) {
252 sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
253 TRACE_CUR("queue %d is the shortest\n",
254 kfmlp_get_idx(sem, sem->shortest_queue));
255 }
256
257#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
258 if(sem->aff_obs) {
259 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t);
260 }
261#endif
262
263 /* release lock before sleeping */
264 spin_unlock_irqrestore(&sem->lock, flags);
265
266 /* We depend on the FIFO order. Thus, we don't need to recheck
267 * when we wake up; we are guaranteed to have the lock since
268 * there is only one wake up per release (or steal).
269 */
270 schedule();
271
272
273 if(my_queue->owner == t) {
274 TRACE_CUR("queue %d: acquired through waiting\n",
275 kfmlp_get_idx(sem, my_queue));
276 }
277 else {
278 /* this case may happen if our wait entry was stolen
279 between queues. record where we went. */
280 my_queue = kfmlp_get_queue(sem, t);
281
282 BUG_ON(!my_queue);
283 TRACE_CUR("queue %d: acquired through stealing\n",
284 kfmlp_get_idx(sem, my_queue));
285 }
286 }
287 else {
288 TRACE_CUR("queue %d: acquired immediately\n",
289 kfmlp_get_idx(sem, my_queue));
290
291 my_queue->owner = t;
292
293 ++(my_queue->count);
294
295 if(my_queue == sem->shortest_queue) {
296 sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
297 TRACE_CUR("queue %d is the shortest\n",
298 kfmlp_get_idx(sem, sem->shortest_queue));
299 }
300
301#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
302 if(sem->aff_obs) {
303 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t);
304 sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, t);
305 }
306#endif
307
308 spin_unlock_irqrestore(&sem->lock, flags);
309 }
310
311
312#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
313 if(sem->aff_obs) {
314 return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, my_queue);
315 }
316#endif
317 return kfmlp_get_idx(sem, my_queue);
318}
319
320
321int kfmlp_unlock(struct litmus_lock* l)
322{
323 struct task_struct *t = current, *next;
324 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
325 struct kfmlp_queue *my_queue, *to_steal_from;
326 unsigned long flags;
327 int err = 0;
328
329 my_queue = kfmlp_get_queue(sem, t);
330
331 if (!my_queue) {
332 err = -EINVAL;
333 goto out;
334 }
335
336 spin_lock_irqsave(&sem->lock, flags);
337
338 TRACE_CUR("queue %d: unlocking\n", kfmlp_get_idx(sem, my_queue));
339
340 my_queue->owner = NULL; // clear ownership
341 --(my_queue->count);
342
343 if(my_queue->count < sem->shortest_queue->count)
344 {
345 sem->shortest_queue = my_queue;
346 TRACE_CUR("queue %d is the shortest\n",
347 kfmlp_get_idx(sem, sem->shortest_queue));
348 }
349
350#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
351 if(sem->aff_obs) {
352 sem->aff_obs->ops->notify_dequeue(sem->aff_obs, my_queue, t);
353 sem->aff_obs->ops->notify_freed(sem->aff_obs, my_queue, t);
354 }
355#endif
356
357 /* we lose the benefit of priority inheritance (if any) */
358 if (tsk_rt(t)->inh_task)
359 litmus->decrease_prio(t, NULL);
360
361
362 /* check if there are jobs waiting for this resource */
363RETRY:
364 next = __waitqueue_remove_first(&my_queue->wait);
365 if (next) {
366 /* next becomes the resouce holder */
367 my_queue->owner = next;
368
369#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
370 if(sem->aff_obs) {
371 sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, next);
372 }
373#endif
374
375 TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
376 kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
377
378 /* determine new hp_waiter if necessary */
379 if (next == my_queue->hp_waiter) {
380 TRACE_TASK(next, "was highest-prio waiter\n");
381 my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
382 if (my_queue->hp_waiter)
383 TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
384 else
385 TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
386 } else {
387 /* Well, if next is not the highest-priority waiter,
388 * then it ought to inherit the highest-priority
389 * waiter's priority. */
390 litmus->increase_prio(next, my_queue->hp_waiter);
391 }
392
393 /* wake up next */
394 wake_up_process(next);
395 }
396 else {
397 // TODO: put this stealing logic before we attempt to release
398 // our resource. (simplifies code and gets rid of ugly goto RETRY.
399 wait_queue_t *wait;
400
401 TRACE_CUR("queue %d: looking to steal someone...\n",
402 kfmlp_get_idx(sem, my_queue));
403
404#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
405 next = (sem->aff_obs) ?
406 sem->aff_obs->ops->advise_steal(sem->aff_obs, &wait, &to_steal_from) :
407 kfmlp_select_hp_steal(sem, &wait, &to_steal_from);
408#else
409 next = kfmlp_select_hp_steal(sem, &wait, &to_steal_from);
410#endif
411
412 if(next) {
413 TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
414 kfmlp_get_idx(sem, my_queue),
415 next->comm, next->pid,
416 kfmlp_get_idx(sem, to_steal_from));
417
418 kfmlp_steal_node(sem, my_queue, wait, to_steal_from);
419
420 goto RETRY; // will succeed this time.
421 }
422 else {
423 TRACE_CUR("queue %d: no one to steal.\n",
424 kfmlp_get_idx(sem, my_queue));
425 }
426 }
427
428 spin_unlock_irqrestore(&sem->lock, flags);
429
430out:
431 return err;
432}
433
434int kfmlp_close(struct litmus_lock* l)
435{
436 struct task_struct *t = current;
437 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
438 struct kfmlp_queue *my_queue;
439 unsigned long flags;
440
441 int owner;
442
443 spin_lock_irqsave(&sem->lock, flags);
444
445 my_queue = kfmlp_get_queue(sem, t);
446 owner = (my_queue) ? (my_queue->owner == t) : 0;
447
448 spin_unlock_irqrestore(&sem->lock, flags);
449
450 if (owner)
451 kfmlp_unlock(l);
452
453 return 0;
454}
455
456void kfmlp_free(struct litmus_lock* l)
457{
458 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
459 kfree(sem->queues);
460 kfree(sem);
461}
462
463
464
465struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args)
466{
467 struct kfmlp_semaphore* sem;
468 int num_resources = 0;
469 int i;
470
471 if(!access_ok(VERIFY_READ, args, sizeof(num_resources)))
472 {
473 return(NULL);
474 }
475 if(__copy_from_user(&num_resources, args, sizeof(num_resources)))
476 {
477 return(NULL);
478 }
479 if(num_resources < 1)
480 {
481 return(NULL);
482 }
483
484 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
485 if(!sem)
486 {
487 return(NULL);
488 }
489
490 sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
491 if(!sem->queues)
492 {
493 kfree(sem);
494 return(NULL);
495 }
496
497 sem->litmus_lock.ops = ops;
498 spin_lock_init(&sem->lock);
499 sem->num_resources = num_resources;
500
501 for(i = 0; i < num_resources; ++i)
502 {
503 sem->queues[i].owner = NULL;
504 sem->queues[i].hp_waiter = NULL;
505 init_waitqueue_head(&sem->queues[i].wait);
506 sem->queues[i].count = 0;
507 }
508
509 sem->shortest_queue = &sem->queues[0];
510
511#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
512 sem->aff_obs = NULL;
513#endif
514
515 return &sem->litmus_lock;
516}
517
518
519
520
521#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
522
523static inline int __replica_to_gpu(struct kfmlp_affinity* aff, int replica)
524{
525 int gpu = replica % aff->nr_rsrc;
526 return gpu;
527}
528
529static inline int replica_to_gpu(struct kfmlp_affinity* aff, int replica)
530{
531 int gpu = __replica_to_gpu(aff, replica) + aff->offset;
532 return gpu;
533}
534
535static inline int gpu_to_base_replica(struct kfmlp_affinity* aff, int gpu)
536{
537 int replica = gpu - aff->offset;
538 return replica;
539}
540
541
542int kfmlp_aff_obs_close(struct affinity_observer* obs)
543{
544 return 0;
545}
546
547void kfmlp_aff_obs_free(struct affinity_observer* obs)
548{
549 struct kfmlp_affinity *kfmlp_aff = kfmlp_aff_obs_from_aff_obs(obs);
550 kfree(kfmlp_aff->nr_cur_users_on_rsrc);
551 kfree(kfmlp_aff->q_info);
552 kfree(kfmlp_aff);
553}
554
555static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* ops,
556 struct kfmlp_affinity_ops* kfmlp_ops,
557 void* __user args)
558{
559 struct kfmlp_affinity* kfmlp_aff;
560 struct gpu_affinity_observer_args aff_args;
561 struct kfmlp_semaphore* sem;
562 int i;
563 unsigned long flags;
564
565 if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
566 return(NULL);
567 }
568 if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
569 return(NULL);
570 }
571
572 sem = (struct kfmlp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
573
574 if(sem->litmus_lock.type != KFMLP_SEM) {
575 TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type);
576 return(NULL);
577 }
578
579 if((aff_args.nr_simult_users <= 0) ||
580 (sem->num_resources%aff_args.nr_simult_users != 0)) {
581 TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
582 "(%d) per replica. #replicas should be evenly divisible "
583 "by #simult_users.\n",
584 sem->litmus_lock.ident,
585 sem->num_resources,
586 aff_args.nr_simult_users);
587 return(NULL);
588 }
589
590 if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
591 TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
592 NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
593// return(NULL);
594 }
595
596 kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL);
597 if(!kfmlp_aff) {
598 return(NULL);
599 }
600
601 kfmlp_aff->q_info = kmalloc(sizeof(struct kfmlp_queue_info)*sem->num_resources, GFP_KERNEL);
602 if(!kfmlp_aff->q_info) {
603 kfree(kfmlp_aff);
604 return(NULL);
605 }
606
607 kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->num_resources / aff_args.nr_simult_users), GFP_KERNEL);
608 if(!kfmlp_aff->nr_cur_users_on_rsrc) {
609 kfree(kfmlp_aff->q_info);
610 kfree(kfmlp_aff);
611 return(NULL);
612 }
613
614 affinity_observer_new(&kfmlp_aff->obs, ops, &aff_args.obs);
615
616 kfmlp_aff->ops = kfmlp_ops;
617 kfmlp_aff->offset = aff_args.replica_to_gpu_offset;
618 kfmlp_aff->nr_simult = aff_args.nr_simult_users;
619 kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult;
620
621 memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->num_resources / kfmlp_aff->nr_rsrc));
622
623 for(i = 0; i < sem->num_resources; ++i) {
624 kfmlp_aff->q_info[i].q = &sem->queues[i];
625 kfmlp_aff->q_info[i].estimated_len = 0;
626
627 // multiple q_info's will point to the same resource (aka GPU) if
628 // aff_args.nr_simult_users > 1
629 kfmlp_aff->q_info[i].nr_cur_users = &kfmlp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(kfmlp_aff,i)];
630 }
631
632 // attach observer to the lock
633 spin_lock_irqsave(&sem->lock, flags);
634 sem->aff_obs = kfmlp_aff;
635 spin_unlock_irqrestore(&sem->lock, flags);
636
637 return &kfmlp_aff->obs;
638}
639
640
641
642
643static int gpu_replica_to_resource(struct kfmlp_affinity* aff,
644 struct kfmlp_queue* fq) {
645 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
646 return(replica_to_gpu(aff, kfmlp_get_idx(sem, fq)));
647}
648
649
650// Smart KFMLP Affinity
651
652//static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff)
653//{
654// struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
655// struct kfmlp_queue_info *shortest = &aff->q_info[0];
656// int i;
657//
658// for(i = 1; i < sem->num_resources; ++i) {
659// if(aff->q_info[i].estimated_len < shortest->estimated_len) {
660// shortest = &aff->q_info[i];
661// }
662// }
663//
664// return(shortest);
665//}
666
667struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
668{
669 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
670 lt_t min_len;
671 int min_nr_users;
672 struct kfmlp_queue_info *shortest;
673 struct kfmlp_queue *to_enqueue;
674 int i;
675 int affinity_gpu;
676
677 // simply pick the shortest queue if, we have no affinity, or we have
678 // affinity with the shortest
679 if(unlikely(tsk_rt(t)->last_gpu < 0)) {
680 affinity_gpu = aff->offset; // first gpu
681 TRACE_CUR("no affinity\n");
682 }
683 else {
684 affinity_gpu = tsk_rt(t)->last_gpu;
685 }
686
687 // all things being equal, let's start with the queue with which we have
688 // affinity. this helps us maintain affinity even when we don't have
689 // an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
690 shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
691
692// if(shortest == aff->shortest_queue) {
693// TRACE_CUR("special case: have affinity with shortest queue\n");
694// goto out;
695// }
696
697 min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
698 min_nr_users = *(shortest->nr_cur_users);
699
700 TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
701 get_gpu_estimate(t, MIG_LOCAL),
702 kfmlp_get_idx(sem, shortest->q),
703 min_len);
704
705 for(i = 0; i < sem->num_resources; ++i) {
706 if(&aff->q_info[i] != shortest) {
707
708 lt_t est_len =
709 aff->q_info[i].estimated_len +
710 get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i)));
711
712 // queue is smaller, or they're equal and the other has a smaller number
713 // of total users.
714 //
715 // tie-break on the shortest number of simult users. this only kicks in
716 // when there are more than 1 empty queues.
717 if((est_len < min_len) ||
718 ((est_len == min_len) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
719 shortest = &aff->q_info[i];
720 min_len = est_len;
721 min_nr_users = *(aff->q_info[i].nr_cur_users);
722 }
723
724 TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
725 get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))),
726 kfmlp_get_idx(sem, aff->q_info[i].q),
727 est_len);
728 }
729 }
730
731 to_enqueue = shortest->q;
732 TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
733 kfmlp_get_idx(sem, to_enqueue),
734 kfmlp_get_idx(sem, sem->shortest_queue));
735
736 return to_enqueue;
737}
738
739struct task_struct* gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
740{
741 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
742
743 // For now, just steal highest priority waiter
744 // TODO: Implement affinity-aware stealing.
745
746 return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
747}
748
749
750void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
751{
752 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
753 int replica = kfmlp_get_idx(sem, fq);
754 int gpu = replica_to_gpu(aff, replica);
755 struct kfmlp_queue_info *info = &aff->q_info[replica];
756 lt_t est_time;
757 lt_t est_len_before;
758
759 if(current == t) {
760 tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
761 }
762
763 est_len_before = info->estimated_len;
764 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
765 info->estimated_len += est_time;
766
767 TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n",
768 kfmlp_get_idx(sem, info->q),
769 est_len_before, est_time,
770 info->estimated_len);
771
772// if(aff->shortest_queue == info) {
773// // we may no longer be the shortest
774// aff->shortest_queue = kfmlp_aff_find_shortest(aff);
775//
776// TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
777// kfmlp_get_idx(sem, aff->shortest_queue->q),
778// aff->shortest_queue->q->count,
779// aff->shortest_queue->estimated_len);
780// }
781}
782
783void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
784{
785 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
786 int replica = kfmlp_get_idx(sem, fq);
787 int gpu = replica_to_gpu(aff, replica);
788 struct kfmlp_queue_info *info = &aff->q_info[replica];
789 lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
790
791 if(est_time > info->estimated_len) {
792 WARN_ON(1);
793 info->estimated_len = 0;
794 }
795 else {
796 info->estimated_len -= est_time;
797 }
798
799 TRACE_CUR("fq %d est len is now %llu\n",
800 kfmlp_get_idx(sem, info->q),
801 info->estimated_len);
802
803 // check to see if we're the shortest queue now.
804// if((aff->shortest_queue != info) &&
805// (aff->shortest_queue->estimated_len > info->estimated_len)) {
806//
807// aff->shortest_queue = info;
808//
809// TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
810// kfmlp_get_idx(sem, info->q),
811// info->q->count,
812// info->estimated_len);
813// }
814}
815
816void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
817{
818 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
819 int replica = kfmlp_get_idx(sem, fq);
820 int gpu = replica_to_gpu(aff, replica);
821
822 tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration
823
824 TRACE_CUR("%s/%d acquired gpu %d. migration type = %d\n",
825 t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration);
826
827 // count the number or resource holders
828 ++(*(aff->q_info[replica].nr_cur_users));
829
830 reg_nv_device(gpu, 1, t); // register
831
832 tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
833 reset_gpu_tracker(t);
834 start_gpu_tracker(t);
835}
836
837void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
838{
839 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
840 int replica = kfmlp_get_idx(sem, fq);
841 int gpu = replica_to_gpu(aff, replica);
842 lt_t est_time;
843
844 stop_gpu_tracker(t); // stop the tracker before we do anything else.
845
846 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
847
848 tsk_rt(t)->last_gpu = gpu;
849
850 // count the number or resource holders
851 --(*(aff->q_info[replica].nr_cur_users));
852
853 reg_nv_device(gpu, 0, t); // unregister
854
855 // update estimates
856 update_gpu_estimate(t, get_gpu_time(t));
857
858 TRACE_CUR("%s/%d freed gpu %d. actual time was %llu. estimated was %llu. diff is %d\n",
859 t->comm, t->pid, gpu,
860 get_gpu_time(t),
861 est_time,
862 (long long)get_gpu_time(t) - (long long)est_time);
863}
864
865struct kfmlp_affinity_ops gpu_kfmlp_affinity =
866{
867 .advise_enqueue = gpu_kfmlp_advise_enqueue,
868 .advise_steal = gpu_kfmlp_advise_steal,
869 .notify_enqueue = gpu_kfmlp_notify_enqueue,
870 .notify_dequeue = gpu_kfmlp_notify_dequeue,
871 .notify_acquired = gpu_kfmlp_notify_acquired,
872 .notify_freed = gpu_kfmlp_notify_freed,
873 .replica_to_resource = gpu_replica_to_resource,
874};
875
876struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
877 void* __user args)
878{
879 return kfmlp_aff_obs_new(ops, &gpu_kfmlp_affinity, args);
880}
881
882
883
884
885
886
887
888
889// Simple KFMLP Affinity (standard KFMLP with auto-gpu registration)
890
891struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
892{
893 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
894 int min_count;
895 int min_nr_users;
896 struct kfmlp_queue_info *shortest;
897 struct kfmlp_queue *to_enqueue;
898 int i;
899
900// TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n");
901
902 shortest = &aff->q_info[0];
903 min_count = shortest->q->count;
904 min_nr_users = *(shortest->nr_cur_users);
905
906 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
907 kfmlp_get_idx(sem, shortest->q),
908 shortest->q->count,
909 min_nr_users);
910
911 for(i = 1; i < sem->num_resources; ++i) {
912 int len = aff->q_info[i].q->count;
913
914 // queue is smaller, or they're equal and the other has a smaller number
915 // of total users.
916 //
917 // tie-break on the shortest number of simult users. this only kicks in
918 // when there are more than 1 empty queues.
919 if((len < min_count) ||
920 ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
921 shortest = &aff->q_info[i];
922 min_count = shortest->q->count;
923 min_nr_users = *(aff->q_info[i].nr_cur_users);
924 }
925
926 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
927 kfmlp_get_idx(sem, aff->q_info[i].q),
928 aff->q_info[i].q->count,
929 *(aff->q_info[i].nr_cur_users));
930 }
931
932 to_enqueue = shortest->q;
933 TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
934 kfmlp_get_idx(sem, to_enqueue),
935 kfmlp_get_idx(sem, sem->shortest_queue));
936
937 return to_enqueue;
938}
939
940struct task_struct* simple_gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
941{
942 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
943// TRACE_CUR("Simple GPU KFMLP advise_steal invoked\n");
944 return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
945}
946
947void simple_gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
948{
949// TRACE_CUR("Simple GPU KFMLP notify_enqueue invoked\n");
950}
951
952void simple_gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
953{
954// TRACE_CUR("Simple GPU KFMLP notify_dequeue invoked\n");
955}
956
957void simple_gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
958{
959 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
960 int replica = kfmlp_get_idx(sem, fq);
961 int gpu = replica_to_gpu(aff, replica);
962
963// TRACE_CUR("Simple GPU KFMLP notify_acquired invoked\n");
964
965 // count the number or resource holders
966 ++(*(aff->q_info[replica].nr_cur_users));
967
968 reg_nv_device(gpu, 1, t); // register
969}
970
971void simple_gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
972{
973 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
974 int replica = kfmlp_get_idx(sem, fq);
975 int gpu = replica_to_gpu(aff, replica);
976
977// TRACE_CUR("Simple GPU KFMLP notify_freed invoked\n");
978 // count the number or resource holders
979 --(*(aff->q_info[replica].nr_cur_users));
980
981 reg_nv_device(gpu, 0, t); // unregister
982}
983
984struct kfmlp_affinity_ops simple_gpu_kfmlp_affinity =
985{
986 .advise_enqueue = simple_gpu_kfmlp_advise_enqueue,
987 .advise_steal = simple_gpu_kfmlp_advise_steal,
988 .notify_enqueue = simple_gpu_kfmlp_notify_enqueue,
989 .notify_dequeue = simple_gpu_kfmlp_notify_dequeue,
990 .notify_acquired = simple_gpu_kfmlp_notify_acquired,
991 .notify_freed = simple_gpu_kfmlp_notify_freed,
992 .replica_to_resource = gpu_replica_to_resource,
993};
994
995struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
996 void* __user args)
997{
998 return kfmlp_aff_obs_new(ops, &simple_gpu_kfmlp_affinity, args);
999}
1000
1001#endif
1002
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 301390148d02..d1f836c8af6e 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -21,6 +21,10 @@
21#include <litmus/affinity.h> 21#include <litmus/affinity.h>
22#endif 22#endif
23 23
24#ifdef CONFIG_LITMUS_NVIDIA
25#include <litmus/nvidia_info.h>
26#endif
27
24/* Number of RT tasks that exist in the system */ 28/* Number of RT tasks that exist in the system */
25atomic_t rt_task_count = ATOMIC_INIT(0); 29atomic_t rt_task_count = ATOMIC_INIT(0);
26static DEFINE_RAW_SPINLOCK(task_transition_lock); 30static DEFINE_RAW_SPINLOCK(task_transition_lock);
@@ -51,6 +55,28 @@ void bheap_node_free(struct bheap_node* hn)
51struct release_heap* release_heap_alloc(int gfp_flags); 55struct release_heap* release_heap_alloc(int gfp_flags);
52void release_heap_free(struct release_heap* rh); 56void release_heap_free(struct release_heap* rh);
53 57
58#ifdef CONFIG_LITMUS_NVIDIA
59/*
60 * sys_register_nv_device
61 * @nv_device_id: The Nvidia device id that the task want to register
62 * @reg_action: set to '1' to register the specified device. zero otherwise.
63 * Syscall for register task's designated nvidia device into NV_DEVICE_REG array
64 * Returns EFAULT if nv_device_id is out of range.
65 * 0 if success
66 */
67asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action)
68{
69 /* register the device to caller (aka 'current') */
70 return(reg_nv_device(nv_device_id, reg_action, current));
71}
72#else
73asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action)
74{
75 return(-EINVAL);
76}
77#endif
78
79
54/* 80/*
55 * sys_set_task_rt_param 81 * sys_set_task_rt_param
56 * @pid: Pid of the task which scheduling parameters must be changed 82 * @pid: Pid of the task which scheduling parameters must be changed
@@ -269,6 +295,7 @@ asmlinkage long sys_query_job_no(unsigned int __user *job)
269 return retval; 295 return retval;
270} 296}
271 297
298
272/* sys_null_call() is only used for determining raw system call 299/* sys_null_call() is only used for determining raw system call
273 * overheads (kernel entry, kernel exit). It has no useful side effects. 300 * overheads (kernel entry, kernel exit). It has no useful side effects.
274 * If ts is non-NULL, then the current Feather-Trace time is recorded. 301 * If ts is non-NULL, then the current Feather-Trace time is recorded.
@@ -286,12 +313,42 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
286 return ret; 313 return ret;
287} 314}
288 315
316
317#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
318void init_gpu_affinity_state(struct task_struct* p)
319{
320 // under-damped
321 //p->rt_param.gpu_fb_param_a = _frac(14008, 10000);
322 //p->rt_param.gpu_fb_param_b = _frac(16024, 10000);
323
324 // emperical;
325 p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000);
326 p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000);
327
328 p->rt_param.gpu_fb_param_a[1] = _frac(8600, 10000);
329 p->rt_param.gpu_fb_param_b[1] = _frac(40000, 10000);
330
331 p->rt_param.gpu_fb_param_a[2] = _frac(6890, 10000);
332 p->rt_param.gpu_fb_param_b[2] = _frac(40000, 10000);
333
334 p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000);
335 p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000);
336
337 p->rt_param.gpu_migration = MIG_NONE;
338 p->rt_param.last_gpu = -1;
339}
340#endif
341
289/* p is a real-time task. Re-init its state as a best-effort task. */ 342/* p is a real-time task. Re-init its state as a best-effort task. */
290static void reinit_litmus_state(struct task_struct* p, int restore) 343static void reinit_litmus_state(struct task_struct* p, int restore)
291{ 344{
292 struct rt_task user_config = {}; 345 struct rt_task user_config = {};
293 void* ctrl_page = NULL; 346 void* ctrl_page = NULL;
294 347
348#ifdef CONFIG_LITMUS_NESTED_LOCKING
349 binheap_order_t prio_order = NULL;
350#endif
351
295 if (restore) { 352 if (restore) {
296 /* Safe user-space provided configuration data. 353 /* Safe user-space provided configuration data.
297 * and allocated page. */ 354 * and allocated page. */
@@ -299,11 +356,38 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
299 ctrl_page = p->rt_param.ctrl_page; 356 ctrl_page = p->rt_param.ctrl_page;
300 } 357 }
301 358
359#ifdef CONFIG_LITMUS_NESTED_LOCKING
360 prio_order = p->rt_param.hp_blocked_tasks.compare;
361#endif
362
302 /* We probably should not be inheriting any task's priority 363 /* We probably should not be inheriting any task's priority
303 * at this point in time. 364 * at this point in time.
304 */ 365 */
305 WARN_ON(p->rt_param.inh_task); 366 WARN_ON(p->rt_param.inh_task);
306 367
368#ifdef CONFIG_LITMUS_NESTED_LOCKING
369 WARN_ON(p->rt_param.blocked_lock);
370 WARN_ON(!binheap_empty(&p->rt_param.hp_blocked_tasks));
371#endif
372
373#ifdef CONFIG_LITMUS_SOFTIRQD
374 /* We probably should not have any tasklets executing for
375 * us at this time.
376 */
377 WARN_ON(p->rt_param.cur_klitirqd);
378 WARN_ON(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD);
379
380 if(p->rt_param.cur_klitirqd)
381 flush_pending(p->rt_param.cur_klitirqd, p);
382
383 if(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD)
384 up_and_set_stat(p, NOT_HELD, &p->rt_param.klitirqd_sem);
385#endif
386
387#ifdef CONFIG_LITMUS_NVIDIA
388 WARN_ON(p->rt_param.held_gpus != 0);
389#endif
390
307 /* Cleanup everything else. */ 391 /* Cleanup everything else. */
308 memset(&p->rt_param, 0, sizeof(p->rt_param)); 392 memset(&p->rt_param, 0, sizeof(p->rt_param));
309 393
@@ -312,6 +396,15 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
312 p->rt_param.task_params = user_config; 396 p->rt_param.task_params = user_config;
313 p->rt_param.ctrl_page = ctrl_page; 397 p->rt_param.ctrl_page = ctrl_page;
314 } 398 }
399
400#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
401 init_gpu_affinity_state(p);
402#endif
403
404#ifdef CONFIG_LITMUS_NESTED_LOCKING
405 INIT_BINHEAP_HANDLE(&p->rt_param.hp_blocked_tasks, prio_order);
406 raw_spin_lock_init(&p->rt_param.hp_blocked_tasks_lock);
407#endif
315} 408}
316 409
317long litmus_admit_task(struct task_struct* tsk) 410long litmus_admit_task(struct task_struct* tsk)
@@ -358,6 +451,26 @@ long litmus_admit_task(struct task_struct* tsk)
358 bheap_node_init(&tsk_rt(tsk)->heap_node, tsk); 451 bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
359 } 452 }
360 453
454
455#ifdef CONFIG_LITMUS_NVIDIA
456 atomic_set(&tsk_rt(tsk)->nv_int_count, 0);
457#endif
458#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
459 init_gpu_affinity_state(tsk);
460#endif
461#ifdef CONFIG_LITMUS_NESTED_LOCKING
462 tsk_rt(tsk)->blocked_lock = NULL;
463 raw_spin_lock_init(&tsk_rt(tsk)->hp_blocked_tasks_lock);
464 //INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks, prio_order); // done by scheduler
465#endif
466#ifdef CONFIG_LITMUS_SOFTIRQD
467 /* proxy thread off by default */
468 tsk_rt(tsk)is_proxy_thread = 0;
469 tsk_rt(tsk)cur_klitirqd = NULL;
470 mutex_init(&tsk_rt(tsk)->klitirqd_sem);
471 atomic_set(&tsk_rt(tsk)->klitirqd_sem_stat, NOT_HELD);
472#endif
473
361 retval = litmus->admit_task(tsk); 474 retval = litmus->admit_task(tsk);
362 475
363 if (!retval) { 476 if (!retval) {
@@ -403,7 +516,7 @@ static void synch_on_plugin_switch(void* info)
403 */ 516 */
404int switch_sched_plugin(struct sched_plugin* plugin) 517int switch_sched_plugin(struct sched_plugin* plugin)
405{ 518{
406 unsigned long flags; 519 //unsigned long flags;
407 int ret = 0; 520 int ret = 0;
408 521
409 BUG_ON(!plugin); 522 BUG_ON(!plugin);
@@ -417,8 +530,15 @@ int switch_sched_plugin(struct sched_plugin* plugin)
417 while (atomic_read(&cannot_use_plugin) < num_online_cpus()) 530 while (atomic_read(&cannot_use_plugin) < num_online_cpus())
418 cpu_relax(); 531 cpu_relax();
419 532
533#ifdef CONFIG_LITMUS_SOFTIRQD
534 if(!klitirqd_is_dead())
535 {
536 kill_klitirqd();
537 }
538#endif
539
420 /* stop task transitions */ 540 /* stop task transitions */
421 raw_spin_lock_irqsave(&task_transition_lock, flags); 541 //raw_spin_lock_irqsave(&task_transition_lock, flags);
422 542
423 /* don't switch if there are active real-time tasks */ 543 /* don't switch if there are active real-time tasks */
424 if (atomic_read(&rt_task_count) == 0) { 544 if (atomic_read(&rt_task_count) == 0) {
@@ -436,7 +556,7 @@ int switch_sched_plugin(struct sched_plugin* plugin)
436 } else 556 } else
437 ret = -EBUSY; 557 ret = -EBUSY;
438out: 558out:
439 raw_spin_unlock_irqrestore(&task_transition_lock, flags); 559 //raw_spin_unlock_irqrestore(&task_transition_lock, flags);
440 atomic_set(&cannot_use_plugin, 0); 560 atomic_set(&cannot_use_plugin, 0);
441 return ret; 561 return ret;
442} 562}
diff --git a/litmus/litmus_pai_softirq.c b/litmus/litmus_pai_softirq.c
new file mode 100644
index 000000000000..300571a81bbd
--- /dev/null
+++ b/litmus/litmus_pai_softirq.c
@@ -0,0 +1,64 @@
1#include <linux/interrupt.h>
2#include <linux/percpu.h>
3#include <linux/cpu.h>
4#include <linux/kthread.h>
5#include <linux/ftrace.h>
6#include <linux/smp.h>
7#include <linux/slab.h>
8#include <linux/mutex.h>
9
10#include <linux/sched.h>
11#include <linux/cpuset.h>
12
13#include <litmus/litmus.h>
14#include <litmus/sched_trace.h>
15#include <litmus/jobs.h>
16#include <litmus/sched_plugin.h>
17#include <litmus/litmus_softirq.h>
18
19
20
21int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
22{
23 int ret = 0; /* assume failure */
24 if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
25 {
26 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
27 BUG();
28 }
29
30 ret = litmus->enqueue_pai_tasklet(t);
31
32 return(ret);
33}
34
35EXPORT_SYMBOL(__litmus_tasklet_schedule);
36
37
38
39// failure causes default Linux handling.
40int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
41{
42 int ret = 0; /* assume failure */
43 return(ret);
44}
45EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);
46
47
48// failure causes default Linux handling.
49int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id)
50{
51 int ret = 0; /* assume failure */
52 return(ret);
53}
54EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first);
55
56
57// failure causes default Linux handling.
58int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
59{
60 int ret = 0; /* assume failure */
61 return(ret);
62}
63EXPORT_SYMBOL(__litmus_schedule_work);
64
diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
index 4bf725a36c9c..9ab7e015a3c1 100644
--- a/litmus/litmus_proc.c
+++ b/litmus/litmus_proc.c
@@ -20,11 +20,18 @@ static struct proc_dir_entry *litmus_dir = NULL,
20#ifdef CONFIG_RELEASE_MASTER 20#ifdef CONFIG_RELEASE_MASTER
21 *release_master_file = NULL, 21 *release_master_file = NULL,
22#endif 22#endif
23#ifdef CONFIG_LITMUS_SOFTIRQD
24 *klitirqd_file = NULL,
25#endif
23 *plugs_file = NULL; 26 *plugs_file = NULL;
24 27
25/* in litmus/sync.c */ 28/* in litmus/sync.c */
26int count_tasks_waiting_for_release(void); 29int count_tasks_waiting_for_release(void);
27 30
31extern int proc_read_klitirqd_stats(char *page, char **start,
32 off_t off, int count,
33 int *eof, void *data);
34
28static int proc_read_stats(char *page, char **start, 35static int proc_read_stats(char *page, char **start,
29 off_t off, int count, 36 off_t off, int count,
30 int *eof, void *data) 37 int *eof, void *data)
@@ -161,6 +168,12 @@ int __init init_litmus_proc(void)
161 release_master_file->write_proc = proc_write_release_master; 168 release_master_file->write_proc = proc_write_release_master;
162#endif 169#endif
163 170
171#ifdef CONFIG_LITMUS_SOFTIRQD
172 klitirqd_file =
173 create_proc_read_entry("klitirqd_stats", 0444, litmus_dir,
174 proc_read_klitirqd_stats, NULL);
175#endif
176
164 stat_file = create_proc_read_entry("stats", 0444, litmus_dir, 177 stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
165 proc_read_stats, NULL); 178 proc_read_stats, NULL);
166 179
@@ -187,6 +200,10 @@ void exit_litmus_proc(void)
187 remove_proc_entry("stats", litmus_dir); 200 remove_proc_entry("stats", litmus_dir);
188 if (curr_file) 201 if (curr_file)
189 remove_proc_entry("active_plugin", litmus_dir); 202 remove_proc_entry("active_plugin", litmus_dir);
203#ifdef CONFIG_LITMUS_SOFTIRQD
204 if (klitirqd_file)
205 remove_proc_entry("klitirqd_stats", litmus_dir);
206#endif
190#ifdef CONFIG_RELEASE_MASTER 207#ifdef CONFIG_RELEASE_MASTER
191 if (release_master_file) 208 if (release_master_file)
192 remove_proc_entry("release_master", litmus_dir); 209 remove_proc_entry("release_master", litmus_dir);
diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c
new file mode 100644
index 000000000000..9f7d9da5facb
--- /dev/null
+++ b/litmus/litmus_softirq.c
@@ -0,0 +1,1582 @@
1#include <linux/interrupt.h>
2#include <linux/percpu.h>
3#include <linux/cpu.h>
4#include <linux/kthread.h>
5#include <linux/ftrace.h>
6#include <linux/smp.h>
7#include <linux/slab.h>
8#include <linux/mutex.h>
9
10#include <linux/sched.h>
11#include <linux/cpuset.h>
12
13#include <litmus/litmus.h>
14#include <litmus/sched_trace.h>
15#include <litmus/jobs.h>
16#include <litmus/sched_plugin.h>
17#include <litmus/litmus_softirq.h>
18
19/* TODO: Remove unneeded mb() and other barriers. */
20
21
22/* counts number of daemons ready to handle litmus irqs. */
23static atomic_t num_ready_klitirqds = ATOMIC_INIT(0);
24
25enum pending_flags
26{
27 LIT_TASKLET_LOW = 0x1,
28 LIT_TASKLET_HI = LIT_TASKLET_LOW<<1,
29 LIT_WORK = LIT_TASKLET_HI<<1
30};
31
32/* only support tasklet processing for now. */
33struct tasklet_head
34{
35 struct tasklet_struct *head;
36 struct tasklet_struct **tail;
37};
38
39struct klitirqd_info
40{
41 struct task_struct* klitirqd;
42 struct task_struct* current_owner;
43 int terminating;
44
45
46 raw_spinlock_t lock;
47
48 u32 pending;
49 atomic_t num_hi_pending;
50 atomic_t num_low_pending;
51 atomic_t num_work_pending;
52
53 /* in order of priority */
54 struct tasklet_head pending_tasklets_hi;
55 struct tasklet_head pending_tasklets;
56 struct list_head worklist;
57};
58
59/* one list for each klitirqd */
60static struct klitirqd_info klitirqds[NR_LITMUS_SOFTIRQD];
61
62
63
64
65
66int proc_read_klitirqd_stats(char *page, char **start,
67 off_t off, int count,
68 int *eof, void *data)
69{
70 int len = snprintf(page, PAGE_SIZE,
71 "num ready klitirqds: %d\n\n",
72 atomic_read(&num_ready_klitirqds));
73
74 if(klitirqd_is_ready())
75 {
76 int i;
77 for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
78 {
79 len +=
80 snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */
81 "klitirqd_th%d: %s/%d\n"
82 "\tcurrent_owner: %s/%d\n"
83 "\tpending: %x\n"
84 "\tnum hi: %d\n"
85 "\tnum low: %d\n"
86 "\tnum work: %d\n\n",
87 i,
88 klitirqds[i].klitirqd->comm, klitirqds[i].klitirqd->pid,
89 (klitirqds[i].current_owner != NULL) ?
90 klitirqds[i].current_owner->comm : "(null)",
91 (klitirqds[i].current_owner != NULL) ?
92 klitirqds[i].current_owner->pid : 0,
93 klitirqds[i].pending,
94 atomic_read(&klitirqds[i].num_hi_pending),
95 atomic_read(&klitirqds[i].num_low_pending),
96 atomic_read(&klitirqds[i].num_work_pending));
97 }
98 }
99
100 return(len);
101}
102
103
104
105
106
107#if 0
108static atomic_t dump_id = ATOMIC_INIT(0);
109
110static void __dump_state(struct klitirqd_info* which, const char* caller)
111{
112 struct tasklet_struct* list;
113
114 int id = atomic_inc_return(&dump_id);
115
116 //if(in_interrupt())
117 {
118 if(which->current_owner)
119 {
120 TRACE("(id: %d caller: %s)\n"
121 "klitirqd: %s/%d\n"
122 "current owner: %s/%d\n"
123 "pending: %x\n",
124 id, caller,
125 which->klitirqd->comm, which->klitirqd->pid,
126 which->current_owner->comm, which->current_owner->pid,
127 which->pending);
128 }
129 else
130 {
131 TRACE("(id: %d caller: %s)\n"
132 "klitirqd: %s/%d\n"
133 "current owner: %p\n"
134 "pending: %x\n",
135 id, caller,
136 which->klitirqd->comm, which->klitirqd->pid,
137 NULL,
138 which->pending);
139 }
140
141 list = which->pending_tasklets.head;
142 while(list)
143 {
144 struct tasklet_struct *t = list;
145 list = list->next; /* advance */
146 if(t->owner)
147 TRACE("(id: %d caller: %s) Tasklet: %x, Owner = %s/%d\n", id, caller, t, t->owner->comm, t->owner->pid);
148 else
149 TRACE("(id: %d caller: %s) Tasklet: %x, Owner = %p\n", id, caller, t, NULL);
150 }
151 }
152}
153
154static void dump_state(struct klitirqd_info* which, const char* caller)
155{
156 unsigned long flags;
157
158 raw_spin_lock_irqsave(&which->lock, flags);
159 __dump_state(which, caller);
160 raw_spin_unlock_irqrestore(&which->lock, flags);
161}
162#endif
163
164
165/* forward declarations */
166static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
167 struct klitirqd_info *which,
168 int wakeup);
169static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
170 struct klitirqd_info *which,
171 int wakeup);
172static void ___litmus_schedule_work(struct work_struct *w,
173 struct klitirqd_info *which,
174 int wakeup);
175
176
177
178inline unsigned int klitirqd_id(struct task_struct* tsk)
179{
180 int i;
181 for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
182 {
183 if(klitirqds[i].klitirqd == tsk)
184 {
185 return i;
186 }
187 }
188
189 BUG();
190
191 return 0;
192}
193
194
195inline static u32 litirq_pending_hi_irqoff(struct klitirqd_info* which)
196{
197 return (which->pending & LIT_TASKLET_HI);
198}
199
200inline static u32 litirq_pending_low_irqoff(struct klitirqd_info* which)
201{
202 return (which->pending & LIT_TASKLET_LOW);
203}
204
205inline static u32 litirq_pending_work_irqoff(struct klitirqd_info* which)
206{
207 return (which->pending & LIT_WORK);
208}
209
210inline static u32 litirq_pending_irqoff(struct klitirqd_info* which)
211{
212 return(which->pending);
213}
214
215
216inline static u32 litirq_pending(struct klitirqd_info* which)
217{
218 unsigned long flags;
219 u32 pending;
220
221 raw_spin_lock_irqsave(&which->lock, flags);
222 pending = litirq_pending_irqoff(which);
223 raw_spin_unlock_irqrestore(&which->lock, flags);
224
225 return pending;
226};
227
228inline static u32 litirq_pending_with_owner(struct klitirqd_info* which, struct task_struct* owner)
229{
230 unsigned long flags;
231 u32 pending;
232
233 raw_spin_lock_irqsave(&which->lock, flags);
234 pending = litirq_pending_irqoff(which);
235 if(pending)
236 {
237 if(which->current_owner != owner)
238 {
239 pending = 0; // owner switch!
240 }
241 }
242 raw_spin_unlock_irqrestore(&which->lock, flags);
243
244 return pending;
245}
246
247
248inline static u32 litirq_pending_and_sem_and_owner(struct klitirqd_info* which,
249 struct mutex** sem,
250 struct task_struct** t)
251{
252 unsigned long flags;
253 u32 pending;
254
255 /* init values */
256 *sem = NULL;
257 *t = NULL;
258
259 raw_spin_lock_irqsave(&which->lock, flags);
260
261 pending = litirq_pending_irqoff(which);
262 if(pending)
263 {
264 if(which->current_owner != NULL)
265 {
266 *t = which->current_owner;
267 *sem = &tsk_rt(which->current_owner)->klitirqd_sem;
268 }
269 else
270 {
271 BUG();
272 }
273 }
274 raw_spin_unlock_irqrestore(&which->lock, flags);
275
276 if(likely(*sem))
277 {
278 return pending;
279 }
280 else
281 {
282 return 0;
283 }
284}
285
286/* returns true if the next piece of work to do is from a different owner.
287 */
288static int tasklet_ownership_change(
289 struct klitirqd_info* which,
290 enum pending_flags taskletQ)
291{
292 /* this function doesn't have to look at work objects since they have
293 priority below tasklets. */
294
295 unsigned long flags;
296 int ret = 0;
297
298 raw_spin_lock_irqsave(&which->lock, flags);
299
300 switch(taskletQ)
301 {
302 case LIT_TASKLET_HI:
303 if(litirq_pending_hi_irqoff(which))
304 {
305 ret = (which->pending_tasklets_hi.head->owner !=
306 which->current_owner);
307 }
308 break;
309 case LIT_TASKLET_LOW:
310 if(litirq_pending_low_irqoff(which))
311 {
312 ret = (which->pending_tasklets.head->owner !=
313 which->current_owner);
314 }
315 break;
316 default:
317 break;
318 }
319
320 raw_spin_unlock_irqrestore(&which->lock, flags);
321
322 TRACE_TASK(which->klitirqd, "ownership change needed: %d\n", ret);
323
324 return ret;
325}
326
327
328static void __reeval_prio(struct klitirqd_info* which)
329{
330 struct task_struct* next_owner = NULL;
331 struct task_struct* klitirqd = which->klitirqd;
332
333 /* Check in prio-order */
334 u32 pending = litirq_pending_irqoff(which);
335
336 //__dump_state(which, "__reeval_prio: before");
337
338 if(pending)
339 {
340 if(pending & LIT_TASKLET_HI)
341 {
342 next_owner = which->pending_tasklets_hi.head->owner;
343 }
344 else if(pending & LIT_TASKLET_LOW)
345 {
346 next_owner = which->pending_tasklets.head->owner;
347 }
348 else if(pending & LIT_WORK)
349 {
350 struct work_struct* work =
351 list_first_entry(&which->worklist, struct work_struct, entry);
352 next_owner = work->owner;
353 }
354 }
355
356 if(next_owner != which->current_owner)
357 {
358 struct task_struct* old_owner = which->current_owner;
359
360 /* bind the next owner. */
361 which->current_owner = next_owner;
362 mb();
363
364 if(next_owner != NULL)
365 {
366 if(!in_interrupt())
367 {
368 TRACE_CUR("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
369 ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm,
370 ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid,
371 next_owner->comm, next_owner->pid);
372 }
373 else
374 {
375 TRACE("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
376 ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm,
377 ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid,
378 next_owner->comm, next_owner->pid);
379 }
380
381 litmus->increase_prio_inheritance_klitirqd(klitirqd, old_owner, next_owner);
382 }
383 else
384 {
385 if(likely(!in_interrupt()))
386 {
387 TRACE_CUR("%s: Ownership change: %s/%d to NULL (reverting)\n",
388 __FUNCTION__, klitirqd->comm, klitirqd->pid);
389 }
390 else
391 {
392 // is this a bug?
393 TRACE("%s: Ownership change: %s/%d to NULL (reverting)\n",
394 __FUNCTION__, klitirqd->comm, klitirqd->pid);
395 }
396
397 BUG_ON(pending != 0);
398 litmus->decrease_prio_inheritance_klitirqd(klitirqd, old_owner, NULL);
399 }
400 }
401
402 //__dump_state(which, "__reeval_prio: after");
403}
404
405static void reeval_prio(struct klitirqd_info* which)
406{
407 unsigned long flags;
408
409 raw_spin_lock_irqsave(&which->lock, flags);
410 __reeval_prio(which);
411 raw_spin_unlock_irqrestore(&which->lock, flags);
412}
413
414
415static void wakeup_litirqd_locked(struct klitirqd_info* which)
416{
417 /* Interrupts are disabled: no need to stop preemption */
418 if (which && which->klitirqd)
419 {
420 __reeval_prio(which); /* configure the proper priority */
421
422 if(which->klitirqd->state != TASK_RUNNING)
423 {
424 TRACE("%s: Waking up klitirqd: %s/%d\n", __FUNCTION__,
425 which->klitirqd->comm, which->klitirqd->pid);
426
427 wake_up_process(which->klitirqd);
428 }
429 }
430}
431
432
433static void do_lit_tasklet(struct klitirqd_info* which,
434 struct tasklet_head* pending_tasklets)
435{
436 unsigned long flags;
437 struct tasklet_struct *list;
438 atomic_t* count;
439
440 raw_spin_lock_irqsave(&which->lock, flags);
441
442 //__dump_state(which, "do_lit_tasklet: before steal");
443
444 /* copy out the tasklets for our private use. */
445 list = pending_tasklets->head;
446 pending_tasklets->head = NULL;
447 pending_tasklets->tail = &pending_tasklets->head;
448
449 /* remove pending flag */
450 which->pending &= (pending_tasklets == &which->pending_tasklets) ?
451 ~LIT_TASKLET_LOW :
452 ~LIT_TASKLET_HI;
453
454 count = (pending_tasklets == &which->pending_tasklets) ?
455 &which->num_low_pending:
456 &which->num_hi_pending;
457
458 //__dump_state(which, "do_lit_tasklet: after steal");
459
460 raw_spin_unlock_irqrestore(&which->lock, flags);
461
462
463 while(list)
464 {
465 struct tasklet_struct *t = list;
466
467 /* advance, lest we forget */
468 list = list->next;
469
470 /* execute tasklet if it has my priority and is free */
471 if ((t->owner == which->current_owner) && tasklet_trylock(t)) {
472 if (!atomic_read(&t->count)) {
473
474 sched_trace_tasklet_begin(t->owner);
475
476 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
477 {
478 BUG();
479 }
480 TRACE_CUR("%s: Invoking tasklet.\n", __FUNCTION__);
481 t->func(t->data);
482 tasklet_unlock(t);
483
484 atomic_dec(count);
485
486 sched_trace_tasklet_end(t->owner, 0ul);
487
488 continue; /* process more tasklets */
489 }
490 tasklet_unlock(t);
491 }
492
493 TRACE_CUR("%s: Could not invoke tasklet. Requeuing.\n", __FUNCTION__);
494
495 /* couldn't process tasklet. put it back at the end of the queue. */
496 if(pending_tasklets == &which->pending_tasklets)
497 ___litmus_tasklet_schedule(t, which, 0);
498 else
499 ___litmus_tasklet_hi_schedule(t, which, 0);
500 }
501}
502
503
504// returns 1 if priorities need to be changed to continue processing
505// pending tasklets.
506static int do_litirq(struct klitirqd_info* which)
507{
508 u32 pending;
509 int resched = 0;
510
511 if(in_interrupt())
512 {
513 TRACE("%s: exiting early: in interrupt context!\n", __FUNCTION__);
514 return(0);
515 }
516
517 if(which->klitirqd != current)
518 {
519 TRACE_CUR("%s: exiting early: thread/info mismatch! Running %s/%d but given %s/%d.\n",
520 __FUNCTION__, current->comm, current->pid,
521 which->klitirqd->comm, which->klitirqd->pid);
522 return(0);
523 }
524
525 if(!is_realtime(current))
526 {
527 TRACE_CUR("%s: exiting early: klitirqd is not real-time. Sched Policy = %d\n",
528 __FUNCTION__, current->policy);
529 return(0);
530 }
531
532
533 /* We only handle tasklets & work objects, no need for RCU triggers? */
534
535 pending = litirq_pending(which);
536 if(pending)
537 {
538 /* extract the work to do and do it! */
539 if(pending & LIT_TASKLET_HI)
540 {
541 TRACE_CUR("%s: Invoking HI tasklets.\n", __FUNCTION__);
542 do_lit_tasklet(which, &which->pending_tasklets_hi);
543 resched = tasklet_ownership_change(which, LIT_TASKLET_HI);
544
545 if(resched)
546 {
547 TRACE_CUR("%s: HI tasklets of another owner remain. "
548 "Skipping any LOW tasklets.\n", __FUNCTION__);
549 }
550 }
551
552 if(!resched && (pending & LIT_TASKLET_LOW))
553 {
554 TRACE_CUR("%s: Invoking LOW tasklets.\n", __FUNCTION__);
555 do_lit_tasklet(which, &which->pending_tasklets);
556 resched = tasklet_ownership_change(which, LIT_TASKLET_LOW);
557
558 if(resched)
559 {
560 TRACE_CUR("%s: LOW tasklets of another owner remain. "
561 "Skipping any work objects.\n", __FUNCTION__);
562 }
563 }
564 }
565
566 return(resched);
567}
568
569
570static void do_work(struct klitirqd_info* which)
571{
572 unsigned long flags;
573 work_func_t f;
574 struct work_struct* work;
575
576 // only execute one work-queue item to yield to tasklets.
577 // ...is this a good idea, or should we just batch them?
578 raw_spin_lock_irqsave(&which->lock, flags);
579
580 if(!litirq_pending_work_irqoff(which))
581 {
582 raw_spin_unlock_irqrestore(&which->lock, flags);
583 goto no_work;
584 }
585
586 work = list_first_entry(&which->worklist, struct work_struct, entry);
587 list_del_init(&work->entry);
588
589 if(list_empty(&which->worklist))
590 {
591 which->pending &= ~LIT_WORK;
592 }
593
594 raw_spin_unlock_irqrestore(&which->lock, flags);
595
596
597
598 /* safe to read current_owner outside of lock since only this thread
599 may write to the pointer. */
600 if(work->owner == which->current_owner)
601 {
602 TRACE_CUR("%s: Invoking work object.\n", __FUNCTION__);
603 // do the work!
604 work_clear_pending(work);
605 f = work->func;
606 f(work); /* can't touch 'work' after this point,
607 the user may have freed it. */
608
609 atomic_dec(&which->num_work_pending);
610 }
611 else
612 {
613 TRACE_CUR("%s: Could not invoke work object. Requeuing.\n",
614 __FUNCTION__);
615 ___litmus_schedule_work(work, which, 0);
616 }
617
618no_work:
619 return;
620}
621
622
623static int set_litmus_daemon_sched(void)
624{
625 /* set up a daemon job that will never complete.
626 it should only ever run on behalf of another
627 real-time task.
628
629 TODO: Transition to a new job whenever a
630 new tasklet is handled */
631
632 int ret = 0;
633
634 struct rt_task tp = {
635 .exec_cost = 0,
636 .period = 1000000000, /* dummy 1 second period */
637 .phase = 0,
638 .cpu = task_cpu(current),
639 .budget_policy = NO_ENFORCEMENT,
640 .cls = RT_CLASS_BEST_EFFORT
641 };
642
643 struct sched_param param = { .sched_priority = 0};
644
645
646 /* set task params, mark as proxy thread, and init other data */
647 tsk_rt(current)->task_params = tp;
648 tsk_rt(current)->is_proxy_thread = 1;
649 tsk_rt(current)->cur_klitirqd = NULL;
650 mutex_init(&tsk_rt(current)->klitirqd_sem);
651 atomic_set(&tsk_rt(current)->klitirqd_sem_stat, NOT_HELD);
652
653 /* inform the OS we're SCHED_LITMUS --
654 sched_setscheduler_nocheck() calls litmus_admit_task(). */
655 sched_setscheduler_nocheck(current, SCHED_LITMUS, &param);
656
657 return ret;
658}
659
660static void enter_execution_phase(struct klitirqd_info* which,
661 struct mutex* sem,
662 struct task_struct* t)
663{
664 TRACE_CUR("%s: Trying to enter execution phase. "
665 "Acquiring semaphore of %s/%d\n", __FUNCTION__,
666 t->comm, t->pid);
667 down_and_set_stat(current, HELD, sem);
668 TRACE_CUR("%s: Execution phase entered! "
669 "Acquired semaphore of %s/%d\n", __FUNCTION__,
670 t->comm, t->pid);
671}
672
673static void exit_execution_phase(struct klitirqd_info* which,
674 struct mutex* sem,
675 struct task_struct* t)
676{
677 TRACE_CUR("%s: Exiting execution phase. "
678 "Releasing semaphore of %s/%d\n", __FUNCTION__,
679 t->comm, t->pid);
680 if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) == HELD)
681 {
682 up_and_set_stat(current, NOT_HELD, sem);
683 TRACE_CUR("%s: Execution phase exited! "
684 "Released semaphore of %s/%d\n", __FUNCTION__,
685 t->comm, t->pid);
686 }
687 else
688 {
689 TRACE_CUR("%s: COULDN'T RELEASE SEMAPHORE BECAUSE ONE IS NOT HELD!\n", __FUNCTION__);
690 }
691}
692
693/* main loop for klitsoftirqd */
694static int run_klitirqd(void* unused)
695{
696 struct klitirqd_info* which = &klitirqds[klitirqd_id(current)];
697 struct mutex* sem;
698 struct task_struct* owner;
699
700 int rt_status = set_litmus_daemon_sched();
701
702 if(rt_status != 0)
703 {
704 TRACE_CUR("%s: Failed to transition to rt-task.\n", __FUNCTION__);
705 goto rt_failed;
706 }
707
708 atomic_inc(&num_ready_klitirqds);
709
710 set_current_state(TASK_INTERRUPTIBLE);
711
712 while (!kthread_should_stop())
713 {
714 preempt_disable();
715 if (!litirq_pending(which))
716 {
717 /* sleep for work */
718 TRACE_CUR("%s: No more tasklets or work objects. Going to sleep.\n",
719 __FUNCTION__);
720 preempt_enable_no_resched();
721 schedule();
722
723 if(kthread_should_stop()) /* bail out */
724 {
725 TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
726 continue;
727 }
728
729 preempt_disable();
730 }
731
732 __set_current_state(TASK_RUNNING);
733
734 while (litirq_pending_and_sem_and_owner(which, &sem, &owner))
735 {
736 int needs_resched = 0;
737
738 preempt_enable_no_resched();
739
740 BUG_ON(sem == NULL);
741
742 // wait to enter execution phase; wait for 'current_owner' to block.
743 enter_execution_phase(which, sem, owner);
744
745 if(kthread_should_stop())
746 {
747 TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
748 break;
749 }
750
751 preempt_disable();
752
753 /* Double check that there's still pending work and the owner hasn't
754 * changed. Pending items may have been flushed while we were sleeping.
755 */
756 if(litirq_pending_with_owner(which, owner))
757 {
758 TRACE_CUR("%s: Executing tasklets and/or work objects.\n",
759 __FUNCTION__);
760
761 needs_resched = do_litirq(which);
762
763 preempt_enable_no_resched();
764
765 // work objects are preemptible.
766 if(!needs_resched)
767 {
768 do_work(which);
769 }
770
771 // exit execution phase.
772 exit_execution_phase(which, sem, owner);
773
774 TRACE_CUR("%s: Setting up next priority.\n", __FUNCTION__);
775 reeval_prio(which); /* check if we need to change priority here */
776 }
777 else
778 {
779 TRACE_CUR("%s: Pending work was flushed! Prev owner was %s/%d\n",
780 __FUNCTION__,
781 owner->comm, owner->pid);
782 preempt_enable_no_resched();
783
784 // exit execution phase.
785 exit_execution_phase(which, sem, owner);
786 }
787
788 cond_resched();
789 preempt_disable();
790 }
791 preempt_enable();
792 set_current_state(TASK_INTERRUPTIBLE);
793 }
794 __set_current_state(TASK_RUNNING);
795
796 atomic_dec(&num_ready_klitirqds);
797
798rt_failed:
799 litmus_exit_task(current);
800
801 return rt_status;
802}
803
804
805struct klitirqd_launch_data
806{
807 int* cpu_affinity;
808 struct work_struct work;
809};
810
811/* executed by a kworker from workqueues */
812static void launch_klitirqd(struct work_struct *work)
813{
814 int i;
815
816 struct klitirqd_launch_data* launch_data =
817 container_of(work, struct klitirqd_launch_data, work);
818
819 TRACE("%s: Creating %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
820
821 /* create the daemon threads */
822 for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
823 {
824 if(launch_data->cpu_affinity)
825 {
826 klitirqds[i].klitirqd =
827 kthread_create(
828 run_klitirqd,
829 /* treat the affinity as a pointer, we'll cast it back later */
830 (void*)(long long)launch_data->cpu_affinity[i],
831 "klitirqd_th%d/%d",
832 i,
833 launch_data->cpu_affinity[i]);
834
835 /* litmus will put is in the right cluster. */
836 kthread_bind(klitirqds[i].klitirqd, launch_data->cpu_affinity[i]);
837 }
838 else
839 {
840 klitirqds[i].klitirqd =
841 kthread_create(
842 run_klitirqd,
843 /* treat the affinity as a pointer, we'll cast it back later */
844 (void*)(long long)(-1),
845 "klitirqd_th%d",
846 i);
847 }
848 }
849
850 TRACE("%s: Launching %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
851
852 /* unleash the daemons */
853 for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
854 {
855 wake_up_process(klitirqds[i].klitirqd);
856 }
857
858 if(launch_data->cpu_affinity)
859 kfree(launch_data->cpu_affinity);
860 kfree(launch_data);
861}
862
863
864void spawn_klitirqd(int* affinity)
865{
866 int i;
867 struct klitirqd_launch_data* delayed_launch;
868
869 if(atomic_read(&num_ready_klitirqds) != 0)
870 {
871 TRACE("%s: At least one klitirqd is already running! Need to call kill_klitirqd()?\n");
872 return;
873 }
874
875 /* init the tasklet & work queues */
876 for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
877 {
878 klitirqds[i].terminating = 0;
879 klitirqds[i].pending = 0;
880
881 klitirqds[i].num_hi_pending.counter = 0;
882 klitirqds[i].num_low_pending.counter = 0;
883 klitirqds[i].num_work_pending.counter = 0;
884
885 klitirqds[i].pending_tasklets_hi.head = NULL;
886 klitirqds[i].pending_tasklets_hi.tail = &klitirqds[i].pending_tasklets_hi.head;
887
888 klitirqds[i].pending_tasklets.head = NULL;
889 klitirqds[i].pending_tasklets.tail = &klitirqds[i].pending_tasklets.head;
890
891 INIT_LIST_HEAD(&klitirqds[i].worklist);
892
893 raw_spin_lock_init(&klitirqds[i].lock);
894 }
895
896 /* wait to flush the initializations to memory since other threads
897 will access it. */
898 mb();
899
900 /* tell a work queue to launch the threads. we can't make scheduling
901 calls since we're in an atomic state. */
902 TRACE("%s: Setting callback up to launch klitirqds\n", __FUNCTION__);
903 delayed_launch = kmalloc(sizeof(struct klitirqd_launch_data), GFP_ATOMIC);
904 if(affinity)
905 {
906 delayed_launch->cpu_affinity =
907 kmalloc(sizeof(int)*NR_LITMUS_SOFTIRQD, GFP_ATOMIC);
908
909 memcpy(delayed_launch->cpu_affinity, affinity,
910 sizeof(int)*NR_LITMUS_SOFTIRQD);
911 }
912 else
913 {
914 delayed_launch->cpu_affinity = NULL;
915 }
916 INIT_WORK(&delayed_launch->work, launch_klitirqd);
917 schedule_work(&delayed_launch->work);
918}
919
920
921void kill_klitirqd(void)
922{
923 if(!klitirqd_is_dead())
924 {
925 int i;
926
927 TRACE("%s: Killing %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
928
929 for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
930 {
931 if(klitirqds[i].terminating != 1)
932 {
933 klitirqds[i].terminating = 1;
934 mb(); /* just to be sure? */
935 flush_pending(klitirqds[i].klitirqd, NULL);
936
937 /* signal termination */
938 kthread_stop(klitirqds[i].klitirqd);
939 }
940 }
941 }
942}
943
944
945int klitirqd_is_ready(void)
946{
947 return(atomic_read(&num_ready_klitirqds) == NR_LITMUS_SOFTIRQD);
948}
949
950int klitirqd_is_dead(void)
951{
952 return(atomic_read(&num_ready_klitirqds) == 0);
953}
954
955
956struct task_struct* get_klitirqd(unsigned int k_id)
957{
958 return(klitirqds[k_id].klitirqd);
959}
960
961
962void flush_pending(struct task_struct* klitirqd_thread,
963 struct task_struct* owner)
964{
965 unsigned int k_id = klitirqd_id(klitirqd_thread);
966 struct klitirqd_info *which = &klitirqds[k_id];
967
968 unsigned long flags;
969 struct tasklet_struct *list;
970
971 u32 work_flushed = 0;
972
973 raw_spin_lock_irqsave(&which->lock, flags);
974
975 //__dump_state(which, "flush_pending: before");
976
977 // flush hi tasklets.
978 if(litirq_pending_hi_irqoff(which))
979 {
980 which->pending &= ~LIT_TASKLET_HI;
981
982 list = which->pending_tasklets_hi.head;
983 which->pending_tasklets_hi.head = NULL;
984 which->pending_tasklets_hi.tail = &which->pending_tasklets_hi.head;
985
986 TRACE("%s: Handing HI tasklets back to Linux.\n", __FUNCTION__);
987
988 while(list)
989 {
990 struct tasklet_struct *t = list;
991 list = list->next;
992
993 if(likely((t->owner == owner) || (owner == NULL)))
994 {
995 if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
996 {
997 BUG();
998 }
999
1000 work_flushed |= LIT_TASKLET_HI;
1001
1002 t->owner = NULL;
1003
1004 // WTF?
1005 if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
1006 {
1007 atomic_dec(&which->num_hi_pending);
1008 ___tasklet_hi_schedule(t);
1009 }
1010 else
1011 {
1012 TRACE("%s: dropped hi tasklet??\n", __FUNCTION__);
1013 BUG();
1014 }
1015 }
1016 else
1017 {
1018 TRACE("%s: Could not flush a HI tasklet.\n", __FUNCTION__);
1019 // put back on queue.
1020 ___litmus_tasklet_hi_schedule(t, which, 0);
1021 }
1022 }
1023 }
1024
1025 // flush low tasklets.
1026 if(litirq_pending_low_irqoff(which))
1027 {
1028 which->pending &= ~LIT_TASKLET_LOW;
1029
1030 list = which->pending_tasklets.head;
1031 which->pending_tasklets.head = NULL;
1032 which->pending_tasklets.tail = &which->pending_tasklets.head;
1033
1034 TRACE("%s: Handing LOW tasklets back to Linux.\n", __FUNCTION__);
1035
1036 while(list)
1037 {
1038 struct tasklet_struct *t = list;
1039 list = list->next;
1040
1041 if(likely((t->owner == owner) || (owner == NULL)))
1042 {
1043 if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
1044 {
1045 BUG();
1046 }
1047
1048 work_flushed |= LIT_TASKLET_LOW;
1049
1050 t->owner = NULL;
1051 sched_trace_tasklet_end(owner, 1ul);
1052
1053 if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
1054 {
1055 atomic_dec(&which->num_low_pending);
1056 ___tasklet_schedule(t);
1057 }
1058 else
1059 {
1060 TRACE("%s: dropped tasklet??\n", __FUNCTION__);
1061 BUG();
1062 }
1063 }
1064 else
1065 {
1066 TRACE("%s: Could not flush a LOW tasklet.\n", __FUNCTION__);
1067 // put back on queue
1068 ___litmus_tasklet_schedule(t, which, 0);
1069 }
1070 }
1071 }
1072
1073 // flush work objects
1074 if(litirq_pending_work_irqoff(which))
1075 {
1076 which->pending &= ~LIT_WORK;
1077
1078 TRACE("%s: Handing work objects back to Linux.\n", __FUNCTION__);
1079
1080 while(!list_empty(&which->worklist))
1081 {
1082 struct work_struct* work =
1083 list_first_entry(&which->worklist, struct work_struct, entry);
1084 list_del_init(&work->entry);
1085
1086 if(likely((work->owner == owner) || (owner == NULL)))
1087 {
1088 work_flushed |= LIT_WORK;
1089 atomic_dec(&which->num_work_pending);
1090
1091 work->owner = NULL;
1092 sched_trace_work_end(owner, current, 1ul);
1093 __schedule_work(work);
1094 }
1095 else
1096 {
1097 TRACE("%s: Could not flush a work object.\n", __FUNCTION__);
1098 // put back on queue
1099 ___litmus_schedule_work(work, which, 0);
1100 }
1101 }
1102 }
1103
1104 //__dump_state(which, "flush_pending: after (before reeval prio)");
1105
1106
1107 mb(); /* commit changes to pending flags */
1108
1109 /* reset the scheduling priority */
1110 if(work_flushed)
1111 {
1112 __reeval_prio(which);
1113
1114 /* Try to offload flushed tasklets to Linux's ksoftirqd. */
1115 if(work_flushed & (LIT_TASKLET_LOW | LIT_TASKLET_HI))
1116 {
1117 wakeup_softirqd();
1118 }
1119 }
1120 else
1121 {
1122 TRACE_CUR("%s: no work flushed, so __reeval_prio() skipped\n", __FUNCTION__);
1123 }
1124
1125 raw_spin_unlock_irqrestore(&which->lock, flags);
1126}
1127
1128
1129
1130
1131static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
1132 struct klitirqd_info *which,
1133 int wakeup)
1134{
1135 unsigned long flags;
1136 u32 old_pending;
1137
1138 t->next = NULL;
1139
1140 raw_spin_lock_irqsave(&which->lock, flags);
1141
1142 //__dump_state(which, "___litmus_tasklet_schedule: before queuing");
1143
1144 *(which->pending_tasklets.tail) = t;
1145 which->pending_tasklets.tail = &t->next;
1146
1147 old_pending = which->pending;
1148 which->pending |= LIT_TASKLET_LOW;
1149
1150 atomic_inc(&which->num_low_pending);
1151
1152 mb();
1153
1154 if(!old_pending && wakeup)
1155 {
1156 wakeup_litirqd_locked(which); /* wake up the klitirqd */
1157 }
1158
1159 //__dump_state(which, "___litmus_tasklet_schedule: after queuing");
1160
1161 raw_spin_unlock_irqrestore(&which->lock, flags);
1162}
1163
1164int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
1165{
1166 int ret = 0; /* assume failure */
1167 if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
1168 {
1169 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
1170 BUG();
1171 }
1172
1173 if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
1174 {
1175 TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
1176 BUG();
1177 }
1178
1179 if(likely(!klitirqds[k_id].terminating))
1180 {
1181 /* Can't accept tasklets while we're processing a workqueue
1182 because they're handled by the same thread. This case is
1183 very RARE.
1184
1185 TODO: Use a separate thread for work objects!!!!!!
1186 */
1187 if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
1188 {
1189 ret = 1;
1190 ___litmus_tasklet_schedule(t, &klitirqds[k_id], 1);
1191 }
1192 else
1193 {
1194 TRACE("%s: rejected tasklet because of pending work.\n",
1195 __FUNCTION__);
1196 }
1197 }
1198 return(ret);
1199}
1200
1201EXPORT_SYMBOL(__litmus_tasklet_schedule);
1202
1203
1204static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
1205 struct klitirqd_info *which,
1206 int wakeup)
1207{
1208 unsigned long flags;
1209 u32 old_pending;
1210
1211 t->next = NULL;
1212
1213 raw_spin_lock_irqsave(&which->lock, flags);
1214
1215 *(which->pending_tasklets_hi.tail) = t;
1216 which->pending_tasklets_hi.tail = &t->next;
1217
1218 old_pending = which->pending;
1219 which->pending |= LIT_TASKLET_HI;
1220
1221 atomic_inc(&which->num_hi_pending);
1222
1223 mb();
1224
1225 if(!old_pending && wakeup)
1226 {
1227 wakeup_litirqd_locked(which); /* wake up the klitirqd */
1228 }
1229
1230 raw_spin_unlock_irqrestore(&which->lock, flags);
1231}
1232
1233int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
1234{
1235 int ret = 0; /* assume failure */
1236 if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
1237 {
1238 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
1239 BUG();
1240 }
1241
1242 if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
1243 {
1244 TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
1245 BUG();
1246 }
1247
1248 if(unlikely(!klitirqd_is_ready()))
1249 {
1250 TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
1251 BUG();
1252 }
1253
1254 if(likely(!klitirqds[k_id].terminating))
1255 {
1256 if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
1257 {
1258 ret = 1;
1259 ___litmus_tasklet_hi_schedule(t, &klitirqds[k_id], 1);
1260 }
1261 else
1262 {
1263 TRACE("%s: rejected tasklet because of pending work.\n",
1264 __FUNCTION__);
1265 }
1266 }
1267 return(ret);
1268}
1269
1270EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);
1271
1272
1273int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id)
1274{
1275 int ret = 0; /* assume failure */
1276 u32 old_pending;
1277
1278 BUG_ON(!irqs_disabled());
1279
1280 if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
1281 {
1282 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
1283 BUG();
1284 }
1285
1286 if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
1287 {
1288 TRACE("%s: No klitirqd_th%u!\n", __FUNCTION__, k_id);
1289 BUG();
1290 }
1291
1292 if(unlikely(!klitirqd_is_ready()))
1293 {
1294 TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
1295 BUG();
1296 }
1297
1298 if(likely(!klitirqds[k_id].terminating))
1299 {
1300 raw_spin_lock(&klitirqds[k_id].lock);
1301
1302 if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
1303 {
1304 ret = 1; // success!
1305
1306 t->next = klitirqds[k_id].pending_tasklets_hi.head;
1307 klitirqds[k_id].pending_tasklets_hi.head = t;
1308
1309 old_pending = klitirqds[k_id].pending;
1310 klitirqds[k_id].pending |= LIT_TASKLET_HI;
1311
1312 atomic_inc(&klitirqds[k_id].num_hi_pending);
1313
1314 mb();
1315
1316 if(!old_pending)
1317 wakeup_litirqd_locked(&klitirqds[k_id]); /* wake up the klitirqd */
1318 }
1319 else
1320 {
1321 TRACE("%s: rejected tasklet because of pending work.\n",
1322 __FUNCTION__);
1323 }
1324
1325 raw_spin_unlock(&klitirqds[k_id].lock);
1326 }
1327 return(ret);
1328}
1329
1330EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first);
1331
1332
1333
1334static void ___litmus_schedule_work(struct work_struct *w,
1335 struct klitirqd_info *which,
1336 int wakeup)
1337{
1338 unsigned long flags;
1339 u32 old_pending;
1340
1341 raw_spin_lock_irqsave(&which->lock, flags);
1342
1343 work_pending(w);
1344 list_add_tail(&w->entry, &which->worklist);
1345
1346 old_pending = which->pending;
1347 which->pending |= LIT_WORK;
1348
1349 atomic_inc(&which->num_work_pending);
1350
1351 mb();
1352
1353 if(!old_pending && wakeup)
1354 {
1355 wakeup_litirqd_locked(which); /* wakeup the klitirqd */
1356 }
1357
1358 raw_spin_unlock_irqrestore(&which->lock, flags);
1359}
1360
1361int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
1362{
1363 int ret = 1; /* assume success */
1364 if(unlikely(w->owner == NULL) || !is_realtime(w->owner))
1365 {
1366 TRACE("%s: No owner associated with this work object!\n", __FUNCTION__);
1367 BUG();
1368 }
1369
1370 if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
1371 {
1372 TRACE("%s: No klitirqd_th%u!\n", k_id);
1373 BUG();
1374 }
1375
1376 if(unlikely(!klitirqd_is_ready()))
1377 {
1378 TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
1379 BUG();
1380 }
1381
1382 if(likely(!klitirqds[k_id].terminating))
1383 ___litmus_schedule_work(w, &klitirqds[k_id], 1);
1384 else
1385 ret = 0;
1386 return(ret);
1387}
1388EXPORT_SYMBOL(__litmus_schedule_work);
1389
1390
1391static int set_klitirqd_sem_status(unsigned long stat)
1392{
1393 TRACE_CUR("SETTING STATUS FROM %d TO %d\n",
1394 atomic_read(&tsk_rt(current)->klitirqd_sem_stat),
1395 stat);
1396 atomic_set(&tsk_rt(current)->klitirqd_sem_stat, stat);
1397 //mb();
1398
1399 return(0);
1400}
1401
1402static int set_klitirqd_sem_status_if_not_held(unsigned long stat)
1403{
1404 if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) != HELD)
1405 {
1406 return(set_klitirqd_sem_status(stat));
1407 }
1408 return(-1);
1409}
1410
1411
1412void __down_and_reset_and_set_stat(struct task_struct* t,
1413 enum klitirqd_sem_status to_reset,
1414 enum klitirqd_sem_status to_set,
1415 struct mutex* sem)
1416{
1417#if 0
1418 struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
1419 struct task_struct* task = container_of(param, struct task_struct, rt_param);
1420
1421 TRACE_CUR("%s: entered. Locking semaphore of %s/%d\n",
1422 __FUNCTION__, task->comm, task->pid);
1423#endif
1424
1425 mutex_lock_sfx(sem,
1426 set_klitirqd_sem_status_if_not_held, to_reset,
1427 set_klitirqd_sem_status, to_set);
1428#if 0
1429 TRACE_CUR("%s: exiting. Have semaphore of %s/%d\n",
1430 __FUNCTION__, task->comm, task->pid);
1431#endif
1432}
1433
1434void down_and_set_stat(struct task_struct* t,
1435 enum klitirqd_sem_status to_set,
1436 struct mutex* sem)
1437{
1438#if 0
1439 struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
1440 struct task_struct* task = container_of(param, struct task_struct, rt_param);
1441
1442 TRACE_CUR("%s: entered. Locking semaphore of %s/%d\n",
1443 __FUNCTION__, task->comm, task->pid);
1444#endif
1445
1446 mutex_lock_sfx(sem,
1447 NULL, 0,
1448 set_klitirqd_sem_status, to_set);
1449
1450#if 0
1451 TRACE_CUR("%s: exiting. Have semaphore of %s/%d\n",
1452 __FUNCTION__, task->comm, task->pid);
1453#endif
1454}
1455
1456
1457void up_and_set_stat(struct task_struct* t,
1458 enum klitirqd_sem_status to_set,
1459 struct mutex* sem)
1460{
1461#if 0
1462 struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
1463 struct task_struct* task = container_of(param, struct task_struct, rt_param);
1464
1465 TRACE_CUR("%s: entered. Unlocking semaphore of %s/%d\n",
1466 __FUNCTION__,
1467 task->comm, task->pid);
1468#endif
1469
1470 mutex_unlock_sfx(sem, NULL, 0,
1471 set_klitirqd_sem_status, to_set);
1472
1473#if 0
1474 TRACE_CUR("%s: exiting. Unlocked semaphore of %s/%d\n",
1475 __FUNCTION__,
1476 task->comm, task->pid);
1477#endif
1478}
1479
1480
1481
1482void release_klitirqd_lock(struct task_struct* t)
1483{
1484 if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == HELD))
1485 {
1486 struct mutex* sem;
1487 struct task_struct* owner = t;
1488
1489 if(t->state == TASK_RUNNING)
1490 {
1491 TRACE_TASK(t, "NOT giving up klitirqd_sem because we're not blocked!\n");
1492 return;
1493 }
1494
1495 if(likely(!tsk_rt(t)->is_proxy_thread))
1496 {
1497 sem = &tsk_rt(t)->klitirqd_sem;
1498 }
1499 else
1500 {
1501 unsigned int k_id = klitirqd_id(t);
1502 owner = klitirqds[k_id].current_owner;
1503
1504 BUG_ON(t != klitirqds[k_id].klitirqd);
1505
1506 if(likely(owner))
1507 {
1508 sem = &tsk_rt(owner)->klitirqd_sem;
1509 }
1510 else
1511 {
1512 BUG();
1513
1514 // We had the rug pulled out from under us. Abort attempt
1515 // to reacquire the lock since our client no longer needs us.
1516 TRACE_CUR("HUH?! How did this happen?\n");
1517 atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD);
1518 return;
1519 }
1520 }
1521
1522 //TRACE_CUR("Releasing semaphore of %s/%d...\n", owner->comm, owner->pid);
1523 up_and_set_stat(t, NEED_TO_REACQUIRE, sem);
1524 //TRACE_CUR("Semaphore of %s/%d released!\n", owner->comm, owner->pid);
1525 }
1526 /*
1527 else if(is_realtime(t))
1528 {
1529 TRACE_CUR("%s: Nothing to do. Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat);
1530 }
1531 */
1532}
1533
1534int reacquire_klitirqd_lock(struct task_struct* t)
1535{
1536 int ret = 0;
1537
1538 if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == NEED_TO_REACQUIRE))
1539 {
1540 struct mutex* sem;
1541 struct task_struct* owner = t;
1542
1543 if(likely(!tsk_rt(t)->is_proxy_thread))
1544 {
1545 sem = &tsk_rt(t)->klitirqd_sem;
1546 }
1547 else
1548 {
1549 unsigned int k_id = klitirqd_id(t);
1550 //struct task_struct* owner = klitirqds[k_id].current_owner;
1551 owner = klitirqds[k_id].current_owner;
1552
1553 BUG_ON(t != klitirqds[k_id].klitirqd);
1554
1555 if(likely(owner))
1556 {
1557 sem = &tsk_rt(owner)->klitirqd_sem;
1558 }
1559 else
1560 {
1561 // We had the rug pulled out from under us. Abort attempt
1562 // to reacquire the lock since our client no longer needs us.
1563 TRACE_CUR("No longer needs to reacquire klitirqd_sem!\n");
1564 atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD);
1565 return(0);
1566 }
1567 }
1568
1569 //TRACE_CUR("Trying to reacquire semaphore of %s/%d\n", owner->comm, owner->pid);
1570 __down_and_reset_and_set_stat(t, REACQUIRING, HELD, sem);
1571 //TRACE_CUR("Reacquired semaphore %s/%d\n", owner->comm, owner->pid);
1572 }
1573 /*
1574 else if(is_realtime(t))
1575 {
1576 TRACE_CUR("%s: Nothing to do. Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat);
1577 }
1578 */
1579
1580 return(ret);
1581}
1582
diff --git a/litmus/locking.c b/litmus/locking.c
index 0c1aa6aa40b7..718a5a3281d7 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -4,6 +4,15 @@
4 4
5#include <litmus/sched_plugin.h> 5#include <litmus/sched_plugin.h>
6#include <litmus/trace.h> 6#include <litmus/trace.h>
7#include <litmus/litmus.h>
8
9#ifdef CONFIG_LITMUS_DGL_SUPPORT
10#include <linux/uaccess.h>
11#endif
12
13#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
14#include <litmus/gpu_affinity.h>
15#endif
7 16
8static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg); 17static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg);
9static int open_generic_lock(struct od_table_entry* entry, void* __user arg); 18static int open_generic_lock(struct od_table_entry* entry, void* __user arg);
@@ -17,6 +26,9 @@ struct fdso_ops generic_lock_ops = {
17 .destroy = destroy_generic_lock 26 .destroy = destroy_generic_lock
18}; 27};
19 28
29static atomic_t lock_id_gen = ATOMIC_INIT(0);
30
31
20static inline bool is_lock(struct od_table_entry* entry) 32static inline bool is_lock(struct od_table_entry* entry)
21{ 33{
22 return entry->class == &generic_lock_ops; 34 return entry->class == &generic_lock_ops;
@@ -34,8 +46,21 @@ static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user ar
34 int err; 46 int err;
35 47
36 err = litmus->allocate_lock(&lock, type, arg); 48 err = litmus->allocate_lock(&lock, type, arg);
37 if (err == 0) 49 if (err == 0) {
50#ifdef CONFIG_LITMUS_NESTED_LOCKING
51 lock->nest.lock = lock;
52 lock->nest.hp_waiter_eff_prio = NULL;
53
54 INIT_BINHEAP_NODE(&lock->nest.hp_binheap_node);
55 if(!lock->nest.hp_waiter_ptr) {
56 TRACE_CUR("BEWARE: hp_waiter_ptr should probably not be NULL in "
57 "most uses. (exception: IKGLP donors)\n");
58 }
59#endif
60 lock->type = type;
61 lock->ident = atomic_inc_return(&lock_id_gen);
38 *obj_ref = lock; 62 *obj_ref = lock;
63 }
39 return err; 64 return err;
40} 65}
41 66
@@ -74,7 +99,8 @@ asmlinkage long sys_litmus_lock(int lock_od)
74 entry = get_entry_for_od(lock_od); 99 entry = get_entry_for_od(lock_od);
75 if (entry && is_lock(entry)) { 100 if (entry && is_lock(entry)) {
76 l = get_lock(entry); 101 l = get_lock(entry);
77 TRACE_CUR("attempts to lock 0x%p\n", l); 102 //TRACE_CUR("attempts to lock 0x%p\n", l);
103 TRACE_CUR("attempts to lock %d\n", l->ident);
78 err = l->ops->lock(l); 104 err = l->ops->lock(l);
79 } 105 }
80 106
@@ -96,7 +122,8 @@ asmlinkage long sys_litmus_unlock(int lock_od)
96 entry = get_entry_for_od(lock_od); 122 entry = get_entry_for_od(lock_od);
97 if (entry && is_lock(entry)) { 123 if (entry && is_lock(entry)) {
98 l = get_lock(entry); 124 l = get_lock(entry);
99 TRACE_CUR("attempts to unlock 0x%p\n", l); 125 //TRACE_CUR("attempts to unlock 0x%p\n", l);
126 TRACE_CUR("attempts to unlock %d\n", l->ident);
100 err = l->ops->unlock(l); 127 err = l->ops->unlock(l);
101 } 128 }
102 129
@@ -121,8 +148,366 @@ struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
121 return(t); 148 return(t);
122} 149}
123 150
151#ifdef CONFIG_LITMUS_NESTED_LOCKING
152
153void print_hp_waiters(struct binheap_node* n, int depth)
154{
155 struct litmus_lock *l;
156 struct nested_info *nest;
157 char padding[81] = " ";
158 struct task_struct *hp = NULL;
159 struct task_struct *hp_eff = NULL;
160 struct task_struct *node_prio = NULL;
161
162
163 if(n == NULL) {
164 TRACE("+-> %p\n", NULL);
165 return;
166 }
167
168 nest = binheap_entry(n, struct nested_info, hp_binheap_node);
169 l = nest->lock;
170
171 if(depth*2 <= 80)
172 padding[depth*2] = '\0';
173
174 if(nest->hp_waiter_ptr && *(nest->hp_waiter_ptr)) {
175 hp = *(nest->hp_waiter_ptr);
176
177 if(tsk_rt(hp)->inh_task) {
178 hp_eff = tsk_rt(hp)->inh_task;
179 }
180 }
181
182 node_prio = nest->hp_waiter_eff_prio;
183
184 TRACE("%s+-> %s/%d [waiter = %s/%d] [waiter's inh = %s/%d] (lock = %d)\n",
185 padding,
186 (node_prio) ? node_prio->comm : "nil",
187 (node_prio) ? node_prio->pid : -1,
188 (hp) ? hp->comm : "nil",
189 (hp) ? hp->pid : -1,
190 (hp_eff) ? hp_eff->comm : "nil",
191 (hp_eff) ? hp_eff->pid : -1,
192 l->ident);
193
194 if(n->left) print_hp_waiters(n->left, depth+1);
195 if(n->right) print_hp_waiters(n->right, depth+1);
196}
197#endif
198
199
200#ifdef CONFIG_LITMUS_DGL_SUPPORT
201
202void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/)
203{
204 /*
205 We pick the next lock in reverse order. This causes inheritance propagation
206 from locks received earlier to flow in the same direction as regular nested
207 locking. This might make fine-grain DGL easier in the future.
208 */
209
210 BUG_ON(tsk_rt(dgl_wait->task)->blocked_lock);
211
212 //WARN_ON(dgl_wait->locks[dgl_wait->last_primary] != prev_lock);
213
214 // note reverse order
215 for(dgl_wait->last_primary = dgl_wait->last_primary - 1;
216 dgl_wait->last_primary >= 0;
217 --(dgl_wait->last_primary)){
218 if(!dgl_wait->locks[dgl_wait->last_primary]->ops->is_owner(
219 dgl_wait->locks[dgl_wait->last_primary], dgl_wait->task)) {
220
221 tsk_rt(dgl_wait->task)->blocked_lock =
222 dgl_wait->locks[dgl_wait->last_primary];
223 mb();
224
225 TRACE_CUR("New blocked lock is %d\n",
226 dgl_wait->locks[dgl_wait->last_primary]->ident);
227
228 break;
229 }
230 }
231}
232
233int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key)
234{
235 // should never be called.
236 BUG();
237 return 1;
238}
239
240void __waitqueue_dgl_remove_first(wait_queue_head_t *wq,
241 dgl_wait_state_t** dgl_wait,
242 struct task_struct **task)
243{
244 wait_queue_t *q;
245
246 *dgl_wait = NULL;
247 *task = NULL;
248
249 if (waitqueue_active(wq)) {
250 q = list_entry(wq->task_list.next,
251 wait_queue_t, task_list);
252
253 if(q->func == dgl_wake_up) {
254 *dgl_wait = (dgl_wait_state_t*) q->private;
255 }
256 else {
257 *task = (struct task_struct*) q->private;
258 }
259
260 __remove_wait_queue(wq, q);
261 }
262}
263
264void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait)
265{
266 init_waitqueue_entry(wq_node, dgl_wait->task);
267 wq_node->private = dgl_wait;
268 wq_node->func = dgl_wake_up;
269}
270
271
272static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait)
273{
274 int i;
275 unsigned long irqflags; //, dummyflags;
276 raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(dgl_wait->task);
277
278 BUG_ON(dgl_wait->task != current);
279
280 raw_spin_lock_irqsave(dgl_lock, irqflags);
281
282
283 dgl_wait->nr_remaining = dgl_wait->size;
284
285 TRACE_CUR("Locking DGL with size %d\n", dgl_wait->size);
286
287 // try to acquire each lock. enqueue (non-blocking) if it is unavailable.
288 for(i = 0; i < dgl_wait->size; ++i) {
289 struct litmus_lock *l = dgl_wait->locks[i];
290
291 // dgl_lock() must set task state to TASK_UNINTERRUPTIBLE if task blocks.
292
293 if(l->ops->dgl_lock(l, dgl_wait, &dgl_wait->wq_nodes[i])) {
294 --(dgl_wait->nr_remaining);
295 TRACE_CUR("Acquired lock %d immediatly.\n", l->ident);
296 }
297 }
298
299 if(dgl_wait->nr_remaining == 0) {
300 // acquired entire group immediatly
301 TRACE_CUR("Acquired all locks in DGL immediatly!\n");
302 }
303 else {
304
305 TRACE_CUR("As many as %d locks in DGL are pending. Suspending.\n",
306 dgl_wait->nr_remaining);
307
308#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
309 // KLUDGE: don't count this suspension as time in the critical gpu
310 // critical section
311 if(tsk_rt(dgl_wait->task)->held_gpus) {
312 tsk_rt(dgl_wait->task)->suspend_gpu_tracker_on_block = 1;
313 }
314#endif
315
316 // note reverse order. see comments in select_next_lock for reason.
317 for(i = dgl_wait->size - 1; i >= 0; --i) {
318 struct litmus_lock *l = dgl_wait->locks[i];
319 if(!l->ops->is_owner(l, dgl_wait->task)) { // double-check to be thread safe
320
321 TRACE_CUR("Activating priority inheritance on lock %d\n",
322 l->ident);
323
324 TS_DGL_LOCK_SUSPEND;
325
326 l->ops->enable_priority(l, dgl_wait);
327 dgl_wait->last_primary = i;
328
329 TRACE_CUR("Suspending for lock %d\n", l->ident);
330
331 raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending
332
333 schedule(); // suspend!!!
334
335 TS_DGL_LOCK_RESUME;
336
337 TRACE_CUR("Woken up from DGL suspension.\n");
338
339 goto all_acquired; // we should hold all locks when we wake up.
340 }
341 }
342
343 TRACE_CUR("Didn't have to suspend after all, but calling schedule() anyway.\n");
344 //BUG();
345 }
346
347 raw_spin_unlock_irqrestore(dgl_lock, irqflags);
348
349all_acquired:
350
351 // FOR SANITY CHECK FOR TESTING
352// for(i = 0; i < dgl_wait->size; ++i) {
353// struct litmus_lock *l = dgl_wait->locks[i];
354// BUG_ON(!l->ops->is_owner(l, dgl_wait->task));
355// }
356
357 TRACE_CUR("Acquired entire DGL\n");
358
359 return 0;
360}
361
362static int supports_dgl(struct litmus_lock *l)
363{
364 struct litmus_lock_ops* ops = l->ops;
365
366 return (ops->dgl_lock &&
367 ops->is_owner &&
368 ops->enable_priority);
369}
370
371asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size)
372{
373 struct task_struct *t = current;
374 long err = -EINVAL;
375 int dgl_ods[MAX_DGL_SIZE];
376 int i;
377
378 dgl_wait_state_t dgl_wait_state; // lives on the stack until all resources in DGL are held.
379
380 if(dgl_size > MAX_DGL_SIZE || dgl_size < 1)
381 goto out;
382
383 if(!access_ok(VERIFY_READ, usr_dgl_ods, dgl_size*(sizeof(int))))
384 goto out;
385
386 if(__copy_from_user(&dgl_ods, usr_dgl_ods, dgl_size*(sizeof(int))))
387 goto out;
388
389 if (!is_realtime(t)) {
390 err = -EPERM;
391 goto out;
392 }
393
394 for(i = 0; i < dgl_size; ++i) {
395 struct od_table_entry *entry = get_entry_for_od(dgl_ods[i]);
396 if(entry && is_lock(entry)) {
397 dgl_wait_state.locks[i] = get_lock(entry);
398 if(!supports_dgl(dgl_wait_state.locks[i])) {
399 TRACE_CUR("Lock %d does not support all required DGL operations.\n",
400 dgl_wait_state.locks[i]->ident);
401 goto out;
402 }
403 }
404 else {
405 TRACE_CUR("Invalid lock identifier\n");
406 goto out;
407 }
408 }
409
410 dgl_wait_state.task = t;
411 dgl_wait_state.size = dgl_size;
412
413 TS_DGL_LOCK_START;
414 err = do_litmus_dgl_lock(&dgl_wait_state);
415
416 /* Note: task my have been suspended or preempted in between! Take
417 * this into account when computing overheads. */
418 TS_DGL_LOCK_END;
419
420out:
421 return err;
422}
423
424static long do_litmus_dgl_unlock(struct litmus_lock* dgl_locks[], int dgl_size)
425{
426 int i;
427 long err = 0;
428
429 TRACE_CUR("Unlocking a DGL of %d size\n", dgl_size);
430
431 for(i = dgl_size - 1; i >= 0; --i) { // unlock in reverse order
432
433 struct litmus_lock *l = dgl_locks[i];
434 long tmp_err;
435
436 TRACE_CUR("Unlocking lock %d of DGL.\n", l->ident);
437
438 tmp_err = l->ops->unlock(l);
439
440 if(tmp_err) {
441 TRACE_CUR("There was an error unlocking %d: %d.\n", l->ident, tmp_err);
442 err = tmp_err;
443 }
444 }
445
446 TRACE_CUR("DGL unlocked. err = %d\n", err);
447
448 return err;
449}
450
451asmlinkage long sys_litmus_dgl_unlock(void* __user usr_dgl_ods, int dgl_size)
452{
453 long err = -EINVAL;
454 int dgl_ods[MAX_DGL_SIZE];
455 struct od_table_entry* entry;
456 int i;
457
458 struct litmus_lock* dgl_locks[MAX_DGL_SIZE];
459
460 if(dgl_size > MAX_DGL_SIZE || dgl_size < 1)
461 goto out;
462
463 if(!access_ok(VERIFY_READ, usr_dgl_ods, dgl_size*(sizeof(int))))
464 goto out;
465
466 if(__copy_from_user(&dgl_ods, usr_dgl_ods, dgl_size*(sizeof(int))))
467 goto out;
468
469 for(i = 0; i < dgl_size; ++i) {
470 entry = get_entry_for_od(dgl_ods[i]);
471 if(entry && is_lock(entry)) {
472 dgl_locks[i] = get_lock(entry);
473 if(!supports_dgl(dgl_locks[i])) {
474 TRACE_CUR("Lock %d does not support all required DGL operations.\n",
475 dgl_locks[i]->ident);
476 goto out;
477 }
478 }
479 else {
480 TRACE_CUR("Invalid lock identifier\n");
481 goto out;
482 }
483 }
484
485 TS_DGL_UNLOCK_START;
486 err = do_litmus_dgl_unlock(dgl_locks, dgl_size);
487
488 /* Note: task my have been suspended or preempted in between! Take
489 * this into account when computing overheads. */
490 TS_DGL_UNLOCK_END;
491
492out:
493 return err;
494}
495
496#else // CONFIG_LITMUS_DGL_SUPPORT
497
498asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size)
499{
500 return -ENOSYS;
501}
502
503asmlinkage long sys_litmus_dgl_unlock(void* __user usr_dgl_ods, int dgl_size)
504{
505 return -ENOSYS;
506}
507
508#endif
124 509
125#else 510#else // CONFIG_LITMUS_LOCKING
126 511
127struct fdso_ops generic_lock_ops = {}; 512struct fdso_ops generic_lock_ops = {};
128 513
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
new file mode 100644
index 000000000000..4b86a50d3bd1
--- /dev/null
+++ b/litmus/nvidia_info.c
@@ -0,0 +1,597 @@
1#include <linux/module.h>
2#include <linux/semaphore.h>
3#include <linux/pci.h>
4
5#include <litmus/sched_trace.h>
6#include <litmus/nvidia_info.h>
7#include <litmus/litmus.h>
8
9#include <litmus/sched_plugin.h>
10
11#include <litmus/binheap.h>
12
13typedef unsigned char NvV8; /* "void": enumerated or multiple fields */
14typedef unsigned short NvV16; /* "void": enumerated or multiple fields */
15typedef unsigned char NvU8; /* 0 to 255 */
16typedef unsigned short NvU16; /* 0 to 65535 */
17typedef signed char NvS8; /* -128 to 127 */
18typedef signed short NvS16; /* -32768 to 32767 */
19typedef float NvF32; /* IEEE Single Precision (S1E8M23) */
20typedef double NvF64; /* IEEE Double Precision (S1E11M52) */
21typedef unsigned int NvV32; /* "void": enumerated or multiple fields */
22typedef unsigned int NvU32; /* 0 to 4294967295 */
23typedef unsigned long long NvU64; /* 0 to 18446744073709551615 */
24typedef union
25{
26 volatile NvV8 Reg008[1];
27 volatile NvV16 Reg016[1];
28 volatile NvV32 Reg032[1];
29} litmus_nv_hwreg_t, * litmus_nv_phwreg_t;
30
31typedef struct
32{
33 NvU64 address;
34 NvU64 size;
35 NvU32 offset;
36 NvU32 *map;
37 litmus_nv_phwreg_t map_u;
38} litmus_nv_aperture_t;
39
40typedef struct
41{
42 void *priv; /* private data */
43 void *os_state; /* os-specific device state */
44
45 int rmInitialized;
46 int flags;
47
48 /* PCI config info */
49 NvU32 domain;
50 NvU16 bus;
51 NvU16 slot;
52 NvU16 vendor_id;
53 NvU16 device_id;
54 NvU16 subsystem_id;
55 NvU32 gpu_id;
56 void *handle;
57
58 NvU32 pci_cfg_space[16];
59
60 /* physical characteristics */
61 litmus_nv_aperture_t bars[3];
62 litmus_nv_aperture_t *regs;
63 litmus_nv_aperture_t *fb, ud;
64 litmus_nv_aperture_t agp;
65
66 NvU32 interrupt_line;
67
68 NvU32 agp_config;
69 NvU32 agp_status;
70
71 NvU32 primary_vga;
72
73 NvU32 sim_env;
74
75 NvU32 rc_timer_enabled;
76
77 /* list of events allocated for this device */
78 void *event_list;
79
80 void *kern_mappings;
81
82} litmus_nv_state_t;
83
84typedef struct work_struct litmus_nv_task_t;
85
86typedef struct litmus_nv_work_s {
87 litmus_nv_task_t task;
88 void *data;
89} litmus_nv_work_t;
90
91typedef struct litmus_nv_linux_state_s {
92 litmus_nv_state_t nv_state;
93 atomic_t usage_count;
94
95 struct pci_dev *dev;
96 void *agp_bridge;
97 void *alloc_queue;
98
99 void *timer_sp;
100 void *isr_sp;
101 void *pci_cfgchk_sp;
102 void *isr_bh_sp;
103
104#ifdef CONFIG_CUDA_4_0
105 char registry_keys[512];
106#endif
107
108 /* keep track of any pending bottom halfes */
109 struct tasklet_struct tasklet;
110 litmus_nv_work_t work;
111
112 /* get a timer callback every second */
113 struct timer_list rc_timer;
114
115 /* lock for linux-specific data, not used by core rm */
116 struct semaphore ldata_lock;
117
118 /* lock for linux-specific alloc queue */
119 struct semaphore at_lock;
120
121#if 0
122#if defined(NV_USER_MAP)
123 /* list of user mappings */
124 struct nv_usermap_s *usermap_list;
125
126 /* lock for VMware-specific mapping list */
127 struct semaphore mt_lock;
128#endif /* defined(NV_USER_MAP) */
129#if defined(NV_PM_SUPPORT_OLD_STYLE_APM)
130 void *apm_nv_dev;
131#endif
132#endif
133
134 NvU32 device_num;
135 struct litmus_nv_linux_state_s *next;
136} litmus_nv_linux_state_t;
137
138void dump_nvidia_info(const struct tasklet_struct *t)
139{
140 litmus_nv_state_t* nvstate = NULL;
141 litmus_nv_linux_state_t* linuxstate = NULL;
142 struct pci_dev* pci = NULL;
143
144 nvstate = (litmus_nv_state_t*)(t->data);
145
146 if(nvstate)
147 {
148 TRACE("NV State:\n"
149 "\ttasklet ptr = %p\n"
150 "\tstate ptr = %p\n"
151 "\tprivate data ptr = %p\n"
152 "\tos state ptr = %p\n"
153 "\tdomain = %u\n"
154 "\tbus = %u\n"
155 "\tslot = %u\n"
156 "\tvender_id = %u\n"
157 "\tdevice_id = %u\n"
158 "\tsubsystem_id = %u\n"
159 "\tgpu_id = %u\n"
160 "\tinterrupt_line = %u\n",
161 t,
162 nvstate,
163 nvstate->priv,
164 nvstate->os_state,
165 nvstate->domain,
166 nvstate->bus,
167 nvstate->slot,
168 nvstate->vendor_id,
169 nvstate->device_id,
170 nvstate->subsystem_id,
171 nvstate->gpu_id,
172 nvstate->interrupt_line);
173
174 linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
175 }
176 else
177 {
178 TRACE("INVALID NVSTATE????\n");
179 }
180
181 if(linuxstate)
182 {
183 int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate);
184 int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state));
185 int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
186
187
188 TRACE("LINUX NV State:\n"
189 "\tlinux nv state ptr: %p\n"
190 "\taddress of tasklet: %p\n"
191 "\taddress of work: %p\n"
192 "\tusage_count: %d\n"
193 "\tdevice_num: %u\n"
194 "\ttasklet addr == this tasklet: %d\n"
195 "\tpci: %p\n",
196 linuxstate,
197 &(linuxstate->tasklet),
198 &(linuxstate->work),
199 atomic_read(&(linuxstate->usage_count)),
200 linuxstate->device_num,
201 (t == &(linuxstate->tasklet)),
202 linuxstate->dev);
203
204 pci = linuxstate->dev;
205
206 TRACE("Offsets:\n"
207 "\tOffset from LinuxState: %d, %x\n"
208 "\tOffset from NVState: %d, %x\n"
209 "\tOffset from parameter: %d, %x\n"
210 "\tdevice_num: %u\n",
211 ls_offset, ls_offset,
212 ns_offset_raw, ns_offset_raw,
213 ns_offset_desired, ns_offset_desired,
214 *((u32*)((void*)nvstate + ns_offset_desired)));
215 }
216 else
217 {
218 TRACE("INVALID LINUXNVSTATE?????\n");
219 }
220
221#if 0
222 if(pci)
223 {
224 TRACE("PCI DEV Info:\n"
225 "pci device ptr: %p\n"
226 "\tdevfn = %d\n"
227 "\tvendor = %d\n"
228 "\tdevice = %d\n"
229 "\tsubsystem_vendor = %d\n"
230 "\tsubsystem_device = %d\n"
231 "\tslot # = %d\n",
232 pci,
233 pci->devfn,
234 pci->vendor,
235 pci->device,
236 pci->subsystem_vendor,
237 pci->subsystem_device,
238 pci->slot->number);
239 }
240 else
241 {
242 TRACE("INVALID PCIDEV PTR?????\n");
243 }
244#endif
245}
246
247static struct module* nvidia_mod = NULL;
248int init_nvidia_info(void)
249{
250 mutex_lock(&module_mutex);
251 nvidia_mod = find_module("nvidia");
252 mutex_unlock(&module_mutex);
253 if(nvidia_mod != NULL)
254 {
255 TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__,
256 (void*)(nvidia_mod->module_core),
257 (void*)(nvidia_mod->module_core) + nvidia_mod->core_size);
258 init_nv_device_reg();
259 return(0);
260 }
261 else
262 {
263 TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__);
264 return(-1);
265 }
266}
267
268void shutdown_nvidia_info(void)
269{
270 nvidia_mod = NULL;
271 mb();
272}
273
274/* works with pointers to static data inside the module too. */
275int is_nvidia_func(void* func_addr)
276{
277 int ret = 0;
278 if(nvidia_mod)
279 {
280 ret = within_module_core((long unsigned int)func_addr, nvidia_mod);
281 /*
282 if(ret)
283 {
284 TRACE("%s : %p is in NVIDIA module: %d\n",
285 __FUNCTION__, func_addr, ret);
286 }*/
287 }
288
289 return(ret);
290}
291
292u32 get_tasklet_nv_device_num(const struct tasklet_struct *t)
293{
294 // life is too short to use hard-coded offsets. update this later.
295 litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data);
296 litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
297
298 BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM);
299
300 return(linuxstate->device_num);
301
302 //int DEVICE_NUM_OFFSET = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
303
304#if 0
305 // offset determined though observed behavior of the NV driver.
306 //const int DEVICE_NUM_OFFSET = 0x480; // CUDA 4.0 RC1
307 //const int DEVICE_NUM_OFFSET = 0x510; // CUDA 4.0 RC2
308
309 void* state = (void*)(t->data);
310 void* device_num_ptr = state + DEVICE_NUM_OFFSET;
311
312 //dump_nvidia_info(t);
313 return(*((u32*)device_num_ptr));
314#endif
315}
316
317u32 get_work_nv_device_num(const struct work_struct *t)
318{
319 // offset determined though observed behavior of the NV driver.
320 const int DEVICE_NUM_OFFSET = sizeof(struct work_struct);
321 void* state = (void*)(t);
322 void** device_num_ptr = state + DEVICE_NUM_OFFSET;
323 return(*((u32*)(*device_num_ptr)));
324}
325
326
327typedef struct {
328 raw_spinlock_t lock;
329 int nr_owners;
330 struct task_struct* max_prio_owner;
331 struct task_struct* owners[NV_MAX_SIMULT_USERS];
332}nv_device_registry_t;
333
334static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];
335
336int init_nv_device_reg(void)
337{
338 int i;
339
340 memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG));
341
342 for(i = 0; i < NV_DEVICE_NUM; ++i)
343 {
344 raw_spin_lock_init(&NV_DEVICE_REG[i].lock);
345 }
346
347 return(1);
348}
349
350/* use to get nv_device_id by given owner.
351 (if return -1, can't get the assocaite device id)*/
352/*
353int get_nv_device_id(struct task_struct* owner)
354{
355 int i;
356 if(!owner)
357 {
358 return(-1);
359 }
360 for(i = 0; i < NV_DEVICE_NUM; ++i)
361 {
362 if(NV_DEVICE_REG[i].device_owner == owner)
363 return(i);
364 }
365 return(-1);
366}
367*/
368
369static struct task_struct* find_hp_owner(nv_device_registry_t *reg, struct task_struct *skip) {
370 int i;
371 struct task_struct *found = NULL;
372 for(i = 0; i < reg->nr_owners; ++i) {
373 if(reg->owners[i] && reg->owners[i] != skip && litmus->compare(reg->owners[i], found)) {
374 found = reg->owners[i];
375 }
376 }
377 return found;
378}
379
380#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
381void pai_check_priority_increase(struct task_struct *t, int reg_device_id)
382{
383 unsigned long flags;
384 nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
385
386 if(reg->max_prio_owner != t) {
387
388 raw_spin_lock_irqsave(&reg->lock, flags);
389
390 if(reg->max_prio_owner != t) {
391 if(litmus->compare(t, reg->max_prio_owner)) {
392 litmus->change_prio_pai_tasklet(reg->max_prio_owner, t);
393 reg->max_prio_owner = t;
394 }
395 }
396
397 raw_spin_unlock_irqrestore(&reg->lock, flags);
398 }
399}
400
401
402void pai_check_priority_decrease(struct task_struct *t, int reg_device_id)
403{
404 unsigned long flags;
405 nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
406
407 if(reg->max_prio_owner == t) {
408
409 raw_spin_lock_irqsave(&reg->lock, flags);
410
411 if(reg->max_prio_owner == t) {
412 reg->max_prio_owner = find_hp_owner(reg, NULL);
413 if(reg->max_prio_owner != t) {
414 litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
415 }
416 }
417
418 raw_spin_unlock_irqrestore(&reg->lock, flags);
419 }
420}
421#endif
422
423static int __reg_nv_device(int reg_device_id, struct task_struct *t)
424{
425 int ret = 0;
426 int i;
427 struct task_struct *old_max = NULL;
428 unsigned long flags;
429 nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
430
431 if(test_bit(reg_device_id, &tsk_rt(t)->held_gpus)) {
432 // TODO: check if taks is already registered.
433 return ret; // assume already registered.
434 }
435
436
437 raw_spin_lock_irqsave(&reg->lock, flags);
438
439 if(reg->nr_owners < NV_MAX_SIMULT_USERS) {
440 TRACE_TASK(t, "registers GPU %d\n", reg_device_id);
441 for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
442 if(reg->owners[i] == NULL) {
443 reg->owners[i] = t;
444
445 //if(edf_higher_prio(t, reg->max_prio_owner)) {
446 if(litmus->compare(t, reg->max_prio_owner)) {
447 old_max = reg->max_prio_owner;
448 reg->max_prio_owner = t;
449
450#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
451 litmus->change_prio_pai_tasklet(old_max, t);
452#endif
453 }
454
455#ifdef CONFIG_LITMUS_SOFTIRQD
456 down_and_set_stat(t, HELD, &tsk_rt(t)->klitirqd_sem);
457#endif
458 ++(reg->nr_owners);
459
460 break;
461 }
462 }
463 }
464 else
465 {
466 TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
467 //ret = -EBUSY;
468 }
469
470 raw_spin_unlock_irqrestore(&reg->lock, flags);
471
472 __set_bit(reg_device_id, &tsk_rt(t)->held_gpus);
473
474 return(ret);
475}
476
477static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
478{
479 int ret = 0;
480 int i;
481 unsigned long flags;
482 nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id];
483
484#ifdef CONFIG_LITMUS_SOFTIRQD
485 struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id);
486#endif
487
488 if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) {
489 return ret;
490 }
491
492 raw_spin_lock_irqsave(&reg->lock, flags);
493
494 TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id);
495
496 for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
497 if(reg->owners[i] == t) {
498#ifdef CONFIG_LITMUS_SOFTIRQD
499 flush_pending(klitirqd_th, t);
500#endif
501 if(reg->max_prio_owner == t) {
502 reg->max_prio_owner = find_hp_owner(reg, t);
503#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
504 litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
505#endif
506 }
507
508#ifdef CONFIG_LITMUS_SOFTIRQD
509 up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klitirqd_sem);
510#endif
511
512 reg->owners[i] = NULL;
513 --(reg->nr_owners);
514
515 break;
516 }
517 }
518
519 raw_spin_unlock_irqrestore(&reg->lock, flags);
520
521 __clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus);
522
523 return(ret);
524}
525
526
527int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t)
528{
529 int ret;
530
531 if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0))
532 {
533 if(reg_action)
534 ret = __reg_nv_device(reg_device_id, t);
535 else
536 ret = __clear_reg_nv_device(reg_device_id, t);
537 }
538 else
539 {
540 ret = -ENODEV;
541 }
542
543 return(ret);
544}
545
546/* use to get the owner of nv_device_id. */
547struct task_struct* get_nv_max_device_owner(u32 target_device_id)
548{
549 struct task_struct *owner = NULL;
550 BUG_ON(target_device_id >= NV_DEVICE_NUM);
551 owner = NV_DEVICE_REG[target_device_id].max_prio_owner;
552 return(owner);
553}
554
555void lock_nv_registry(u32 target_device_id, unsigned long* flags)
556{
557 BUG_ON(target_device_id >= NV_DEVICE_NUM);
558
559 if(in_interrupt())
560 TRACE("Locking registry for %d.\n", target_device_id);
561 else
562 TRACE_CUR("Locking registry for %d.\n", target_device_id);
563
564 raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags);
565}
566
567void unlock_nv_registry(u32 target_device_id, unsigned long* flags)
568{
569 BUG_ON(target_device_id >= NV_DEVICE_NUM);
570
571 if(in_interrupt())
572 TRACE("Unlocking registry for %d.\n", target_device_id);
573 else
574 TRACE_CUR("Unlocking registry for %d.\n", target_device_id);
575
576 raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags);
577}
578
579
580//void increment_nv_int_count(u32 device)
581//{
582// unsigned long flags;
583// struct task_struct* owner;
584//
585// lock_nv_registry(device, &flags);
586//
587// owner = NV_DEVICE_REG[device].device_owner;
588// if(owner)
589// {
590// atomic_inc(&tsk_rt(owner)->nv_int_count);
591// }
592//
593// unlock_nv_registry(device, &flags);
594//}
595//EXPORT_SYMBOL(increment_nv_int_count);
596
597
diff --git a/litmus/preempt.c b/litmus/preempt.c
index 5704d0bf4c0b..28368d5bc046 100644
--- a/litmus/preempt.c
+++ b/litmus/preempt.c
@@ -30,6 +30,7 @@ void sched_state_will_schedule(struct task_struct* tsk)
30 /* Litmus tasks should never be subject to a remote 30 /* Litmus tasks should never be subject to a remote
31 * set_tsk_need_resched(). */ 31 * set_tsk_need_resched(). */
32 BUG_ON(is_realtime(tsk)); 32 BUG_ON(is_realtime(tsk));
33
33#ifdef CONFIG_PREEMPT_STATE_TRACE 34#ifdef CONFIG_PREEMPT_STATE_TRACE
34 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n", 35 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
35 __builtin_return_address(0)); 36 __builtin_return_address(0));
@@ -45,13 +46,17 @@ void sched_state_ipi(void)
45 /* Cause scheduler to be invoked. 46 /* Cause scheduler to be invoked.
46 * This will cause a transition to WILL_SCHEDULE. */ 47 * This will cause a transition to WILL_SCHEDULE. */
47 set_tsk_need_resched(current); 48 set_tsk_need_resched(current);
49 /*
48 TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n", 50 TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n",
49 current->comm, current->pid); 51 current->comm, current->pid);
52 */
50 } else { 53 } else {
51 /* ignore */ 54 /* ignore */
55 /*
52 TRACE_STATE("ignoring IPI in state %x (%s)\n", 56 TRACE_STATE("ignoring IPI in state %x (%s)\n",
53 get_sched_state(), 57 get_sched_state(),
54 sched_state_name(get_sched_state())); 58 sched_state_name(get_sched_state()));
59 */
55 } 60 }
56} 61}
57 62
diff --git a/litmus/rsm_lock.c b/litmus/rsm_lock.c
new file mode 100644
index 000000000000..75ed87c5ed48
--- /dev/null
+++ b/litmus/rsm_lock.c
@@ -0,0 +1,796 @@
1#include <linux/slab.h>
2#include <linux/uaccess.h>
3
4#include <litmus/trace.h>
5#include <litmus/sched_plugin.h>
6#include <litmus/rsm_lock.h>
7
8//#include <litmus/edf_common.h>
9
10#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
11#include <litmus/gpu_affinity.h>
12#endif
13
14
15/* caller is responsible for locking */
16static struct task_struct* rsm_mutex_find_hp_waiter(struct rsm_mutex *mutex,
17 struct task_struct* skip)
18{
19 wait_queue_t *q;
20 struct list_head *pos;
21 struct task_struct *queued = NULL, *found = NULL;
22
23#ifdef CONFIG_LITMUS_DGL_SUPPORT
24 dgl_wait_state_t *dgl_wait = NULL;
25#endif
26
27 list_for_each(pos, &mutex->wait.task_list) {
28 q = list_entry(pos, wait_queue_t, task_list);
29
30#ifdef CONFIG_LITMUS_DGL_SUPPORT
31 if(q->func == dgl_wake_up) {
32 dgl_wait = (dgl_wait_state_t*) q->private;
33 if(tsk_rt(dgl_wait->task)->blocked_lock == &mutex->litmus_lock) {
34 queued = dgl_wait->task;
35 }
36 else {
37 queued = NULL; // skip it.
38 }
39 }
40 else {
41 queued = (struct task_struct*) q->private;
42 }
43#else
44 queued = (struct task_struct*) q->private;
45#endif
46
47 /* Compare task prios, find high prio task. */
48 //if (queued && queued != skip && edf_higher_prio(queued, found)) {
49 if (queued && queued != skip && litmus->compare(queued, found)) {
50 found = queued;
51 }
52 }
53 return found;
54}
55
56
57#ifdef CONFIG_LITMUS_DGL_SUPPORT
58
59int rsm_mutex_is_owner(struct litmus_lock *l, struct task_struct *t)
60{
61 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
62 return(mutex->owner == t);
63}
64
65// return 1 if resource was immediatly acquired.
66// Assumes mutex->lock is held.
67// Must set task state to TASK_UNINTERRUPTIBLE if task blocks.
68int rsm_mutex_dgl_lock(struct litmus_lock *l, dgl_wait_state_t* dgl_wait,
69 wait_queue_t* wq_node)
70{
71 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
72 struct task_struct *t = dgl_wait->task;
73
74 int acquired_immediatly = 0;
75
76 BUG_ON(t != current);
77
78 if (mutex->owner) {
79 TRACE_TASK(t, "Enqueuing on lock %d.\n", l->ident);
80
81 init_dgl_waitqueue_entry(wq_node, dgl_wait);
82
83 set_task_state(t, TASK_UNINTERRUPTIBLE);
84 __add_wait_queue_tail_exclusive(&mutex->wait, wq_node);
85 } else {
86 TRACE_TASK(t, "Acquired lock %d with no blocking.\n", l->ident);
87
88 /* it's ours now */
89 mutex->owner = t;
90
91 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
92 binheap_add(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
93 struct nested_info, hp_binheap_node);
94 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
95
96 acquired_immediatly = 1;
97 }
98
99 return acquired_immediatly;
100}
101
102void rsm_mutex_enable_priority(struct litmus_lock *l,
103 dgl_wait_state_t* dgl_wait)
104{
105 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
106 struct task_struct *t = dgl_wait->task;
107 struct task_struct *owner = mutex->owner;
108 unsigned long flags = 0; // these are unused under DGL coarse-grain locking
109
110 BUG_ON(owner == t);
111
112 tsk_rt(t)->blocked_lock = l;
113 mb();
114
115 //if (edf_higher_prio(t, mutex->hp_waiter)) {
116 if (litmus->compare(t, mutex->hp_waiter)) {
117
118 struct task_struct *old_max_eff_prio;
119 struct task_struct *new_max_eff_prio;
120 struct task_struct *new_prio = NULL;
121
122 if(mutex->hp_waiter)
123 TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
124 mutex->hp_waiter->comm, mutex->hp_waiter->pid);
125 else
126 TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
127
128 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
129
130 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
131 mutex->hp_waiter = t;
132 l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
133 binheap_decrease(&l->nest.hp_binheap_node,
134 &tsk_rt(owner)->hp_blocked_tasks);
135 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
136
137 if(new_max_eff_prio != old_max_eff_prio) {
138 TRACE_TASK(t, "is new hp_waiter.\n");
139
140 if ((effective_priority(owner) == old_max_eff_prio) ||
141 //(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))){
142 (litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))){
143 new_prio = new_max_eff_prio;
144 }
145 }
146 else {
147 TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
148 }
149
150 if(new_prio) {
151 litmus->nested_increase_prio(owner, new_prio,
152 &mutex->lock, flags); // unlocks lock.
153 }
154 else {
155 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
156 unlock_fine_irqrestore(&mutex->lock, flags);
157 }
158 }
159 else {
160 TRACE_TASK(t, "no change in hp_waiter.\n");
161 unlock_fine_irqrestore(&mutex->lock, flags);
162 }
163}
164
165static void select_next_lock_if_primary(struct litmus_lock *l,
166 dgl_wait_state_t *dgl_wait)
167{
168 if(tsk_rt(dgl_wait->task)->blocked_lock == l) {
169 TRACE_CUR("Lock %d in DGL was primary for %s/%d.\n",
170 l->ident, dgl_wait->task->comm, dgl_wait->task->pid);
171 tsk_rt(dgl_wait->task)->blocked_lock = NULL;
172 mb();
173 select_next_lock(dgl_wait /*, l*/); // pick the next lock to be blocked on
174 }
175 else {
176 TRACE_CUR("Got lock early! Lock %d in DGL was NOT primary for %s/%d.\n",
177 l->ident, dgl_wait->task->comm, dgl_wait->task->pid);
178 }
179}
180#endif
181
182
183
184
185int rsm_mutex_lock(struct litmus_lock* l)
186{
187 struct task_struct *t = current;
188 struct task_struct *owner;
189 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
190 wait_queue_t wait;
191 unsigned long flags;
192
193#ifdef CONFIG_LITMUS_DGL_SUPPORT
194 raw_spinlock_t *dgl_lock;
195#endif
196
197 if (!is_realtime(t))
198 return -EPERM;
199
200#ifdef CONFIG_LITMUS_DGL_SUPPORT
201 dgl_lock = litmus->get_dgl_spinlock(t);
202#endif
203
204 lock_global_irqsave(dgl_lock, flags);
205 lock_fine_irqsave(&mutex->lock, flags);
206
207 if (mutex->owner) {
208 TRACE_TASK(t, "Blocking on lock %d.\n", l->ident);
209
210#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
211 // KLUDGE: don't count this suspension as time in the critical gpu
212 // critical section
213 if(tsk_rt(t)->held_gpus) {
214 tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
215 }
216#endif
217
218 /* resource is not free => must suspend and wait */
219
220 owner = mutex->owner;
221
222 init_waitqueue_entry(&wait, t);
223
224 tsk_rt(t)->blocked_lock = l; /* record where we are blocked */
225 mb(); // needed?
226
227 /* FIXME: interruptible would be nice some day */
228 set_task_state(t, TASK_UNINTERRUPTIBLE);
229
230 __add_wait_queue_tail_exclusive(&mutex->wait, &wait);
231
232 /* check if we need to activate priority inheritance */
233 //if (edf_higher_prio(t, mutex->hp_waiter)) {
234 if (litmus->compare(t, mutex->hp_waiter)) {
235
236 struct task_struct *old_max_eff_prio;
237 struct task_struct *new_max_eff_prio;
238 struct task_struct *new_prio = NULL;
239
240 if(mutex->hp_waiter)
241 TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
242 mutex->hp_waiter->comm, mutex->hp_waiter->pid);
243 else
244 TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
245
246 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
247
248 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
249 mutex->hp_waiter = t;
250 l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
251 binheap_decrease(&l->nest.hp_binheap_node,
252 &tsk_rt(owner)->hp_blocked_tasks);
253 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
254
255 if(new_max_eff_prio != old_max_eff_prio) {
256 TRACE_TASK(t, "is new hp_waiter.\n");
257
258 if ((effective_priority(owner) == old_max_eff_prio) ||
259 //(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))){
260 (litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))){
261 new_prio = new_max_eff_prio;
262 }
263 }
264 else {
265 TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
266 }
267
268 if(new_prio) {
269 litmus->nested_increase_prio(owner, new_prio, &mutex->lock,
270 flags); // unlocks lock.
271 }
272 else {
273 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
274 unlock_fine_irqrestore(&mutex->lock, flags);
275 }
276 }
277 else {
278 TRACE_TASK(t, "no change in hp_waiter.\n");
279
280 unlock_fine_irqrestore(&mutex->lock, flags);
281 }
282
283 unlock_global_irqrestore(dgl_lock, flags);
284
285 TS_LOCK_SUSPEND;
286
287 /* We depend on the FIFO order. Thus, we don't need to recheck
288 * when we wake up; we are guaranteed to have the lock since
289 * there is only one wake up per release.
290 */
291
292 schedule();
293
294 TS_LOCK_RESUME;
295
296 /* Since we hold the lock, no other task will change
297 * ->owner. We can thus check it without acquiring the spin
298 * lock. */
299 BUG_ON(mutex->owner != t);
300
301 TRACE_TASK(t, "Acquired lock %d.\n", l->ident);
302
303 } else {
304 TRACE_TASK(t, "Acquired lock %d with no blocking.\n", l->ident);
305
306 /* it's ours now */
307 mutex->owner = t;
308
309 raw_spin_lock(&tsk_rt(mutex->owner)->hp_blocked_tasks_lock);
310 binheap_add(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
311 struct nested_info, hp_binheap_node);
312 raw_spin_unlock(&tsk_rt(mutex->owner)->hp_blocked_tasks_lock);
313
314
315 unlock_fine_irqrestore(&mutex->lock, flags);
316 unlock_global_irqrestore(dgl_lock, flags);
317 }
318
319 return 0;
320}
321
322
323
324int rsm_mutex_unlock(struct litmus_lock* l)
325{
326 struct task_struct *t = current, *next = NULL;
327 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
328 unsigned long flags;
329
330 struct task_struct *old_max_eff_prio;
331
332 int wake_up_task = 1;
333
334#ifdef CONFIG_LITMUS_DGL_SUPPORT
335 dgl_wait_state_t *dgl_wait = NULL;
336 raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t);
337#endif
338
339 int err = 0;
340
341 if (mutex->owner != t) {
342 err = -EINVAL;
343 return err;
344 }
345
346 lock_global_irqsave(dgl_lock, flags);
347 lock_fine_irqsave(&mutex->lock, flags);
348
349 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
350
351 TRACE_TASK(t, "Freeing lock %d\n", l->ident);
352
353 old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
354 binheap_delete(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks);
355
356 if(tsk_rt(t)->inh_task){
357 struct task_struct *new_max_eff_prio =
358 top_priority(&tsk_rt(t)->hp_blocked_tasks);
359
360 if((new_max_eff_prio == NULL) ||
361 /* there was a change in eff prio */
362 ( (new_max_eff_prio != old_max_eff_prio) &&
363 /* and owner had the old eff prio */
364 (effective_priority(t) == old_max_eff_prio)) )
365 {
366 // old_max_eff_prio > new_max_eff_prio
367
368 //if(__edf_higher_prio(new_max_eff_prio, BASE, t, EFFECTIVE)) {
369 if(litmus->__compare(new_max_eff_prio, BASE, t, EFFECTIVE)) {
370 TRACE_TASK(t, "new_max_eff_prio > task's eff_prio-- new_max_eff_prio: %s/%d task: %s/%d [%s/%d]\n",
371 new_max_eff_prio->comm, new_max_eff_prio->pid,
372 t->comm, t->pid, tsk_rt(t)->inh_task->comm,
373 tsk_rt(t)->inh_task->pid);
374 WARN_ON(1);
375 }
376
377 litmus->decrease_prio(t, new_max_eff_prio);
378 }
379 }
380
381 if(binheap_empty(&tsk_rt(t)->hp_blocked_tasks) &&
382 tsk_rt(t)->inh_task != NULL)
383 {
384 WARN_ON(tsk_rt(t)->inh_task != NULL);
385 TRACE_TASK(t, "No more locks are held, but eff_prio = %s/%d\n",
386 tsk_rt(t)->inh_task->comm, tsk_rt(t)->inh_task->pid);
387 }
388
389 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
390
391
392 /* check if there are jobs waiting for this resource */
393#ifdef CONFIG_LITMUS_DGL_SUPPORT
394 __waitqueue_dgl_remove_first(&mutex->wait, &dgl_wait, &next);
395 if(dgl_wait) {
396 next = dgl_wait->task;
397 //select_next_lock_if_primary(l, dgl_wait);
398 }
399#else
400 next = __waitqueue_remove_first(&mutex->wait);
401#endif
402 if (next) {
403 /* next becomes the resouce holder */
404 mutex->owner = next;
405 TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid);
406
407 /* determine new hp_waiter if necessary */
408 if (next == mutex->hp_waiter) {
409
410 TRACE_TASK(next, "was highest-prio waiter\n");
411 /* next has the highest priority --- it doesn't need to
412 * inherit. However, we need to make sure that the
413 * next-highest priority in the queue is reflected in
414 * hp_waiter. */
415 mutex->hp_waiter = rsm_mutex_find_hp_waiter(mutex, next);
416 l->nest.hp_waiter_eff_prio = (mutex->hp_waiter) ?
417 effective_priority(mutex->hp_waiter) :
418 NULL;
419
420 if (mutex->hp_waiter)
421 TRACE_TASK(mutex->hp_waiter, "is new highest-prio waiter\n");
422 else
423 TRACE("no further waiters\n");
424
425 raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
426
427 binheap_add(&l->nest.hp_binheap_node,
428 &tsk_rt(next)->hp_blocked_tasks,
429 struct nested_info, hp_binheap_node);
430
431#ifdef CONFIG_LITMUS_DGL_SUPPORT
432 if(dgl_wait) {
433 select_next_lock_if_primary(l, dgl_wait);
434 //wake_up_task = atomic_dec_and_test(&dgl_wait->nr_remaining);
435 --(dgl_wait->nr_remaining);
436 wake_up_task = (dgl_wait->nr_remaining == 0);
437 }
438#endif
439 raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
440 }
441 else {
442 /* Well, if 'next' is not the highest-priority waiter,
443 * then it (probably) ought to inherit the highest-priority
444 * waiter's priority. */
445 TRACE_TASK(next, "is not hp_waiter of lock %d.\n", l->ident);
446
447 raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
448
449 binheap_add(&l->nest.hp_binheap_node,
450 &tsk_rt(next)->hp_blocked_tasks,
451 struct nested_info, hp_binheap_node);
452
453#ifdef CONFIG_LITMUS_DGL_SUPPORT
454 if(dgl_wait) {
455 select_next_lock_if_primary(l, dgl_wait);
456 --(dgl_wait->nr_remaining);
457 wake_up_task = (dgl_wait->nr_remaining == 0);
458 }
459#endif
460
461 /* It is possible that 'next' *should* be the hp_waiter, but isn't
462 * because that update hasn't yet executed (update operation is
463 * probably blocked on mutex->lock). So only inherit if the top of
464 * 'next's top heap node is indeed the effective prio. of hp_waiter.
465 * (We use l->hp_waiter_eff_prio instead of effective_priority(hp_waiter)
466 * since the effective priority of hp_waiter can change (and the
467 * update has not made it to this lock).)
468 */
469#ifdef CONFIG_LITMUS_DGL_SUPPORT
470 if((l->nest.hp_waiter_eff_prio != NULL) &&
471 (top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
472 l->nest.hp_waiter_eff_prio))
473 {
474 if(dgl_wait && tsk_rt(next)->blocked_lock) {
475 BUG_ON(wake_up_task);
476 //if(__edf_higher_prio(l->nest.hp_waiter_eff_prio, BASE, next, EFFECTIVE)) {
477 if(litmus->__compare(l->nest.hp_waiter_eff_prio, BASE, next, EFFECTIVE)) {
478 litmus->nested_increase_prio(next,
479 l->nest.hp_waiter_eff_prio, &mutex->lock, flags); // unlocks lock && hp_blocked_tasks_lock.
480 goto out; // all spinlocks are released. bail out now.
481 }
482 }
483 else {
484 litmus->increase_prio(next, l->nest.hp_waiter_eff_prio);
485 }
486 }
487
488 raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
489#else
490 if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
491 l->nest.hp_waiter_eff_prio))
492 {
493 litmus->increase_prio(next, l->nest.hp_waiter_eff_prio);
494 }
495 raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
496#endif
497 }
498
499 if(wake_up_task) {
500 TRACE_TASK(next, "waking up since it is no longer blocked.\n");
501
502 tsk_rt(next)->blocked_lock = NULL;
503 mb();
504
505#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
506 // re-enable tracking
507 if(tsk_rt(next)->held_gpus) {
508 tsk_rt(next)->suspend_gpu_tracker_on_block = 0;
509 }
510#endif
511
512 wake_up_process(next);
513 }
514 else {
515 TRACE_TASK(next, "is still blocked.\n");
516 }
517 }
518 else {
519 /* becomes available */
520 mutex->owner = NULL;
521 }
522
523 unlock_fine_irqrestore(&mutex->lock, flags);
524
525#ifdef CONFIG_LITMUS_DGL_SUPPORT
526out:
527#endif
528 unlock_global_irqrestore(dgl_lock, flags);
529
530 return err;
531}
532
533
534void rsm_mutex_propagate_increase_inheritance(struct litmus_lock* l,
535 struct task_struct* t,
536 raw_spinlock_t* to_unlock,
537 unsigned long irqflags)
538{
539 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
540
541 // relay-style locking
542 lock_fine(&mutex->lock);
543 unlock_fine(to_unlock);
544
545 if(tsk_rt(t)->blocked_lock == l) { // prevent race on tsk_rt(t)->blocked
546 struct task_struct *owner = mutex->owner;
547
548 struct task_struct *old_max_eff_prio;
549 struct task_struct *new_max_eff_prio;
550
551 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
552
553 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
554
555 //if((t != mutex->hp_waiter) && edf_higher_prio(t, mutex->hp_waiter)) {
556 if((t != mutex->hp_waiter) && litmus->compare(t, mutex->hp_waiter)) {
557 TRACE_TASK(t, "is new highest-prio waiter by propagation.\n");
558 mutex->hp_waiter = t;
559 }
560 if(t == mutex->hp_waiter) {
561 // reflect the decreased priority in the heap node.
562 l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
563
564 BUG_ON(!binheap_is_in_heap(&l->nest.hp_binheap_node));
565 BUG_ON(!binheap_is_in_this_heap(&l->nest.hp_binheap_node,
566 &tsk_rt(owner)->hp_blocked_tasks));
567
568 binheap_decrease(&l->nest.hp_binheap_node,
569 &tsk_rt(owner)->hp_blocked_tasks);
570 }
571
572 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
573
574
575 if(new_max_eff_prio != old_max_eff_prio) {
576 // new_max_eff_prio > old_max_eff_prio holds.
577 if ((effective_priority(owner) == old_max_eff_prio) ||
578 //(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))) {
579 (litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))) {
580 TRACE_CUR("Propagating inheritance to holder of lock %d.\n",
581 l->ident);
582
583 // beware: recursion
584 litmus->nested_increase_prio(owner, new_max_eff_prio,
585 &mutex->lock, irqflags); // unlocks mutex->lock
586 }
587 else {
588 TRACE_CUR("Lower priority than holder %s/%d. No propagation.\n",
589 owner->comm, owner->pid);
590 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
591 unlock_fine_irqrestore(&mutex->lock, irqflags);
592 }
593 }
594 else {
595 TRACE_TASK(mutex->owner, "No change in maxiumum effective priority.\n");
596 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
597 unlock_fine_irqrestore(&mutex->lock, irqflags);
598 }
599 }
600 else {
601 struct litmus_lock *still_blocked = tsk_rt(t)->blocked_lock;
602
603 TRACE_TASK(t, "is not blocked on lock %d.\n", l->ident);
604 if(still_blocked) {
605 TRACE_TASK(t, "is still blocked on a lock though (lock %d).\n",
606 still_blocked->ident);
607 if(still_blocked->ops->propagate_increase_inheritance) {
608 /* due to relay-style nesting of spinlocks (acq. A, acq. B, free A, free B)
609 we know that task 't' has not released any locks behind us in this
610 chain. Propagation just needs to catch up with task 't'. */
611 still_blocked->ops->propagate_increase_inheritance(still_blocked,
612 t,
613 &mutex->lock,
614 irqflags);
615 }
616 else {
617 TRACE_TASK(t,
618 "Inheritor is blocked on lock (%p) that does not "
619 "support nesting!\n",
620 still_blocked);
621 unlock_fine_irqrestore(&mutex->lock, irqflags);
622 }
623 }
624 else {
625 unlock_fine_irqrestore(&mutex->lock, irqflags);
626 }
627 }
628}
629
630
631void rsm_mutex_propagate_decrease_inheritance(struct litmus_lock* l,
632 struct task_struct* t,
633 raw_spinlock_t* to_unlock,
634 unsigned long irqflags)
635{
636 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
637
638 // relay-style locking
639 lock_fine(&mutex->lock);
640 unlock_fine(to_unlock);
641
642 if(tsk_rt(t)->blocked_lock == l) { // prevent race on tsk_rt(t)->blocked
643 if(t == mutex->hp_waiter) {
644 struct task_struct *owner = mutex->owner;
645
646 struct task_struct *old_max_eff_prio;
647 struct task_struct *new_max_eff_prio;
648
649 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
650
651 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
652
653 binheap_delete(&l->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks);
654 mutex->hp_waiter = rsm_mutex_find_hp_waiter(mutex, NULL);
655 l->nest.hp_waiter_eff_prio = (mutex->hp_waiter) ?
656 effective_priority(mutex->hp_waiter) : NULL;
657 binheap_add(&l->nest.hp_binheap_node,
658 &tsk_rt(owner)->hp_blocked_tasks,
659 struct nested_info, hp_binheap_node);
660
661 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
662
663 if((old_max_eff_prio != new_max_eff_prio) &&
664 (effective_priority(owner) == old_max_eff_prio))
665 {
666 // Need to set new effective_priority for owner
667
668 struct task_struct *decreased_prio;
669
670 TRACE_CUR("Propagating decreased inheritance to holder of lock %d.\n",
671 l->ident);
672
673 //if(__edf_higher_prio(new_max_eff_prio, BASE, owner, BASE)) {
674 if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
675 TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of lock %d.\n",
676 (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
677 (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
678 owner->comm,
679 owner->pid,
680 l->ident);
681
682 decreased_prio = new_max_eff_prio;
683 }
684 else {
685 TRACE_CUR("%s/%d has lesser base priority than base priority of owner (%s/%d) of lock %d.\n",
686 (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
687 (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
688 owner->comm,
689 owner->pid,
690 l->ident);
691
692 decreased_prio = NULL;
693 }
694
695 // beware: recursion
696 litmus->nested_decrease_prio(owner, decreased_prio, &mutex->lock, irqflags); // will unlock mutex->lock
697 }
698 else {
699 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
700 unlock_fine_irqrestore(&mutex->lock, irqflags);
701 }
702 }
703 else {
704 TRACE_TASK(t, "is not hp_waiter. No propagation.\n");
705 unlock_fine_irqrestore(&mutex->lock, irqflags);
706 }
707 }
708 else {
709 struct litmus_lock *still_blocked = tsk_rt(t)->blocked_lock;
710
711 TRACE_TASK(t, "is not blocked on lock %d.\n", l->ident);
712 if(still_blocked) {
713 TRACE_TASK(t, "is still blocked on a lock though (lock %d).\n",
714 still_blocked->ident);
715 if(still_blocked->ops->propagate_decrease_inheritance) {
716 /* due to linked nesting of spinlocks (acq. A, acq. B, free A, free B)
717 we know that task 't' has not released any locks behind us in this
718 chain. propagation just needs to catch up with task 't' */
719 still_blocked->ops->propagate_decrease_inheritance(still_blocked,
720 t,
721 &mutex->lock,
722 irqflags);
723 }
724 else {
725 TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
726 still_blocked);
727 unlock_fine_irqrestore(&mutex->lock, irqflags);
728 }
729 }
730 else {
731 unlock_fine_irqrestore(&mutex->lock, irqflags);
732 }
733 }
734}
735
736
737int rsm_mutex_close(struct litmus_lock* l)
738{
739 struct task_struct *t = current;
740 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
741 unsigned long flags;
742
743 int owner;
744
745#ifdef CONFIG_LITMUS_DGL_SUPPORT
746 raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t);
747#endif
748
749 lock_global_irqsave(dgl_lock, flags);
750 lock_fine_irqsave(&mutex->lock, flags);
751
752 owner = (mutex->owner == t);
753
754 unlock_fine_irqrestore(&mutex->lock, flags);
755 unlock_global_irqrestore(dgl_lock, flags);
756
757 if (owner)
758 rsm_mutex_unlock(l);
759
760 return 0;
761}
762
763void rsm_mutex_free(struct litmus_lock* lock)
764{
765 kfree(rsm_mutex_from_lock(lock));
766}
767
768struct litmus_lock* rsm_mutex_new(struct litmus_lock_ops* ops)
769{
770 struct rsm_mutex* mutex;
771
772 mutex = kmalloc(sizeof(*mutex), GFP_KERNEL);
773 if (!mutex)
774 return NULL;
775
776 mutex->litmus_lock.ops = ops;
777 mutex->owner = NULL;
778 mutex->hp_waiter = NULL;
779 init_waitqueue_head(&mutex->wait);
780
781
782#ifdef CONFIG_DEBUG_SPINLOCK
783 {
784 __raw_spin_lock_init(&mutex->lock,
785 ((struct litmus_lock*)mutex)->cheat_lockdep,
786 &((struct litmus_lock*)mutex)->key);
787 }
788#else
789 raw_spin_lock_init(&mutex->lock);
790#endif
791
792 ((struct litmus_lock*)mutex)->nest.hp_waiter_ptr = &mutex->hp_waiter;
793
794 return &mutex->litmus_lock;
795}
796
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index 480c62bc895b..be14dbec6ed2 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -29,7 +29,7 @@
29#include <linux/percpu.h> 29#include <linux/percpu.h>
30#include <linux/sched.h> 30#include <linux/sched.h>
31#include <linux/slab.h> 31#include <linux/slab.h>
32 32#include <linux/uaccess.h>
33#include <linux/module.h> 33#include <linux/module.h>
34 34
35#include <litmus/litmus.h> 35#include <litmus/litmus.h>
@@ -42,6 +42,16 @@
42#include <litmus/clustered.h> 42#include <litmus/clustered.h>
43 43
44#include <litmus/bheap.h> 44#include <litmus/bheap.h>
45#include <litmus/binheap.h>
46
47#ifdef CONFIG_LITMUS_LOCKING
48#include <litmus/kfmlp_lock.h>
49#endif
50
51#ifdef CONFIG_LITMUS_NESTED_LOCKING
52#include <litmus/rsm_lock.h>
53#include <litmus/ikglp_lock.h>
54#endif
45 55
46#ifdef CONFIG_SCHED_CPU_AFFINITY 56#ifdef CONFIG_SCHED_CPU_AFFINITY
47#include <litmus/affinity.h> 57#include <litmus/affinity.h>
@@ -49,7 +59,27 @@
49 59
50/* to configure the cluster size */ 60/* to configure the cluster size */
51#include <litmus/litmus_proc.h> 61#include <litmus/litmus_proc.h>
52#include <linux/uaccess.h> 62
63#ifdef CONFIG_SCHED_CPU_AFFINITY
64#include <litmus/affinity.h>
65#endif
66
67#ifdef CONFIG_LITMUS_SOFTIRQD
68#include <litmus/litmus_softirq.h>
69#endif
70
71#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
72#include <linux/interrupt.h>
73#include <litmus/trace.h>
74#endif
75
76#ifdef CONFIG_LITMUS_NVIDIA
77#include <litmus/nvidia_info.h>
78#endif
79
80#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
81#include <litmus/gpu_affinity.h>
82#endif
53 83
54/* Reference configuration variable. Determines which cache level is used to 84/* Reference configuration variable. Determines which cache level is used to
55 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that 85 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
@@ -70,7 +100,7 @@ typedef struct {
70 struct task_struct* linked; /* only RT tasks */ 100 struct task_struct* linked; /* only RT tasks */
71 struct task_struct* scheduled; /* only RT tasks */ 101 struct task_struct* scheduled; /* only RT tasks */
72 atomic_t will_schedule; /* prevent unneeded IPIs */ 102 atomic_t will_schedule; /* prevent unneeded IPIs */
73 struct bheap_node* hn; 103 struct binheap_node hn;
74} cpu_entry_t; 104} cpu_entry_t;
75 105
76/* one cpu_entry_t per CPU */ 106/* one cpu_entry_t per CPU */
@@ -83,6 +113,14 @@ DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
83#define test_will_schedule(cpu) \ 113#define test_will_schedule(cpu) \
84 (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule)) 114 (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
85 115
116#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
117struct tasklet_head
118{
119 struct tasklet_struct *head;
120 struct tasklet_struct **tail;
121};
122#endif
123
86/* 124/*
87 * In C-EDF there is a cedf domain _per_ cluster 125 * In C-EDF there is a cedf domain _per_ cluster
88 * The number of clusters is dynamically determined accordingly to the 126 * The number of clusters is dynamically determined accordingly to the
@@ -96,10 +134,17 @@ typedef struct clusterdomain {
96 /* map of this cluster cpus */ 134 /* map of this cluster cpus */
97 cpumask_var_t cpu_map; 135 cpumask_var_t cpu_map;
98 /* the cpus queue themselves according to priority in here */ 136 /* the cpus queue themselves according to priority in here */
99 struct bheap_node *heap_node; 137 struct binheap_handle cpu_heap;
100 struct bheap cpu_heap;
101 /* lock for this cluster */ 138 /* lock for this cluster */
102#define cluster_lock domain.ready_lock 139#define cluster_lock domain.ready_lock
140
141#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
142 struct tasklet_head pending_tasklets;
143#endif
144
145#ifdef CONFIG_LITMUS_DGL_SUPPORT
146 raw_spinlock_t dgl_lock;
147#endif
103} cedf_domain_t; 148} cedf_domain_t;
104 149
105/* a cedf_domain per cluster; allocation is done at init/activation time */ 150/* a cedf_domain per cluster; allocation is done at init/activation time */
@@ -108,6 +153,22 @@ cedf_domain_t *cedf;
108#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster) 153#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
109#define task_cpu_cluster(task) remote_cluster(get_partition(task)) 154#define task_cpu_cluster(task) remote_cluster(get_partition(task))
110 155
156/* total number of cluster */
157static int num_clusters;
158/* we do not support cluster of different sizes */
159static unsigned int cluster_size;
160
161static int clusters_allocated = 0;
162
163#ifdef CONFIG_LITMUS_DGL_SUPPORT
164static raw_spinlock_t* cedf_get_dgl_spinlock(struct task_struct *t)
165{
166 cedf_domain_t *cluster = task_cpu_cluster(t);
167 return(&cluster->dgl_lock);
168}
169#endif
170
171
111/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling 172/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
112 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose 173 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
113 * information during the initialization of the plugin (e.g., topology) 174 * information during the initialization of the plugin (e.g., topology)
@@ -115,11 +176,11 @@ cedf_domain_t *cedf;
115 */ 176 */
116#define VERBOSE_INIT 177#define VERBOSE_INIT
117 178
118static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) 179static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b)
119{ 180{
120 cpu_entry_t *a, *b; 181 cpu_entry_t *a = binheap_entry(_a, cpu_entry_t, hn);
121 a = _a->value; 182 cpu_entry_t *b = binheap_entry(_b, cpu_entry_t, hn);
122 b = _b->value; 183
123 /* Note that a and b are inverted: we want the lowest-priority CPU at 184 /* Note that a and b are inverted: we want the lowest-priority CPU at
124 * the top of the heap. 185 * the top of the heap.
125 */ 186 */
@@ -133,20 +194,17 @@ static void update_cpu_position(cpu_entry_t *entry)
133{ 194{
134 cedf_domain_t *cluster = entry->cluster; 195 cedf_domain_t *cluster = entry->cluster;
135 196
136 if (likely(bheap_node_in_heap(entry->hn))) 197 if (likely(binheap_is_in_heap(&entry->hn))) {
137 bheap_delete(cpu_lower_prio, 198 binheap_delete(&entry->hn, &cluster->cpu_heap);
138 &cluster->cpu_heap, 199 }
139 entry->hn);
140 200
141 bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn); 201 binheap_add(&entry->hn, &cluster->cpu_heap, cpu_entry_t, hn);
142} 202}
143 203
144/* caller must hold cedf lock */ 204/* caller must hold cedf lock */
145static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster) 205static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
146{ 206{
147 struct bheap_node* hn; 207 return binheap_top_entry(&cluster->cpu_heap, cpu_entry_t, hn);
148 hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
149 return hn->value;
150} 208}
151 209
152 210
@@ -208,7 +266,7 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
208} 266}
209 267
210/* unlink - Make sure a task is not linked any longer to an entry 268/* unlink - Make sure a task is not linked any longer to an entry
211 * where it was linked before. Must hold cedf_lock. 269 * where it was linked before. Must hold cluster_lock.
212 */ 270 */
213static noinline void unlink(struct task_struct* t) 271static noinline void unlink(struct task_struct* t)
214{ 272{
@@ -244,7 +302,7 @@ static void preempt(cpu_entry_t *entry)
244} 302}
245 303
246/* requeue - Put an unlinked task into gsn-edf domain. 304/* requeue - Put an unlinked task into gsn-edf domain.
247 * Caller must hold cedf_lock. 305 * Caller must hold cluster_lock.
248 */ 306 */
249static noinline void requeue(struct task_struct* task) 307static noinline void requeue(struct task_struct* task)
250{ 308{
@@ -339,13 +397,17 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
339 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); 397 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
340} 398}
341 399
342/* caller holds cedf_lock */ 400/* caller holds cluster_lock */
343static noinline void job_completion(struct task_struct *t, int forced) 401static noinline void job_completion(struct task_struct *t, int forced)
344{ 402{
345 BUG_ON(!t); 403 BUG_ON(!t);
346 404
347 sched_trace_task_completion(t, forced); 405 sched_trace_task_completion(t, forced);
348 406
407#ifdef CONFIG_LITMUS_NVIDIA
408 atomic_set(&tsk_rt(t)->nv_int_count, 0);
409#endif
410
349 TRACE_TASK(t, "job_completion().\n"); 411 TRACE_TASK(t, "job_completion().\n");
350 412
351 /* set flags */ 413 /* set flags */
@@ -389,6 +451,314 @@ static void cedf_tick(struct task_struct* t)
389 } 451 }
390} 452}
391 453
454
455
456
457
458
459
460
461
462
463
464
465
466#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
467
468
469static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
470{
471 if (!atomic_read(&tasklet->count)) {
472 if(tasklet->owner) {
473 sched_trace_tasklet_begin(tasklet->owner);
474 }
475
476 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
477 {
478 BUG();
479 }
480 TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n",
481 __FUNCTION__,
482 (tasklet->owner) ? tasklet->owner->pid : -1,
483 (tasklet->owner) ? 0 : 1);
484 tasklet->func(tasklet->data);
485 tasklet_unlock(tasklet);
486
487 if(tasklet->owner) {
488 sched_trace_tasklet_end(tasklet->owner, flushed);
489 }
490 }
491 else {
492 BUG();
493 }
494}
495
496
497static void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_task)
498{
499 int work_to_do = 1;
500 struct tasklet_struct *tasklet = NULL;
501 unsigned long flags;
502
503 while(work_to_do) {
504
505 TS_NV_SCHED_BOTISR_START;
506
507 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
508
509 if(cluster->pending_tasklets.head != NULL) {
510 // remove tasklet at head.
511 struct tasklet_struct *prev = NULL;
512 tasklet = cluster->pending_tasklets.head;
513
514 // find a tasklet with prio to execute; skip ones where
515 // sched_task has a higher priority.
516 // We use the '!edf' test instead of swaping function arguments since
517 // both sched_task and owner could be NULL. In this case, we want to
518 // still execute the tasklet.
519 while(tasklet && !edf_higher_prio(tasklet->owner, sched_task)) {
520 prev = tasklet;
521 tasklet = tasklet->next;
522 }
523
524 if(tasklet) { // found something to execuite
525 // remove the tasklet from the queue
526 if(prev) {
527 prev->next = tasklet->next;
528 if(prev->next == NULL) {
529 TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
530 cluster->pending_tasklets.tail = &(prev);
531 }
532 }
533 else {
534 cluster->pending_tasklets.head = tasklet->next;
535 if(tasklet->next == NULL) {
536 TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
537 cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
538 }
539 }
540 }
541 else {
542 TRACE("%s: No tasklets with eligible priority.\n", __FUNCTION__);
543 }
544 }
545 else {
546 TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
547 }
548
549 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
550
551 if(tasklet) {
552 __do_lit_tasklet(tasklet, 0ul);
553 tasklet = NULL;
554 }
555 else {
556 work_to_do = 0;
557 }
558
559 TS_NV_SCHED_BOTISR_END;
560 }
561}
562
563static void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster)
564{
565 struct tasklet_struct* step;
566
567 tasklet->next = NULL; // make sure there are no old values floating around
568
569 step = cluster->pending_tasklets.head;
570 if(step == NULL) {
571 TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
572 // insert at tail.
573 *(cluster->pending_tasklets.tail) = tasklet;
574 cluster->pending_tasklets.tail = &(tasklet->next);
575 }
576 else if((*(cluster->pending_tasklets.tail) != NULL) &&
577 edf_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) {
578 // insert at tail.
579 TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
580
581 *(cluster->pending_tasklets.tail) = tasklet;
582 cluster->pending_tasklets.tail = &(tasklet->next);
583 }
584 else {
585
586 // insert the tasklet somewhere in the middle.
587
588 TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
589
590 while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) {
591 step = step->next;
592 }
593
594 // insert tasklet right before step->next.
595
596 TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__,
597 tasklet->owner->pid,
598 (step->owner) ?
599 step->owner->pid :
600 -1,
601 (step->next) ?
602 ((step->next->owner) ?
603 step->next->owner->pid :
604 -1) :
605 -1);
606
607 tasklet->next = step->next;
608 step->next = tasklet;
609
610 // patch up the head if needed.
611 if(cluster->pending_tasklets.head == step)
612 {
613 TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
614 cluster->pending_tasklets.head = tasklet;
615 }
616 }
617}
618
619static void cedf_run_tasklets(struct task_struct* sched_task)
620{
621 cedf_domain_t* cluster;
622
623 preempt_disable();
624
625 cluster = (is_realtime(sched_task)) ?
626 task_cpu_cluster(sched_task) :
627 remote_cluster(smp_processor_id());
628
629 if(cluster && cluster->pending_tasklets.head != NULL) {
630 TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
631 do_lit_tasklets(cluster, sched_task);
632 }
633
634 preempt_enable_no_resched();
635}
636
637
638
639static int cedf_enqueue_pai_tasklet(struct tasklet_struct* tasklet)
640{
641#if 0
642 cedf_domain_t *cluster = NULL;
643 cpu_entry_t *targetCPU = NULL;
644 int thisCPU;
645 int runLocal = 0;
646 int runNow = 0;
647 unsigned long flags;
648
649 if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
650 {
651 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
652 return 0;
653 }
654
655 cluster = task_cpu_cluster(tasklet->owner);
656
657 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
658
659 thisCPU = smp_processor_id();
660
661#ifdef CONFIG_SCHED_CPU_AFFINITY
662 {
663 cpu_entry_t* affinity = NULL;
664
665 // use this CPU if it is in our cluster and isn't running any RT work.
666 if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(cedf_cpu_entries).linked == NULL)) {
667 affinity = &(__get_cpu_var(cedf_cpu_entries));
668 }
669 else {
670 // this CPU is busy or shouldn't run tasklet in this cluster.
671 // look for available near by CPUs.
672 // NOTE: Affinity towards owner and not this CPU. Is this right?
673 affinity =
674 cedf_get_nearest_available_cpu(cluster,
675 &per_cpu(cedf_cpu_entries, task_cpu(tasklet->owner)));
676 }
677
678 targetCPU = affinity;
679 }
680#endif
681
682 if (targetCPU == NULL) {
683 targetCPU = lowest_prio_cpu(cluster);
684 }
685
686 if (edf_higher_prio(tasklet->owner, targetCPU->linked)) {
687 if (thisCPU == targetCPU->cpu) {
688 TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
689 runLocal = 1;
690 runNow = 1;
691 }
692 else {
693 TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
694 runLocal = 0;
695 runNow = 1;
696 }
697 }
698 else {
699 runLocal = 0;
700 runNow = 0;
701 }
702
703 if(!runLocal) {
704 // enqueue the tasklet
705 __add_pai_tasklet(tasklet, cluster);
706 }
707
708 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
709
710
711 if (runLocal /*&& runNow */) { // runNow == 1 is implied
712 TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
713 __do_lit_tasklet(tasklet, 0ul);
714 }
715 else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied
716 TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
717 preempt(targetCPU); // need to be protected by cluster_lock?
718 }
719 else {
720 TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
721 }
722#else
723 TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
724 __do_lit_tasklet(tasklet, 0ul);
725#endif
726 return(1); // success
727}
728
729static void cedf_change_prio_pai_tasklet(struct task_struct *old_prio,
730 struct task_struct *new_prio)
731{
732 struct tasklet_struct* step;
733 unsigned long flags;
734 cedf_domain_t *cluster;
735 struct task_struct *probe;
736
737 // identify the cluster by the assignment of these tasks. one should
738 // be non-NULL.
739 probe = (old_prio) ? old_prio : new_prio;
740
741 if(probe) {
742 cluster = task_cpu_cluster(probe);
743
744 if(cluster->pending_tasklets.head != NULL) {
745 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
746 for(step = cluster->pending_tasklets.head; step != NULL; step = step->next) {
747 if(step->owner == old_prio) {
748 TRACE("%s: Found tasklet to change: %d\n", __FUNCTION__, step->owner->pid);
749 step->owner = new_prio;
750 }
751 }
752 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
753 }
754 }
755 else {
756 TRACE("%s: Both priorities were NULL\n");
757 }
758}
759
760#endif // PAI
761
392/* Getting schedule() right is a bit tricky. schedule() may not make any 762/* Getting schedule() right is a bit tricky. schedule() may not make any
393 * assumptions on the state of the current task since it may be called for a 763 * assumptions on the state of the current task since it may be called for a
394 * number of reasons. The reasons include a scheduler_tick() determined that it 764 * number of reasons. The reasons include a scheduler_tick() determined that it
@@ -465,6 +835,19 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
465 if (blocks) 835 if (blocks)
466 unlink(entry->scheduled); 836 unlink(entry->scheduled);
467 837
838#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
839 if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) {
840 if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
841 // don't track preemptions or locking protocol suspensions.
842 TRACE_TASK(entry->scheduled, "stopping GPU tracker.\n");
843 stop_gpu_tracker(entry->scheduled);
844 }
845 else if(blocks && !tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
846 TRACE_TASK(entry->scheduled, "GPU tracker remains on during suspension.\n");
847 }
848 }
849#endif
850
468 /* Request a sys_exit_np() call if we would like to preempt but cannot. 851 /* Request a sys_exit_np() call if we would like to preempt but cannot.
469 * We need to make sure to update the link structure anyway in case 852 * We need to make sure to update the link structure anyway in case
470 * that we are still linked. Multiple calls to request_exit_np() don't 853 * that we are still linked. Multiple calls to request_exit_np() don't
@@ -514,7 +897,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
514 raw_spin_unlock(&cluster->cluster_lock); 897 raw_spin_unlock(&cluster->cluster_lock);
515 898
516#ifdef WANT_ALL_SCHED_EVENTS 899#ifdef WANT_ALL_SCHED_EVENTS
517 TRACE("cedf_lock released, next=0x%p\n", next); 900 TRACE("cluster_lock released, next=0x%p\n", next);
518 901
519 if (next) 902 if (next)
520 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); 903 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
@@ -522,7 +905,6 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
522 TRACE("becomes idle at %llu.\n", litmus_clock()); 905 TRACE("becomes idle at %llu.\n", litmus_clock());
523#endif 906#endif
524 907
525
526 return next; 908 return next;
527} 909}
528 910
@@ -548,7 +930,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
548 cpu_entry_t* entry; 930 cpu_entry_t* entry;
549 cedf_domain_t* cluster; 931 cedf_domain_t* cluster;
550 932
551 TRACE("gsn edf: task new %d\n", t->pid); 933 TRACE("c-edf: task new %d\n", t->pid);
552 934
553 /* the cluster doesn't change even if t is running */ 935 /* the cluster doesn't change even if t is running */
554 cluster = task_cpu_cluster(t); 936 cluster = task_cpu_cluster(t);
@@ -586,7 +968,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
586static void cedf_task_wake_up(struct task_struct *task) 968static void cedf_task_wake_up(struct task_struct *task)
587{ 969{
588 unsigned long flags; 970 unsigned long flags;
589 lt_t now; 971 //lt_t now;
590 cedf_domain_t *cluster; 972 cedf_domain_t *cluster;
591 973
592 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); 974 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
@@ -594,6 +976,8 @@ static void cedf_task_wake_up(struct task_struct *task)
594 cluster = task_cpu_cluster(task); 976 cluster = task_cpu_cluster(task);
595 977
596 raw_spin_lock_irqsave(&cluster->cluster_lock, flags); 978 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
979
980#if 0 // sproadic task model
597 /* We need to take suspensions because of semaphores into 981 /* We need to take suspensions because of semaphores into
598 * account! If a job resumes after being suspended due to acquiring 982 * account! If a job resumes after being suspended due to acquiring
599 * a semaphore, it should never be treated as a new job release. 983 * a semaphore, it should never be treated as a new job release.
@@ -615,7 +999,13 @@ static void cedf_task_wake_up(struct task_struct *task)
615 } 999 }
616 } 1000 }
617 } 1001 }
618 cedf_job_arrival(task); 1002#else
1003 set_rt_flags(task, RT_F_RUNNING); // periodic model
1004#endif
1005
1006 if(tsk_rt(task)->linked_on == NO_CPU)
1007 cedf_job_arrival(task);
1008
619 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); 1009 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
620} 1010}
621 1011
@@ -642,6 +1032,10 @@ static void cedf_task_exit(struct task_struct * t)
642 unsigned long flags; 1032 unsigned long flags;
643 cedf_domain_t *cluster = task_cpu_cluster(t); 1033 cedf_domain_t *cluster = task_cpu_cluster(t);
644 1034
1035#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1036 cedf_change_prio_pai_tasklet(t, NULL);
1037#endif
1038
645 /* unlink if necessary */ 1039 /* unlink if necessary */
646 raw_spin_lock_irqsave(&cluster->cluster_lock, flags); 1040 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
647 unlink(t); 1041 unlink(t);
@@ -659,13 +1053,536 @@ static void cedf_task_exit(struct task_struct * t)
659 1053
660static long cedf_admit_task(struct task_struct* tsk) 1054static long cedf_admit_task(struct task_struct* tsk)
661{ 1055{
1056#ifdef CONFIG_LITMUS_NESTED_LOCKING
1057 INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks,
1058 edf_max_heap_base_priority_order);
1059#endif
1060
662 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; 1061 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
663} 1062}
664 1063
665/* total number of cluster */ 1064
666static int num_clusters; 1065
667/* we do not support cluster of different sizes */ 1066#ifdef CONFIG_LITMUS_LOCKING
668static unsigned int cluster_size; 1067
1068#include <litmus/fdso.h>
1069
1070
1071
1072/* called with IRQs off */
1073static void __increase_priority_inheritance(struct task_struct* t,
1074 struct task_struct* prio_inh)
1075{
1076 int linked_on;
1077 int check_preempt = 0;
1078
1079 cedf_domain_t* cluster = task_cpu_cluster(t);
1080
1081#ifdef CONFIG_LITMUS_NESTED_LOCKING
1082 /* this sanity check allows for weaker locking in protocols */
1083 /* TODO (klitirqd): Skip this check if 't' is a proxy thread (???) */
1084 if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) {
1085#endif
1086 TRACE_TASK(t, "inherits priority from %s/%d\n",
1087 prio_inh->comm, prio_inh->pid);
1088 tsk_rt(t)->inh_task = prio_inh;
1089
1090 linked_on = tsk_rt(t)->linked_on;
1091
1092 /* If it is scheduled, then we need to reorder the CPU heap. */
1093 if (linked_on != NO_CPU) {
1094 TRACE_TASK(t, "%s: linked on %d\n",
1095 __FUNCTION__, linked_on);
1096 /* Holder is scheduled; need to re-order CPUs.
1097 * We can't use heap_decrease() here since
1098 * the cpu_heap is ordered in reverse direction, so
1099 * it is actually an increase. */
1100 binheap_delete(&per_cpu(cedf_cpu_entries, linked_on).hn,
1101 &cluster->cpu_heap);
1102 binheap_add(&per_cpu(cedf_cpu_entries, linked_on).hn,
1103 &cluster->cpu_heap, cpu_entry_t, hn);
1104
1105 } else {
1106 /* holder may be queued: first stop queue changes */
1107 raw_spin_lock(&cluster->domain.release_lock);
1108 if (is_queued(t)) {
1109 TRACE_TASK(t, "%s: is queued\n",
1110 __FUNCTION__);
1111 /* We need to update the position of holder in some
1112 * heap. Note that this could be a release heap if we
1113 * budget enforcement is used and this job overran. */
1114 check_preempt =
1115 !bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node);
1116 } else {
1117 /* Nothing to do: if it is not queued and not linked
1118 * then it is either sleeping or currently being moved
1119 * by other code (e.g., a timer interrupt handler) that
1120 * will use the correct priority when enqueuing the
1121 * task. */
1122 TRACE_TASK(t, "%s: is NOT queued => Done.\n",
1123 __FUNCTION__);
1124 }
1125 raw_spin_unlock(&cluster->domain.release_lock);
1126
1127 /* If holder was enqueued in a release heap, then the following
1128 * preemption check is pointless, but we can't easily detect
1129 * that case. If you want to fix this, then consider that
1130 * simply adding a state flag requires O(n) time to update when
1131 * releasing n tasks, which conflicts with the goal to have
1132 * O(log n) merges. */
1133 if (check_preempt) {
1134 /* heap_decrease() hit the top level of the heap: make
1135 * sure preemption checks get the right task, not the
1136 * potentially stale cache. */
1137 bheap_uncache_min(edf_ready_order,
1138 &cluster->domain.ready_queue);
1139 check_for_preemptions(cluster);
1140 }
1141 }
1142#ifdef CONFIG_LITMUS_NESTED_LOCKING
1143 }
1144 else {
1145 TRACE_TASK(t, "Spurious invalid priority increase. "
1146 "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
1147 "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
1148 t->comm, t->pid,
1149 effective_priority(t)->comm, effective_priority(t)->pid,
1150 (prio_inh) ? prio_inh->comm : "nil",
1151 (prio_inh) ? prio_inh->pid : -1);
1152 WARN_ON(!prio_inh);
1153 }
1154#endif
1155}
1156
1157/* called with IRQs off */
1158static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
1159{
1160 cedf_domain_t* cluster = task_cpu_cluster(t);
1161
1162 raw_spin_lock(&cluster->cluster_lock);
1163
1164 __increase_priority_inheritance(t, prio_inh);
1165
1166#ifdef CONFIG_LITMUS_SOFTIRQD
1167 if(tsk_rt(t)->cur_klitirqd != NULL)
1168 {
1169 TRACE_TASK(t, "%s/%d inherits a new priority!\n",
1170 tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
1171
1172 __increase_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
1173 }
1174#endif
1175
1176 raw_spin_unlock(&cluster->cluster_lock);
1177
1178#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
1179 if(tsk_rt(t)->held_gpus) {
1180 int i;
1181 for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
1182 i < NV_DEVICE_NUM;
1183 i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
1184 pai_check_priority_increase(t, i);
1185 }
1186 }
1187#endif
1188}
1189
1190/* called with IRQs off */
1191static void __decrease_priority_inheritance(struct task_struct* t,
1192 struct task_struct* prio_inh)
1193{
1194#ifdef CONFIG_LITMUS_NESTED_LOCKING
1195 if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) {
1196#endif
1197 /* A job only stops inheriting a priority when it releases a
1198 * resource. Thus we can make the following assumption.*/
1199 if(prio_inh)
1200 TRACE_TASK(t, "EFFECTIVE priority decreased to %s/%d\n",
1201 prio_inh->comm, prio_inh->pid);
1202 else
1203 TRACE_TASK(t, "base priority restored.\n");
1204
1205 tsk_rt(t)->inh_task = prio_inh;
1206
1207 if(tsk_rt(t)->scheduled_on != NO_CPU) {
1208 TRACE_TASK(t, "is scheduled.\n");
1209
1210 /* Check if rescheduling is necessary. We can't use heap_decrease()
1211 * since the priority was effectively lowered. */
1212 unlink(t);
1213 cedf_job_arrival(t);
1214 }
1215 else {
1216 cedf_domain_t* cluster = task_cpu_cluster(t);
1217 /* task is queued */
1218 raw_spin_lock(&cluster->domain.release_lock);
1219 if (is_queued(t)) {
1220 TRACE_TASK(t, "is queued.\n");
1221
1222 /* decrease in priority, so we have to re-add to binomial heap */
1223 unlink(t);
1224 cedf_job_arrival(t);
1225 }
1226 else {
1227 TRACE_TASK(t, "is not in scheduler. Probably on wait queue somewhere.\n");
1228 }
1229 raw_spin_unlock(&cluster->domain.release_lock);
1230 }
1231#ifdef CONFIG_LITMUS_NESTED_LOCKING
1232 }
1233 else {
1234 TRACE_TASK(t, "Spurious invalid priority decrease. "
1235 "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
1236 "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
1237 t->comm, t->pid,
1238 effective_priority(t)->comm, effective_priority(t)->pid,
1239 (prio_inh) ? prio_inh->comm : "nil",
1240 (prio_inh) ? prio_inh->pid : -1);
1241 }
1242#endif
1243}
1244
1245static void decrease_priority_inheritance(struct task_struct* t,
1246 struct task_struct* prio_inh)
1247{
1248 cedf_domain_t* cluster = task_cpu_cluster(t);
1249
1250 raw_spin_lock(&cluster->cluster_lock);
1251 __decrease_priority_inheritance(t, prio_inh);
1252
1253#ifdef CONFIG_LITMUS_SOFTIRQD
1254 if(tsk_rt(t)->cur_klitirqd != NULL)
1255 {
1256 TRACE_TASK(t, "%s/%d decreases in priority!\n",
1257 tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
1258
1259 __decrease_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
1260 }
1261#endif
1262
1263 raw_spin_unlock(&cluster->cluster_lock);
1264
1265#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
1266 if(tsk_rt(t)->held_gpus) {
1267 int i;
1268 for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
1269 i < NV_DEVICE_NUM;
1270 i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
1271 pai_check_priority_decrease(t, i);
1272 }
1273 }
1274#endif
1275}
1276
1277
1278
1279
1280
1281#ifdef CONFIG_LITMUS_SOFTIRQD
1282/* called with IRQs off */
1283static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd,
1284 struct task_struct* old_owner,
1285 struct task_struct* new_owner)
1286{
1287 cedf_domain_t* cluster = task_cpu_cluster(klitirqd);
1288
1289 BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
1290
1291 raw_spin_lock(&cluster->cluster_lock);
1292
1293 if(old_owner != new_owner)
1294 {
1295 if(old_owner)
1296 {
1297 // unreachable?
1298 tsk_rt(old_owner)->cur_klitirqd = NULL;
1299 }
1300
1301 TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
1302 new_owner->comm, new_owner->pid);
1303
1304 tsk_rt(new_owner)->cur_klitirqd = klitirqd;
1305 }
1306
1307 __decrease_priority_inheritance(klitirqd, NULL); // kludge to clear out cur prio.
1308
1309 __increase_priority_inheritance(klitirqd,
1310 (tsk_rt(new_owner)->inh_task == NULL) ?
1311 new_owner :
1312 tsk_rt(new_owner)->inh_task);
1313
1314 raw_spin_unlock(&cluster->cluster_lock);
1315}
1316
1317
1318/* called with IRQs off */
1319static void decrease_priority_inheritance_klitirqd(struct task_struct* klitirqd,
1320 struct task_struct* old_owner,
1321 struct task_struct* new_owner)
1322{
1323 cedf_domain_t* cluster = task_cpu_cluster(klitirqd);
1324
1325 BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
1326
1327 raw_spin_lock(&cluster->cluster_lock);
1328
1329 TRACE_TASK(klitirqd, "priority restored\n");
1330
1331 __decrease_priority_inheritance(klitirqd, new_owner);
1332
1333 tsk_rt(old_owner)->cur_klitirqd = NULL;
1334
1335 raw_spin_unlock(&cluster->cluster_lock);
1336}
1337#endif // CONFIG_LITMUS_SOFTIRQD
1338
1339
1340
1341
1342
1343
1344
1345#ifdef CONFIG_LITMUS_NESTED_LOCKING
1346
1347/* called with IRQs off */
1348/* preconditions:
1349 (1) The 'hp_blocked_tasks_lock' of task 't' is held.
1350 (2) The lock 'to_unlock' is held.
1351 */
1352static void nested_increase_priority_inheritance(struct task_struct* t,
1353 struct task_struct* prio_inh,
1354 raw_spinlock_t *to_unlock,
1355 unsigned long irqflags)
1356{
1357 struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
1358
1359 if(tsk_rt(t)->inh_task != prio_inh) { // shield redundent calls.
1360 increase_priority_inheritance(t, prio_inh); // increase our prio.
1361 }
1362
1363 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
1364
1365
1366 if(blocked_lock) {
1367 if(blocked_lock->ops->propagate_increase_inheritance) {
1368 TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
1369 blocked_lock->ident);
1370
1371 // beware: recursion
1372 blocked_lock->ops->propagate_increase_inheritance(blocked_lock,
1373 t, to_unlock,
1374 irqflags);
1375 }
1376 else {
1377 TRACE_TASK(t, "Inheritor is blocked on lock (%d) that does not support nesting!\n",
1378 blocked_lock->ident);
1379 unlock_fine_irqrestore(to_unlock, irqflags);
1380 }
1381 }
1382 else {
1383 TRACE_TASK(t, "is not blocked. No propagation.\n");
1384 unlock_fine_irqrestore(to_unlock, irqflags);
1385 }
1386}
1387
1388/* called with IRQs off */
1389/* preconditions:
1390 (1) The 'hp_blocked_tasks_lock' of task 't' is held.
1391 (2) The lock 'to_unlock' is held.
1392 */
1393static void nested_decrease_priority_inheritance(struct task_struct* t,
1394 struct task_struct* prio_inh,
1395 raw_spinlock_t *to_unlock,
1396 unsigned long irqflags)
1397{
1398 struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
1399 decrease_priority_inheritance(t, prio_inh);
1400
1401 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
1402
1403 if(blocked_lock) {
1404 if(blocked_lock->ops->propagate_decrease_inheritance) {
1405 TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
1406 blocked_lock->ident);
1407
1408 // beware: recursion
1409 blocked_lock->ops->propagate_decrease_inheritance(blocked_lock, t,
1410 to_unlock,
1411 irqflags);
1412 }
1413 else {
1414 TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
1415 blocked_lock);
1416 unlock_fine_irqrestore(to_unlock, irqflags);
1417 }
1418 }
1419 else {
1420 TRACE_TASK(t, "is not blocked. No propagation.\n");
1421 unlock_fine_irqrestore(to_unlock, irqflags);
1422 }
1423}
1424
1425
1426/* ******************** RSM MUTEX ********************** */
1427
1428static struct litmus_lock_ops cedf_rsm_mutex_lock_ops = {
1429 .lock = rsm_mutex_lock,
1430 .unlock = rsm_mutex_unlock,
1431 .close = rsm_mutex_close,
1432 .deallocate = rsm_mutex_free,
1433
1434 .propagate_increase_inheritance = rsm_mutex_propagate_increase_inheritance,
1435 .propagate_decrease_inheritance = rsm_mutex_propagate_decrease_inheritance,
1436
1437#ifdef CONFIG_LITMUS_DGL_SUPPORT
1438 .dgl_lock = rsm_mutex_dgl_lock,
1439 .is_owner = rsm_mutex_is_owner,
1440 .enable_priority = rsm_mutex_enable_priority,
1441#endif
1442};
1443
1444static struct litmus_lock* cedf_new_rsm_mutex(void)
1445{
1446 return rsm_mutex_new(&cedf_rsm_mutex_lock_ops);
1447}
1448
1449/* ******************** IKGLP ********************** */
1450
1451static struct litmus_lock_ops cedf_ikglp_lock_ops = {
1452 .lock = ikglp_lock,
1453 .unlock = ikglp_unlock,
1454 .close = ikglp_close,
1455 .deallocate = ikglp_free,
1456
1457 // ikglp can only be an outer-most lock.
1458 .propagate_increase_inheritance = NULL,
1459 .propagate_decrease_inheritance = NULL,
1460};
1461
1462static struct litmus_lock* cedf_new_ikglp(void* __user arg)
1463{
1464 // assumes clusters of uniform size.
1465 return ikglp_new(cluster_size/num_clusters, &cedf_ikglp_lock_ops, arg);
1466}
1467
1468#endif /* CONFIG_LITMUS_NESTED_LOCKING */
1469
1470
1471
1472
1473/* ******************** KFMLP support ********************** */
1474
1475static struct litmus_lock_ops cedf_kfmlp_lock_ops = {
1476 .lock = kfmlp_lock,
1477 .unlock = kfmlp_unlock,
1478 .close = kfmlp_close,
1479 .deallocate = kfmlp_free,
1480
1481 // kfmlp can only be an outer-most lock.
1482 .propagate_increase_inheritance = NULL,
1483 .propagate_decrease_inheritance = NULL,
1484};
1485
1486
1487static struct litmus_lock* cedf_new_kfmlp(void* __user arg)
1488{
1489 return kfmlp_new(&cedf_kfmlp_lock_ops, arg);
1490}
1491
1492
1493/* **** lock constructor **** */
1494
1495static long cedf_allocate_lock(struct litmus_lock **lock, int type,
1496 void* __user args)
1497{
1498 int err;
1499
1500 switch (type) {
1501#ifdef CONFIG_LITMUS_NESTED_LOCKING
1502 case RSM_MUTEX:
1503 *lock = cedf_new_rsm_mutex();
1504 break;
1505
1506 case IKGLP_SEM:
1507 *lock = cedf_new_ikglp(args);
1508 break;
1509#endif
1510 case KFMLP_SEM:
1511 *lock = cedf_new_kfmlp(args);
1512 break;
1513
1514 default:
1515 err = -ENXIO;
1516 goto UNSUPPORTED_LOCK;
1517 };
1518
1519 if (*lock)
1520 err = 0;
1521 else
1522 err = -ENOMEM;
1523
1524UNSUPPORTED_LOCK:
1525 return err;
1526}
1527
1528#endif // CONFIG_LITMUS_LOCKING
1529
1530
1531#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1532static struct affinity_observer_ops cedf_kfmlp_affinity_ops = {
1533 .close = kfmlp_aff_obs_close,
1534 .deallocate = kfmlp_aff_obs_free,
1535};
1536
1537#ifdef CONFIG_LITMUS_NESTED_LOCKING
1538static struct affinity_observer_ops cedf_ikglp_affinity_ops = {
1539 .close = ikglp_aff_obs_close,
1540 .deallocate = ikglp_aff_obs_free,
1541};
1542#endif
1543
1544static long cedf_allocate_affinity_observer(struct affinity_observer **aff_obs,
1545 int type,
1546 void* __user args)
1547{
1548 int err;
1549
1550 switch (type) {
1551
1552 case KFMLP_SIMPLE_GPU_AFF_OBS:
1553 *aff_obs = kfmlp_simple_gpu_aff_obs_new(&cedf_kfmlp_affinity_ops, args);
1554 break;
1555
1556 case KFMLP_GPU_AFF_OBS:
1557 *aff_obs = kfmlp_gpu_aff_obs_new(&cedf_kfmlp_affinity_ops, args);
1558 break;
1559
1560#ifdef CONFIG_LITMUS_NESTED_LOCKING
1561 case IKGLP_SIMPLE_GPU_AFF_OBS:
1562 *aff_obs = ikglp_simple_gpu_aff_obs_new(&cedf_ikglp_affinity_ops, args);
1563 break;
1564
1565 case IKGLP_GPU_AFF_OBS:
1566 *aff_obs = ikglp_gpu_aff_obs_new(&cedf_ikglp_affinity_ops, args);
1567 break;
1568#endif
1569 default:
1570 err = -ENXIO;
1571 goto UNSUPPORTED_AFF_OBS;
1572 };
1573
1574 if (*aff_obs)
1575 err = 0;
1576 else
1577 err = -ENOMEM;
1578
1579UNSUPPORTED_AFF_OBS:
1580 return err;
1581}
1582#endif
1583
1584
1585
669 1586
670#ifdef VERBOSE_INIT 1587#ifdef VERBOSE_INIT
671static void print_cluster_topology(cpumask_var_t mask, int cpu) 1588static void print_cluster_topology(cpumask_var_t mask, int cpu)
@@ -680,16 +1597,17 @@ static void print_cluster_topology(cpumask_var_t mask, int cpu)
680} 1597}
681#endif 1598#endif
682 1599
683static int clusters_allocated = 0;
684
685static void cleanup_cedf(void) 1600static void cleanup_cedf(void)
686{ 1601{
687 int i; 1602 int i;
688 1603
1604#ifdef CONFIG_LITMUS_NVIDIA
1605 shutdown_nvidia_info();
1606#endif
1607
689 if (clusters_allocated) { 1608 if (clusters_allocated) {
690 for (i = 0; i < num_clusters; i++) { 1609 for (i = 0; i < num_clusters; i++) {
691 kfree(cedf[i].cpus); 1610 kfree(cedf[i].cpus);
692 kfree(cedf[i].heap_node);
693 free_cpumask_var(cedf[i].cpu_map); 1611 free_cpumask_var(cedf[i].cpu_map);
694 } 1612 }
695 1613
@@ -749,12 +1667,16 @@ static long cedf_activate_plugin(void)
749 1667
750 cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t), 1668 cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
751 GFP_ATOMIC); 1669 GFP_ATOMIC);
752 cedf[i].heap_node = kmalloc( 1670 INIT_BINHEAP_HANDLE(&(cedf[i].cpu_heap), cpu_lower_prio);
753 cluster_size * sizeof(struct bheap_node),
754 GFP_ATOMIC);
755 bheap_init(&(cedf[i].cpu_heap));
756 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs); 1671 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
757 1672
1673
1674#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1675 cedf[i].pending_tasklets.head = NULL;
1676 cedf[i].pending_tasklets.tail = &(cedf[i].pending_tasklets.head);
1677#endif
1678
1679
758 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) 1680 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
759 return -ENOMEM; 1681 return -ENOMEM;
760#ifdef CONFIG_RELEASE_MASTER 1682#ifdef CONFIG_RELEASE_MASTER
@@ -765,6 +1687,10 @@ static long cedf_activate_plugin(void)
765 /* cycle through cluster and add cpus to them */ 1687 /* cycle through cluster and add cpus to them */
766 for (i = 0; i < num_clusters; i++) { 1688 for (i = 0; i < num_clusters; i++) {
767 1689
1690#ifdef CONFIG_LITMUS_DGL_SUPPORT
1691 raw_spin_lock_init(&cedf[i].dgl_lock);
1692#endif
1693
768 for_each_online_cpu(cpu) { 1694 for_each_online_cpu(cpu) {
769 /* check if the cpu is already in a cluster */ 1695 /* check if the cpu is already in a cluster */
770 for (j = 0; j < num_clusters; j++) 1696 for (j = 0; j < num_clusters; j++)
@@ -795,8 +1721,8 @@ static long cedf_activate_plugin(void)
795 atomic_set(&entry->will_schedule, 0); 1721 atomic_set(&entry->will_schedule, 0);
796 entry->cpu = ccpu; 1722 entry->cpu = ccpu;
797 entry->cluster = &cedf[i]; 1723 entry->cluster = &cedf[i];
798 entry->hn = &(cedf[i].heap_node[cpu_count]); 1724
799 bheap_node_init(&entry->hn, entry); 1725 INIT_BINHEAP_NODE(&entry->hn);
800 1726
801 cpu_count++; 1727 cpu_count++;
802 1728
@@ -813,6 +1739,40 @@ static long cedf_activate_plugin(void)
813 } 1739 }
814 } 1740 }
815 1741
1742#ifdef CONFIG_LITMUS_SOFTIRQD
1743 {
1744 /* distribute the daemons evenly across the clusters. */
1745 int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC);
1746 int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters;
1747 int left_over = NR_LITMUS_SOFTIRQD % num_clusters;
1748
1749 int daemon = 0;
1750 for(i = 0; i < num_clusters; ++i)
1751 {
1752 int num_on_this_cluster = num_daemons_per_cluster;
1753 if(left_over)
1754 {
1755 ++num_on_this_cluster;
1756 --left_over;
1757 }
1758
1759 for(j = 0; j < num_on_this_cluster; ++j)
1760 {
1761 // first CPU of this cluster
1762 affinity[daemon++] = i*cluster_size;
1763 }
1764 }
1765
1766 spawn_klitirqd(affinity);
1767
1768 kfree(affinity);
1769 }
1770#endif
1771
1772#ifdef CONFIG_LITMUS_NVIDIA
1773 init_nvidia_info();
1774#endif
1775
816 free_cpumask_var(mask); 1776 free_cpumask_var(mask);
817 clusters_allocated = 1; 1777 clusters_allocated = 1;
818 return 0; 1778 return 0;
@@ -831,6 +1791,32 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
831 .task_block = cedf_task_block, 1791 .task_block = cedf_task_block,
832 .admit_task = cedf_admit_task, 1792 .admit_task = cedf_admit_task,
833 .activate_plugin = cedf_activate_plugin, 1793 .activate_plugin = cedf_activate_plugin,
1794 .compare = edf_higher_prio,
1795#ifdef CONFIG_LITMUS_LOCKING
1796 .allocate_lock = cedf_allocate_lock,
1797 .increase_prio = increase_priority_inheritance,
1798 .decrease_prio = decrease_priority_inheritance,
1799#endif
1800#ifdef CONFIG_LITMUS_NESTED_LOCKING
1801 .nested_increase_prio = nested_increase_priority_inheritance,
1802 .nested_decrease_prio = nested_decrease_priority_inheritance,
1803 .__compare = __edf_higher_prio,
1804#endif
1805#ifdef CONFIG_LITMUS_DGL_SUPPORT
1806 .get_dgl_spinlock = cedf_get_dgl_spinlock,
1807#endif
1808#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1809 .allocate_aff_obs = cedf_allocate_affinity_observer,
1810#endif
1811#ifdef CONFIG_LITMUS_SOFTIRQD
1812 .increase_prio_klitirqd = increase_priority_inheritance_klitirqd,
1813 .decrease_prio_klitirqd = decrease_priority_inheritance_klitirqd,
1814#endif
1815#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1816 .enqueue_pai_tasklet = cedf_enqueue_pai_tasklet,
1817 .change_prio_pai_tasklet = cedf_change_prio_pai_tasklet,
1818 .run_tasklets = cedf_run_tasklets,
1819#endif
834}; 1820};
835 1821
836static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL; 1822static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index 6ed504f4750e..8c48757fa86c 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -12,23 +12,49 @@
12#include <linux/percpu.h> 12#include <linux/percpu.h>
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/uaccess.h>
16#include <linux/module.h>
15 17
16#include <litmus/litmus.h> 18#include <litmus/litmus.h>
17#include <litmus/jobs.h> 19#include <litmus/jobs.h>
18#include <litmus/sched_plugin.h> 20#include <litmus/sched_plugin.h>
19#include <litmus/edf_common.h> 21#include <litmus/edf_common.h>
20#include <litmus/sched_trace.h> 22#include <litmus/sched_trace.h>
21#include <litmus/trace.h>
22 23
23#include <litmus/preempt.h> 24#include <litmus/preempt.h>
24 25
25#include <litmus/bheap.h> 26#include <litmus/bheap.h>
27#include <litmus/binheap.h>
28
29#ifdef CONFIG_LITMUS_LOCKING
30#include <litmus/kfmlp_lock.h>
31#endif
32
33#ifdef CONFIG_LITMUS_NESTED_LOCKING
34#include <litmus/rsm_lock.h>
35#include <litmus/ikglp_lock.h>
36#endif
26 37
27#ifdef CONFIG_SCHED_CPU_AFFINITY 38#ifdef CONFIG_SCHED_CPU_AFFINITY
28#include <litmus/affinity.h> 39#include <litmus/affinity.h>
29#endif 40#endif
30 41
31#include <linux/module.h> 42#ifdef CONFIG_LITMUS_SOFTIRQD
43#include <litmus/litmus_softirq.h>
44#endif
45
46#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
47#include <linux/interrupt.h>
48#include <litmus/trace.h>
49#endif
50
51#ifdef CONFIG_LITMUS_NVIDIA
52#include <litmus/nvidia_info.h>
53#endif
54
55#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
56#include <litmus/gpu_affinity.h>
57#endif
32 58
33/* Overview of GSN-EDF operations. 59/* Overview of GSN-EDF operations.
34 * 60 *
@@ -103,52 +129,70 @@ typedef struct {
103 int cpu; 129 int cpu;
104 struct task_struct* linked; /* only RT tasks */ 130 struct task_struct* linked; /* only RT tasks */
105 struct task_struct* scheduled; /* only RT tasks */ 131 struct task_struct* scheduled; /* only RT tasks */
106 struct bheap_node* hn; 132 struct binheap_node hn;
107} cpu_entry_t; 133} cpu_entry_t;
108DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries); 134DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries);
109 135
110cpu_entry_t* gsnedf_cpus[NR_CPUS]; 136cpu_entry_t* gsnedf_cpus[NR_CPUS];
111 137
112/* the cpus queue themselves according to priority in here */ 138/* the cpus queue themselves according to priority in here */
113static struct bheap_node gsnedf_heap_node[NR_CPUS]; 139static struct binheap_handle gsnedf_cpu_heap;
114static struct bheap gsnedf_cpu_heap;
115 140
116static rt_domain_t gsnedf; 141static rt_domain_t gsnedf;
117#define gsnedf_lock (gsnedf.ready_lock) 142#define gsnedf_lock (gsnedf.ready_lock)
118 143
144#ifdef CONFIG_LITMUS_DGL_SUPPORT
145static raw_spinlock_t dgl_lock;
146
147static raw_spinlock_t* gsnedf_get_dgl_spinlock(struct task_struct *t)
148{
149 return(&dgl_lock);
150}
151#endif
152
153#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
154struct tasklet_head
155{
156 struct tasklet_struct *head;
157 struct tasklet_struct **tail;
158};
159
160struct tasklet_head gsnedf_pending_tasklets;
161#endif
162
119 163
120/* Uncomment this if you want to see all scheduling decisions in the 164/* Uncomment this if you want to see all scheduling decisions in the
121 * TRACE() log. 165 * TRACE() log.
122#define WANT_ALL_SCHED_EVENTS 166#define WANT_ALL_SCHED_EVENTS
123 */ 167 */
124 168
125static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) 169static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b)
126{ 170{
127 cpu_entry_t *a, *b; 171 cpu_entry_t *a = binheap_entry(_a, cpu_entry_t, hn);
128 a = _a->value; 172 cpu_entry_t *b = binheap_entry(_b, cpu_entry_t, hn);
129 b = _b->value; 173
130 /* Note that a and b are inverted: we want the lowest-priority CPU at 174 /* Note that a and b are inverted: we want the lowest-priority CPU at
131 * the top of the heap. 175 * the top of the heap.
132 */ 176 */
133 return edf_higher_prio(b->linked, a->linked); 177 return edf_higher_prio(b->linked, a->linked);
134} 178}
135 179
180
136/* update_cpu_position - Move the cpu entry to the correct place to maintain 181/* update_cpu_position - Move the cpu entry to the correct place to maintain
137 * order in the cpu queue. Caller must hold gsnedf lock. 182 * order in the cpu queue. Caller must hold gsnedf lock.
138 */ 183 */
139static void update_cpu_position(cpu_entry_t *entry) 184static void update_cpu_position(cpu_entry_t *entry)
140{ 185{
141 if (likely(bheap_node_in_heap(entry->hn))) 186 if (likely(binheap_is_in_heap(&entry->hn))) {
142 bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); 187 binheap_delete(&entry->hn, &gsnedf_cpu_heap);
143 bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); 188 }
189 binheap_add(&entry->hn, &gsnedf_cpu_heap, cpu_entry_t, hn);
144} 190}
145 191
146/* caller must hold gsnedf lock */ 192/* caller must hold gsnedf lock */
147static cpu_entry_t* lowest_prio_cpu(void) 193static cpu_entry_t* lowest_prio_cpu(void)
148{ 194{
149 struct bheap_node* hn; 195 return binheap_top_entry(&gsnedf_cpu_heap, cpu_entry_t, hn);
150 hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap);
151 return hn->value;
152} 196}
153 197
154 198
@@ -337,6 +381,10 @@ static noinline void job_completion(struct task_struct *t, int forced)
337 381
338 sched_trace_task_completion(t, forced); 382 sched_trace_task_completion(t, forced);
339 383
384#ifdef CONFIG_LITMUS_NVIDIA
385 atomic_set(&tsk_rt(t)->nv_int_count, 0);
386#endif
387
340 TRACE_TASK(t, "job_completion().\n"); 388 TRACE_TASK(t, "job_completion().\n");
341 389
342 /* set flags */ 390 /* set flags */
@@ -379,6 +427,318 @@ static void gsnedf_tick(struct task_struct* t)
379 } 427 }
380} 428}
381 429
430
431
432
433#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
434
435
436static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
437{
438 if (!atomic_read(&tasklet->count)) {
439 if(tasklet->owner) {
440 sched_trace_tasklet_begin(tasklet->owner);
441 }
442
443 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
444 {
445 BUG();
446 }
447 TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n",
448 __FUNCTION__,
449 (tasklet->owner) ? tasklet->owner->pid : -1,
450 (tasklet->owner) ? 0 : 1);
451 tasklet->func(tasklet->data);
452 tasklet_unlock(tasklet);
453
454 if(tasklet->owner) {
455 sched_trace_tasklet_end(tasklet->owner, flushed);
456 }
457 }
458 else {
459 BUG();
460 }
461}
462
463static void do_lit_tasklets(struct task_struct* sched_task)
464{
465 int work_to_do = 1;
466 struct tasklet_struct *tasklet = NULL;
467 unsigned long flags;
468
469 while(work_to_do) {
470
471 TS_NV_SCHED_BOTISR_START;
472
473 // execute one tasklet that has higher priority
474 raw_spin_lock_irqsave(&gsnedf_lock, flags);
475
476 if(gsnedf_pending_tasklets.head != NULL) {
477 struct tasklet_struct *prev = NULL;
478 tasklet = gsnedf_pending_tasklets.head;
479
480 while(tasklet && edf_higher_prio(sched_task, tasklet->owner)) {
481 prev = tasklet;
482 tasklet = tasklet->next;
483 }
484
485 // remove the tasklet from the queue
486 if(prev) {
487 prev->next = tasklet->next;
488 if(prev->next == NULL) {
489 TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
490 gsnedf_pending_tasklets.tail = &(prev);
491 }
492 }
493 else {
494 gsnedf_pending_tasklets.head = tasklet->next;
495 if(tasklet->next == NULL) {
496 TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
497 gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
498 }
499 }
500 }
501 else {
502 TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
503 }
504
505 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
506
507 if(tasklet) {
508 __do_lit_tasklet(tasklet, 0ul);
509 tasklet = NULL;
510 }
511 else {
512 work_to_do = 0;
513 }
514
515 TS_NV_SCHED_BOTISR_END;
516 }
517}
518
519//static void do_lit_tasklets(struct task_struct* sched_task)
520//{
521// int work_to_do = 1;
522// struct tasklet_struct *tasklet = NULL;
523// //struct tasklet_struct *step;
524// unsigned long flags;
525//
526// while(work_to_do) {
527//
528// TS_NV_SCHED_BOTISR_START;
529//
530// // remove tasklet at head of list if it has higher priority.
531// raw_spin_lock_irqsave(&gsnedf_lock, flags);
532//
533// if(gsnedf_pending_tasklets.head != NULL) {
534// // remove tasklet at head.
535// tasklet = gsnedf_pending_tasklets.head;
536//
537// if(edf_higher_prio(tasklet->owner, sched_task)) {
538//
539// if(NULL == tasklet->next) {
540// // tasklet is at the head, list only has one element
541// TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
542// gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
543// }
544//
545// // remove the tasklet from the queue
546// gsnedf_pending_tasklets.head = tasklet->next;
547//
548// TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
549// }
550// else {
551// TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id());
552// tasklet = NULL;
553// }
554// }
555// else {
556// TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
557// }
558//
559// raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
560//
561// TS_NV_SCHED_BOTISR_END;
562//
563// if(tasklet) {
564// __do_lit_tasklet(tasklet, 0ul);
565// tasklet = NULL;
566// }
567// else {
568// work_to_do = 0;
569// }
570// }
571//
572// //TRACE("%s: exited.\n", __FUNCTION__);
573//}
574
575static void __add_pai_tasklet(struct tasklet_struct* tasklet)
576{
577 struct tasklet_struct* step;
578
579 tasklet->next = NULL; // make sure there are no old values floating around
580
581 step = gsnedf_pending_tasklets.head;
582 if(step == NULL) {
583 TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
584 // insert at tail.
585 *(gsnedf_pending_tasklets.tail) = tasklet;
586 gsnedf_pending_tasklets.tail = &(tasklet->next);
587 }
588 else if((*(gsnedf_pending_tasklets.tail) != NULL) &&
589 edf_higher_prio((*(gsnedf_pending_tasklets.tail))->owner, tasklet->owner)) {
590 // insert at tail.
591 TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
592
593 *(gsnedf_pending_tasklets.tail) = tasklet;
594 gsnedf_pending_tasklets.tail = &(tasklet->next);
595 }
596 else {
597 // insert the tasklet somewhere in the middle.
598
599 TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
600
601 while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) {
602 step = step->next;
603 }
604
605 // insert tasklet right before step->next.
606
607 TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1);
608
609 tasklet->next = step->next;
610 step->next = tasklet;
611
612 // patch up the head if needed.
613 if(gsnedf_pending_tasklets.head == step)
614 {
615 TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
616 gsnedf_pending_tasklets.head = tasklet;
617 }
618 }
619}
620
621static void gsnedf_run_tasklets(struct task_struct* sched_task)
622{
623 preempt_disable();
624
625 if(gsnedf_pending_tasklets.head != NULL) {
626 TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
627 do_lit_tasklets(sched_task);
628 }
629
630 preempt_enable_no_resched();
631}
632
633static int gsnedf_enqueue_pai_tasklet(struct tasklet_struct* tasklet)
634{
635 cpu_entry_t *targetCPU = NULL;
636 int thisCPU;
637 int runLocal = 0;
638 int runNow = 0;
639 unsigned long flags;
640
641 if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
642 {
643 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
644 return 0;
645 }
646
647
648 raw_spin_lock_irqsave(&gsnedf_lock, flags);
649
650 thisCPU = smp_processor_id();
651
652#ifdef CONFIG_SCHED_CPU_AFFINITY
653 {
654 cpu_entry_t* affinity = NULL;
655
656 // use this CPU if it is in our cluster and isn't running any RT work.
657 if(
658#ifdef CONFIG_RELEASE_MASTER
659 (thisCPU != gsnedf.release_master) &&
660#endif
661 (__get_cpu_var(gsnedf_cpu_entries).linked == NULL)) {
662 affinity = &(__get_cpu_var(gsnedf_cpu_entries));
663 }
664 else {
665 // this CPU is busy or shouldn't run tasklet in this cluster.
666 // look for available near by CPUs.
667 // NOTE: Affinity towards owner and not this CPU. Is this right?
668 affinity =
669 gsnedf_get_nearest_available_cpu(
670 &per_cpu(gsnedf_cpu_entries, task_cpu(tasklet->owner)));
671 }
672
673 targetCPU = affinity;
674 }
675#endif
676
677 if (targetCPU == NULL) {
678 targetCPU = lowest_prio_cpu();
679 }
680
681 if (edf_higher_prio(tasklet->owner, targetCPU->linked)) {
682 if (thisCPU == targetCPU->cpu) {
683 TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
684 runLocal = 1;
685 runNow = 1;
686 }
687 else {
688 TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
689 runLocal = 0;
690 runNow = 1;
691 }
692 }
693 else {
694 runLocal = 0;
695 runNow = 0;
696 }
697
698 if(!runLocal) {
699 // enqueue the tasklet
700 __add_pai_tasklet(tasklet);
701 }
702
703 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
704
705
706 if (runLocal /*&& runNow */) { // runNow == 1 is implied
707 TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
708 __do_lit_tasklet(tasklet, 0ul);
709 }
710 else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied
711 TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
712 preempt(targetCPU); // need to be protected by cedf_lock?
713 }
714 else {
715 TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
716 }
717
718 return(1); // success
719}
720
721static void gsnedf_change_prio_pai_tasklet(struct task_struct *old_prio,
722 struct task_struct *new_prio)
723{
724 struct tasklet_struct* step;
725 unsigned long flags;
726
727 if(gsnedf_pending_tasklets.head != NULL) {
728 raw_spin_lock_irqsave(&gsnedf_lock, flags);
729 for(step = gsnedf_pending_tasklets.head; step != NULL; step = step->next) {
730 if(step->owner == old_prio) {
731 TRACE("%s: Found tasklet to change: %d\n", __FUNCTION__, step->owner->pid);
732 step->owner = new_prio;
733 }
734 }
735 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
736 }
737}
738
739#endif // end PAI
740
741
382/* Getting schedule() right is a bit tricky. schedule() may not make any 742/* Getting schedule() right is a bit tricky. schedule() may not make any
383 * assumptions on the state of the current task since it may be called for a 743 * assumptions on the state of the current task since it may be called for a
384 * number of reasons. The reasons include a scheduler_tick() determined that it 744 * number of reasons. The reasons include a scheduler_tick() determined that it
@@ -437,21 +797,32 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
437 TRACE_TASK(prev, "invoked gsnedf_schedule.\n"); 797 TRACE_TASK(prev, "invoked gsnedf_schedule.\n");
438#endif 798#endif
439 799
800 /*
440 if (exists) 801 if (exists)
441 TRACE_TASK(prev, 802 TRACE_TASK(prev,
442 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " 803 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
443 "state:%d sig:%d\n", 804 "state:%d sig:%d\n",
444 blocks, out_of_time, np, sleep, preempt, 805 blocks, out_of_time, np, sleep, preempt,
445 prev->state, signal_pending(prev)); 806 prev->state, signal_pending(prev));
807 */
808
446 if (entry->linked && preempt) 809 if (entry->linked && preempt)
447 TRACE_TASK(prev, "will be preempted by %s/%d\n", 810 TRACE_TASK(prev, "will be preempted by %s/%d\n",
448 entry->linked->comm, entry->linked->pid); 811 entry->linked->comm, entry->linked->pid);
449 812
450
451 /* If a task blocks we have no choice but to reschedule. 813 /* If a task blocks we have no choice but to reschedule.
452 */ 814 */
453 if (blocks) 815 if (blocks) {
454 unlink(entry->scheduled); 816 unlink(entry->scheduled);
817 }
818
819#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
820 if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) {
821 if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
822 stop_gpu_tracker(entry->scheduled);
823 }
824 }
825#endif
455 826
456 /* Request a sys_exit_np() call if we would like to preempt but cannot. 827 /* Request a sys_exit_np() call if we would like to preempt but cannot.
457 * We need to make sure to update the link structure anyway in case 828 * We need to make sure to update the link structure anyway in case
@@ -492,12 +863,15 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
492 entry->scheduled->rt_param.scheduled_on = NO_CPU; 863 entry->scheduled->rt_param.scheduled_on = NO_CPU;
493 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); 864 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
494 } 865 }
495 } else 866 }
867 else
868 {
496 /* Only override Linux scheduler if we have a real-time task 869 /* Only override Linux scheduler if we have a real-time task
497 * scheduled that needs to continue. 870 * scheduled that needs to continue.
498 */ 871 */
499 if (exists) 872 if (exists)
500 next = prev; 873 next = prev;
874 }
501 875
502 sched_state_task_picked(); 876 sched_state_task_picked();
503 877
@@ -524,6 +898,7 @@ static void gsnedf_finish_switch(struct task_struct *prev)
524 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); 898 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
525 899
526 entry->scheduled = is_realtime(current) ? current : NULL; 900 entry->scheduled = is_realtime(current) ? current : NULL;
901
527#ifdef WANT_ALL_SCHED_EVENTS 902#ifdef WANT_ALL_SCHED_EVENTS
528 TRACE_TASK(prev, "switched away from\n"); 903 TRACE_TASK(prev, "switched away from\n");
529#endif 904#endif
@@ -572,11 +947,14 @@ static void gsnedf_task_new(struct task_struct * t, int on_rq, int running)
572static void gsnedf_task_wake_up(struct task_struct *task) 947static void gsnedf_task_wake_up(struct task_struct *task)
573{ 948{
574 unsigned long flags; 949 unsigned long flags;
575 lt_t now; 950 //lt_t now;
576 951
577 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); 952 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
578 953
579 raw_spin_lock_irqsave(&gsnedf_lock, flags); 954 raw_spin_lock_irqsave(&gsnedf_lock, flags);
955
956
957#if 0 // sporadic task model
580 /* We need to take suspensions because of semaphores into 958 /* We need to take suspensions because of semaphores into
581 * account! If a job resumes after being suspended due to acquiring 959 * account! If a job resumes after being suspended due to acquiring
582 * a semaphore, it should never be treated as a new job release. 960 * a semaphore, it should never be treated as a new job release.
@@ -598,19 +976,26 @@ static void gsnedf_task_wake_up(struct task_struct *task)
598 } 976 }
599 } 977 }
600 } 978 }
979#else // periodic task model
980 set_rt_flags(task, RT_F_RUNNING);
981#endif
982
601 gsnedf_job_arrival(task); 983 gsnedf_job_arrival(task);
602 raw_spin_unlock_irqrestore(&gsnedf_lock, flags); 984 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
603} 985}
604 986
605static void gsnedf_task_block(struct task_struct *t) 987static void gsnedf_task_block(struct task_struct *t)
606{ 988{
989 // TODO: is this called on preemption??
607 unsigned long flags; 990 unsigned long flags;
608 991
609 TRACE_TASK(t, "block at %llu\n", litmus_clock()); 992 TRACE_TASK(t, "block at %llu\n", litmus_clock());
610 993
611 /* unlink if necessary */ 994 /* unlink if necessary */
612 raw_spin_lock_irqsave(&gsnedf_lock, flags); 995 raw_spin_lock_irqsave(&gsnedf_lock, flags);
996
613 unlink(t); 997 unlink(t);
998
614 raw_spin_unlock_irqrestore(&gsnedf_lock, flags); 999 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
615 1000
616 BUG_ON(!is_realtime(t)); 1001 BUG_ON(!is_realtime(t));
@@ -621,6 +1006,10 @@ static void gsnedf_task_exit(struct task_struct * t)
621{ 1006{
622 unsigned long flags; 1007 unsigned long flags;
623 1008
1009#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1010 gsnedf_change_prio_pai_tasklet(t, NULL);
1011#endif
1012
624 /* unlink if necessary */ 1013 /* unlink if necessary */
625 raw_spin_lock_irqsave(&gsnedf_lock, flags); 1014 raw_spin_lock_irqsave(&gsnedf_lock, flags);
626 unlink(t); 1015 unlink(t);
@@ -637,101 +1026,423 @@ static void gsnedf_task_exit(struct task_struct * t)
637 1026
638static long gsnedf_admit_task(struct task_struct* tsk) 1027static long gsnedf_admit_task(struct task_struct* tsk)
639{ 1028{
1029#ifdef CONFIG_LITMUS_NESTED_LOCKING
1030 INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks,
1031 edf_max_heap_base_priority_order);
1032#endif
1033
640 return 0; 1034 return 0;
641} 1035}
642 1036
1037
1038
1039
1040
1041
643#ifdef CONFIG_LITMUS_LOCKING 1042#ifdef CONFIG_LITMUS_LOCKING
644 1043
645#include <litmus/fdso.h> 1044#include <litmus/fdso.h>
646 1045
647/* called with IRQs off */ 1046/* called with IRQs off */
648static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) 1047static void __increase_priority_inheritance(struct task_struct* t,
1048 struct task_struct* prio_inh)
649{ 1049{
650 int linked_on; 1050 int linked_on;
651 int check_preempt = 0; 1051 int check_preempt = 0;
652 1052
1053#ifdef CONFIG_LITMUS_NESTED_LOCKING
1054 /* this sanity check allows for weaker locking in protocols */
1055 /* TODO (klitirqd): Skip this check if 't' is a proxy thread (???) */
1056 if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) {
1057#endif
1058 TRACE_TASK(t, "inherits priority from %s/%d\n",
1059 prio_inh->comm, prio_inh->pid);
1060 tsk_rt(t)->inh_task = prio_inh;
1061
1062 linked_on = tsk_rt(t)->linked_on;
1063
1064 /* If it is scheduled, then we need to reorder the CPU heap. */
1065 if (linked_on != NO_CPU) {
1066 TRACE_TASK(t, "%s: linked on %d\n",
1067 __FUNCTION__, linked_on);
1068 /* Holder is scheduled; need to re-order CPUs.
1069 * We can't use heap_decrease() here since
1070 * the cpu_heap is ordered in reverse direction, so
1071 * it is actually an increase. */
1072 binheap_delete(&gsnedf_cpus[linked_on]->hn, &gsnedf_cpu_heap);
1073 binheap_add(&gsnedf_cpus[linked_on]->hn,
1074 &gsnedf_cpu_heap, cpu_entry_t, hn);
1075 } else {
1076 /* holder may be queued: first stop queue changes */
1077 raw_spin_lock(&gsnedf.release_lock);
1078 if (is_queued(t)) {
1079 TRACE_TASK(t, "%s: is queued\n",
1080 __FUNCTION__);
1081 /* We need to update the position of holder in some
1082 * heap. Note that this could be a release heap if we
1083 * budget enforcement is used and this job overran. */
1084 check_preempt =
1085 !bheap_decrease(edf_ready_order,
1086 tsk_rt(t)->heap_node);
1087 } else {
1088 /* Nothing to do: if it is not queued and not linked
1089 * then it is either sleeping or currently being moved
1090 * by other code (e.g., a timer interrupt handler) that
1091 * will use the correct priority when enqueuing the
1092 * task. */
1093 TRACE_TASK(t, "%s: is NOT queued => Done.\n",
1094 __FUNCTION__);
1095 }
1096 raw_spin_unlock(&gsnedf.release_lock);
1097
1098 /* If holder was enqueued in a release heap, then the following
1099 * preemption check is pointless, but we can't easily detect
1100 * that case. If you want to fix this, then consider that
1101 * simply adding a state flag requires O(n) time to update when
1102 * releasing n tasks, which conflicts with the goal to have
1103 * O(log n) merges. */
1104 if (check_preempt) {
1105 /* heap_decrease() hit the top level of the heap: make
1106 * sure preemption checks get the right task, not the
1107 * potentially stale cache. */
1108 bheap_uncache_min(edf_ready_order,
1109 &gsnedf.ready_queue);
1110 check_for_preemptions();
1111 }
1112 }
1113#ifdef CONFIG_LITMUS_NESTED_LOCKING
1114 }
1115 else {
1116 TRACE_TASK(t, "Spurious invalid priority increase. "
1117 "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
1118 "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
1119 t->comm, t->pid,
1120 effective_priority(t)->comm, effective_priority(t)->pid,
1121 (prio_inh) ? prio_inh->comm : "nil",
1122 (prio_inh) ? prio_inh->pid : -1);
1123 WARN_ON(!prio_inh);
1124 }
1125#endif
1126}
1127
1128/* called with IRQs off */
1129static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
1130{
653 raw_spin_lock(&gsnedf_lock); 1131 raw_spin_lock(&gsnedf_lock);
654 1132
655 TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid); 1133 __increase_priority_inheritance(t, prio_inh);
656 tsk_rt(t)->inh_task = prio_inh; 1134
657 1135#ifdef CONFIG_LITMUS_SOFTIRQD
658 linked_on = tsk_rt(t)->linked_on; 1136 if(tsk_rt(t)->cur_klitirqd != NULL)
659 1137 {
660 /* If it is scheduled, then we need to reorder the CPU heap. */ 1138 TRACE_TASK(t, "%s/%d inherits a new priority!\n",
661 if (linked_on != NO_CPU) { 1139 tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
662 TRACE_TASK(t, "%s: linked on %d\n", 1140
663 __FUNCTION__, linked_on); 1141 __increase_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
664 /* Holder is scheduled; need to re-order CPUs. 1142 }
665 * We can't use heap_decrease() here since 1143#endif
666 * the cpu_heap is ordered in reverse direction, so 1144
667 * it is actually an increase. */ 1145 raw_spin_unlock(&gsnedf_lock);
668 bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, 1146
669 gsnedf_cpus[linked_on]->hn); 1147#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
670 bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, 1148 if(tsk_rt(t)->held_gpus) {
671 gsnedf_cpus[linked_on]->hn); 1149 int i;
672 } else { 1150 for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
673 /* holder may be queued: first stop queue changes */ 1151 i < NV_DEVICE_NUM;
674 raw_spin_lock(&gsnedf.release_lock); 1152 i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
675 if (is_queued(t)) { 1153 pai_check_priority_increase(t, i);
676 TRACE_TASK(t, "%s: is queued\n", 1154 }
677 __FUNCTION__); 1155 }
678 /* We need to update the position of holder in some 1156#endif
679 * heap. Note that this could be a release heap if we 1157}
680 * budget enforcement is used and this job overran. */ 1158
681 check_preempt = 1159
682 !bheap_decrease(edf_ready_order, 1160/* called with IRQs off */
683 tsk_rt(t)->heap_node); 1161static void __decrease_priority_inheritance(struct task_struct* t,
684 } else { 1162 struct task_struct* prio_inh)
685 /* Nothing to do: if it is not queued and not linked 1163{
686 * then it is either sleeping or currently being moved 1164#ifdef CONFIG_LITMUS_NESTED_LOCKING
687 * by other code (e.g., a timer interrupt handler) that 1165 if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) {
688 * will use the correct priority when enqueuing the 1166#endif
689 * task. */ 1167 /* A job only stops inheriting a priority when it releases a
690 TRACE_TASK(t, "%s: is NOT queued => Done.\n", 1168 * resource. Thus we can make the following assumption.*/
691 __FUNCTION__); 1169 if(prio_inh)
1170 TRACE_TASK(t, "EFFECTIVE priority decreased to %s/%d\n",
1171 prio_inh->comm, prio_inh->pid);
1172 else
1173 TRACE_TASK(t, "base priority restored.\n");
1174
1175 tsk_rt(t)->inh_task = prio_inh;
1176
1177 if(tsk_rt(t)->scheduled_on != NO_CPU) {
1178 TRACE_TASK(t, "is scheduled.\n");
1179
1180 /* Check if rescheduling is necessary. We can't use heap_decrease()
1181 * since the priority was effectively lowered. */
1182 unlink(t);
1183 gsnedf_job_arrival(t);
692 } 1184 }
693 raw_spin_unlock(&gsnedf.release_lock); 1185 else {
694 1186 /* task is queued */
695 /* If holder was enqueued in a release heap, then the following 1187 raw_spin_lock(&gsnedf.release_lock);
696 * preemption check is pointless, but we can't easily detect 1188 if (is_queued(t)) {
697 * that case. If you want to fix this, then consider that 1189 TRACE_TASK(t, "is queued.\n");
698 * simply adding a state flag requires O(n) time to update when 1190
699 * releasing n tasks, which conflicts with the goal to have 1191 /* decrease in priority, so we have to re-add to binomial heap */
700 * O(log n) merges. */ 1192 unlink(t);
701 if (check_preempt) { 1193 gsnedf_job_arrival(t);
702 /* heap_decrease() hit the top level of the heap: make 1194 }
703 * sure preemption checks get the right task, not the 1195 else {
704 * potentially stale cache. */ 1196 TRACE_TASK(t, "is not in scheduler. Probably on wait queue somewhere.\n");
705 bheap_uncache_min(edf_ready_order, 1197 }
706 &gsnedf.ready_queue); 1198 raw_spin_unlock(&gsnedf.release_lock);
707 check_for_preemptions();
708 } 1199 }
1200#ifdef CONFIG_LITMUS_NESTED_LOCKING
1201 }
1202 else {
1203 TRACE_TASK(t, "Spurious invalid priority decrease. "
1204 "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
1205 "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
1206 t->comm, t->pid,
1207 effective_priority(t)->comm, effective_priority(t)->pid,
1208 (prio_inh) ? prio_inh->comm : "nil",
1209 (prio_inh) ? prio_inh->pid : -1);
709 } 1210 }
1211#endif
1212}
1213
1214static void decrease_priority_inheritance(struct task_struct* t,
1215 struct task_struct* prio_inh)
1216{
1217 raw_spin_lock(&gsnedf_lock);
1218 __decrease_priority_inheritance(t, prio_inh);
1219
1220#ifdef CONFIG_LITMUS_SOFTIRQD
1221 if(tsk_rt(t)->cur_klitirqd != NULL)
1222 {
1223 TRACE_TASK(t, "%s/%d decreases in priority!\n",
1224 tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
1225
1226 __decrease_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
1227 }
1228#endif
710 1229
711 raw_spin_unlock(&gsnedf_lock); 1230 raw_spin_unlock(&gsnedf_lock);
1231
1232#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
1233 if(tsk_rt(t)->held_gpus) {
1234 int i;
1235 for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
1236 i < NV_DEVICE_NUM;
1237 i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
1238 pai_check_priority_decrease(t, i);
1239 }
1240 }
1241#endif
712} 1242}
713 1243
1244
1245#ifdef CONFIG_LITMUS_SOFTIRQD
714/* called with IRQs off */ 1246/* called with IRQs off */
715static void clear_priority_inheritance(struct task_struct* t) 1247static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd,
1248 struct task_struct* old_owner,
1249 struct task_struct* new_owner)
716{ 1250{
1251 BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
1252
717 raw_spin_lock(&gsnedf_lock); 1253 raw_spin_lock(&gsnedf_lock);
718 1254
719 /* A job only stops inheriting a priority when it releases a 1255 if(old_owner != new_owner)
720 * resource. Thus we can make the following assumption.*/ 1256 {
721 BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU); 1257 if(old_owner)
1258 {
1259 // unreachable?
1260 tsk_rt(old_owner)->cur_klitirqd = NULL;
1261 }
722 1262
723 TRACE_TASK(t, "priority restored\n"); 1263 TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
724 tsk_rt(t)->inh_task = NULL; 1264 new_owner->comm, new_owner->pid);
725 1265
726 /* Check if rescheduling is necessary. We can't use heap_decrease() 1266 tsk_rt(new_owner)->cur_klitirqd = klitirqd;
727 * since the priority was effectively lowered. */ 1267 }
728 unlink(t); 1268
729 gsnedf_job_arrival(t); 1269 __decrease_priority_inheritance(klitirqd, NULL); // kludge to clear out cur prio.
1270
1271 __increase_priority_inheritance(klitirqd,
1272 (tsk_rt(new_owner)->inh_task == NULL) ?
1273 new_owner :
1274 tsk_rt(new_owner)->inh_task);
730 1275
731 raw_spin_unlock(&gsnedf_lock); 1276 raw_spin_unlock(&gsnedf_lock);
732} 1277}
733 1278
734 1279
1280/* called with IRQs off */
1281static void decrease_priority_inheritance_klitirqd(struct task_struct* klitirqd,
1282 struct task_struct* old_owner,
1283 struct task_struct* new_owner)
1284{
1285 BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
1286
1287 raw_spin_lock(&gsnedf_lock);
1288
1289 TRACE_TASK(klitirqd, "priority restored\n");
1290
1291 __decrease_priority_inheritance(klitirqd, new_owner);
1292
1293 tsk_rt(old_owner)->cur_klitirqd = NULL;
1294
1295 raw_spin_unlock(&gsnedf_lock);
1296}
1297#endif
1298
1299
1300
1301
1302#ifdef CONFIG_LITMUS_NESTED_LOCKING
1303
1304/* called with IRQs off */
1305/* preconditions:
1306 (1) The 'hp_blocked_tasks_lock' of task 't' is held.
1307 (2) The lock 'to_unlock' is held.
1308 */
1309static void nested_increase_priority_inheritance(struct task_struct* t,
1310 struct task_struct* prio_inh,
1311 raw_spinlock_t *to_unlock,
1312 unsigned long irqflags)
1313{
1314 struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
1315
1316 if(tsk_rt(t)->inh_task != prio_inh) { // shield redundent calls.
1317 increase_priority_inheritance(t, prio_inh); // increase our prio.
1318 }
1319
1320 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
1321
1322
1323 if(blocked_lock) {
1324 if(blocked_lock->ops->propagate_increase_inheritance) {
1325 TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
1326 blocked_lock->ident);
1327
1328 // beware: recursion
1329 blocked_lock->ops->propagate_increase_inheritance(blocked_lock,
1330 t, to_unlock,
1331 irqflags);
1332 }
1333 else {
1334 TRACE_TASK(t, "Inheritor is blocked on lock (%d) that does not support nesting!\n",
1335 blocked_lock->ident);
1336 unlock_fine_irqrestore(to_unlock, irqflags);
1337 }
1338 }
1339 else {
1340 TRACE_TASK(t, "is not blocked. No propagation.\n");
1341 unlock_fine_irqrestore(to_unlock, irqflags);
1342 }
1343}
1344
1345/* called with IRQs off */
1346/* preconditions:
1347 (1) The 'hp_blocked_tasks_lock' of task 't' is held.
1348 (2) The lock 'to_unlock' is held.
1349 */
1350static void nested_decrease_priority_inheritance(struct task_struct* t,
1351 struct task_struct* prio_inh,
1352 raw_spinlock_t *to_unlock,
1353 unsigned long irqflags)
1354{
1355 struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
1356 decrease_priority_inheritance(t, prio_inh);
1357
1358 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
1359
1360 if(blocked_lock) {
1361 if(blocked_lock->ops->propagate_decrease_inheritance) {
1362 TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
1363 blocked_lock->ident);
1364
1365 // beware: recursion
1366 blocked_lock->ops->propagate_decrease_inheritance(blocked_lock, t,
1367 to_unlock,
1368 irqflags);
1369 }
1370 else {
1371 TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
1372 blocked_lock);
1373 unlock_fine_irqrestore(to_unlock, irqflags);
1374 }
1375 }
1376 else {
1377 TRACE_TASK(t, "is not blocked. No propagation.\n");
1378 unlock_fine_irqrestore(to_unlock, irqflags);
1379 }
1380}
1381
1382
1383/* ******************** RSM MUTEX ********************** */
1384
1385static struct litmus_lock_ops gsnedf_rsm_mutex_lock_ops = {
1386 .lock = rsm_mutex_lock,
1387 .unlock = rsm_mutex_unlock,
1388 .close = rsm_mutex_close,
1389 .deallocate = rsm_mutex_free,
1390
1391 .propagate_increase_inheritance = rsm_mutex_propagate_increase_inheritance,
1392 .propagate_decrease_inheritance = rsm_mutex_propagate_decrease_inheritance,
1393
1394#ifdef CONFIG_LITMUS_DGL_SUPPORT
1395 .dgl_lock = rsm_mutex_dgl_lock,
1396 .is_owner = rsm_mutex_is_owner,
1397 .enable_priority = rsm_mutex_enable_priority,
1398#endif
1399};
1400
1401static struct litmus_lock* gsnedf_new_rsm_mutex(void)
1402{
1403 return rsm_mutex_new(&gsnedf_rsm_mutex_lock_ops);
1404}
1405
1406/* ******************** IKGLP ********************** */
1407
1408static struct litmus_lock_ops gsnedf_ikglp_lock_ops = {
1409 .lock = ikglp_lock,
1410 .unlock = ikglp_unlock,
1411 .close = ikglp_close,
1412 .deallocate = ikglp_free,
1413
1414 // ikglp can only be an outer-most lock.
1415 .propagate_increase_inheritance = NULL,
1416 .propagate_decrease_inheritance = NULL,
1417};
1418
1419static struct litmus_lock* gsnedf_new_ikglp(void* __user arg)
1420{
1421 return ikglp_new(num_online_cpus(), &gsnedf_ikglp_lock_ops, arg);
1422}
1423
1424#endif /* CONFIG_LITMUS_NESTED_LOCKING */
1425
1426
1427/* ******************** KFMLP support ********************** */
1428
1429static struct litmus_lock_ops gsnedf_kfmlp_lock_ops = {
1430 .lock = kfmlp_lock,
1431 .unlock = kfmlp_unlock,
1432 .close = kfmlp_close,
1433 .deallocate = kfmlp_free,
1434
1435 // kfmlp can only be an outer-most lock.
1436 .propagate_increase_inheritance = NULL,
1437 .propagate_decrease_inheritance = NULL,
1438};
1439
1440
1441static struct litmus_lock* gsnedf_new_kfmlp(void* __user arg)
1442{
1443 return kfmlp_new(&gsnedf_kfmlp_lock_ops, arg);
1444}
1445
735/* ******************** FMLP support ********************** */ 1446/* ******************** FMLP support ********************** */
736 1447
737/* struct for semaphore with priority inheritance */ 1448/* struct for semaphore with priority inheritance */
@@ -797,7 +1508,7 @@ int gsnedf_fmlp_lock(struct litmus_lock* l)
797 if (edf_higher_prio(t, sem->hp_waiter)) { 1508 if (edf_higher_prio(t, sem->hp_waiter)) {
798 sem->hp_waiter = t; 1509 sem->hp_waiter = t;
799 if (edf_higher_prio(t, sem->owner)) 1510 if (edf_higher_prio(t, sem->owner))
800 set_priority_inheritance(sem->owner, sem->hp_waiter); 1511 increase_priority_inheritance(sem->owner, sem->hp_waiter);
801 } 1512 }
802 1513
803 TS_LOCK_SUSPEND; 1514 TS_LOCK_SUSPEND;
@@ -865,7 +1576,7 @@ int gsnedf_fmlp_unlock(struct litmus_lock* l)
865 /* Well, if next is not the highest-priority waiter, 1576 /* Well, if next is not the highest-priority waiter,
866 * then it ought to inherit the highest-priority 1577 * then it ought to inherit the highest-priority
867 * waiter's priority. */ 1578 * waiter's priority. */
868 set_priority_inheritance(next, sem->hp_waiter); 1579 increase_priority_inheritance(next, sem->hp_waiter);
869 } 1580 }
870 1581
871 /* wake up next */ 1582 /* wake up next */
@@ -876,7 +1587,7 @@ int gsnedf_fmlp_unlock(struct litmus_lock* l)
876 1587
877 /* we lose the benefit of priority inheritance (if any) */ 1588 /* we lose the benefit of priority inheritance (if any) */
878 if (tsk_rt(t)->inh_task) 1589 if (tsk_rt(t)->inh_task)
879 clear_priority_inheritance(t); 1590 decrease_priority_inheritance(t, NULL);
880 1591
881out: 1592out:
882 spin_unlock_irqrestore(&sem->wait.lock, flags); 1593 spin_unlock_irqrestore(&sem->wait.lock, flags);
@@ -914,6 +1625,11 @@ static struct litmus_lock_ops gsnedf_fmlp_lock_ops = {
914 .lock = gsnedf_fmlp_lock, 1625 .lock = gsnedf_fmlp_lock,
915 .unlock = gsnedf_fmlp_unlock, 1626 .unlock = gsnedf_fmlp_unlock,
916 .deallocate = gsnedf_fmlp_free, 1627 .deallocate = gsnedf_fmlp_free,
1628
1629#ifdef CONFIG_LITMUS_NESTED_LOCKING
1630 .propagate_increase_inheritance = NULL,
1631 .propagate_decrease_inheritance = NULL
1632#endif
917}; 1633};
918 1634
919static struct litmus_lock* gsnedf_new_fmlp(void) 1635static struct litmus_lock* gsnedf_new_fmlp(void)
@@ -932,47 +1648,121 @@ static struct litmus_lock* gsnedf_new_fmlp(void)
932 return &sem->litmus_lock; 1648 return &sem->litmus_lock;
933} 1649}
934 1650
935/* **** lock constructor **** */
936
937 1651
938static long gsnedf_allocate_lock(struct litmus_lock **lock, int type, 1652static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
939 void* __user unused) 1653 void* __user args)
940{ 1654{
941 int err = -ENXIO; 1655 int err;
942 1656
943 /* GSN-EDF currently only supports the FMLP for global resources. */
944 switch (type) { 1657 switch (type) {
945 1658
946 case FMLP_SEM: 1659 case FMLP_SEM:
947 /* Flexible Multiprocessor Locking Protocol */ 1660 /* Flexible Multiprocessor Locking Protocol */
948 *lock = gsnedf_new_fmlp(); 1661 *lock = gsnedf_new_fmlp();
949 if (*lock) 1662 break;
950 err = 0; 1663#ifdef CONFIG_LITMUS_NESTED_LOCKING
951 else 1664 case RSM_MUTEX:
952 err = -ENOMEM; 1665 *lock = gsnedf_new_rsm_mutex();
953 break; 1666 break;
954 1667
1668 case IKGLP_SEM:
1669 *lock = gsnedf_new_ikglp(args);
1670 break;
1671#endif
1672 case KFMLP_SEM:
1673 *lock = gsnedf_new_kfmlp(args);
1674 break;
1675 default:
1676 err = -ENXIO;
1677 goto UNSUPPORTED_LOCK;
955 }; 1678 };
956 1679
1680 if (*lock)
1681 err = 0;
1682 else
1683 err = -ENOMEM;
1684
1685UNSUPPORTED_LOCK:
957 return err; 1686 return err;
958} 1687}
959 1688
1689#endif // CONFIG_LITMUS_LOCKING
1690
1691
1692
1693
1694
1695#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1696static struct affinity_observer_ops gsnedf_kfmlp_affinity_ops = {
1697 .close = kfmlp_aff_obs_close,
1698 .deallocate = kfmlp_aff_obs_free,
1699};
1700
1701#ifdef CONFIG_LITMUS_NESTED_LOCKING
1702static struct affinity_observer_ops gsnedf_ikglp_affinity_ops = {
1703 .close = ikglp_aff_obs_close,
1704 .deallocate = ikglp_aff_obs_free,
1705};
960#endif 1706#endif
961 1707
1708static long gsnedf_allocate_affinity_observer(
1709 struct affinity_observer **aff_obs,
1710 int type,
1711 void* __user args)
1712{
1713 int err;
1714
1715 switch (type) {
1716
1717 case KFMLP_SIMPLE_GPU_AFF_OBS:
1718 *aff_obs = kfmlp_simple_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args);
1719 break;
1720
1721 case KFMLP_GPU_AFF_OBS:
1722 *aff_obs = kfmlp_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args);
1723 break;
1724
1725#ifdef CONFIG_LITMUS_NESTED_LOCKING
1726 case IKGLP_SIMPLE_GPU_AFF_OBS:
1727 *aff_obs = ikglp_simple_gpu_aff_obs_new(&gsnedf_ikglp_affinity_ops, args);
1728 break;
1729
1730 case IKGLP_GPU_AFF_OBS:
1731 *aff_obs = ikglp_gpu_aff_obs_new(&gsnedf_ikglp_affinity_ops, args);
1732 break;
1733#endif
1734 default:
1735 err = -ENXIO;
1736 goto UNSUPPORTED_AFF_OBS;
1737 };
1738
1739 if (*aff_obs)
1740 err = 0;
1741 else
1742 err = -ENOMEM;
1743
1744UNSUPPORTED_AFF_OBS:
1745 return err;
1746}
1747#endif
1748
1749
1750
1751
962 1752
963static long gsnedf_activate_plugin(void) 1753static long gsnedf_activate_plugin(void)
964{ 1754{
965 int cpu; 1755 int cpu;
966 cpu_entry_t *entry; 1756 cpu_entry_t *entry;
967 1757
968 bheap_init(&gsnedf_cpu_heap); 1758 INIT_BINHEAP_HANDLE(&gsnedf_cpu_heap, cpu_lower_prio);
969#ifdef CONFIG_RELEASE_MASTER 1759#ifdef CONFIG_RELEASE_MASTER
970 gsnedf.release_master = atomic_read(&release_master_cpu); 1760 gsnedf.release_master = atomic_read(&release_master_cpu);
971#endif 1761#endif
972 1762
973 for_each_online_cpu(cpu) { 1763 for_each_online_cpu(cpu) {
974 entry = &per_cpu(gsnedf_cpu_entries, cpu); 1764 entry = &per_cpu(gsnedf_cpu_entries, cpu);
975 bheap_node_init(&entry->hn, entry); 1765 INIT_BINHEAP_NODE(&entry->hn);
976 entry->linked = NULL; 1766 entry->linked = NULL;
977 entry->scheduled = NULL; 1767 entry->scheduled = NULL;
978#ifdef CONFIG_RELEASE_MASTER 1768#ifdef CONFIG_RELEASE_MASTER
@@ -986,6 +1776,20 @@ static long gsnedf_activate_plugin(void)
986 } 1776 }
987#endif 1777#endif
988 } 1778 }
1779
1780#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1781 gsnedf_pending_tasklets.head = NULL;
1782 gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
1783#endif
1784
1785#ifdef CONFIG_LITMUS_SOFTIRQD
1786 spawn_klitirqd(NULL);
1787#endif
1788
1789#ifdef CONFIG_LITMUS_NVIDIA
1790 init_nvidia_info();
1791#endif
1792
989 return 0; 1793 return 0;
990} 1794}
991 1795
@@ -1002,8 +1806,31 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
1002 .task_block = gsnedf_task_block, 1806 .task_block = gsnedf_task_block,
1003 .admit_task = gsnedf_admit_task, 1807 .admit_task = gsnedf_admit_task,
1004 .activate_plugin = gsnedf_activate_plugin, 1808 .activate_plugin = gsnedf_activate_plugin,
1809 .compare = edf_higher_prio,
1005#ifdef CONFIG_LITMUS_LOCKING 1810#ifdef CONFIG_LITMUS_LOCKING
1006 .allocate_lock = gsnedf_allocate_lock, 1811 .allocate_lock = gsnedf_allocate_lock,
1812 .increase_prio = increase_priority_inheritance,
1813 .decrease_prio = decrease_priority_inheritance,
1814#endif
1815#ifdef CONFIG_LITMUS_NESTED_LOCKING
1816 .nested_increase_prio = nested_increase_priority_inheritance,
1817 .nested_decrease_prio = nested_decrease_priority_inheritance,
1818 .__compare = __edf_higher_prio,
1819#endif
1820#ifdef CONFIG_LITMUS_DGL_SUPPORT
1821 .get_dgl_spinlock = gsnedf_get_dgl_spinlock,
1822#endif
1823#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1824 .allocate_aff_obs = gsnedf_allocate_affinity_observer,
1825#endif
1826#ifdef CONFIG_LITMUS_SOFTIRQD
1827 .increase_prio_klitirqd = increase_priority_inheritance_klitirqd,
1828 .decrease_prio_klitirqd = decrease_priority_inheritance_klitirqd,
1829#endif
1830#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1831 .enqueue_pai_tasklet = gsnedf_enqueue_pai_tasklet,
1832 .change_prio_pai_tasklet = gsnedf_change_prio_pai_tasklet,
1833 .run_tasklets = gsnedf_run_tasklets,
1007#endif 1834#endif
1008}; 1835};
1009 1836
@@ -1013,15 +1840,20 @@ static int __init init_gsn_edf(void)
1013 int cpu; 1840 int cpu;
1014 cpu_entry_t *entry; 1841 cpu_entry_t *entry;
1015 1842
1016 bheap_init(&gsnedf_cpu_heap); 1843 INIT_BINHEAP_HANDLE(&gsnedf_cpu_heap, cpu_lower_prio);
1017 /* initialize CPU state */ 1844 /* initialize CPU state */
1018 for (cpu = 0; cpu < NR_CPUS; cpu++) { 1845 for (cpu = 0; cpu < NR_CPUS; ++cpu) {
1019 entry = &per_cpu(gsnedf_cpu_entries, cpu); 1846 entry = &per_cpu(gsnedf_cpu_entries, cpu);
1020 gsnedf_cpus[cpu] = entry; 1847 gsnedf_cpus[cpu] = entry;
1021 entry->cpu = cpu; 1848 entry->cpu = cpu;
1022 entry->hn = &gsnedf_heap_node[cpu]; 1849
1023 bheap_node_init(&entry->hn, entry); 1850 INIT_BINHEAP_NODE(&entry->hn);
1024 } 1851 }
1852
1853#ifdef CONFIG_LITMUS_DGL_SUPPORT
1854 raw_spin_lock_init(&dgl_lock);
1855#endif
1856
1025 edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs); 1857 edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs);
1026 return register_sched_plugin(&gsn_edf_plugin); 1858 return register_sched_plugin(&gsn_edf_plugin);
1027} 1859}
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
index 5a15ce938984..9a6fe487718e 100644
--- a/litmus/sched_litmus.c
+++ b/litmus/sched_litmus.c
@@ -103,7 +103,9 @@ litmus_schedule(struct rq *rq, struct task_struct *prev)
103 } 103 }
104#ifdef __ARCH_WANT_UNLOCKED_CTXSW 104#ifdef __ARCH_WANT_UNLOCKED_CTXSW
105 if (next->oncpu) 105 if (next->oncpu)
106 {
106 TRACE_TASK(next, "waiting for !oncpu"); 107 TRACE_TASK(next, "waiting for !oncpu");
108 }
107 while (next->oncpu) { 109 while (next->oncpu) {
108 cpu_relax(); 110 cpu_relax();
109 mb(); 111 mb();
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index 00a1900d6457..245e41c25a5d 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -13,6 +13,10 @@
13#include <litmus/preempt.h> 13#include <litmus/preempt.h>
14#include <litmus/jobs.h> 14#include <litmus/jobs.h>
15 15
16#ifdef CONFIG_LITMUS_NVIDIA
17#include <litmus/nvidia_info.h>
18#endif
19
16/* 20/*
17 * Generic function to trigger preemption on either local or remote cpu 21 * Generic function to trigger preemption on either local or remote cpu
18 * from scheduler plugins. The key feature is that this function is 22 * from scheduler plugins. The key feature is that this function is
@@ -102,6 +106,9 @@ static long litmus_dummy_complete_job(void)
102 106
103static long litmus_dummy_activate_plugin(void) 107static long litmus_dummy_activate_plugin(void)
104{ 108{
109#ifdef CONFIG_LITMUS_NVIDIA
110 shutdown_nvidia_info();
111#endif
105 return 0; 112 return 0;
106} 113}
107 114
@@ -110,14 +117,93 @@ static long litmus_dummy_deactivate_plugin(void)
110 return 0; 117 return 0;
111} 118}
112 119
113#ifdef CONFIG_LITMUS_LOCKING 120static int litmus_dummy_compare(struct task_struct* a, struct task_struct* b)
121{
122 TRACE_CUR("WARNING: Dummy compare function called!\n");
123 return 0;
124}
114 125
126#ifdef CONFIG_LITMUS_LOCKING
115static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type, 127static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
116 void* __user config) 128 void* __user config)
117{ 129{
118 return -ENXIO; 130 return -ENXIO;
119} 131}
120 132
133static void litmus_dummy_increase_prio(struct task_struct* t, struct task_struct* prio_inh)
134{
135}
136
137static void litmus_dummy_decrease_prio(struct task_struct* t, struct task_struct* prio_inh)
138{
139}
140#endif
141
142#ifdef CONFIG_LITMUS_SOFTIRQD
143static void litmus_dummy_increase_prio_klitirq(struct task_struct* klitirqd,
144 struct task_struct* old_owner,
145 struct task_struct* new_owner)
146{
147}
148
149static void litmus_dummy_decrease_prio_klitirqd(struct task_struct* klitirqd,
150 struct task_struct* old_owner)
151{
152}
153#endif
154
155#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
156static int litmus_dummy_enqueue_pai_tasklet(struct tasklet_struct* t)
157{
158 TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
159 return(0); // failure.
160}
161
162static void litmus_dummy_change_prio_pai_tasklet(struct task_struct *old_prio,
163 struct task_struct *new_prio)
164{
165 TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
166}
167
168static void litmus_dummy_run_tasklets(struct task_struct* t)
169{
170 //TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
171}
172#endif
173
174#ifdef CONFIG_LITMUS_NESTED_LOCKING
175static void litmus_dummy_nested_increase_prio(struct task_struct* t, struct task_struct* prio_inh,
176 raw_spinlock_t *to_unlock, unsigned long irqflags)
177{
178}
179
180static void litmus_dummy_nested_decrease_prio(struct task_struct* t, struct task_struct* prio_inh,
181 raw_spinlock_t *to_unlock, unsigned long irqflags)
182{
183}
184
185static int litmus_dummy___compare(struct task_struct* a, comparison_mode_t a_mod,
186 struct task_struct* b, comparison_mode_t b_mode)
187{
188 TRACE_CUR("WARNING: Dummy compare function called!\n");
189 return 0;
190}
191#endif
192
193#ifdef CONFIG_LITMUS_DGL_SUPPORT
194static raw_spinlock_t* litmus_dummy_get_dgl_spinlock(struct task_struct *t)
195{
196 return NULL;
197}
198#endif
199
200#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
201static long litmus_dummy_allocate_aff_obs(struct affinity_observer **aff_obs,
202 int type,
203 void* __user config)
204{
205 return -ENXIO;
206}
121#endif 207#endif
122 208
123 209
@@ -136,9 +222,33 @@ struct sched_plugin linux_sched_plugin = {
136 .finish_switch = litmus_dummy_finish_switch, 222 .finish_switch = litmus_dummy_finish_switch,
137 .activate_plugin = litmus_dummy_activate_plugin, 223 .activate_plugin = litmus_dummy_activate_plugin,
138 .deactivate_plugin = litmus_dummy_deactivate_plugin, 224 .deactivate_plugin = litmus_dummy_deactivate_plugin,
225 .compare = litmus_dummy_compare,
139#ifdef CONFIG_LITMUS_LOCKING 226#ifdef CONFIG_LITMUS_LOCKING
140 .allocate_lock = litmus_dummy_allocate_lock, 227 .allocate_lock = litmus_dummy_allocate_lock,
228 .increase_prio = litmus_dummy_increase_prio,
229 .decrease_prio = litmus_dummy_decrease_prio,
230#endif
231#ifdef CONFIG_LITMUS_NESTED_LOCKING
232 .nested_increase_prio = litmus_dummy_nested_increase_prio,
233 .nested_decrease_prio = litmus_dummy_nested_decrease_prio,
234 .__compare = litmus_dummy___compare,
235#endif
236#ifdef CONFIG_LITMUS_SOFTIRQD
237 .increase_prio_klitirqd = litmus_dummy_increase_prio_klitirqd,
238 .decrease_prio_klitirqd = litmus_dummy_decrease_prio_klitirqd,
239#endif
240#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
241 .enqueue_pai_tasklet = litmus_dummy_enqueue_pai_tasklet,
242 .change_prio_pai_tasklet = litmus_dummy_change_prio_pai_tasklet,
243 .run_tasklets = litmus_dummy_run_tasklets,
244#endif
245#ifdef CONFIG_LITMUS_DGL_SUPPORT
246 .get_dgl_spinlock = litmus_dummy_get_dgl_spinlock,
141#endif 247#endif
248#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
249 .allocate_aff_obs = litmus_dummy_allocate_aff_obs,
250#endif
251
142 .admit_task = litmus_dummy_admit_task 252 .admit_task = litmus_dummy_admit_task
143}; 253};
144 254
@@ -174,8 +284,31 @@ int register_sched_plugin(struct sched_plugin* plugin)
174 CHECK(complete_job); 284 CHECK(complete_job);
175 CHECK(activate_plugin); 285 CHECK(activate_plugin);
176 CHECK(deactivate_plugin); 286 CHECK(deactivate_plugin);
287 CHECK(compare);
177#ifdef CONFIG_LITMUS_LOCKING 288#ifdef CONFIG_LITMUS_LOCKING
178 CHECK(allocate_lock); 289 CHECK(allocate_lock);
290 CHECK(increase_prio);
291 CHECK(decrease_prio);
292#endif
293#ifdef CONFIG_LITMUS_NESTED_LOCKING
294 CHECK(nested_increase_prio);
295 CHECK(nested_decrease_prio);
296 CHECK(__compare);
297#endif
298#ifdef CONFIG_LITMUS_SOFTIRQD
299 CHECK(increase_prio_klitirqd);
300 CHECK(decrease_prio_klitirqd);
301#endif
302#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
303 CHECK(enqueue_pai_tasklet);
304 CHECK(change_prio_pai_tasklet);
305 CHECK(run_tasklets);
306#endif
307#ifdef CONFIG_LITMUS_DGL_SUPPORT
308 CHECK(get_dgl_spinlock);
309#endif
310#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
311 CHECK(allocate_aff_obs);
179#endif 312#endif
180 CHECK(admit_task); 313 CHECK(admit_task);
181 314
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
index 5ef8d09ab41f..f7f575346b54 100644
--- a/litmus/sched_task_trace.c
+++ b/litmus/sched_task_trace.c
@@ -7,6 +7,7 @@
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/percpu.h> 9#include <linux/percpu.h>
10#include <linux/hardirq.h>
10 11
11#include <litmus/ftdev.h> 12#include <litmus/ftdev.h>
12#include <litmus/litmus.h> 13#include <litmus/litmus.h>
@@ -16,13 +17,13 @@
16#include <litmus/ftdev.h> 17#include <litmus/ftdev.h>
17 18
18 19
19#define NO_EVENTS (1 << CONFIG_SCHED_TASK_TRACE_SHIFT) 20#define NUM_EVENTS (1 << (CONFIG_SCHED_TASK_TRACE_SHIFT+11))
20 21
21#define now() litmus_clock() 22#define now() litmus_clock()
22 23
23struct local_buffer { 24struct local_buffer {
24 struct st_event_record record[NO_EVENTS]; 25 struct st_event_record record[NUM_EVENTS];
25 char flag[NO_EVENTS]; 26 char flag[NUM_EVENTS];
26 struct ft_buffer ftbuf; 27 struct ft_buffer ftbuf;
27}; 28};
28 29
@@ -41,7 +42,7 @@ static int __init init_sched_task_trace(void)
41 int i, ok = 0, err; 42 int i, ok = 0, err;
42 printk("Allocated %u sched_trace_xxx() events per CPU " 43 printk("Allocated %u sched_trace_xxx() events per CPU "
43 "(buffer size: %d bytes)\n", 44 "(buffer size: %d bytes)\n",
44 NO_EVENTS, (int) sizeof(struct local_buffer)); 45 NUM_EVENTS, (int) sizeof(struct local_buffer));
45 46
46 err = ftdev_init(&st_dev, THIS_MODULE, 47 err = ftdev_init(&st_dev, THIS_MODULE,
47 num_online_cpus(), "sched_trace"); 48 num_online_cpus(), "sched_trace");
@@ -50,7 +51,7 @@ static int __init init_sched_task_trace(void)
50 51
51 for (i = 0; i < st_dev.minor_cnt; i++) { 52 for (i = 0; i < st_dev.minor_cnt; i++) {
52 buf = &per_cpu(st_event_buffer, i); 53 buf = &per_cpu(st_event_buffer, i);
53 ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS, 54 ok += init_ft_buffer(&buf->ftbuf, NUM_EVENTS,
54 sizeof(struct st_event_record), 55 sizeof(struct st_event_record),
55 buf->flag, 56 buf->flag,
56 buf->record); 57 buf->record);
@@ -154,7 +155,8 @@ feather_callback void do_sched_trace_task_switch_to(unsigned long id,
154{ 155{
155 struct task_struct *t = (struct task_struct*) _task; 156 struct task_struct *t = (struct task_struct*) _task;
156 struct st_event_record* rec; 157 struct st_event_record* rec;
157 if (is_realtime(t)) { 158 //if (is_realtime(t)) /* comment out to trace EVERYTHING */
159 {
158 rec = get_record(ST_SWITCH_TO, t); 160 rec = get_record(ST_SWITCH_TO, t);
159 if (rec) { 161 if (rec) {
160 rec->data.switch_to.when = now(); 162 rec->data.switch_to.when = now();
@@ -169,7 +171,8 @@ feather_callback void do_sched_trace_task_switch_away(unsigned long id,
169{ 171{
170 struct task_struct *t = (struct task_struct*) _task; 172 struct task_struct *t = (struct task_struct*) _task;
171 struct st_event_record* rec; 173 struct st_event_record* rec;
172 if (is_realtime(t)) { 174 //if (is_realtime(t)) /* comment out to trace EVERYTHING */
175 {
173 rec = get_record(ST_SWITCH_AWAY, t); 176 rec = get_record(ST_SWITCH_AWAY, t);
174 if (rec) { 177 if (rec) {
175 rec->data.switch_away.when = now(); 178 rec->data.switch_away.when = now();
@@ -188,6 +191,9 @@ feather_callback void do_sched_trace_task_completion(unsigned long id,
188 if (rec) { 191 if (rec) {
189 rec->data.completion.when = now(); 192 rec->data.completion.when = now();
190 rec->data.completion.forced = forced; 193 rec->data.completion.forced = forced;
194#ifdef LITMUS_NVIDIA
195 rec->data.completion.nv_int_count = (u16)atomic_read(&tsk_rt(t)->nv_int_count);
196#endif
191 put_record(rec); 197 put_record(rec);
192 } 198 }
193} 199}
@@ -239,3 +245,265 @@ feather_callback void do_sched_trace_action(unsigned long id,
239 put_record(rec); 245 put_record(rec);
240 } 246 }
241} 247}
248
249
250
251
252feather_callback void do_sched_trace_prediction_err(unsigned long id,
253 unsigned long _task,
254 unsigned long _distance,
255 unsigned long _rel_err)
256{
257 struct task_struct *t = (struct task_struct*) _task;
258 struct st_event_record *rec = get_record(ST_PREDICTION_ERR, t);
259
260 if (rec) {
261 gpu_migration_dist_t* distance = (gpu_migration_dist_t*) _distance;
262 fp_t* rel_err = (fp_t*) _rel_err;
263
264 rec->data.prediction_err.distance = *distance;
265 rec->data.prediction_err.rel_err = rel_err->val;
266 put_record(rec);
267 }
268}
269
270
271feather_callback void do_sched_trace_migration(unsigned long id,
272 unsigned long _task,
273 unsigned long _mig_info)
274{
275 struct task_struct *t = (struct task_struct*) _task;
276 struct st_event_record *rec = get_record(ST_MIGRATION, t);
277
278 if (rec) {
279 struct migration_info* mig_info = (struct migration_info*) _mig_info;
280
281 rec->hdr.extra = mig_info->distance;
282 rec->data.migration.observed = mig_info->observed;
283 rec->data.migration.estimated = mig_info->estimated;
284
285 put_record(rec);
286 }
287}
288
289
290
291
292
293
294
295
296
297feather_callback void do_sched_trace_tasklet_release(unsigned long id,
298 unsigned long _owner)
299{
300 struct task_struct *t = (struct task_struct*) _owner;
301 struct st_event_record *rec = get_record(ST_TASKLET_RELEASE, t);
302
303 if (rec) {
304 rec->data.tasklet_release.when = now();
305 put_record(rec);
306 }
307}
308
309
310feather_callback void do_sched_trace_tasklet_begin(unsigned long id,
311 unsigned long _owner)
312{
313 struct task_struct *t = (struct task_struct*) _owner;
314 struct st_event_record *rec = get_record(ST_TASKLET_BEGIN, t);
315
316 if (rec) {
317 rec->data.tasklet_begin.when = now();
318
319 if(!in_interrupt())
320 rec->data.tasklet_begin.exe_pid = current->pid;
321 else
322 rec->data.tasklet_begin.exe_pid = 0;
323
324 put_record(rec);
325 }
326}
327EXPORT_SYMBOL(do_sched_trace_tasklet_begin);
328
329
330feather_callback void do_sched_trace_tasklet_end(unsigned long id,
331 unsigned long _owner,
332 unsigned long _flushed)
333{
334 struct task_struct *t = (struct task_struct*) _owner;
335 struct st_event_record *rec = get_record(ST_TASKLET_END, t);
336
337 if (rec) {
338 rec->data.tasklet_end.when = now();
339 rec->data.tasklet_end.flushed = _flushed;
340
341 if(!in_interrupt())
342 rec->data.tasklet_end.exe_pid = current->pid;
343 else
344 rec->data.tasklet_end.exe_pid = 0;
345
346 put_record(rec);
347 }
348}
349EXPORT_SYMBOL(do_sched_trace_tasklet_end);
350
351
352feather_callback void do_sched_trace_work_release(unsigned long id,
353 unsigned long _owner)
354{
355 struct task_struct *t = (struct task_struct*) _owner;
356 struct st_event_record *rec = get_record(ST_WORK_RELEASE, t);
357
358 if (rec) {
359 rec->data.work_release.when = now();
360 put_record(rec);
361 }
362}
363
364
365feather_callback void do_sched_trace_work_begin(unsigned long id,
366 unsigned long _owner,
367 unsigned long _exe)
368{
369 struct task_struct *t = (struct task_struct*) _owner;
370 struct st_event_record *rec = get_record(ST_WORK_BEGIN, t);
371
372 if (rec) {
373 struct task_struct *exe = (struct task_struct*) _exe;
374 rec->data.work_begin.exe_pid = exe->pid;
375 rec->data.work_begin.when = now();
376 put_record(rec);
377 }
378}
379EXPORT_SYMBOL(do_sched_trace_work_begin);
380
381
382feather_callback void do_sched_trace_work_end(unsigned long id,
383 unsigned long _owner,
384 unsigned long _exe,
385 unsigned long _flushed)
386{
387 struct task_struct *t = (struct task_struct*) _owner;
388 struct st_event_record *rec = get_record(ST_WORK_END, t);
389
390 if (rec) {
391 struct task_struct *exe = (struct task_struct*) _exe;
392 rec->data.work_end.exe_pid = exe->pid;
393 rec->data.work_end.flushed = _flushed;
394 rec->data.work_end.when = now();
395 put_record(rec);
396 }
397}
398EXPORT_SYMBOL(do_sched_trace_work_end);
399
400
401feather_callback void do_sched_trace_eff_prio_change(unsigned long id,
402 unsigned long _task,
403 unsigned long _inh)
404{
405 struct task_struct *t = (struct task_struct*) _task;
406 struct st_event_record *rec = get_record(ST_EFF_PRIO_CHANGE, t);
407
408 if (rec) {
409 struct task_struct *inh = (struct task_struct*) _inh;
410 rec->data.effective_priority_change.when = now();
411 rec->data.effective_priority_change.inh_pid = (inh != NULL) ?
412 inh->pid :
413 0xffff;
414
415 put_record(rec);
416 }
417}
418
419/* pray for no nesting of nv interrupts on same CPU... */
420struct tracing_interrupt_map
421{
422 int active;
423 int count;
424 unsigned long data[128]; // assume nesting less than 128...
425 unsigned long serial[128];
426};
427DEFINE_PER_CPU(struct tracing_interrupt_map, active_interrupt_tracing);
428
429
430DEFINE_PER_CPU(u32, intCounter);
431
432feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
433 unsigned long _device)
434{
435 struct st_event_record *rec;
436 u32 serialNum;
437
438 {
439 u32* serial;
440 struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
441 if(!int_map->active == 0xcafebabe)
442 {
443 int_map->count++;
444 }
445 else
446 {
447 int_map->active = 0xcafebabe;
448 int_map->count = 1;
449 }
450 //int_map->data[int_map->count-1] = _device;
451
452 serial = &per_cpu(intCounter, smp_processor_id());
453 *serial += num_online_cpus();
454 serialNum = *serial;
455 int_map->serial[int_map->count-1] = serialNum;
456 }
457
458 rec = get_record(ST_NV_INTERRUPT_BEGIN, NULL);
459 if(rec) {
460 u32 device = _device;
461 rec->data.nv_interrupt_begin.when = now();
462 rec->data.nv_interrupt_begin.device = device;
463 rec->data.nv_interrupt_begin.serialNumber = serialNum;
464 put_record(rec);
465 }
466}
467EXPORT_SYMBOL(do_sched_trace_nv_interrupt_begin);
468
469/*
470int is_interrupt_tracing_active(void)
471{
472 struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
473 if(int_map->active == 0xcafebabe)
474 return 1;
475 return 0;
476}
477*/
478
479feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned long _device)
480{
481 struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
482 if(int_map->active == 0xcafebabe)
483 {
484 struct st_event_record *rec = get_record(ST_NV_INTERRUPT_END, NULL);
485
486 int_map->count--;
487 if(int_map->count == 0)
488 int_map->active = 0;
489
490 if(rec) {
491 u32 device = _device;
492 rec->data.nv_interrupt_end.when = now();
493 //rec->data.nv_interrupt_end.device = int_map->data[int_map->count];
494 rec->data.nv_interrupt_end.device = device;
495 rec->data.nv_interrupt_end.serialNumber = int_map->serial[int_map->count];
496 put_record(rec);
497 }
498 }
499}
500EXPORT_SYMBOL(do_sched_trace_nv_interrupt_end);
501
502
503
504
505
506
507
508
509
diff --git a/litmus/sched_trace_external.c b/litmus/sched_trace_external.c
new file mode 100644
index 000000000000..cf8e1d78aa77
--- /dev/null
+++ b/litmus/sched_trace_external.c
@@ -0,0 +1,64 @@
1#include <linux/module.h>
2
3#include <litmus/trace.h>
4#include <litmus/sched_trace.h>
5#include <litmus/litmus.h>
6
7void __sched_trace_tasklet_begin_external(struct task_struct* t)
8{
9 sched_trace_tasklet_begin(t);
10}
11EXPORT_SYMBOL(__sched_trace_tasklet_begin_external);
12
13void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed)
14{
15 sched_trace_tasklet_end(t, flushed);
16}
17EXPORT_SYMBOL(__sched_trace_tasklet_end_external);
18
19
20
21void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e)
22{
23 sched_trace_work_begin(t, e);
24}
25EXPORT_SYMBOL(__sched_trace_work_begin_external);
26
27void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f)
28{
29 sched_trace_work_end(t, e, f);
30}
31EXPORT_SYMBOL(__sched_trace_work_end_external);
32
33
34
35void __sched_trace_nv_interrupt_begin_external(u32 device)
36{
37 //unsigned long _device = device;
38 sched_trace_nv_interrupt_begin((unsigned long)device);
39}
40EXPORT_SYMBOL(__sched_trace_nv_interrupt_begin_external);
41
42void __sched_trace_nv_interrupt_end_external(u32 device)
43{
44 //unsigned long _device = device;
45 sched_trace_nv_interrupt_end((unsigned long)device);
46}
47EXPORT_SYMBOL(__sched_trace_nv_interrupt_end_external);
48
49
50#ifdef CONFIG_LITMUS_NVIDIA
51
52#define EXX_TS(evt) \
53void __##evt(void) { evt; } \
54EXPORT_SYMBOL(__##evt);
55
56EXX_TS(TS_NV_TOPISR_START)
57EXX_TS(TS_NV_TOPISR_END)
58EXX_TS(TS_NV_BOTISR_START)
59EXX_TS(TS_NV_BOTISR_END)
60EXX_TS(TS_NV_RELEASE_BOTISR_START)
61EXX_TS(TS_NV_RELEASE_BOTISR_END)
62
63#endif
64