aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-05-26 17:29:58 -0400
committerGlenn Elliott <gelliott@cs.unc.edu>2012-05-26 17:29:58 -0400
commita463f9a9e04385f0729f7435a0a6dff7d89b25de (patch)
tree00ff42c305926c800e18b13df8440a4de1a1a041
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
GPUSync patch for Litmus 2012.1.
-rw-r--r--arch/x86/kernel/irq.c4
-rw-r--r--arch/x86/kernel/syscall_table_32.S1
-rw-r--r--include/linux/completion.h1
-rw-r--r--include/linux/interrupt.h10
-rw-r--r--include/linux/mutex.h10
-rw-r--r--include/linux/semaphore.h9
-rw-r--r--include/linux/workqueue.h18
-rw-r--r--include/litmus/binheap.h207
-rw-r--r--include/litmus/edf_common.h12
-rw-r--r--include/litmus/fdso.h14
-rw-r--r--include/litmus/fpmath.h145
-rw-r--r--include/litmus/gpu_affinity.h49
-rw-r--r--include/litmus/ikglp_lock.h160
-rw-r--r--include/litmus/kexclu_affinity.h35
-rw-r--r--include/litmus/kfmlp_lock.h97
-rw-r--r--include/litmus/litmus.h9
-rw-r--r--include/litmus/litmus_softirq.h199
-rw-r--r--include/litmus/locking.h142
-rw-r--r--include/litmus/nvidia_info.h46
-rw-r--r--include/litmus/preempt.h2
-rw-r--r--include/litmus/rsm_lock.h54
-rw-r--r--include/litmus/rt_param.h100
-rw-r--r--include/litmus/sched_plugin.h76
-rw-r--r--include/litmus/sched_trace.h218
-rw-r--r--include/litmus/sched_trace_external.h78
-rw-r--r--include/litmus/trace.h34
-rw-r--r--include/litmus/unistd_32.h5
-rw-r--r--include/litmus/unistd_64.h9
-rw-r--r--kernel/lockdep.c7
-rw-r--r--kernel/mutex.c125
-rw-r--r--kernel/sched.c27
-rw-r--r--kernel/semaphore.c13
-rw-r--r--kernel/softirq.c322
-rw-r--r--kernel/workqueue.c71
-rw-r--r--litmus/Kconfig148
-rw-r--r--litmus/Makefile11
-rw-r--r--litmus/affinity.c2
-rw-r--r--litmus/binheap.c443
-rw-r--r--litmus/edf_common.c147
-rw-r--r--litmus/fdso.c13
-rw-r--r--litmus/gpu_affinity.c113
-rw-r--r--litmus/ikglp_lock.c2838
-rw-r--r--litmus/jobs.c17
-rw-r--r--litmus/kexclu_affinity.c92
-rw-r--r--litmus/kfmlp_lock.c1002
-rw-r--r--litmus/litmus.c126
-rw-r--r--litmus/litmus_pai_softirq.c64
-rw-r--r--litmus/litmus_proc.c17
-rw-r--r--litmus/litmus_softirq.c1582
-rw-r--r--litmus/locking.c393
-rw-r--r--litmus/nvidia_info.c597
-rw-r--r--litmus/preempt.c5
-rw-r--r--litmus/rsm_lock.c796
-rw-r--r--litmus/sched_cedf.c1062
-rw-r--r--litmus/sched_gsn_edf.c1032
-rw-r--r--litmus/sched_litmus.c2
-rw-r--r--litmus/sched_plugin.c135
-rw-r--r--litmus/sched_task_trace.c282
-rw-r--r--litmus/sched_trace_external.c64
59 files changed, 13012 insertions, 280 deletions
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 6c0802eb2f7f..680a5cb4b585 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -10,6 +10,10 @@
10#include <linux/ftrace.h> 10#include <linux/ftrace.h>
11#include <linux/delay.h> 11#include <linux/delay.h>
12 12
13#ifdef CONFIG_LITMUS_NVIDIA
14#include <litmus/sched_trace.h>
15#endif
16
13#include <asm/apic.h> 17#include <asm/apic.h>
14#include <asm/io_apic.h> 18#include <asm/io_apic.h>
15#include <asm/irq.h> 19#include <asm/irq.h>
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index d0126222b394..0cb4373698e7 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -358,3 +358,4 @@ ENTRY(sys_call_table)
358 .long sys_wait_for_ts_release 358 .long sys_wait_for_ts_release
359 .long sys_release_ts /* +10 */ 359 .long sys_release_ts /* +10 */
360 .long sys_null_call 360 .long sys_null_call
361 .long sys_register_nv_device
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 9d727271c9fe..cff405c4dd3a 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -76,6 +76,7 @@ static inline void init_completion(struct completion *x)
76 init_waitqueue_head(&x->wait); 76 init_waitqueue_head(&x->wait);
77} 77}
78 78
79extern void __wait_for_completion_locked(struct completion *);
79extern void wait_for_completion(struct completion *); 80extern void wait_for_completion(struct completion *);
80extern int wait_for_completion_interruptible(struct completion *x); 81extern int wait_for_completion_interruptible(struct completion *x);
81extern int wait_for_completion_killable(struct completion *x); 82extern int wait_for_completion_killable(struct completion *x);
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index f6efed0039ed..57a7bc8807be 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -445,6 +445,7 @@ static inline void __raise_softirq_irqoff(unsigned int nr)
445 445
446extern void raise_softirq_irqoff(unsigned int nr); 446extern void raise_softirq_irqoff(unsigned int nr);
447extern void raise_softirq(unsigned int nr); 447extern void raise_softirq(unsigned int nr);
448extern void wakeup_softirqd(void);
448 449
449/* This is the worklist that queues up per-cpu softirq work. 450/* This is the worklist that queues up per-cpu softirq work.
450 * 451 *
@@ -500,6 +501,10 @@ struct tasklet_struct
500 atomic_t count; 501 atomic_t count;
501 void (*func)(unsigned long); 502 void (*func)(unsigned long);
502 unsigned long data; 503 unsigned long data;
504
505#if defined(CONFIG_LITMUS_SOFTIRQD) || defined(CONFIG_LITMUS_PAI_SOFTIRQD)
506 struct task_struct *owner;
507#endif
503}; 508};
504 509
505#define DECLARE_TASKLET(name, func, data) \ 510#define DECLARE_TASKLET(name, func, data) \
@@ -537,6 +542,7 @@ static inline void tasklet_unlock_wait(struct tasklet_struct *t)
537#define tasklet_unlock(t) do { } while (0) 542#define tasklet_unlock(t) do { } while (0)
538#endif 543#endif
539 544
545extern void ___tasklet_schedule(struct tasklet_struct *t);
540extern void __tasklet_schedule(struct tasklet_struct *t); 546extern void __tasklet_schedule(struct tasklet_struct *t);
541 547
542static inline void tasklet_schedule(struct tasklet_struct *t) 548static inline void tasklet_schedule(struct tasklet_struct *t)
@@ -545,6 +551,7 @@ static inline void tasklet_schedule(struct tasklet_struct *t)
545 __tasklet_schedule(t); 551 __tasklet_schedule(t);
546} 552}
547 553
554extern void ___tasklet_hi_schedule(struct tasklet_struct *t);
548extern void __tasklet_hi_schedule(struct tasklet_struct *t); 555extern void __tasklet_hi_schedule(struct tasklet_struct *t);
549 556
550static inline void tasklet_hi_schedule(struct tasklet_struct *t) 557static inline void tasklet_hi_schedule(struct tasklet_struct *t)
@@ -553,6 +560,7 @@ static inline void tasklet_hi_schedule(struct tasklet_struct *t)
553 __tasklet_hi_schedule(t); 560 __tasklet_hi_schedule(t);
554} 561}
555 562
563extern void ___tasklet_hi_schedule_first(struct tasklet_struct *t);
556extern void __tasklet_hi_schedule_first(struct tasklet_struct *t); 564extern void __tasklet_hi_schedule_first(struct tasklet_struct *t);
557 565
558/* 566/*
@@ -582,7 +590,7 @@ static inline void tasklet_disable(struct tasklet_struct *t)
582} 590}
583 591
584static inline void tasklet_enable(struct tasklet_struct *t) 592static inline void tasklet_enable(struct tasklet_struct *t)
585{ 593{
586 smp_mb__before_atomic_dec(); 594 smp_mb__before_atomic_dec();
587 atomic_dec(&t->count); 595 atomic_dec(&t->count);
588} 596}
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index a940fe435aca..cb47debbf24d 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -126,6 +126,15 @@ static inline int mutex_is_locked(struct mutex *lock)
126 return atomic_read(&lock->count) != 1; 126 return atomic_read(&lock->count) != 1;
127} 127}
128 128
129/* return non-zero to abort. only pre-side-effects may abort */
130typedef int (*side_effect_t)(unsigned long);
131extern void mutex_lock_sfx(struct mutex *lock,
132 side_effect_t pre, unsigned long pre_arg,
133 side_effect_t post, unsigned long post_arg);
134extern void mutex_unlock_sfx(struct mutex *lock,
135 side_effect_t pre, unsigned long pre_arg,
136 side_effect_t post, unsigned long post_arg);
137
129/* 138/*
130 * See kernel/mutex.c for detailed documentation of these APIs. 139 * See kernel/mutex.c for detailed documentation of these APIs.
131 * Also see Documentation/mutex-design.txt. 140 * Also see Documentation/mutex-design.txt.
@@ -153,6 +162,7 @@ extern void mutex_lock(struct mutex *lock);
153extern int __must_check mutex_lock_interruptible(struct mutex *lock); 162extern int __must_check mutex_lock_interruptible(struct mutex *lock);
154extern int __must_check mutex_lock_killable(struct mutex *lock); 163extern int __must_check mutex_lock_killable(struct mutex *lock);
155 164
165
156# define mutex_lock_nested(lock, subclass) mutex_lock(lock) 166# define mutex_lock_nested(lock, subclass) mutex_lock(lock)
157# define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) 167# define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
158# define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock) 168# define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock)
diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h
index 39fa04966aa8..c83fc2b65f01 100644
--- a/include/linux/semaphore.h
+++ b/include/linux/semaphore.h
@@ -43,4 +43,13 @@ extern int __must_check down_trylock(struct semaphore *sem);
43extern int __must_check down_timeout(struct semaphore *sem, long jiffies); 43extern int __must_check down_timeout(struct semaphore *sem, long jiffies);
44extern void up(struct semaphore *sem); 44extern void up(struct semaphore *sem);
45 45
46extern void __down(struct semaphore *sem);
47extern void __up(struct semaphore *sem);
48
49struct semaphore_waiter {
50 struct list_head list;
51 struct task_struct *task;
52 int up;
53};
54
46#endif /* __LINUX_SEMAPHORE_H */ 55#endif /* __LINUX_SEMAPHORE_H */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f584aba78ca9..1ec2ec7d4e3b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -83,6 +83,9 @@ struct work_struct {
83#ifdef CONFIG_LOCKDEP 83#ifdef CONFIG_LOCKDEP
84 struct lockdep_map lockdep_map; 84 struct lockdep_map lockdep_map;
85#endif 85#endif
86#ifdef CONFIG_LITMUS_SOFTIRQD
87 struct task_struct *owner;
88#endif
86}; 89};
87 90
88#define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU) 91#define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU)
@@ -115,11 +118,25 @@ struct execute_work {
115#define __WORK_INIT_LOCKDEP_MAP(n, k) 118#define __WORK_INIT_LOCKDEP_MAP(n, k)
116#endif 119#endif
117 120
121#ifdef CONFIG_LITMUS_SOFTIRQD
122#define __WORK_INIT_OWNER() \
123 .owner = NULL,
124
125#define PREPARE_OWNER(_work, _owner) \
126 do { \
127 (_work)->owner = (_owner); \
128 } while(0)
129#else
130#define __WORK_INIT_OWNER()
131#define PREPARE_OWNER(_work, _owner)
132#endif
133
118#define __WORK_INITIALIZER(n, f) { \ 134#define __WORK_INITIALIZER(n, f) { \
119 .data = WORK_DATA_STATIC_INIT(), \ 135 .data = WORK_DATA_STATIC_INIT(), \
120 .entry = { &(n).entry, &(n).entry }, \ 136 .entry = { &(n).entry, &(n).entry }, \
121 .func = (f), \ 137 .func = (f), \
122 __WORK_INIT_LOCKDEP_MAP(#n, &(n)) \ 138 __WORK_INIT_LOCKDEP_MAP(#n, &(n)) \
139 __WORK_INIT_OWNER() \
123 } 140 }
124 141
125#define __DELAYED_WORK_INITIALIZER(n, f) { \ 142#define __DELAYED_WORK_INITIALIZER(n, f) { \
@@ -357,6 +374,7 @@ extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
357extern void flush_workqueue(struct workqueue_struct *wq); 374extern void flush_workqueue(struct workqueue_struct *wq);
358extern void flush_scheduled_work(void); 375extern void flush_scheduled_work(void);
359 376
377extern int __schedule_work(struct work_struct *work);
360extern int schedule_work(struct work_struct *work); 378extern int schedule_work(struct work_struct *work);
361extern int schedule_work_on(int cpu, struct work_struct *work); 379extern int schedule_work_on(int cpu, struct work_struct *work);
362extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay); 380extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay);
diff --git a/include/litmus/binheap.h b/include/litmus/binheap.h
new file mode 100644
index 000000000000..9e966e3886cb
--- /dev/null
+++ b/include/litmus/binheap.h
@@ -0,0 +1,207 @@
1#ifndef LITMUS_BINARY_HEAP_H
2#define LITMUS_BINARY_HEAP_H
3
4#include <linux/kernel.h>
5
6/**
7 * Simple binary heap with add, arbitrary delete, delete_root, and top
8 * operations.
9 *
10 * Style meant to conform with list.h.
11 *
12 * Motivation: Linux's prio_heap.h is of fixed size. Litmus's binomial
13 * heap may be overkill (and perhaps not general enough) for some applications.
14 *
15 * Note: In order to make node swaps fast, a node inserted with a data pointer
16 * may not always hold said data pointer. This is similar to the binomial heap
17 * implementation. This does make node deletion tricky since we have to
18 * (1) locate the node that holds the data pointer to delete, and (2) the
19 * node that was originally inserted with said data pointer. These have to be
20 * coalesced into a single node before removal (see usage of
21 * __binheap_safe_swap()). We have to track node references to accomplish this.
22 */
23
24struct binheap_node {
25 void *data;
26 struct binheap_node *parent;
27 struct binheap_node *left;
28 struct binheap_node *right;
29
30 /* pointer to binheap_node that holds *data for which this binheap_node
31 * was originally inserted. (*data "owns" this node)
32 */
33 struct binheap_node *ref;
34 struct binheap_node **ref_ptr;
35};
36
37/**
38 * Signature of compator function. Assumed 'less-than' (min-heap).
39 * Pass in 'greater-than' for max-heap.
40 *
41 * TODO: Consider macro-based implementation that allows comparator to be
42 * inlined (similar to Linux red/black tree) for greater efficiency.
43 */
44typedef int (*binheap_order_t)(struct binheap_node *a,
45 struct binheap_node *b);
46
47
48struct binheap_handle {
49 struct binheap_node *root;
50
51 /* pointer to node to take next inserted child */
52 struct binheap_node *next;
53
54 /* pointer to last node in complete binary tree */
55 struct binheap_node *last;
56
57 /* comparator function pointer */
58 binheap_order_t compare;
59};
60
61
62#define BINHEAP_POISON ((void*)(0xdeadbeef))
63
64
65/**
66 * binheap_entry - get the struct for this heap node.
67 * Only valid when called upon heap nodes other than the root handle.
68 * @ptr: the heap node.
69 * @type: the type of struct pointed to by binheap_node::data.
70 * @member: unused.
71 */
72#define binheap_entry(ptr, type, member) \
73((type *)((ptr)->data))
74
75/**
76 * binheap_node_container - get the struct that contains this node.
77 * Only valid when called upon heap nodes other than the root handle.
78 * @ptr: the heap node.
79 * @type: the type of struct the node is embedded in.
80 * @member: the name of the binheap_struct within the (type) struct.
81 */
82#define binheap_node_container(ptr, type, member) \
83container_of((ptr), type, member)
84
85/**
86 * binheap_top_entry - get the struct for the node at the top of the heap.
87 * Only valid when called upon the heap handle node.
88 * @ptr: the special heap-handle node.
89 * @type: the type of the struct the head is embedded in.
90 * @member: the name of the binheap_struct within the (type) struct.
91 */
92#define binheap_top_entry(ptr, type, member) \
93binheap_entry((ptr)->root, type, member)
94
95/**
96 * binheap_delete_root - remove the root element from the heap.
97 * @handle: handle to the heap.
98 * @type: the type of the struct the head is embedded in.
99 * @member: the name of the binheap_struct within the (type) struct.
100 */
101#define binheap_delete_root(handle, type, member) \
102__binheap_delete_root((handle), &((type *)((handle)->root->data))->member)
103
104/**
105 * binheap_delete - remove an arbitrary element from the heap.
106 * @to_delete: pointer to node to be removed.
107 * @handle: handle to the heap.
108 */
109#define binheap_delete(to_delete, handle) \
110__binheap_delete((to_delete), (handle))
111
112/**
113 * binheap_add - insert an element to the heap
114 * new_node: node to add.
115 * @handle: handle to the heap.
116 * @type: the type of the struct the head is embedded in.
117 * @member: the name of the binheap_struct within the (type) struct.
118 */
119#define binheap_add(new_node, handle, type, member) \
120__binheap_add((new_node), (handle), container_of((new_node), type, member))
121
122/**
123 * binheap_decrease - re-eval the position of a node (based upon its
124 * original data pointer).
125 * @handle: handle to the heap.
126 * @orig_node: node that was associated with the data pointer
127 * (whose value has changed) when said pointer was
128 * added to the heap.
129 */
130#define binheap_decrease(orig_node, handle) \
131__binheap_decrease((orig_node), (handle))
132
133#define BINHEAP_NODE_INIT() { NULL, BINHEAP_POISON, NULL, NULL , NULL, NULL}
134
135#define BINHEAP_NODE(name) \
136 struct binheap_node name = BINHEAP_NODE_INIT()
137
138
139static inline void INIT_BINHEAP_NODE(struct binheap_node *n)
140{
141 n->data = NULL;
142 n->parent = BINHEAP_POISON;
143 n->left = NULL;
144 n->right = NULL;
145 n->ref = NULL;
146 n->ref_ptr = NULL;
147}
148
149static inline void INIT_BINHEAP_HANDLE(
150 struct binheap_handle *handle,
151 binheap_order_t compare)
152{
153 handle->root = NULL;
154 handle->next = NULL;
155 handle->last = NULL;
156 handle->compare = compare;
157}
158
159/* Returns true (1) if binheap is empty. */
160static inline int binheap_empty(struct binheap_handle *handle)
161{
162 return(handle->root == NULL);
163}
164
165/* Returns true (1) if binheap node is in a heap. */
166static inline int binheap_is_in_heap(struct binheap_node *node)
167{
168 return (node->parent != BINHEAP_POISON);
169}
170
171
172int binheap_is_in_this_heap(struct binheap_node *node, struct binheap_handle* heap);
173
174
175
176void __binheap_add(struct binheap_node *new_node,
177 struct binheap_handle *handle,
178 void *data);
179
180
181/**
182 * Removes the root node from the heap. The node is removed after coalescing
183 * the binheap_node with its original data pointer at the root of the tree.
184 *
185 * The 'last' node in the tree is then swapped up to the root and bubbled
186 * down.
187 */
188void __binheap_delete_root(struct binheap_handle *handle,
189 struct binheap_node *container);
190
191/**
192 * Delete an arbitrary node. Bubble node to delete up to the root,
193 * and then delete to root.
194 */
195void __binheap_delete(
196 struct binheap_node *node_to_delete,
197 struct binheap_handle *handle);
198
199/**
200 * Bubble up a node whose pointer has decreased in value.
201 */
202void __binheap_decrease(struct binheap_node *orig_node,
203 struct binheap_handle *handle);
204
205
206#endif
207
diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h
index bbaf22ea7f12..63dff7efe8fb 100644
--- a/include/litmus/edf_common.h
+++ b/include/litmus/edf_common.h
@@ -20,6 +20,18 @@ int edf_higher_prio(struct task_struct* first,
20 20
21int edf_ready_order(struct bheap_node* a, struct bheap_node* b); 21int edf_ready_order(struct bheap_node* a, struct bheap_node* b);
22 22
23#ifdef CONFIG_LITMUS_NESTED_LOCKING
24/* binheap_nodes must be embedded within 'struct litmus_lock' */
25int edf_max_heap_order(struct binheap_node *a, struct binheap_node *b);
26int edf_min_heap_order(struct binheap_node *a, struct binheap_node *b);
27int edf_max_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b);
28int edf_min_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b);
29
30int __edf_higher_prio(struct task_struct* first, comparison_mode_t first_mode,
31 struct task_struct* second, comparison_mode_t second_mode);
32
33#endif
34
23int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t); 35int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t);
24 36
25#endif 37#endif
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
index caf2a1e6918c..1f5d3bd1a1db 100644
--- a/include/litmus/fdso.h
+++ b/include/litmus/fdso.h
@@ -20,7 +20,16 @@ typedef enum {
20 FMLP_SEM = 0, 20 FMLP_SEM = 0,
21 SRP_SEM = 1, 21 SRP_SEM = 1,
22 22
23 MAX_OBJ_TYPE = 1 23 RSM_MUTEX = 2,
24 IKGLP_SEM = 3,
25 KFMLP_SEM = 4,
26
27 IKGLP_SIMPLE_GPU_AFF_OBS = 5,
28 IKGLP_GPU_AFF_OBS = 6,
29 KFMLP_SIMPLE_GPU_AFF_OBS = 7,
30 KFMLP_GPU_AFF_OBS = 8,
31
32 MAX_OBJ_TYPE = 8
24} obj_type_t; 33} obj_type_t;
25 34
26struct inode_obj_id { 35struct inode_obj_id {
@@ -64,8 +73,11 @@ static inline void* od_lookup(int od, obj_type_t type)
64} 73}
65 74
66#define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM)) 75#define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM))
76#define lookup_kfmlp_sem(od)((struct pi_semaphore*) od_lookup(od, KFMLP_SEM))
67#define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM)) 77#define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
68#define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID)) 78#define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID))
69 79
80#define lookup_rsm_mutex(od)((struct litmus_lock*) od_lookup(od, FMLP_SEM))
81
70 82
71#endif 83#endif
diff --git a/include/litmus/fpmath.h b/include/litmus/fpmath.h
new file mode 100644
index 000000000000..04d4bcaeae96
--- /dev/null
+++ b/include/litmus/fpmath.h
@@ -0,0 +1,145 @@
1#ifndef __FP_MATH_H__
2#define __FP_MATH_H__
3
4#ifndef __KERNEL__
5#include <stdint.h>
6#define abs(x) (((x) < 0) ? -(x) : x)
7#endif
8
9// Use 64-bit because we want to track things at the nanosecond scale.
10// This can lead to very large numbers.
11typedef int64_t fpbuf_t;
12typedef struct
13{
14 fpbuf_t val;
15} fp_t;
16
17#define FP_SHIFT 10
18#define ROUND_BIT (FP_SHIFT - 1)
19
20#define _fp(x) ((fp_t) {x})
21
22#ifdef __KERNEL__
23static const fp_t LITMUS_FP_ZERO = {.val = 0};
24static const fp_t LITMUS_FP_ONE = {.val = (1 << FP_SHIFT)};
25#endif
26
27static inline fp_t FP(fpbuf_t x)
28{
29 return _fp(((fpbuf_t) x) << FP_SHIFT);
30}
31
32/* divide two integers to obtain a fixed point value */
33static inline fp_t _frac(fpbuf_t a, fpbuf_t b)
34{
35 return _fp(FP(a).val / (b));
36}
37
38static inline fpbuf_t _point(fp_t x)
39{
40 return (x.val % (1 << FP_SHIFT));
41
42}
43
44#define fp2str(x) x.val
45/*(x.val >> FP_SHIFT), (x.val % (1 << FP_SHIFT)) */
46#define _FP_ "%ld/1024"
47
48static inline fpbuf_t _floor(fp_t x)
49{
50 return x.val >> FP_SHIFT;
51}
52
53/* FIXME: negative rounding */
54static inline fpbuf_t _round(fp_t x)
55{
56 return _floor(x) + ((x.val >> ROUND_BIT) & 1);
57}
58
59/* multiply two fixed point values */
60static inline fp_t _mul(fp_t a, fp_t b)
61{
62 return _fp((a.val * b.val) >> FP_SHIFT);
63}
64
65static inline fp_t _div(fp_t a, fp_t b)
66{
67#if !defined(__KERNEL__) && !defined(unlikely)
68#define unlikely(x) (x)
69#define DO_UNDEF_UNLIKELY
70#endif
71 /* try not to overflow */
72 if (unlikely( a.val > (2l << ((sizeof(fpbuf_t)*8) - FP_SHIFT)) ))
73 return _fp((a.val / b.val) << FP_SHIFT);
74 else
75 return _fp((a.val << FP_SHIFT) / b.val);
76#ifdef DO_UNDEF_UNLIKELY
77#undef unlikely
78#undef DO_UNDEF_UNLIKELY
79#endif
80}
81
82static inline fp_t _add(fp_t a, fp_t b)
83{
84 return _fp(a.val + b.val);
85}
86
87static inline fp_t _sub(fp_t a, fp_t b)
88{
89 return _fp(a.val - b.val);
90}
91
92static inline fp_t _neg(fp_t x)
93{
94 return _fp(-x.val);
95}
96
97static inline fp_t _abs(fp_t x)
98{
99 return _fp(abs(x.val));
100}
101
102/* works the same as casting float/double to integer */
103static inline fpbuf_t _fp_to_integer(fp_t x)
104{
105 return _floor(_abs(x)) * ((x.val > 0) ? 1 : -1);
106}
107
108static inline fp_t _integer_to_fp(fpbuf_t x)
109{
110 return _frac(x,1);
111}
112
113static inline int _leq(fp_t a, fp_t b)
114{
115 return a.val <= b.val;
116}
117
118static inline int _geq(fp_t a, fp_t b)
119{
120 return a.val >= b.val;
121}
122
123static inline int _lt(fp_t a, fp_t b)
124{
125 return a.val < b.val;
126}
127
128static inline int _gt(fp_t a, fp_t b)
129{
130 return a.val > b.val;
131}
132
133static inline int _eq(fp_t a, fp_t b)
134{
135 return a.val == b.val;
136}
137
138static inline fp_t _max(fp_t a, fp_t b)
139{
140 if (a.val < b.val)
141 return b;
142 else
143 return a;
144}
145#endif
diff --git a/include/litmus/gpu_affinity.h b/include/litmus/gpu_affinity.h
new file mode 100644
index 000000000000..6b3fb8b28745
--- /dev/null
+++ b/include/litmus/gpu_affinity.h
@@ -0,0 +1,49 @@
1#ifndef LITMUS_GPU_AFFINITY_H
2#define LITMUS_GPU_AFFINITY_H
3
4#include <litmus/rt_param.h>
5#include <litmus/sched_plugin.h>
6#include <litmus/litmus.h>
7
8void update_gpu_estimate(struct task_struct* t, lt_t observed);
9gpu_migration_dist_t gpu_migration_distance(int a, int b);
10
11static inline void reset_gpu_tracker(struct task_struct* t)
12{
13 t->rt_param.accum_gpu_time = 0;
14}
15
16static inline void start_gpu_tracker(struct task_struct* t)
17{
18 t->rt_param.gpu_time_stamp = litmus_clock();
19}
20
21static inline void stop_gpu_tracker(struct task_struct* t)
22{
23 lt_t now = litmus_clock();
24 t->rt_param.accum_gpu_time += (now - t->rt_param.gpu_time_stamp);
25}
26
27static inline lt_t get_gpu_time(struct task_struct* t)
28{
29 return t->rt_param.accum_gpu_time;
30}
31
32static inline lt_t get_gpu_estimate(struct task_struct* t, gpu_migration_dist_t dist)
33{
34 int i;
35 fpbuf_t temp = _fp_to_integer(t->rt_param.gpu_migration_est[dist].est);
36 lt_t val = (temp >= 0) ? temp : 0; // never allow negative estimates...
37
38 WARN_ON(temp < 0);
39
40 // lower-bound a distant migration to be at least equal to the level
41 // below it.
42 for(i = dist-1; (val == 0) && (i >= MIG_LOCAL); --i) {
43 val = _fp_to_integer(t->rt_param.gpu_migration_est[i].est);
44 }
45
46 return ((val > 0) ? val : dist+1);
47}
48
49#endif
diff --git a/include/litmus/ikglp_lock.h b/include/litmus/ikglp_lock.h
new file mode 100644
index 000000000000..af6f15178cb1
--- /dev/null
+++ b/include/litmus/ikglp_lock.h
@@ -0,0 +1,160 @@
1#ifndef LITMUS_IKGLP_H
2#define LITMUS_IKGLP_H
3
4#include <litmus/litmus.h>
5#include <litmus/binheap.h>
6#include <litmus/locking.h>
7
8#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
9#include <litmus/kexclu_affinity.h>
10
11struct ikglp_affinity;
12#endif
13
14typedef struct ikglp_heap_node
15{
16 struct task_struct *task;
17 struct binheap_node node;
18} ikglp_heap_node_t;
19
20struct fifo_queue;
21struct ikglp_wait_state;
22
23typedef struct ikglp_donee_heap_node
24{
25 struct task_struct *task;
26 struct fifo_queue *fq;
27 struct ikglp_wait_state *donor_info; // cross-linked with ikglp_wait_state_t of donor
28
29 struct binheap_node node;
30} ikglp_donee_heap_node_t;
31
32// Maintains the state of a request as it goes through the IKGLP
33typedef struct ikglp_wait_state {
34 struct task_struct *task; // pointer back to the requesting task
35
36 // Data for while waiting in FIFO Queue
37 wait_queue_t fq_node;
38 ikglp_heap_node_t global_heap_node;
39 ikglp_donee_heap_node_t donee_heap_node;
40
41 // Data for while waiting in PQ
42 ikglp_heap_node_t pq_node;
43
44 // Data for while waiting as a donor
45 ikglp_donee_heap_node_t *donee_info; // cross-linked with donee's ikglp_donee_heap_node_t
46 struct nested_info prio_donation;
47 struct binheap_node node;
48} ikglp_wait_state_t;
49
50/* struct for semaphore with priority inheritance */
51struct fifo_queue
52{
53 wait_queue_head_t wait;
54 struct task_struct* owner;
55
56 // used for bookkeepping
57 ikglp_heap_node_t global_heap_node;
58 ikglp_donee_heap_node_t donee_heap_node;
59
60 struct task_struct* hp_waiter;
61 int count; /* number of waiters + holder */
62
63 struct nested_info nest;
64};
65
66struct ikglp_semaphore
67{
68 struct litmus_lock litmus_lock;
69
70 raw_spinlock_t lock;
71 raw_spinlock_t real_lock;
72
73 int nr_replicas; // AKA k
74 int m;
75
76 int max_fifo_len; // max len of a fifo queue
77 int nr_in_fifos;
78
79 struct binheap_handle top_m; // min heap, base prio
80 int top_m_size; // number of nodes in top_m
81
82 struct binheap_handle not_top_m; // max heap, base prio
83
84 struct binheap_handle donees; // min-heap, base prio
85 struct fifo_queue *shortest_fifo_queue; // pointer to shortest fifo queue
86
87 /* data structures for holding requests */
88 struct fifo_queue *fifo_queues; // array nr_replicas in length
89 struct binheap_handle priority_queue; // max-heap, base prio
90 struct binheap_handle donors; // max-heap, base prio
91
92#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
93 struct ikglp_affinity *aff_obs;
94#endif
95};
96
97static inline struct ikglp_semaphore* ikglp_from_lock(struct litmus_lock* lock)
98{
99 return container_of(lock, struct ikglp_semaphore, litmus_lock);
100}
101
102int ikglp_lock(struct litmus_lock* l);
103int ikglp_unlock(struct litmus_lock* l);
104int ikglp_close(struct litmus_lock* l);
105void ikglp_free(struct litmus_lock* l);
106struct litmus_lock* ikglp_new(int m, struct litmus_lock_ops*, void* __user arg);
107
108
109
110#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
111
112struct ikglp_queue_info
113{
114 struct fifo_queue* q;
115 lt_t estimated_len;
116 int *nr_cur_users;
117};
118
119struct ikglp_affinity_ops
120{
121 struct fifo_queue* (*advise_enqueue)(struct ikglp_affinity* aff, struct task_struct* t); // select FIFO
122 ikglp_wait_state_t* (*advise_steal)(struct ikglp_affinity* aff, struct fifo_queue* dst); // select steal from FIFO
123 ikglp_donee_heap_node_t* (*advise_donee_selection)(struct ikglp_affinity* aff, struct task_struct* t); // select a donee
124 ikglp_wait_state_t* (*advise_donor_to_fq)(struct ikglp_affinity* aff, struct fifo_queue* dst); // select a donor to move to PQ
125
126 void (*notify_enqueue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // fifo enqueue
127 void (*notify_dequeue)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // fifo dequeue
128 void (*notify_acquired)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica acquired
129 void (*notify_freed)(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t); // replica freed
130 int (*replica_to_resource)(struct ikglp_affinity* aff, struct fifo_queue* fq); // convert a replica # to a GPU (includes offsets and simult user folding)
131};
132
133struct ikglp_affinity
134{
135 struct affinity_observer obs;
136 struct ikglp_affinity_ops *ops;
137 struct ikglp_queue_info *q_info;
138 int *nr_cur_users_on_rsrc;
139 int offset;
140 int nr_simult;
141 int nr_rsrc;
142 int relax_max_fifo_len;
143};
144
145static inline struct ikglp_affinity* ikglp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs)
146{
147 return container_of(aff_obs, struct ikglp_affinity, obs);
148}
149
150int ikglp_aff_obs_close(struct affinity_observer*);
151void ikglp_aff_obs_free(struct affinity_observer*);
152struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops*,
153 void* __user arg);
154struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops*,
155 void* __user arg);
156#endif
157
158
159
160#endif
diff --git a/include/litmus/kexclu_affinity.h b/include/litmus/kexclu_affinity.h
new file mode 100644
index 000000000000..f6355de49074
--- /dev/null
+++ b/include/litmus/kexclu_affinity.h
@@ -0,0 +1,35 @@
1#ifndef LITMUS_AFF_OBS_H
2#define LITMUS_AFF_OBS_H
3
4#include <litmus/locking.h>
5
6struct affinity_observer_ops;
7
8struct affinity_observer
9{
10 struct affinity_observer_ops* ops;
11 int type;
12 int ident;
13
14 struct litmus_lock* lock; // the lock under observation
15};
16
17typedef int (*aff_obs_open_t)(struct affinity_observer* aff_obs,
18 void* __user arg);
19typedef int (*aff_obs_close_t)(struct affinity_observer* aff_obs);
20typedef void (*aff_obs_free_t)(struct affinity_observer* aff_obs);
21
22struct affinity_observer_ops
23{
24 aff_obs_open_t open;
25 aff_obs_close_t close;
26 aff_obs_free_t deallocate;
27};
28
29struct litmus_lock* get_lock_from_od(int od);
30
31void affinity_observer_new(struct affinity_observer* aff,
32 struct affinity_observer_ops* ops,
33 struct affinity_observer_args* args);
34
35#endif
diff --git a/include/litmus/kfmlp_lock.h b/include/litmus/kfmlp_lock.h
new file mode 100644
index 000000000000..5f0aae6e6f42
--- /dev/null
+++ b/include/litmus/kfmlp_lock.h
@@ -0,0 +1,97 @@
1#ifndef LITMUS_KFMLP_H
2#define LITMUS_KFMLP_H
3
4#include <litmus/litmus.h>
5#include <litmus/locking.h>
6
7#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
8#include <litmus/kexclu_affinity.h>
9
10struct kfmlp_affinity;
11#endif
12
13/* struct for semaphore with priority inheritance */
14struct kfmlp_queue
15{
16 wait_queue_head_t wait;
17 struct task_struct* owner;
18 struct task_struct* hp_waiter;
19 int count; /* number of waiters + holder */
20};
21
22struct kfmlp_semaphore
23{
24 struct litmus_lock litmus_lock;
25
26 spinlock_t lock;
27
28 int num_resources; /* aka k */
29
30 struct kfmlp_queue *queues; /* array */
31 struct kfmlp_queue *shortest_queue; /* pointer to shortest queue */
32
33#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
34 struct kfmlp_affinity *aff_obs;
35#endif
36};
37
38static inline struct kfmlp_semaphore* kfmlp_from_lock(struct litmus_lock* lock)
39{
40 return container_of(lock, struct kfmlp_semaphore, litmus_lock);
41}
42
43int kfmlp_lock(struct litmus_lock* l);
44int kfmlp_unlock(struct litmus_lock* l);
45int kfmlp_close(struct litmus_lock* l);
46void kfmlp_free(struct litmus_lock* l);
47struct litmus_lock* kfmlp_new(struct litmus_lock_ops*, void* __user arg);
48
49#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
50
51struct kfmlp_queue_info
52{
53 struct kfmlp_queue* q;
54 lt_t estimated_len;
55 int *nr_cur_users;
56};
57
58struct kfmlp_affinity_ops
59{
60 struct kfmlp_queue* (*advise_enqueue)(struct kfmlp_affinity* aff, struct task_struct* t);
61 struct task_struct* (*advise_steal)(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from);
62 void (*notify_enqueue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
63 void (*notify_dequeue)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
64 void (*notify_acquired)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
65 void (*notify_freed)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t);
66 int (*replica_to_resource)(struct kfmlp_affinity* aff, struct kfmlp_queue* fq);
67};
68
69struct kfmlp_affinity
70{
71 struct affinity_observer obs;
72 struct kfmlp_affinity_ops *ops;
73 struct kfmlp_queue_info *q_info;
74 int *nr_cur_users_on_rsrc;
75 int offset;
76 int nr_simult;
77 int nr_rsrc;
78};
79
80static inline struct kfmlp_affinity* kfmlp_aff_obs_from_aff_obs(struct affinity_observer* aff_obs)
81{
82 return container_of(aff_obs, struct kfmlp_affinity, obs);
83}
84
85int kfmlp_aff_obs_close(struct affinity_observer*);
86void kfmlp_aff_obs_free(struct affinity_observer*);
87struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops*,
88 void* __user arg);
89struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops*,
90 void* __user arg);
91
92
93#endif
94
95#endif
96
97
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
index 0b071fd359f9..71df378236f5 100644
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -26,6 +26,7 @@ static inline int in_list(struct list_head* list)
26 ); 26 );
27} 27}
28 28
29
29struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq); 30struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
30 31
31#define NO_CPU 0xffffffff 32#define NO_CPU 0xffffffff
@@ -53,12 +54,16 @@ void litmus_exit_task(struct task_struct *tsk);
53#define get_rt_phase(t) (tsk_rt(t)->task_params.phase) 54#define get_rt_phase(t) (tsk_rt(t)->task_params.phase)
54#define get_partition(t) (tsk_rt(t)->task_params.cpu) 55#define get_partition(t) (tsk_rt(t)->task_params.cpu)
55#define get_deadline(t) (tsk_rt(t)->job_params.deadline) 56#define get_deadline(t) (tsk_rt(t)->job_params.deadline)
57#define get_period(t) (tsk_rt(t)->task_params.period)
56#define get_release(t) (tsk_rt(t)->job_params.release) 58#define get_release(t) (tsk_rt(t)->job_params.release)
57#define get_class(t) (tsk_rt(t)->task_params.cls) 59#define get_class(t) (tsk_rt(t)->task_params.cls)
58 60
59#define is_priority_boosted(t) (tsk_rt(t)->priority_boosted) 61#define is_priority_boosted(t) (tsk_rt(t)->priority_boosted)
60#define get_boost_start(t) (tsk_rt(t)->boost_start_time) 62#define get_boost_start(t) (tsk_rt(t)->boost_start_time)
61 63
64#define effective_priority(t) ((!(tsk_rt(t)->inh_task)) ? t : tsk_rt(t)->inh_task)
65#define base_priority(t) (t)
66
62inline static int budget_exhausted(struct task_struct* t) 67inline static int budget_exhausted(struct task_struct* t)
63{ 68{
64 return get_exec_time(t) >= get_exec_cost(t); 69 return get_exec_time(t) >= get_exec_cost(t);
@@ -114,10 +119,12 @@ static inline lt_t litmus_clock(void)
114#define earlier_deadline(a, b) (lt_before(\ 119#define earlier_deadline(a, b) (lt_before(\
115 (a)->rt_param.job_params.deadline,\ 120 (a)->rt_param.job_params.deadline,\
116 (b)->rt_param.job_params.deadline)) 121 (b)->rt_param.job_params.deadline))
122#define shorter_period(a, b) (lt_before(\
123 (a)->rt_param.task_params.period,\
124 (b)->rt_param.task_params.period))
117#define earlier_release(a, b) (lt_before(\ 125#define earlier_release(a, b) (lt_before(\
118 (a)->rt_param.job_params.release,\ 126 (a)->rt_param.job_params.release,\
119 (b)->rt_param.job_params.release)) 127 (b)->rt_param.job_params.release))
120
121void preempt_if_preemptable(struct task_struct* t, int on_cpu); 128void preempt_if_preemptable(struct task_struct* t, int on_cpu);
122 129
123#ifdef CONFIG_LITMUS_LOCKING 130#ifdef CONFIG_LITMUS_LOCKING
diff --git a/include/litmus/litmus_softirq.h b/include/litmus/litmus_softirq.h
new file mode 100644
index 000000000000..1eb5ea1a6c4b
--- /dev/null
+++ b/include/litmus/litmus_softirq.h
@@ -0,0 +1,199 @@
1#ifndef __LITMUS_SOFTIRQ_H
2#define __LITMUS_SOFTIRQ_H
3
4#include <linux/interrupt.h>
5#include <linux/workqueue.h>
6
7/*
8 Threaded tasklet handling for Litmus. Tasklets
9 are scheduled with the priority of the tasklet's
10 owner---that is, the RT task on behalf the tasklet
11 runs.
12
13 Tasklets are current scheduled in FIFO order with
14 NO priority inheritance for "blocked" tasklets.
15
16 klitirqd assumes the priority of the owner of the
17 tasklet when the tasklet is next to execute.
18
19 Currently, hi-tasklets are scheduled before
20 low-tasklets, regardless of priority of low-tasklets.
21 And likewise, low-tasklets are scheduled before work
22 queue objects. This priority inversion probably needs
23 to be fixed, though it is not an issue if our work with
24 GPUs as GPUs are owned (and associated klitirqds) for
25 exclusive time periods, thus no inversions can
26 occur.
27 */
28
29
30
31#define NR_LITMUS_SOFTIRQD CONFIG_NR_LITMUS_SOFTIRQD
32
33/* Spawns NR_LITMUS_SOFTIRQD klitirqd daemons.
34 Actual launch of threads is deffered to kworker's
35 workqueue, so daemons will likely not be immediately
36 running when this function returns, though the required
37 data will be initialized.
38
39 @affinity_set: an array expressing the processor affinity
40 for each of the NR_LITMUS_SOFTIRQD daemons. May be set
41 to NULL for global scheduling.
42
43 - Examples -
44 8-CPU system with two CPU clusters:
45 affinity[] = {0, 0, 0, 0, 3, 3, 3, 3}
46 NOTE: Daemons not actually bound to specified CPU, but rather
47 cluster in which the CPU resides.
48
49 8-CPU system, partitioned:
50 affinity[] = {0, 1, 2, 3, 4, 5, 6, 7}
51
52 FIXME: change array to a CPU topology or array of cpumasks
53
54 */
55void spawn_klitirqd(int* affinity);
56
57
58/* Raises a flag to tell klitirqds to terminate.
59 Termination is async, so some threads may be running
60 after function return. */
61void kill_klitirqd(void);
62
63
64/* Returns 1 if all NR_LITMUS_SOFTIRQD klitirqs are ready
65 to handle tasklets. 0, otherwise.*/
66int klitirqd_is_ready(void);
67
68/* Returns 1 if no NR_LITMUS_SOFTIRQD klitirqs are ready
69 to handle tasklets. 0, otherwise.*/
70int klitirqd_is_dead(void);
71
72/* Flushes all pending work out to the OS for regular
73 * tasklet/work processing of the specified 'owner'
74 *
75 * PRECOND: klitirqd_thread must have a clear entry
76 * in the GPU registry, otherwise this call will become
77 * a no-op as work will loop back to the klitirqd_thread.
78 *
79 * Pass NULL for owner to flush ALL pending items.
80 */
81void flush_pending(struct task_struct* klitirqd_thread,
82 struct task_struct* owner);
83
84struct task_struct* get_klitirqd(unsigned int k_id);
85
86
87extern int __litmus_tasklet_schedule(
88 struct tasklet_struct *t,
89 unsigned int k_id);
90
91/* schedule a tasklet on klitirqd #k_id */
92static inline int litmus_tasklet_schedule(
93 struct tasklet_struct *t,
94 unsigned int k_id)
95{
96 int ret = 0;
97 if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
98 ret = __litmus_tasklet_schedule(t, k_id);
99 return(ret);
100}
101
102/* for use by __tasklet_schedule() */
103static inline int _litmus_tasklet_schedule(
104 struct tasklet_struct *t,
105 unsigned int k_id)
106{
107 return(__litmus_tasklet_schedule(t, k_id));
108}
109
110
111
112
113extern int __litmus_tasklet_hi_schedule(struct tasklet_struct *t,
114 unsigned int k_id);
115
116/* schedule a hi tasklet on klitirqd #k_id */
117static inline int litmus_tasklet_hi_schedule(struct tasklet_struct *t,
118 unsigned int k_id)
119{
120 int ret = 0;
121 if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
122 ret = __litmus_tasklet_hi_schedule(t, k_id);
123 return(ret);
124}
125
126/* for use by __tasklet_hi_schedule() */
127static inline int _litmus_tasklet_hi_schedule(struct tasklet_struct *t,
128 unsigned int k_id)
129{
130 return(__litmus_tasklet_hi_schedule(t, k_id));
131}
132
133
134
135
136
137extern int __litmus_tasklet_hi_schedule_first(
138 struct tasklet_struct *t,
139 unsigned int k_id);
140
141/* schedule a hi tasklet on klitirqd #k_id on next go-around */
142/* PRECONDITION: Interrupts must be disabled. */
143static inline int litmus_tasklet_hi_schedule_first(
144 struct tasklet_struct *t,
145 unsigned int k_id)
146{
147 int ret = 0;
148 if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
149 ret = __litmus_tasklet_hi_schedule_first(t, k_id);
150 return(ret);
151}
152
153/* for use by __tasklet_hi_schedule_first() */
154static inline int _litmus_tasklet_hi_schedule_first(
155 struct tasklet_struct *t,
156 unsigned int k_id)
157{
158 return(__litmus_tasklet_hi_schedule_first(t, k_id));
159}
160
161
162
163//////////////
164
165extern int __litmus_schedule_work(
166 struct work_struct* w,
167 unsigned int k_id);
168
169static inline int litmus_schedule_work(
170 struct work_struct* w,
171 unsigned int k_id)
172{
173 return(__litmus_schedule_work(w, k_id));
174}
175
176
177
178///////////// mutex operations for client threads.
179
180void down_and_set_stat(struct task_struct* t,
181 enum klitirqd_sem_status to_set,
182 struct mutex* sem);
183
184void __down_and_reset_and_set_stat(struct task_struct* t,
185 enum klitirqd_sem_status to_reset,
186 enum klitirqd_sem_status to_set,
187 struct mutex* sem);
188
189void up_and_set_stat(struct task_struct* t,
190 enum klitirqd_sem_status to_set,
191 struct mutex* sem);
192
193
194
195void release_klitirqd_lock(struct task_struct* t);
196
197int reacquire_klitirqd_lock(struct task_struct* t);
198
199#endif
diff --git a/include/litmus/locking.h b/include/litmus/locking.h
index 4d7b870cb443..36647fee03e4 100644
--- a/include/litmus/locking.h
+++ b/include/litmus/locking.h
@@ -1,28 +1,160 @@
1#ifndef LITMUS_LOCKING_H 1#ifndef LITMUS_LOCKING_H
2#define LITMUS_LOCKING_H 2#define LITMUS_LOCKING_H
3 3
4#include <linux/list.h>
5
4struct litmus_lock_ops; 6struct litmus_lock_ops;
5 7
8#ifdef CONFIG_LITMUS_NESTED_LOCKING
9struct nested_info
10{
11 struct litmus_lock *lock;
12 struct task_struct *hp_waiter_eff_prio;
13 struct task_struct **hp_waiter_ptr;
14 struct binheap_node hp_binheap_node;
15};
16
17static inline struct task_struct* top_priority(struct binheap_handle* handle) {
18 if(!binheap_empty(handle)) {
19 return (struct task_struct*)(binheap_top_entry(handle, struct nested_info, hp_binheap_node)->hp_waiter_eff_prio);
20 }
21 return NULL;
22}
23
24void print_hp_waiters(struct binheap_node* n, int depth);
25#endif
26
27
6/* Generic base struct for LITMUS^RT userspace semaphores. 28/* Generic base struct for LITMUS^RT userspace semaphores.
7 * This structure should be embedded in protocol-specific semaphores. 29 * This structure should be embedded in protocol-specific semaphores.
8 */ 30 */
9struct litmus_lock { 31struct litmus_lock {
10 struct litmus_lock_ops *ops; 32 struct litmus_lock_ops *ops;
11 int type; 33 int type;
34
35 int ident;
36
37#ifdef CONFIG_LITMUS_NESTED_LOCKING
38 struct nested_info nest;
39//#ifdef CONFIG_DEBUG_SPINLOCK
40 char cheat_lockdep[2];
41 struct lock_class_key key;
42//#endif
43#endif
12}; 44};
13 45
46#ifdef CONFIG_LITMUS_DGL_SUPPORT
47
48#define MAX_DGL_SIZE CONFIG_LITMUS_MAX_DGL_SIZE
49
50typedef struct dgl_wait_state {
51 struct task_struct *task; /* task waiting on DGL */
52 struct litmus_lock *locks[MAX_DGL_SIZE]; /* requested locks in DGL */
53 int size; /* size of the DGL */
54 int nr_remaining; /* nr locks remainging before DGL is complete */
55 int last_primary; /* index lock in locks[] that has active priority */
56 wait_queue_t wq_nodes[MAX_DGL_SIZE];
57} dgl_wait_state_t;
58
59void wake_or_wait_on_next_lock(dgl_wait_state_t *dgl_wait);
60void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/);
61
62void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait);
63int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key);
64void __waitqueue_dgl_remove_first(wait_queue_head_t *wq, dgl_wait_state_t** dgl_wait, struct task_struct **task);
65#endif
66
67typedef int (*lock_op_t)(struct litmus_lock *l);
68typedef lock_op_t lock_close_t;
69typedef lock_op_t lock_lock_t;
70typedef lock_op_t lock_unlock_t;
71
72typedef int (*lock_open_t)(struct litmus_lock *l, void* __user arg);
73typedef void (*lock_free_t)(struct litmus_lock *l);
74
14struct litmus_lock_ops { 75struct litmus_lock_ops {
15 /* Current task tries to obtain / drop a reference to a lock. 76 /* Current task tries to obtain / drop a reference to a lock.
16 * Optional methods, allowed by default. */ 77 * Optional methods, allowed by default. */
17 int (*open)(struct litmus_lock*, void* __user); 78 lock_open_t open;
18 int (*close)(struct litmus_lock*); 79 lock_close_t close;
19 80
20 /* Current tries to lock/unlock this lock (mandatory methods). */ 81 /* Current tries to lock/unlock this lock (mandatory methods). */
21 int (*lock)(struct litmus_lock*); 82 lock_lock_t lock;
22 int (*unlock)(struct litmus_lock*); 83 lock_unlock_t unlock;
23 84
24 /* The lock is no longer being referenced (mandatory method). */ 85 /* The lock is no longer being referenced (mandatory method). */
25 void (*deallocate)(struct litmus_lock*); 86 lock_free_t deallocate;
87
88#ifdef CONFIG_LITMUS_NESTED_LOCKING
89 void (*propagate_increase_inheritance)(struct litmus_lock* l, struct task_struct* t, raw_spinlock_t* to_unlock, unsigned long irqflags);
90 void (*propagate_decrease_inheritance)(struct litmus_lock* l, struct task_struct* t, raw_spinlock_t* to_unlock, unsigned long irqflags);
91#endif
92
93#ifdef CONFIG_LITMUS_DGL_SUPPORT
94 raw_spinlock_t* (*get_dgl_spin_lock)(struct litmus_lock *l);
95 int (*dgl_lock)(struct litmus_lock *l, dgl_wait_state_t* dgl_wait, wait_queue_t* wq_node);
96 int (*is_owner)(struct litmus_lock *l, struct task_struct *t);
97 void (*enable_priority)(struct litmus_lock *l, dgl_wait_state_t* dgl_wait);
98#endif
26}; 99};
27 100
101
102/*
103 Nested inheritance can be achieved with fine-grain locking when there is
104 no need for DGL support, presuming locks are acquired in a partial order
105 (no cycles!). However, DGLs allow locks to be acquired in any order. This
106 makes nested inheritance very difficult (we don't yet know a solution) to
107 realize with fine-grain locks, so we use a big lock instead.
108
109 Code contains both fine-grain and coarse-grain methods together, side-by-side.
110 Each lock operation *IS NOT* surrounded by ifdef/endif to help make code more
111 readable. However, this leads to the odd situation where both code paths
112 appear together in code as if they were both active together.
113
114 THIS IS NOT REALLY THE CASE! ONLY ONE CODE PATH IS ACTUALLY ACTIVE!
115
116 Example:
117 lock_global_irqsave(coarseLock, flags);
118 lock_fine_irqsave(fineLock, flags);
119
120 Reality (coarse):
121 lock_global_irqsave(coarseLock, flags);
122 //lock_fine_irqsave(fineLock, flags);
123
124 Reality (fine):
125 //lock_global_irqsave(coarseLock, flags);
126 lock_fine_irqsave(fineLock, flags);
127
128 Be careful when you read code involving nested inheritance.
129 */
130#if defined(CONFIG_LITMUS_DGL_SUPPORT)
131/* DGL requires a big lock to implement nested inheritance */
132#define lock_global_irqsave(lock, flags) raw_spin_lock_irqsave((lock), (flags))
133#define lock_global(lock) raw_spin_lock((lock))
134#define unlock_global_irqrestore(lock, flags) raw_spin_unlock_irqrestore((lock), (flags))
135#define unlock_global(lock) raw_spin_unlock((lock))
136
137/* fine-grain locking are no-ops with DGL support */
138#define lock_fine_irqsave(lock, flags)
139#define lock_fine(lock)
140#define unlock_fine_irqrestore(lock, flags)
141#define unlock_fine(lock)
142
143#elif defined(CONFIG_LITMUS_NESTED_LOCKING)
144/* Use fine-grain locking when DGLs are disabled. */
145/* global locking are no-ops without DGL support */
146#define lock_global_irqsave(lock, flags)
147#define lock_global(lock)
148#define unlock_global_irqrestore(lock, flags)
149#define unlock_global(lock)
150
151#define lock_fine_irqsave(lock, flags) raw_spin_lock_irqsave((lock), (flags))
152#define lock_fine(lock) raw_spin_lock((lock))
153#define unlock_fine_irqrestore(lock, flags) raw_spin_unlock_irqrestore((lock), (flags))
154#define unlock_fine(lock) raw_spin_unlock((lock))
155
28#endif 156#endif
157
158
159#endif
160
diff --git a/include/litmus/nvidia_info.h b/include/litmus/nvidia_info.h
new file mode 100644
index 000000000000..97c9577141db
--- /dev/null
+++ b/include/litmus/nvidia_info.h
@@ -0,0 +1,46 @@
1#ifndef __LITMUS_NVIDIA_H
2#define __LITMUS_NVIDIA_H
3
4#include <linux/interrupt.h>
5
6
7#include <litmus/litmus_softirq.h>
8
9
10//#define NV_DEVICE_NUM NR_LITMUS_SOFTIRQD
11#define NV_DEVICE_NUM CONFIG_NV_DEVICE_NUM
12#define NV_MAX_SIMULT_USERS CONFIG_NV_MAX_SIMULT_USERS
13
14int init_nvidia_info(void);
15void shutdown_nvidia_info(void);
16
17int is_nvidia_func(void* func_addr);
18
19void dump_nvidia_info(const struct tasklet_struct *t);
20
21
22// Returns the Nvidia device # associated with provided tasklet and work_struct.
23u32 get_tasklet_nv_device_num(const struct tasklet_struct *t);
24u32 get_work_nv_device_num(const struct work_struct *t);
25
26
27int init_nv_device_reg(void);
28//int get_nv_device_id(struct task_struct* owner);
29
30
31int reg_nv_device(int reg_device_id, int register_device, struct task_struct *t);
32
33struct task_struct* get_nv_max_device_owner(u32 target_device_id);
34//int is_nv_device_owner(u32 target_device_id);
35
36void lock_nv_registry(u32 reg_device_id, unsigned long* flags);
37void unlock_nv_registry(u32 reg_device_id, unsigned long* flags);
38
39#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
40void pai_check_priority_increase(struct task_struct *t, int reg_device_id);
41void pai_check_priority_decrease(struct task_struct *t, int reg_device_id);
42#endif
43
44//void increment_nv_int_count(u32 device);
45
46#endif
diff --git a/include/litmus/preempt.h b/include/litmus/preempt.h
index 380b886d78ff..8f3a9ca2d4e3 100644
--- a/include/litmus/preempt.h
+++ b/include/litmus/preempt.h
@@ -26,12 +26,12 @@ const char* sched_state_name(int s);
26 (x), #x, __FUNCTION__); \ 26 (x), #x, __FUNCTION__); \
27 } while (0); 27 } while (0);
28 28
29//#define TRACE_SCHED_STATE_CHANGE(x, y, cpu) /* ignore */
29#define TRACE_SCHED_STATE_CHANGE(x, y, cpu) \ 30#define TRACE_SCHED_STATE_CHANGE(x, y, cpu) \
30 TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n", \ 31 TRACE_STATE("[P%d] 0x%x (%s) -> 0x%x (%s)\n", \
31 cpu, (x), sched_state_name(x), \ 32 cpu, (x), sched_state_name(x), \
32 (y), sched_state_name(y)) 33 (y), sched_state_name(y))
33 34
34
35typedef enum scheduling_state { 35typedef enum scheduling_state {
36 TASK_SCHEDULED = (1 << 0), /* The currently scheduled task is the one that 36 TASK_SCHEDULED = (1 << 0), /* The currently scheduled task is the one that
37 * should be scheduled, and the processor does not 37 * should be scheduled, and the processor does not
diff --git a/include/litmus/rsm_lock.h b/include/litmus/rsm_lock.h
new file mode 100644
index 000000000000..a15189683de4
--- /dev/null
+++ b/include/litmus/rsm_lock.h
@@ -0,0 +1,54 @@
1#ifndef LITMUS_RSM_H
2#define LITMUS_RSM_H
3
4#include <litmus/litmus.h>
5#include <litmus/binheap.h>
6#include <litmus/locking.h>
7
8/* struct for semaphore with priority inheritance */
9struct rsm_mutex {
10 struct litmus_lock litmus_lock;
11
12 /* current resource holder */
13 struct task_struct *owner;
14
15 /* highest-priority waiter */
16 struct task_struct *hp_waiter;
17
18 /* FIFO queue of waiting tasks -- for now. time stamp in the future. */
19 wait_queue_head_t wait;
20
21 /* we do some nesting within spinlocks, so we can't use the normal
22 sleeplocks found in wait_queue_head_t. */
23 raw_spinlock_t lock;
24};
25
26static inline struct rsm_mutex* rsm_mutex_from_lock(struct litmus_lock* lock)
27{
28 return container_of(lock, struct rsm_mutex, litmus_lock);
29}
30
31#ifdef CONFIG_LITMUS_DGL_SUPPORT
32int rsm_mutex_is_owner(struct litmus_lock *l, struct task_struct *t);
33int rsm_mutex_dgl_lock(struct litmus_lock *l, dgl_wait_state_t* dgl_wait, wait_queue_t* wq_node);
34void rsm_mutex_enable_priority(struct litmus_lock *l, dgl_wait_state_t* dgl_wait);
35#endif
36
37void rsm_mutex_propagate_increase_inheritance(struct litmus_lock* l,
38 struct task_struct* t,
39 raw_spinlock_t* to_unlock,
40 unsigned long irqflags);
41
42void rsm_mutex_propagate_decrease_inheritance(struct litmus_lock* l,
43 struct task_struct* t,
44 raw_spinlock_t* to_unlock,
45 unsigned long irqflags);
46
47int rsm_mutex_lock(struct litmus_lock* l);
48int rsm_mutex_unlock(struct litmus_lock* l);
49int rsm_mutex_close(struct litmus_lock* l);
50void rsm_mutex_free(struct litmus_lock* l);
51struct litmus_lock* rsm_mutex_new(struct litmus_lock_ops*);
52
53
54#endif \ No newline at end of file
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index d6d799174160..0198884eab86 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -5,6 +5,8 @@
5#ifndef _LINUX_RT_PARAM_H_ 5#ifndef _LINUX_RT_PARAM_H_
6#define _LINUX_RT_PARAM_H_ 6#define _LINUX_RT_PARAM_H_
7 7
8#include <litmus/fpmath.h>
9
8/* Litmus time type. */ 10/* Litmus time type. */
9typedef unsigned long long lt_t; 11typedef unsigned long long lt_t;
10 12
@@ -24,6 +26,7 @@ static inline int lt_after_eq(lt_t a, lt_t b)
24typedef enum { 26typedef enum {
25 RT_CLASS_HARD, 27 RT_CLASS_HARD,
26 RT_CLASS_SOFT, 28 RT_CLASS_SOFT,
29 RT_CLASS_SOFT_W_SLIP,
27 RT_CLASS_BEST_EFFORT 30 RT_CLASS_BEST_EFFORT
28} task_class_t; 31} task_class_t;
29 32
@@ -52,6 +55,19 @@ union np_flag {
52 } np; 55 } np;
53}; 56};
54 57
58struct affinity_observer_args
59{
60 int lock_od;
61};
62
63struct gpu_affinity_observer_args
64{
65 struct affinity_observer_args obs;
66 int replica_to_gpu_offset;
67 int nr_simult_users;
68 int relaxed_rules;
69};
70
55/* The definition of the data that is shared between the kernel and real-time 71/* The definition of the data that is shared between the kernel and real-time
56 * tasks via a shared page (see litmus/ctrldev.c). 72 * tasks via a shared page (see litmus/ctrldev.c).
57 * 73 *
@@ -75,6 +91,9 @@ struct control_page {
75/* don't export internal data structures to user space (liblitmus) */ 91/* don't export internal data structures to user space (liblitmus) */
76#ifdef __KERNEL__ 92#ifdef __KERNEL__
77 93
94#include <litmus/binheap.h>
95#include <linux/semaphore.h>
96
78struct _rt_domain; 97struct _rt_domain;
79struct bheap_node; 98struct bheap_node;
80struct release_heap; 99struct release_heap;
@@ -100,6 +119,31 @@ struct rt_job {
100 119
101struct pfair_param; 120struct pfair_param;
102 121
122enum klitirqd_sem_status
123{
124 NEED_TO_REACQUIRE,
125 REACQUIRING,
126 NOT_HELD,
127 HELD
128};
129
130typedef enum gpu_migration_dist
131{
132 // TODO: Make this variable against NR_NVIDIA_GPUS
133 MIG_LOCAL = 0,
134 MIG_NEAR = 1,
135 MIG_MED = 2,
136 MIG_FAR = 3, // 8 GPUs in a binary tree hierarchy
137 MIG_NONE = 4,
138
139 MIG_LAST = MIG_NONE
140} gpu_migration_dist_t;
141
142typedef struct feedback_est{
143 fp_t est;
144 fp_t accum_err;
145} feedback_est_t;
146
103/* RT task parameters for scheduling extensions 147/* RT task parameters for scheduling extensions
104 * These parameters are inherited during clone and therefore must 148 * These parameters are inherited during clone and therefore must
105 * be explicitly set up before the task set is launched. 149 * be explicitly set up before the task set is launched.
@@ -114,6 +158,52 @@ struct rt_param {
114 /* is the task present? (true if it can be scheduled) */ 158 /* is the task present? (true if it can be scheduled) */
115 unsigned int present:1; 159 unsigned int present:1;
116 160
161#ifdef CONFIG_LITMUS_SOFTIRQD
162 /* proxy threads have minimum priority by default */
163 unsigned int is_proxy_thread:1;
164
165 /* pointer to klitirqd currently working on this
166 task_struct's behalf. only set by the task pointed
167 to by klitirqd.
168
169 ptr only valid if is_proxy_thread == 0
170 */
171 struct task_struct* cur_klitirqd;
172
173 /* Used to implement mutual execution exclusion between
174 * job and klitirqd execution. Job must always hold
175 * it's klitirqd_sem to execute. klitirqd instance
176 * must hold the semaphore before executing on behalf
177 * of a job.
178 */
179 struct mutex klitirqd_sem;
180
181 /* status of held klitirqd_sem, even if the held klitirqd_sem is from
182 another task (only proxy threads do this though).
183 */
184 atomic_t klitirqd_sem_stat;
185#endif
186
187#ifdef CONFIG_LITMUS_NVIDIA
188 /* number of top-half interrupts handled on behalf of current job */
189 atomic_t nv_int_count;
190 long unsigned int held_gpus; // bitmap of held GPUs.
191
192#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
193 fp_t gpu_fb_param_a[MIG_LAST+1];
194 fp_t gpu_fb_param_b[MIG_LAST+1];
195
196 gpu_migration_dist_t gpu_migration;
197 int last_gpu;
198 feedback_est_t gpu_migration_est[MIG_LAST+1]; // local, near, med, far
199
200 lt_t accum_gpu_time;
201 lt_t gpu_time_stamp;
202
203 unsigned int suspend_gpu_tracker_on_block:1;
204#endif
205#endif
206
117#ifdef CONFIG_LITMUS_LOCKING 207#ifdef CONFIG_LITMUS_LOCKING
118 /* Is the task being priority-boosted by a locking protocol? */ 208 /* Is the task being priority-boosted by a locking protocol? */
119 unsigned int priority_boosted:1; 209 unsigned int priority_boosted:1;
@@ -133,7 +223,15 @@ struct rt_param {
133 * could point to self if PI does not result in 223 * could point to self if PI does not result in
134 * an increased task priority. 224 * an increased task priority.
135 */ 225 */
136 struct task_struct* inh_task; 226 struct task_struct* inh_task;
227
228#ifdef CONFIG_LITMUS_NESTED_LOCKING
229 raw_spinlock_t hp_blocked_tasks_lock;
230 struct binheap_handle hp_blocked_tasks;
231
232 /* pointer to lock upon which is currently blocked */
233 struct litmus_lock* blocked_lock;
234#endif
137 235
138#ifdef CONFIG_NP_SECTION 236#ifdef CONFIG_NP_SECTION
139 /* For the FMLP under PSN-EDF, it is required to make the task 237 /* For the FMLP under PSN-EDF, it is required to make the task
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
index 6e7cabdddae8..24a6858b4b0b 100644
--- a/include/litmus/sched_plugin.h
+++ b/include/litmus/sched_plugin.h
@@ -11,6 +11,12 @@
11#include <litmus/locking.h> 11#include <litmus/locking.h>
12#endif 12#endif
13 13
14#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
15#include <litmus/kexclu_affinity.h>
16#endif
17
18#include <linux/interrupt.h>
19
14/************************ setup/tear down ********************/ 20/************************ setup/tear down ********************/
15 21
16typedef long (*activate_plugin_t) (void); 22typedef long (*activate_plugin_t) (void);
@@ -29,7 +35,6 @@ typedef struct task_struct* (*schedule_t)(struct task_struct * prev);
29 */ 35 */
30typedef void (*finish_switch_t)(struct task_struct *prev); 36typedef void (*finish_switch_t)(struct task_struct *prev);
31 37
32
33/********************* task state changes ********************/ 38/********************* task state changes ********************/
34 39
35/* Called to setup a new real-time task. 40/* Called to setup a new real-time task.
@@ -58,6 +63,47 @@ typedef void (*task_exit_t) (struct task_struct *);
58typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type, 63typedef long (*allocate_lock_t) (struct litmus_lock **lock, int type,
59 void* __user config); 64 void* __user config);
60 65
66struct affinity_observer;
67typedef long (*allocate_affinity_observer_t) (
68 struct affinity_observer **aff_obs, int type,
69 void* __user config);
70
71typedef void (*increase_prio_t)(struct task_struct* t, struct task_struct* prio_inh);
72typedef void (*decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh);
73typedef void (*nested_increase_prio_t)(struct task_struct* t, struct task_struct* prio_inh,
74 raw_spinlock_t *to_unlock, unsigned long irqflags);
75typedef void (*nested_decrease_prio_t)(struct task_struct* t, struct task_struct* prio_inh,
76 raw_spinlock_t *to_unlock, unsigned long irqflags);
77
78typedef void (*increase_prio_klitirq_t)(struct task_struct* klitirqd,
79 struct task_struct* old_owner,
80 struct task_struct* new_owner);
81typedef void (*decrease_prio_klitirqd_t)(struct task_struct* klitirqd,
82 struct task_struct* old_owner);
83
84
85typedef int (*enqueue_pai_tasklet_t)(struct tasklet_struct* tasklet);
86typedef void (*change_prio_pai_tasklet_t)(struct task_struct *old_prio,
87 struct task_struct *new_prio);
88typedef void (*run_tasklets_t)(struct task_struct* next);
89
90typedef raw_spinlock_t* (*get_dgl_spinlock_t) (struct task_struct *t);
91
92
93typedef int (*higher_prio_t)(struct task_struct* a, struct task_struct* b);
94
95#ifdef CONFIG_LITMUS_NESTED_LOCKING
96
97typedef enum
98{
99 BASE,
100 EFFECTIVE
101} comparison_mode_t;
102
103typedef int (*__higher_prio_t)(struct task_struct* a, comparison_mode_t a_mod,
104 struct task_struct* b, comparison_mode_t b_mod);
105#endif
106
61 107
62/********************* sys call backends ********************/ 108/********************* sys call backends ********************/
63/* This function causes the caller to sleep until the next release */ 109/* This function causes the caller to sleep until the next release */
@@ -88,14 +134,40 @@ struct sched_plugin {
88 /* task state changes */ 134 /* task state changes */
89 admit_task_t admit_task; 135 admit_task_t admit_task;
90 136
91 task_new_t task_new; 137 task_new_t task_new;
92 task_wake_up_t task_wake_up; 138 task_wake_up_t task_wake_up;
93 task_block_t task_block; 139 task_block_t task_block;
94 task_exit_t task_exit; 140 task_exit_t task_exit;
95 141
142 higher_prio_t compare;
143
96#ifdef CONFIG_LITMUS_LOCKING 144#ifdef CONFIG_LITMUS_LOCKING
97 /* locking protocols */ 145 /* locking protocols */
98 allocate_lock_t allocate_lock; 146 allocate_lock_t allocate_lock;
147 increase_prio_t increase_prio;
148 decrease_prio_t decrease_prio;
149#endif
150#ifdef CONFIG_LITMUS_NESTED_LOCKING
151 nested_increase_prio_t nested_increase_prio;
152 nested_decrease_prio_t nested_decrease_prio;
153 __higher_prio_t __compare;
154#endif
155#ifdef CONFIG_LITMUS_DGL_SUPPORT
156 get_dgl_spinlock_t get_dgl_spinlock;
157#endif
158
159#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
160 allocate_affinity_observer_t allocate_aff_obs;
161#endif
162
163#ifdef CONFIG_LITMUS_SOFTIRQD
164 increase_prio_klitirq_t increase_prio_klitirqd;
165 decrease_prio_klitirqd_t decrease_prio_klitirqd;
166#endif
167#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
168 enqueue_pai_tasklet_t enqueue_pai_tasklet;
169 change_prio_pai_tasklet_t change_prio_pai_tasklet;
170 run_tasklets_t run_tasklets;
99#endif 171#endif
100} __attribute__ ((__aligned__(SMP_CACHE_BYTES))); 172} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
101 173
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
index 7ca34cb13881..b1b71f6c5f0c 100644
--- a/include/litmus/sched_trace.h
+++ b/include/litmus/sched_trace.h
@@ -10,13 +10,14 @@ struct st_trace_header {
10 u8 type; /* Of what type is this record? */ 10 u8 type; /* Of what type is this record? */
11 u8 cpu; /* On which CPU was it recorded? */ 11 u8 cpu; /* On which CPU was it recorded? */
12 u16 pid; /* PID of the task. */ 12 u16 pid; /* PID of the task. */
13 u32 job; /* The job sequence number. */ 13 u32 job:24; /* The job sequence number. */
14}; 14 u8 extra;
15} __attribute__((packed));
15 16
16#define ST_NAME_LEN 16 17#define ST_NAME_LEN 16
17struct st_name_data { 18struct st_name_data {
18 char cmd[ST_NAME_LEN];/* The name of the executable of this process. */ 19 char cmd[ST_NAME_LEN];/* The name of the executable of this process. */
19}; 20} __attribute__((packed));
20 21
21struct st_param_data { /* regular params */ 22struct st_param_data { /* regular params */
22 u32 wcet; 23 u32 wcet;
@@ -25,30 +26,29 @@ struct st_param_data { /* regular params */
25 u8 partition; 26 u8 partition;
26 u8 class; 27 u8 class;
27 u8 __unused[2]; 28 u8 __unused[2];
28}; 29} __attribute__((packed));
29 30
30struct st_release_data { /* A job is was/is going to be released. */ 31struct st_release_data { /* A job is was/is going to be released. */
31 u64 release; /* What's the release time? */ 32 u64 release; /* What's the release time? */
32 u64 deadline; /* By when must it finish? */ 33 u64 deadline; /* By when must it finish? */
33}; 34} __attribute__((packed));
34 35
35struct st_assigned_data { /* A job was asigned to a CPU. */ 36struct st_assigned_data { /* A job was asigned to a CPU. */
36 u64 when; 37 u64 when;
37 u8 target; /* Where should it execute? */ 38 u8 target; /* Where should it execute? */
38 u8 __unused[7]; 39 u8 __unused[7];
39}; 40} __attribute__((packed));
40 41
41struct st_switch_to_data { /* A process was switched to on a given CPU. */ 42struct st_switch_to_data { /* A process was switched to on a given CPU. */
42 u64 when; /* When did this occur? */ 43 u64 when; /* When did this occur? */
43 u32 exec_time; /* Time the current job has executed. */ 44 u32 exec_time; /* Time the current job has executed. */
44 u8 __unused[4]; 45 u8 __unused[4];
45 46} __attribute__((packed));
46};
47 47
48struct st_switch_away_data { /* A process was switched away from on a given CPU. */ 48struct st_switch_away_data { /* A process was switched away from on a given CPU. */
49 u64 when; 49 u64 when;
50 u64 exec_time; 50 u64 exec_time;
51}; 51} __attribute__((packed));
52 52
53struct st_completion_data { /* A job completed. */ 53struct st_completion_data { /* A job completed. */
54 u64 when; 54 u64 when;
@@ -56,35 +56,108 @@ struct st_completion_data { /* A job completed. */
56 * next task automatically; set to 0 otherwise. 56 * next task automatically; set to 0 otherwise.
57 */ 57 */
58 u8 __uflags:7; 58 u8 __uflags:7;
59 u8 __unused[7]; 59 u16 nv_int_count;
60}; 60 u8 __unused[5];
61} __attribute__((packed));
61 62
62struct st_block_data { /* A task blocks. */ 63struct st_block_data { /* A task blocks. */
63 u64 when; 64 u64 when;
64 u64 __unused; 65 u64 __unused;
65}; 66} __attribute__((packed));
66 67
67struct st_resume_data { /* A task resumes. */ 68struct st_resume_data { /* A task resumes. */
68 u64 when; 69 u64 when;
69 u64 __unused; 70 u64 __unused;
70}; 71} __attribute__((packed));
71 72
72struct st_action_data { 73struct st_action_data {
73 u64 when; 74 u64 when;
74 u8 action; 75 u8 action;
75 u8 __unused[7]; 76 u8 __unused[7];
76}; 77} __attribute__((packed));
77 78
78struct st_sys_release_data { 79struct st_sys_release_data {
79 u64 when; 80 u64 when;
80 u64 release; 81 u64 release;
81}; 82} __attribute__((packed));
83
84
85struct st_tasklet_release_data {
86 u64 when;
87 u64 __unused;
88} __attribute__((packed));
89
90struct st_tasklet_begin_data {
91 u64 when;
92 u16 exe_pid;
93 u8 __unused[6];
94} __attribute__((packed));
95
96struct st_tasklet_end_data {
97 u64 when;
98 u16 exe_pid;
99 u8 flushed;
100 u8 __unused[5];
101} __attribute__((packed));
102
103
104struct st_work_release_data {
105 u64 when;
106 u64 __unused;
107} __attribute__((packed));
108
109struct st_work_begin_data {
110 u64 when;
111 u16 exe_pid;
112 u8 __unused[6];
113} __attribute__((packed));
114
115struct st_work_end_data {
116 u64 when;
117 u16 exe_pid;
118 u8 flushed;
119 u8 __unused[5];
120} __attribute__((packed));
121
122struct st_effective_priority_change_data {
123 u64 when;
124 u16 inh_pid;
125 u8 __unused[6];
126} __attribute__((packed));
127
128struct st_nv_interrupt_begin_data {
129 u64 when;
130 u32 device;
131 u32 serialNumber;
132} __attribute__((packed));
133
134struct st_nv_interrupt_end_data {
135 u64 when;
136 u32 device;
137 u32 serialNumber;
138} __attribute__((packed));
139
140struct st_prediction_err_data {
141 u64 distance;
142 u64 rel_err;
143} __attribute__((packed));
144
145struct st_migration_data {
146 u64 observed;
147 u64 estimated;
148} __attribute__((packed));
149
150struct migration_info {
151 u64 observed;
152 u64 estimated;
153 u8 distance;
154} __attribute__((packed));
82 155
83#define DATA(x) struct st_ ## x ## _data x; 156#define DATA(x) struct st_ ## x ## _data x;
84 157
85typedef enum { 158typedef enum {
86 ST_NAME = 1, /* Start at one, so that we can spot 159 ST_NAME = 1, /* Start at one, so that we can spot
87 * uninitialized records. */ 160 * uninitialized records. */
88 ST_PARAM, 161 ST_PARAM,
89 ST_RELEASE, 162 ST_RELEASE,
90 ST_ASSIGNED, 163 ST_ASSIGNED,
@@ -94,7 +167,19 @@ typedef enum {
94 ST_BLOCK, 167 ST_BLOCK,
95 ST_RESUME, 168 ST_RESUME,
96 ST_ACTION, 169 ST_ACTION,
97 ST_SYS_RELEASE 170 ST_SYS_RELEASE,
171 ST_TASKLET_RELEASE,
172 ST_TASKLET_BEGIN,
173 ST_TASKLET_END,
174 ST_WORK_RELEASE,
175 ST_WORK_BEGIN,
176 ST_WORK_END,
177 ST_EFF_PRIO_CHANGE,
178 ST_NV_INTERRUPT_BEGIN,
179 ST_NV_INTERRUPT_END,
180
181 ST_PREDICTION_ERR,
182 ST_MIGRATION,
98} st_event_record_type_t; 183} st_event_record_type_t;
99 184
100struct st_event_record { 185struct st_event_record {
@@ -113,8 +198,20 @@ struct st_event_record {
113 DATA(resume); 198 DATA(resume);
114 DATA(action); 199 DATA(action);
115 DATA(sys_release); 200 DATA(sys_release);
201 DATA(tasklet_release);
202 DATA(tasklet_begin);
203 DATA(tasklet_end);
204 DATA(work_release);
205 DATA(work_begin);
206 DATA(work_end);
207 DATA(effective_priority_change);
208 DATA(nv_interrupt_begin);
209 DATA(nv_interrupt_end);
210
211 DATA(prediction_err);
212 DATA(migration);
116 } data; 213 } data;
117}; 214} __attribute__((packed));
118 215
119#undef DATA 216#undef DATA
120 217
@@ -129,6 +226,8 @@ struct st_event_record {
129 ft_event1(id, callback, task) 226 ft_event1(id, callback, task)
130#define SCHED_TRACE2(id, callback, task, xtra) \ 227#define SCHED_TRACE2(id, callback, task, xtra) \
131 ft_event2(id, callback, task, xtra) 228 ft_event2(id, callback, task, xtra)
229#define SCHED_TRACE3(id, callback, task, xtra1, xtra2) \
230 ft_event3(id, callback, task, xtra1, xtra2)
132 231
133/* provide prototypes; needed on sparc64 */ 232/* provide prototypes; needed on sparc64 */
134#ifndef NO_TASK_TRACE_DECLS 233#ifndef NO_TASK_TRACE_DECLS
@@ -155,12 +254,58 @@ feather_callback void do_sched_trace_action(unsigned long id,
155feather_callback void do_sched_trace_sys_release(unsigned long id, 254feather_callback void do_sched_trace_sys_release(unsigned long id,
156 lt_t* start); 255 lt_t* start);
157 256
257
258feather_callback void do_sched_trace_tasklet_release(unsigned long id,
259 struct task_struct* owner);
260feather_callback void do_sched_trace_tasklet_begin(unsigned long id,
261 struct task_struct* owner);
262feather_callback void do_sched_trace_tasklet_end(unsigned long id,
263 struct task_struct* owner,
264 unsigned long flushed);
265
266feather_callback void do_sched_trace_work_release(unsigned long id,
267 struct task_struct* owner);
268feather_callback void do_sched_trace_work_begin(unsigned long id,
269 struct task_struct* owner,
270 struct task_struct* exe);
271feather_callback void do_sched_trace_work_end(unsigned long id,
272 struct task_struct* owner,
273 struct task_struct* exe,
274 unsigned long flushed);
275
276feather_callback void do_sched_trace_eff_prio_change(unsigned long id,
277 struct task_struct* task,
278 struct task_struct* inh);
279
280feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
281 u32 device);
282feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id,
283 unsigned long unused);
284
285feather_callback void do_sched_trace_prediction_err(unsigned long id,
286 struct task_struct* task,
287 gpu_migration_dist_t* distance,
288 fp_t* rel_err);
289
290
291
292
293
294feather_callback void do_sched_trace_migration(unsigned long id,
295 struct task_struct* task,
296 struct migration_info* mig_info);
297
298
299/* returns true if we're tracing an interrupt on current CPU */
300/* int is_interrupt_tracing_active(void); */
301
158#endif 302#endif
159 303
160#else 304#else
161 305
162#define SCHED_TRACE(id, callback, task) /* no tracing */ 306#define SCHED_TRACE(id, callback, task) /* no tracing */
163#define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */ 307#define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */
308#define SCHED_TRACE3(id, callback, task, xtra1, xtra2)
164 309
165#endif 310#endif
166 311
@@ -193,6 +338,41 @@ feather_callback void do_sched_trace_sys_release(unsigned long id,
193 SCHED_TRACE(SCHED_TRACE_BASE_ID + 10, do_sched_trace_sys_release, when) 338 SCHED_TRACE(SCHED_TRACE_BASE_ID + 10, do_sched_trace_sys_release, when)
194 339
195 340
341#define sched_trace_tasklet_release(t) \
342 SCHED_TRACE(SCHED_TRACE_BASE_ID + 11, do_sched_trace_tasklet_release, t)
343
344#define sched_trace_tasklet_begin(t) \
345 SCHED_TRACE(SCHED_TRACE_BASE_ID + 12, do_sched_trace_tasklet_begin, t)
346
347#define sched_trace_tasklet_end(t, flushed) \
348 SCHED_TRACE2(SCHED_TRACE_BASE_ID + 13, do_sched_trace_tasklet_end, t, flushed)
349
350
351#define sched_trace_work_release(t) \
352 SCHED_TRACE(SCHED_TRACE_BASE_ID + 14, do_sched_trace_work_release, t)
353
354#define sched_trace_work_begin(t, e) \
355 SCHED_TRACE2(SCHED_TRACE_BASE_ID + 15, do_sched_trace_work_begin, t, e)
356
357#define sched_trace_work_end(t, e, flushed) \
358 SCHED_TRACE3(SCHED_TRACE_BASE_ID + 16, do_sched_trace_work_end, t, e, flushed)
359
360
361#define sched_trace_eff_prio_change(t, inh) \
362 SCHED_TRACE2(SCHED_TRACE_BASE_ID + 17, do_sched_trace_eff_prio_change, t, inh)
363
364
365#define sched_trace_nv_interrupt_begin(d) \
366 SCHED_TRACE(SCHED_TRACE_BASE_ID + 18, do_sched_trace_nv_interrupt_begin, d)
367#define sched_trace_nv_interrupt_end(d) \
368 SCHED_TRACE(SCHED_TRACE_BASE_ID + 19, do_sched_trace_nv_interrupt_end, d)
369
370#define sched_trace_prediction_err(t, dist, rel_err) \
371 SCHED_TRACE3(SCHED_TRACE_BASE_ID + 20, do_sched_trace_prediction_err, t, dist, rel_err)
372
373#define sched_trace_migration(t, mig_info) \
374 SCHED_TRACE2(SCHED_TRACE_BASE_ID + 21, do_sched_trace_migration, t, mig_info)
375
196#define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */ 376#define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
197 377
198#endif /* __KERNEL__ */ 378#endif /* __KERNEL__ */
diff --git a/include/litmus/sched_trace_external.h b/include/litmus/sched_trace_external.h
new file mode 100644
index 000000000000..e70e45e4cf51
--- /dev/null
+++ b/include/litmus/sched_trace_external.h
@@ -0,0 +1,78 @@
1/*
2 * sched_trace.h -- record scheduler events to a byte stream for offline analysis.
3 */
4#ifndef _LINUX_SCHED_TRACE_EXTERNAL_H_
5#define _LINUX_SCHED_TRACE_EXTERNAL_H_
6
7
8#ifdef CONFIG_SCHED_TASK_TRACE
9extern void __sched_trace_tasklet_begin_external(struct task_struct* t);
10static inline void sched_trace_tasklet_begin_external(struct task_struct* t)
11{
12 __sched_trace_tasklet_begin_external(t);
13}
14
15extern void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed);
16static inline void sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed)
17{
18 __sched_trace_tasklet_end_external(t, flushed);
19}
20
21extern void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e);
22static inline void sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e)
23{
24 __sched_trace_work_begin_external(t, e);
25}
26
27extern void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f);
28static inline void sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f)
29{
30 __sched_trace_work_end_external(t, e, f);
31}
32
33#ifdef CONFIG_LITMUS_NVIDIA
34extern void __sched_trace_nv_interrupt_begin_external(u32 device);
35static inline void sched_trace_nv_interrupt_begin_external(u32 device)
36{
37 __sched_trace_nv_interrupt_begin_external(device);
38}
39
40extern void __sched_trace_nv_interrupt_end_external(u32 device);
41static inline void sched_trace_nv_interrupt_end_external(u32 device)
42{
43 __sched_trace_nv_interrupt_end_external(device);
44}
45#endif
46
47#else
48
49// no tracing.
50static inline void sched_trace_tasklet_begin_external(struct task_struct* t){}
51static inline void sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed){}
52static inline void sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e){}
53static inline void sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f){}
54
55#ifdef CONFIG_LITMUS_NVIDIA
56static inline void sched_trace_nv_interrupt_begin_external(u32 device){}
57static inline void sched_trace_nv_interrupt_end_external(u32 device){}
58#endif
59
60#endif
61
62
63#ifdef CONFIG_LITMUS_NVIDIA
64
65#define EX_TS(evt) \
66extern void __##evt(void); \
67static inline void EX_##evt(void) { __##evt(); }
68
69EX_TS(TS_NV_TOPISR_START)
70EX_TS(TS_NV_TOPISR_END)
71EX_TS(TS_NV_BOTISR_START)
72EX_TS(TS_NV_BOTISR_END)
73EX_TS(TS_NV_RELEASE_BOTISR_START)
74EX_TS(TS_NV_RELEASE_BOTISR_END)
75
76#endif
77
78#endif
diff --git a/include/litmus/trace.h b/include/litmus/trace.h
index e809376d6487..e078aee4234d 100644
--- a/include/litmus/trace.h
+++ b/include/litmus/trace.h
@@ -103,14 +103,46 @@ feather_callback void save_task_latency(unsigned long event, unsigned long when_
103#define TS_LOCK_START TIMESTAMP(170) 103#define TS_LOCK_START TIMESTAMP(170)
104#define TS_LOCK_SUSPEND TIMESTAMP(171) 104#define TS_LOCK_SUSPEND TIMESTAMP(171)
105#define TS_LOCK_RESUME TIMESTAMP(172) 105#define TS_LOCK_RESUME TIMESTAMP(172)
106#define TS_LOCK_END TIMESTAMP(173) 106#define TS_LOCK_END TIMESTAMP(173)
107
108#ifdef CONFIG_LITMUS_DGL_SUPPORT
109#define TS_DGL_LOCK_START TIMESTAMP(175)
110#define TS_DGL_LOCK_SUSPEND TIMESTAMP(176)
111#define TS_DGL_LOCK_RESUME TIMESTAMP(177)
112#define TS_DGL_LOCK_END TIMESTAMP(178)
113#endif
107 114
108#define TS_UNLOCK_START TIMESTAMP(180) 115#define TS_UNLOCK_START TIMESTAMP(180)
109#define TS_UNLOCK_END TIMESTAMP(181) 116#define TS_UNLOCK_END TIMESTAMP(181)
110 117
118#ifdef CONFIG_LITMUS_DGL_SUPPORT
119#define TS_DGL_UNLOCK_START TIMESTAMP(185)
120#define TS_DGL_UNLOCK_END TIMESTAMP(186)
121#endif
122
111#define TS_SEND_RESCHED_START(c) CTIMESTAMP(190, c) 123#define TS_SEND_RESCHED_START(c) CTIMESTAMP(190, c)
112#define TS_SEND_RESCHED_END DTIMESTAMP(191, TSK_UNKNOWN) 124#define TS_SEND_RESCHED_END DTIMESTAMP(191, TSK_UNKNOWN)
113 125
114#define TS_RELEASE_LATENCY(when) LTIMESTAMP(208, &(when)) 126#define TS_RELEASE_LATENCY(when) LTIMESTAMP(208, &(when))
115 127
128
129#ifdef CONFIG_LITMUS_NVIDIA
130
131#define TS_NV_TOPISR_START TIMESTAMP(200)
132#define TS_NV_TOPISR_END TIMESTAMP(201)
133
134#define TS_NV_BOTISR_START TIMESTAMP(202)
135#define TS_NV_BOTISR_END TIMESTAMP(203)
136
137#define TS_NV_RELEASE_BOTISR_START TIMESTAMP(204)
138#define TS_NV_RELEASE_BOTISR_END TIMESTAMP(205)
139
140#endif
141
142#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
143#define TS_NV_SCHED_BOTISR_START TIMESTAMP(206)
144#define TS_NV_SCHED_BOTISR_END TIMESTAMP(207)
145#endif
146
147
116#endif /* !_SYS_TRACE_H_ */ 148#endif /* !_SYS_TRACE_H_ */
diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
index 94264c27d9ac..4fa514c89605 100644
--- a/include/litmus/unistd_32.h
+++ b/include/litmus/unistd_32.h
@@ -17,5 +17,8 @@
17#define __NR_wait_for_ts_release __LSC(9) 17#define __NR_wait_for_ts_release __LSC(9)
18#define __NR_release_ts __LSC(10) 18#define __NR_release_ts __LSC(10)
19#define __NR_null_call __LSC(11) 19#define __NR_null_call __LSC(11)
20#define __NR_litmus_dgl_lock __LSC(12)
21#define __NR_litmus_dgl_unlock __LSC(13)
22#define __NR_register_nv_device __LSC(14)
20 23
21#define NR_litmus_syscalls 12 24#define NR_litmus_syscalls 15
diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
index d5ced0d2642c..f80dc45dc185 100644
--- a/include/litmus/unistd_64.h
+++ b/include/litmus/unistd_64.h
@@ -29,5 +29,12 @@ __SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release)
29__SYSCALL(__NR_release_ts, sys_release_ts) 29__SYSCALL(__NR_release_ts, sys_release_ts)
30#define __NR_null_call __LSC(11) 30#define __NR_null_call __LSC(11)
31__SYSCALL(__NR_null_call, sys_null_call) 31__SYSCALL(__NR_null_call, sys_null_call)
32#define __NR_litmus_dgl_lock __LSC(12)
33__SYSCALL(__NR_litmus_dgl_lock, sys_litmus_dgl_lock)
34#define __NR_litmus_dgl_unlock __LSC(13)
35__SYSCALL(__NR_litmus_dgl_unlock, sys_litmus_dgl_unlock)
36#define __NR_register_nv_device __LSC(14)
37__SYSCALL(__NR_register_nv_device, sys_register_nv_device)
32 38
33#define NR_litmus_syscalls 12 39
40#define NR_litmus_syscalls 15
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 298c9276dfdb..2bdcdc3691e5 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -542,7 +542,7 @@ static void print_lock(struct held_lock *hlock)
542 print_ip_sym(hlock->acquire_ip); 542 print_ip_sym(hlock->acquire_ip);
543} 543}
544 544
545static void lockdep_print_held_locks(struct task_struct *curr) 545void lockdep_print_held_locks(struct task_struct *curr)
546{ 546{
547 int i, depth = curr->lockdep_depth; 547 int i, depth = curr->lockdep_depth;
548 548
@@ -558,6 +558,7 @@ static void lockdep_print_held_locks(struct task_struct *curr)
558 print_lock(curr->held_locks + i); 558 print_lock(curr->held_locks + i);
559 } 559 }
560} 560}
561EXPORT_SYMBOL(lockdep_print_held_locks);
561 562
562static void print_kernel_version(void) 563static void print_kernel_version(void)
563{ 564{
@@ -583,6 +584,10 @@ static int static_obj(void *obj)
583 end = (unsigned long) &_end, 584 end = (unsigned long) &_end,
584 addr = (unsigned long) obj; 585 addr = (unsigned long) obj;
585 586
587 // GLENN
588 return 1;
589
590
586 /* 591 /*
587 * static variable? 592 * static variable?
588 */ 593 */
diff --git a/kernel/mutex.c b/kernel/mutex.c
index d607ed5dd441..2f363b9bfc1f 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -498,3 +498,128 @@ int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
498 return 1; 498 return 1;
499} 499}
500EXPORT_SYMBOL(atomic_dec_and_mutex_lock); 500EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
501
502
503
504
505void mutex_lock_sfx(struct mutex *lock,
506 side_effect_t pre, unsigned long pre_arg,
507 side_effect_t post, unsigned long post_arg)
508{
509 long state = TASK_UNINTERRUPTIBLE;
510
511 struct task_struct *task = current;
512 struct mutex_waiter waiter;
513 unsigned long flags;
514
515 preempt_disable();
516 mutex_acquire(&lock->dep_map, subclass, 0, ip);
517
518 spin_lock_mutex(&lock->wait_lock, flags);
519
520 if(pre)
521 {
522 if(unlikely(pre(pre_arg)))
523 {
524 // this will fuck with lockdep's CONFIG_PROVE_LOCKING...
525 spin_unlock_mutex(&lock->wait_lock, flags);
526 preempt_enable();
527 return;
528 }
529 }
530
531 debug_mutex_lock_common(lock, &waiter);
532 debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
533
534 /* add waiting tasks to the end of the waitqueue (FIFO): */
535 list_add_tail(&waiter.list, &lock->wait_list);
536 waiter.task = task;
537
538 if (atomic_xchg(&lock->count, -1) == 1)
539 goto done;
540
541 lock_contended(&lock->dep_map, ip);
542
543 for (;;) {
544 /*
545 * Lets try to take the lock again - this is needed even if
546 * we get here for the first time (shortly after failing to
547 * acquire the lock), to make sure that we get a wakeup once
548 * it's unlocked. Later on, if we sleep, this is the
549 * operation that gives us the lock. We xchg it to -1, so
550 * that when we release the lock, we properly wake up the
551 * other waiters:
552 */
553 if (atomic_xchg(&lock->count, -1) == 1)
554 break;
555
556 __set_task_state(task, state);
557
558 /* didnt get the lock, go to sleep: */
559 spin_unlock_mutex(&lock->wait_lock, flags);
560 preempt_enable_no_resched();
561 schedule();
562 preempt_disable();
563 spin_lock_mutex(&lock->wait_lock, flags);
564 }
565
566done:
567 lock_acquired(&lock->dep_map, ip);
568 /* got the lock - rejoice! */
569 mutex_remove_waiter(lock, &waiter, current_thread_info());
570 mutex_set_owner(lock);
571
572 /* set it to 0 if there are no waiters left: */
573 if (likely(list_empty(&lock->wait_list)))
574 atomic_set(&lock->count, 0);
575
576 if(post)
577 post(post_arg);
578
579 spin_unlock_mutex(&lock->wait_lock, flags);
580
581 debug_mutex_free_waiter(&waiter);
582 preempt_enable();
583}
584EXPORT_SYMBOL(mutex_lock_sfx);
585
586void mutex_unlock_sfx(struct mutex *lock,
587 side_effect_t pre, unsigned long pre_arg,
588 side_effect_t post, unsigned long post_arg)
589{
590 unsigned long flags;
591
592 spin_lock_mutex(&lock->wait_lock, flags);
593
594 if(pre)
595 pre(pre_arg);
596
597 //mutex_release(&lock->dep_map, nested, _RET_IP_);
598 mutex_release(&lock->dep_map, 1, _RET_IP_);
599 debug_mutex_unlock(lock);
600
601 /*
602 * some architectures leave the lock unlocked in the fastpath failure
603 * case, others need to leave it locked. In the later case we have to
604 * unlock it here
605 */
606 if (__mutex_slowpath_needs_to_unlock())
607 atomic_set(&lock->count, 1);
608
609 if (!list_empty(&lock->wait_list)) {
610 /* get the first entry from the wait-list: */
611 struct mutex_waiter *waiter =
612 list_entry(lock->wait_list.next,
613 struct mutex_waiter, list);
614
615 debug_mutex_wake_waiter(lock, waiter);
616
617 wake_up_process(waiter->task);
618 }
619
620 if(post)
621 post(post_arg);
622
623 spin_unlock_mutex(&lock->wait_lock, flags);
624}
625EXPORT_SYMBOL(mutex_unlock_sfx);
diff --git a/kernel/sched.c b/kernel/sched.c
index baaca61bc3a3..f3d9a69a3777 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -83,6 +83,10 @@
83#include <litmus/sched_trace.h> 83#include <litmus/sched_trace.h>
84#include <litmus/trace.h> 84#include <litmus/trace.h>
85 85
86#ifdef CONFIG_LITMUS_SOFTIRQD
87#include <litmus/litmus_softirq.h>
88#endif
89
86static void litmus_tick(struct rq*, struct task_struct*); 90static void litmus_tick(struct rq*, struct task_struct*);
87 91
88#define CREATE_TRACE_POINTS 92#define CREATE_TRACE_POINTS
@@ -4305,6 +4309,7 @@ pick_next_task(struct rq *rq)
4305 BUG(); /* the idle class will always have a runnable task */ 4309 BUG(); /* the idle class will always have a runnable task */
4306} 4310}
4307 4311
4312
4308/* 4313/*
4309 * schedule() is the main scheduler function. 4314 * schedule() is the main scheduler function.
4310 */ 4315 */
@@ -4323,6 +4328,10 @@ need_resched:
4323 rcu_note_context_switch(cpu); 4328 rcu_note_context_switch(cpu);
4324 prev = rq->curr; 4329 prev = rq->curr;
4325 4330
4331#ifdef CONFIG_LITMUS_SOFTIRQD
4332 release_klitirqd_lock(prev);
4333#endif
4334
4326 /* LITMUS^RT: quickly re-evaluate the scheduling decision 4335 /* LITMUS^RT: quickly re-evaluate the scheduling decision
4327 * if the previous one is no longer valid after CTX. 4336 * if the previous one is no longer valid after CTX.
4328 */ 4337 */
@@ -4411,13 +4420,24 @@ litmus_need_resched_nonpreemptible:
4411 goto litmus_need_resched_nonpreemptible; 4420 goto litmus_need_resched_nonpreemptible;
4412 4421
4413 preempt_enable_no_resched(); 4422 preempt_enable_no_resched();
4423
4414 if (need_resched()) 4424 if (need_resched())
4415 goto need_resched; 4425 goto need_resched;
4416 4426
4427#ifdef LITMUS_SOFTIRQD
4428 reacquire_klitirqd_lock(prev);
4429#endif
4430
4431#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
4432 litmus->run_tasklets(prev);
4433#endif
4434
4417 srp_ceiling_block(); 4435 srp_ceiling_block();
4418} 4436}
4419EXPORT_SYMBOL(schedule); 4437EXPORT_SYMBOL(schedule);
4420 4438
4439
4440
4421#ifdef CONFIG_MUTEX_SPIN_ON_OWNER 4441#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
4422 4442
4423static inline bool owner_running(struct mutex *lock, struct task_struct *owner) 4443static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
@@ -4561,6 +4581,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
4561 } 4581 }
4562} 4582}
4563 4583
4584
4564/** 4585/**
4565 * __wake_up - wake up threads blocked on a waitqueue. 4586 * __wake_up - wake up threads blocked on a waitqueue.
4566 * @q: the waitqueue 4587 * @q: the waitqueue
@@ -4747,6 +4768,12 @@ void __sched wait_for_completion(struct completion *x)
4747} 4768}
4748EXPORT_SYMBOL(wait_for_completion); 4769EXPORT_SYMBOL(wait_for_completion);
4749 4770
4771void __sched __wait_for_completion_locked(struct completion *x)
4772{
4773 do_wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
4774}
4775EXPORT_SYMBOL(__wait_for_completion_locked);
4776
4750/** 4777/**
4751 * wait_for_completion_timeout: - waits for completion of a task (w/timeout) 4778 * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
4752 * @x: holds the state of this particular completion 4779 * @x: holds the state of this particular completion
diff --git a/kernel/semaphore.c b/kernel/semaphore.c
index 94a62c0d4ade..c947a046a6d7 100644
--- a/kernel/semaphore.c
+++ b/kernel/semaphore.c
@@ -33,11 +33,11 @@
33#include <linux/spinlock.h> 33#include <linux/spinlock.h>
34#include <linux/ftrace.h> 34#include <linux/ftrace.h>
35 35
36static noinline void __down(struct semaphore *sem); 36noinline void __down(struct semaphore *sem);
37static noinline int __down_interruptible(struct semaphore *sem); 37static noinline int __down_interruptible(struct semaphore *sem);
38static noinline int __down_killable(struct semaphore *sem); 38static noinline int __down_killable(struct semaphore *sem);
39static noinline int __down_timeout(struct semaphore *sem, long jiffies); 39static noinline int __down_timeout(struct semaphore *sem, long jiffies);
40static noinline void __up(struct semaphore *sem); 40noinline void __up(struct semaphore *sem);
41 41
42/** 42/**
43 * down - acquire the semaphore 43 * down - acquire the semaphore
@@ -190,11 +190,13 @@ EXPORT_SYMBOL(up);
190 190
191/* Functions for the contended case */ 191/* Functions for the contended case */
192 192
193/*
193struct semaphore_waiter { 194struct semaphore_waiter {
194 struct list_head list; 195 struct list_head list;
195 struct task_struct *task; 196 struct task_struct *task;
196 int up; 197 int up;
197}; 198};
199 */
198 200
199/* 201/*
200 * Because this function is inlined, the 'state' parameter will be 202 * Because this function is inlined, the 'state' parameter will be
@@ -233,10 +235,12 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
233 return -EINTR; 235 return -EINTR;
234} 236}
235 237
236static noinline void __sched __down(struct semaphore *sem) 238noinline void __sched __down(struct semaphore *sem)
237{ 239{
238 __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); 240 __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
239} 241}
242EXPORT_SYMBOL(__down);
243
240 244
241static noinline int __sched __down_interruptible(struct semaphore *sem) 245static noinline int __sched __down_interruptible(struct semaphore *sem)
242{ 246{
@@ -253,7 +257,7 @@ static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies)
253 return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies); 257 return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies);
254} 258}
255 259
256static noinline void __sched __up(struct semaphore *sem) 260noinline void __sched __up(struct semaphore *sem)
257{ 261{
258 struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list, 262 struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
259 struct semaphore_waiter, list); 263 struct semaphore_waiter, list);
@@ -261,3 +265,4 @@ static noinline void __sched __up(struct semaphore *sem)
261 waiter->up = 1; 265 waiter->up = 1;
262 wake_up_process(waiter->task); 266 wake_up_process(waiter->task);
263} 267}
268EXPORT_SYMBOL(__up); \ No newline at end of file
diff --git a/kernel/softirq.c b/kernel/softirq.c
index fca82c32042b..5ce271675662 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -29,6 +29,15 @@
29#include <trace/events/irq.h> 29#include <trace/events/irq.h>
30 30
31#include <asm/irq.h> 31#include <asm/irq.h>
32
33#include <litmus/litmus.h>
34#include <litmus/sched_trace.h>
35
36#ifdef CONFIG_LITMUS_NVIDIA
37#include <litmus/nvidia_info.h>
38#include <litmus/trace.h>
39#endif
40
32/* 41/*
33 - No shared variables, all the data are CPU local. 42 - No shared variables, all the data are CPU local.
34 - If a softirq needs serialization, let it serialize itself 43 - If a softirq needs serialization, let it serialize itself
@@ -67,7 +76,7 @@ char *softirq_to_name[NR_SOFTIRQS] = {
67 * to the pending events, so lets the scheduler to balance 76 * to the pending events, so lets the scheduler to balance
68 * the softirq load for us. 77 * the softirq load for us.
69 */ 78 */
70static void wakeup_softirqd(void) 79void wakeup_softirqd(void)
71{ 80{
72 /* Interrupts are disabled: no need to stop preemption */ 81 /* Interrupts are disabled: no need to stop preemption */
73 struct task_struct *tsk = __this_cpu_read(ksoftirqd); 82 struct task_struct *tsk = __this_cpu_read(ksoftirqd);
@@ -193,6 +202,7 @@ void local_bh_enable_ip(unsigned long ip)
193} 202}
194EXPORT_SYMBOL(local_bh_enable_ip); 203EXPORT_SYMBOL(local_bh_enable_ip);
195 204
205
196/* 206/*
197 * We restart softirq processing MAX_SOFTIRQ_RESTART times, 207 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
198 * and we fall back to softirqd after that. 208 * and we fall back to softirqd after that.
@@ -206,65 +216,65 @@ EXPORT_SYMBOL(local_bh_enable_ip);
206 216
207asmlinkage void __do_softirq(void) 217asmlinkage void __do_softirq(void)
208{ 218{
209 struct softirq_action *h; 219 struct softirq_action *h;
210 __u32 pending; 220 __u32 pending;
211 int max_restart = MAX_SOFTIRQ_RESTART; 221 int max_restart = MAX_SOFTIRQ_RESTART;
212 int cpu; 222 int cpu;
213 223
214 pending = local_softirq_pending(); 224 pending = local_softirq_pending();
215 account_system_vtime(current); 225 account_system_vtime(current);
216 226
217 __local_bh_disable((unsigned long)__builtin_return_address(0), 227 __local_bh_disable((unsigned long)__builtin_return_address(0),
218 SOFTIRQ_OFFSET); 228 SOFTIRQ_OFFSET);
219 lockdep_softirq_enter(); 229 lockdep_softirq_enter();
220 230
221 cpu = smp_processor_id(); 231 cpu = smp_processor_id();
222restart: 232restart:
223 /* Reset the pending bitmask before enabling irqs */ 233 /* Reset the pending bitmask before enabling irqs */
224 set_softirq_pending(0); 234 set_softirq_pending(0);
225 235
226 local_irq_enable(); 236 local_irq_enable();
227 237
228 h = softirq_vec; 238 h = softirq_vec;
229
230 do {
231 if (pending & 1) {
232 unsigned int vec_nr = h - softirq_vec;
233 int prev_count = preempt_count();
234
235 kstat_incr_softirqs_this_cpu(vec_nr);
236
237 trace_softirq_entry(vec_nr);
238 h->action(h);
239 trace_softirq_exit(vec_nr);
240 if (unlikely(prev_count != preempt_count())) {
241 printk(KERN_ERR "huh, entered softirq %u %s %p"
242 "with preempt_count %08x,"
243 " exited with %08x?\n", vec_nr,
244 softirq_to_name[vec_nr], h->action,
245 prev_count, preempt_count());
246 preempt_count() = prev_count;
247 }
248 239
249 rcu_bh_qs(cpu); 240 do {
250 } 241 if (pending & 1) {
251 h++; 242 unsigned int vec_nr = h - softirq_vec;
252 pending >>= 1; 243 int prev_count = preempt_count();
253 } while (pending);
254 244
255 local_irq_disable(); 245 kstat_incr_softirqs_this_cpu(vec_nr);
256 246
257 pending = local_softirq_pending(); 247 trace_softirq_entry(vec_nr);
258 if (pending && --max_restart) 248 h->action(h);
259 goto restart; 249 trace_softirq_exit(vec_nr);
250 if (unlikely(prev_count != preempt_count())) {
251 printk(KERN_ERR "huh, entered softirq %u %s %p"
252 "with preempt_count %08x,"
253 " exited with %08x?\n", vec_nr,
254 softirq_to_name[vec_nr], h->action,
255 prev_count, preempt_count());
256 preempt_count() = prev_count;
257 }
260 258
261 if (pending) 259 rcu_bh_qs(cpu);
262 wakeup_softirqd(); 260 }
261 h++;
262 pending >>= 1;
263 } while (pending);
263 264
264 lockdep_softirq_exit(); 265 local_irq_disable();
265 266
266 account_system_vtime(current); 267 pending = local_softirq_pending();
267 __local_bh_enable(SOFTIRQ_OFFSET); 268 if (pending && --max_restart)
269 goto restart;
270
271 if (pending)
272 wakeup_softirqd();
273
274 lockdep_softirq_exit();
275
276 account_system_vtime(current);
277 __local_bh_enable(SOFTIRQ_OFFSET);
268} 278}
269 279
270#ifndef __ARCH_HAS_DO_SOFTIRQ 280#ifndef __ARCH_HAS_DO_SOFTIRQ
@@ -402,8 +412,99 @@ struct tasklet_head
402static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec); 412static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
403static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec); 413static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
404 414
415#ifdef CONFIG_LITMUS_NVIDIA
416static int __do_nv_now(struct tasklet_struct* tasklet)
417{
418 int success = 1;
419
420 if(tasklet_trylock(tasklet)) {
421 if (!atomic_read(&tasklet->count)) {
422 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state)) {
423 BUG();
424 }
425 tasklet->func(tasklet->data);
426 tasklet_unlock(tasklet);
427 }
428 else {
429 success = 0;
430 }
431
432 tasklet_unlock(tasklet);
433 }
434 else {
435 success = 0;
436 }
437
438 return success;
439}
440#endif
441
442
405void __tasklet_schedule(struct tasklet_struct *t) 443void __tasklet_schedule(struct tasklet_struct *t)
406{ 444{
445#ifdef CONFIG_LITMUS_NVIDIA
446 if(is_nvidia_func(t->func))
447 {
448#if 0
449 // do nvidia tasklets right away and return
450 if(__do_nv_now(t))
451 return;
452#else
453 u32 nvidia_device = get_tasklet_nv_device_num(t);
454 // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
455 // __FUNCTION__, nvidia_device,litmus_clock());
456
457 unsigned long flags;
458 struct task_struct* device_owner;
459
460 lock_nv_registry(nvidia_device, &flags);
461
462 device_owner = get_nv_max_device_owner(nvidia_device);
463
464 if(device_owner==NULL)
465 {
466 t->owner = NULL;
467 }
468 else
469 {
470 if(is_realtime(device_owner))
471 {
472 TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n",
473 __FUNCTION__, nvidia_device,litmus_clock());
474 TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
475 __FUNCTION__,device_owner->pid,nvidia_device);
476
477 t->owner = device_owner;
478 sched_trace_tasklet_release(t->owner);
479
480 if(likely(_litmus_tasklet_schedule(t,nvidia_device)))
481 {
482 unlock_nv_registry(nvidia_device, &flags);
483 return;
484 }
485 else
486 {
487 t->owner = NULL; /* fall through to normal scheduling */
488 }
489 }
490 else
491 {
492 t->owner = NULL;
493 }
494 }
495 unlock_nv_registry(nvidia_device, &flags);
496#endif
497 }
498
499#endif
500
501 ___tasklet_schedule(t);
502}
503EXPORT_SYMBOL(__tasklet_schedule);
504
505
506void ___tasklet_schedule(struct tasklet_struct *t)
507{
407 unsigned long flags; 508 unsigned long flags;
408 509
409 local_irq_save(flags); 510 local_irq_save(flags);
@@ -413,11 +514,65 @@ void __tasklet_schedule(struct tasklet_struct *t)
413 raise_softirq_irqoff(TASKLET_SOFTIRQ); 514 raise_softirq_irqoff(TASKLET_SOFTIRQ);
414 local_irq_restore(flags); 515 local_irq_restore(flags);
415} 516}
517EXPORT_SYMBOL(___tasklet_schedule);
416 518
417EXPORT_SYMBOL(__tasklet_schedule);
418 519
419void __tasklet_hi_schedule(struct tasklet_struct *t) 520void __tasklet_hi_schedule(struct tasklet_struct *t)
420{ 521{
522#ifdef CONFIG_LITMUS_NVIDIA
523 if(is_nvidia_func(t->func))
524 {
525 u32 nvidia_device = get_tasklet_nv_device_num(t);
526 // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
527 // __FUNCTION__, nvidia_device,litmus_clock());
528
529 unsigned long flags;
530 struct task_struct* device_owner;
531
532 lock_nv_registry(nvidia_device, &flags);
533
534 device_owner = get_nv_max_device_owner(nvidia_device);
535
536 if(device_owner==NULL)
537 {
538 t->owner = NULL;
539 }
540 else
541 {
542 if( is_realtime(device_owner))
543 {
544 TRACE("%s: Handling NVIDIA tasklet for device %u\tat %llu\n",
545 __FUNCTION__, nvidia_device,litmus_clock());
546 TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
547 __FUNCTION__,device_owner->pid,nvidia_device);
548
549 t->owner = device_owner;
550 sched_trace_tasklet_release(t->owner);
551 if(likely(_litmus_tasklet_hi_schedule(t,nvidia_device)))
552 {
553 unlock_nv_registry(nvidia_device, &flags);
554 return;
555 }
556 else
557 {
558 t->owner = NULL; /* fall through to normal scheduling */
559 }
560 }
561 else
562 {
563 t->owner = NULL;
564 }
565 }
566 unlock_nv_registry(nvidia_device, &flags);
567 }
568#endif
569
570 ___tasklet_hi_schedule(t);
571}
572EXPORT_SYMBOL(__tasklet_hi_schedule);
573
574void ___tasklet_hi_schedule(struct tasklet_struct* t)
575{
421 unsigned long flags; 576 unsigned long flags;
422 577
423 local_irq_save(flags); 578 local_irq_save(flags);
@@ -427,19 +582,72 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
427 raise_softirq_irqoff(HI_SOFTIRQ); 582 raise_softirq_irqoff(HI_SOFTIRQ);
428 local_irq_restore(flags); 583 local_irq_restore(flags);
429} 584}
430 585EXPORT_SYMBOL(___tasklet_hi_schedule);
431EXPORT_SYMBOL(__tasklet_hi_schedule);
432 586
433void __tasklet_hi_schedule_first(struct tasklet_struct *t) 587void __tasklet_hi_schedule_first(struct tasklet_struct *t)
434{ 588{
435 BUG_ON(!irqs_disabled()); 589 BUG_ON(!irqs_disabled());
590#ifdef CONFIG_LITMUS_NVIDIA
591 if(is_nvidia_func(t->func))
592 {
593 u32 nvidia_device = get_tasklet_nv_device_num(t);
594 // TRACE("%s: Handling NVIDIA tasklet for device\t%u\tat\t%llu\n",
595 // __FUNCTION__, nvidia_device,litmus_clock());
596 unsigned long flags;
597 struct task_struct* device_owner;
598
599 lock_nv_registry(nvidia_device, &flags);
600
601 device_owner = get_nv_max_device_owner(nvidia_device);
602
603 if(device_owner==NULL)
604 {
605 t->owner = NULL;
606 }
607 else
608 {
609 if(is_realtime(device_owner))
610 {
611 TRACE("%s: Handling NVIDIA tasklet for device %u at %llu\n",
612 __FUNCTION__, nvidia_device,litmus_clock());
613
614 TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
615 __FUNCTION__,device_owner->pid,nvidia_device);
616
617 t->owner = device_owner;
618 sched_trace_tasklet_release(t->owner);
619 if(likely(_litmus_tasklet_hi_schedule_first(t,nvidia_device)))
620 {
621 unlock_nv_registry(nvidia_device, &flags);
622 return;
623 }
624 else
625 {
626 t->owner = NULL; /* fall through to normal scheduling */
627 }
628 }
629 else
630 {
631 t->owner = NULL;
632 }
633 }
634 unlock_nv_registry(nvidia_device, &flags);
635 }
636#endif
637
638 ___tasklet_hi_schedule_first(t);
639}
640EXPORT_SYMBOL(__tasklet_hi_schedule_first);
641
642void ___tasklet_hi_schedule_first(struct tasklet_struct* t)
643{
644 BUG_ON(!irqs_disabled());
436 645
437 t->next = __this_cpu_read(tasklet_hi_vec.head); 646 t->next = __this_cpu_read(tasklet_hi_vec.head);
438 __this_cpu_write(tasklet_hi_vec.head, t); 647 __this_cpu_write(tasklet_hi_vec.head, t);
439 __raise_softirq_irqoff(HI_SOFTIRQ); 648 __raise_softirq_irqoff(HI_SOFTIRQ);
440} 649}
441 650EXPORT_SYMBOL(___tasklet_hi_schedule_first);
442EXPORT_SYMBOL(__tasklet_hi_schedule_first);
443 651
444static void tasklet_action(struct softirq_action *a) 652static void tasklet_action(struct softirq_action *a)
445{ 653{
@@ -495,6 +703,7 @@ static void tasklet_hi_action(struct softirq_action *a)
495 if (!atomic_read(&t->count)) { 703 if (!atomic_read(&t->count)) {
496 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) 704 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
497 BUG(); 705 BUG();
706
498 t->func(t->data); 707 t->func(t->data);
499 tasklet_unlock(t); 708 tasklet_unlock(t);
500 continue; 709 continue;
@@ -518,8 +727,13 @@ void tasklet_init(struct tasklet_struct *t,
518 t->next = NULL; 727 t->next = NULL;
519 t->state = 0; 728 t->state = 0;
520 atomic_set(&t->count, 0); 729 atomic_set(&t->count, 0);
730
521 t->func = func; 731 t->func = func;
522 t->data = data; 732 t->data = data;
733
734#ifdef CONFIG_LITMUS_SOFTIRQD
735 t->owner = NULL;
736#endif
523} 737}
524 738
525EXPORT_SYMBOL(tasklet_init); 739EXPORT_SYMBOL(tasklet_init);
@@ -534,6 +748,7 @@ void tasklet_kill(struct tasklet_struct *t)
534 yield(); 748 yield();
535 } while (test_bit(TASKLET_STATE_SCHED, &t->state)); 749 } while (test_bit(TASKLET_STATE_SCHED, &t->state));
536 } 750 }
751
537 tasklet_unlock_wait(t); 752 tasklet_unlock_wait(t);
538 clear_bit(TASKLET_STATE_SCHED, &t->state); 753 clear_bit(TASKLET_STATE_SCHED, &t->state);
539} 754}
@@ -808,6 +1023,7 @@ void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
808 for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) { 1023 for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
809 if (*i == t) { 1024 if (*i == t) {
810 *i = t->next; 1025 *i = t->next;
1026
811 /* If this was the tail element, move the tail ptr */ 1027 /* If this was the tail element, move the tail ptr */
812 if (*i == NULL) 1028 if (*i == NULL)
813 per_cpu(tasklet_vec, cpu).tail = i; 1029 per_cpu(tasklet_vec, cpu).tail = i;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 0400553f0d04..6b59d59ce3cf 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -44,6 +44,13 @@
44 44
45#include "workqueue_sched.h" 45#include "workqueue_sched.h"
46 46
47#ifdef CONFIG_LITMUS_NVIDIA
48#include <litmus/litmus.h>
49#include <litmus/sched_trace.h>
50#include <litmus/nvidia_info.h>
51#endif
52
53
47enum { 54enum {
48 /* global_cwq flags */ 55 /* global_cwq flags */
49 GCWQ_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ 56 GCWQ_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
@@ -1047,9 +1054,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
1047 work_flags |= WORK_STRUCT_DELAYED; 1054 work_flags |= WORK_STRUCT_DELAYED;
1048 worklist = &cwq->delayed_works; 1055 worklist = &cwq->delayed_works;
1049 } 1056 }
1050
1051 insert_work(cwq, work, worklist, work_flags); 1057 insert_work(cwq, work, worklist, work_flags);
1052
1053 spin_unlock_irqrestore(&gcwq->lock, flags); 1058 spin_unlock_irqrestore(&gcwq->lock, flags);
1054} 1059}
1055 1060
@@ -2687,10 +2692,70 @@ EXPORT_SYMBOL(cancel_delayed_work_sync);
2687 */ 2692 */
2688int schedule_work(struct work_struct *work) 2693int schedule_work(struct work_struct *work)
2689{ 2694{
2690 return queue_work(system_wq, work); 2695#if 0
2696#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_SOFTIRQD)
2697 if(is_nvidia_func(work->func))
2698 {
2699 u32 nvidiaDevice = get_work_nv_device_num(work);
2700
2701 //1) Ask Litmus which task owns GPU <nvidiaDevice>. (API to be defined.)
2702 unsigned long flags;
2703 struct task_struct* device_owner;
2704
2705 lock_nv_registry(nvidiaDevice, &flags);
2706
2707 device_owner = get_nv_max_device_owner(nvidiaDevice);
2708
2709 //2) If there is an owner, set work->owner to the owner's task struct.
2710 if(device_owner==NULL)
2711 {
2712 work->owner = NULL;
2713 //TRACE("%s: the owner task of NVIDIA Device %u is NULL\n",__FUNCTION__,nvidiaDevice);
2714 }
2715 else
2716 {
2717 if( is_realtime(device_owner))
2718 {
2719 TRACE("%s: Handling NVIDIA work for device\t%u\tat\t%llu\n",
2720 __FUNCTION__, nvidiaDevice,litmus_clock());
2721 TRACE("%s: the owner task %d of NVIDIA Device %u is RT-task\n",
2722 __FUNCTION__,
2723 device_owner->pid,
2724 nvidiaDevice);
2725
2726 //3) Call litmus_schedule_work() and return (don't execute the rest
2727 // of schedule_schedule()).
2728 work->owner = device_owner;
2729 sched_trace_work_release(work->owner);
2730 if(likely(litmus_schedule_work(work, nvidiaDevice)))
2731 {
2732 unlock_nv_registry(nvidiaDevice, &flags);
2733 return 1;
2734 }
2735 else
2736 {
2737 work->owner = NULL; /* fall through to normal work scheduling */
2738 }
2739 }
2740 else
2741 {
2742 work->owner = NULL;
2743 }
2744 }
2745 unlock_nv_registry(nvidiaDevice, &flags);
2746 }
2747#endif
2748#endif
2749 return(__schedule_work(work));
2691} 2750}
2692EXPORT_SYMBOL(schedule_work); 2751EXPORT_SYMBOL(schedule_work);
2693 2752
2753int __schedule_work(struct work_struct* work)
2754{
2755 return queue_work(system_wq, work);
2756}
2757EXPORT_SYMBOL(__schedule_work);
2758
2694/* 2759/*
2695 * schedule_work_on - put work task on a specific cpu 2760 * schedule_work_on - put work task on a specific cpu
2696 * @cpu: cpu to put the work task on 2761 * @cpu: cpu to put the work task on
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 94b48e199577..8c156e4da528 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -60,6 +60,42 @@ config LITMUS_LOCKING
60 Say Yes if you want to include locking protocols such as the FMLP and 60 Say Yes if you want to include locking protocols such as the FMLP and
61 Baker's SRP. 61 Baker's SRP.
62 62
63config LITMUS_AFFINITY_LOCKING
64 bool "Enable affinity infrastructure in k-exclusion locking protocols."
65 depends on LITMUS_LOCKING
66 default n
67 help
68 Enable affinity tracking infrastructure in k-exclusion locking protocols.
69 This only enabled the *infrastructure* not actual affinity algorithms.
70
71 If unsure, say No.
72
73config LITMUS_NESTED_LOCKING
74 bool "Support for nested inheritance in locking protocols"
75 depends on LITMUS_LOCKING
76 default n
77 help
78 Enable nested priority inheritance.
79
80config LITMUS_DGL_SUPPORT
81 bool "Support for dynamic group locks"
82 depends on LITMUS_NESTED_LOCKING
83 default n
84 help
85 Enable dynamic group lock support.
86
87config LITMUS_MAX_DGL_SIZE
88 int "Maximum size of a dynamic group lock."
89 depends on LITMUS_DGL_SUPPORT
90 range 1 128
91 default "10"
92 help
93 Dynamic group lock data structures are allocated on the process
94 stack when a group is requested. We set a maximum size of
95 locks in a dynamic group lock to avoid dynamic allocation.
96
97 TODO: Batch DGL requests exceeding LITMUS_MAX_DGL_SIZE.
98
63endmenu 99endmenu
64 100
65menu "Performance Enhancements" 101menu "Performance Enhancements"
@@ -121,7 +157,7 @@ config SCHED_TASK_TRACE
121config SCHED_TASK_TRACE_SHIFT 157config SCHED_TASK_TRACE_SHIFT
122 int "Buffer size for sched_trace_xxx() events" 158 int "Buffer size for sched_trace_xxx() events"
123 depends on SCHED_TASK_TRACE 159 depends on SCHED_TASK_TRACE
124 range 8 13 160 range 8 15
125 default 9 161 default 9
126 help 162 help
127 163
@@ -215,4 +251,114 @@ config PREEMPT_STATE_TRACE
215 251
216endmenu 252endmenu
217 253
254menu "Interrupt Handling"
255
256choice
257 prompt "Scheduling of interrupt bottom-halves in Litmus."
258 default LITMUS_SOFTIRQD_NONE
259 depends on LITMUS_LOCKING && !LITMUS_THREAD_ALL_SOFTIRQ
260 help
261 Schedule tasklets with known priorities in Litmus.
262
263config LITMUS_SOFTIRQD_NONE
264 bool "No tasklet scheduling in Litmus."
265 help
266 Don't schedule tasklets in Litmus. Default.
267
268config LITMUS_SOFTIRQD
269 bool "Spawn klitirqd interrupt handling threads."
270 help
271 Create klitirqd interrupt handling threads. Work must be
272 specifically dispatched to these workers. (Softirqs for
273 Litmus tasks are not magically redirected to klitirqd.)
274
275 G-EDF/RM, C-EDF/RM ONLY for now!
276
277
278config LITMUS_PAI_SOFTIRQD
279 bool "Defer tasklets to context switch points."
280 help
281 Only execute scheduled tasklet bottom halves at
282 scheduling points. Trades context switch overhead
283 at the cost of non-preemptive durations of bottom half
284 processing.
285
286 G-EDF/RM, C-EDF/RM ONLY for now!
287
288endchoice
289
290
291config NR_LITMUS_SOFTIRQD
292 int "Number of klitirqd."
293 depends on LITMUS_SOFTIRQD
294 range 1 4096
295 default "1"
296 help
297 Should be <= to the number of CPUs in your system.
298
299config LITMUS_NVIDIA
300 bool "Litmus handling of NVIDIA interrupts."
301 default n
302 help
303 Direct tasklets from NVIDIA devices to Litmus's klitirqd
304 or PAI interrupt handling routines.
305
306 If unsure, say No.
307
308config LITMUS_AFFINITY_AWARE_GPU_ASSINGMENT
309 bool "Enable affinity-aware heuristics to improve GPU assignment."
310 depends on LITMUS_NVIDIA && LITMUS_AFFINITY_LOCKING
311 default n
312 help
313 Enable several heuristics to improve the assignment
314 of GPUs to real-time tasks to reduce the overheads
315 of memory migrations.
316
317 If unsure, say No.
318
319config NV_DEVICE_NUM
320 int "Number of NVIDIA GPUs."
321 depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
322 range 1 4096
323 default "1"
324 help
325 Should be (<= to the number of CPUs) and
326 (<= to the number of GPUs) in your system.
327
328config NV_MAX_SIMULT_USERS
329 int "Maximum number of threads sharing a GPU simultanously"
330 depends on LITMUS_SOFTIRQD || LITMUS_PAI_SOFTIRQD
331 range 1 3
332 default "2"
333 help
334 Should be equal to the #copy_engines + #execution_engines
335 of the GPUs in your system.
336
337 Scientific/Professional GPUs = 3 (ex. M2070, Quadro 6000?)
338 Consumer Fermi/Kepler GPUs = 2 (GTX-4xx thru -6xx)
339 Older = 1 (ex. GTX-2xx)
340
341choice
342 prompt "CUDA/Driver Version Support"
343 default CUDA_4_0
344 depends on LITMUS_NVIDIA
345 help
346 Select the version of CUDA/driver to support.
347
348config CUDA_4_0
349 bool "CUDA 4.0"
350 depends on LITMUS_NVIDIA
351 help
352 Support CUDA 4.0 RC2 (dev. driver version: x86_64-270.40)
353
354config CUDA_3_2
355 bool "CUDA 3.2"
356 depends on LITMUS_NVIDIA
357 help
358 Support CUDA 3.2 (dev. driver version: x86_64-260.24)
359
360endchoice
361
362endmenu
363
218endmenu 364endmenu
diff --git a/litmus/Makefile b/litmus/Makefile
index 7338180f196f..080cbf694a41 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -15,9 +15,11 @@ obj-y = sched_plugin.o litmus.o \
15 locking.o \ 15 locking.o \
16 srp.o \ 16 srp.o \
17 bheap.o \ 17 bheap.o \
18 binheap.o \
18 ctrldev.o \ 19 ctrldev.o \
19 sched_gsn_edf.o \ 20 sched_gsn_edf.o \
20 sched_psn_edf.o 21 sched_psn_edf.o \
22 kfmlp_lock.o
21 23
22obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o 24obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
23obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o 25obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
@@ -27,3 +29,10 @@ obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
27obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o 29obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
28obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o 30obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
29obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o 31obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
32
33obj-$(CONFIG_LITMUS_NESTED_LOCKING) += rsm_lock.o ikglp_lock.o
34obj-$(CONFIG_LITMUS_SOFTIRQD) += litmus_softirq.o
35obj-$(CONFIG_LITMUS_PAI_SOFTIRQD) += litmus_pai_softirq.o
36obj-$(CONFIG_LITMUS_NVIDIA) += nvidia_info.o sched_trace_external.o
37
38obj-$(CONFIG_LITMUS_AFFINITY_LOCKING) += kexclu_affinity.o gpu_affinity.o
diff --git a/litmus/affinity.c b/litmus/affinity.c
index 3fa6dd789400..cd93249b5506 100644
--- a/litmus/affinity.c
+++ b/litmus/affinity.c
@@ -26,7 +26,7 @@ void init_topology(void) {
26 cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]); 26 cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]);
27 } 27 }
28 printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n", 28 printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
29 cpu, neigh_info[cpu].size[i], i, 29 cpu, neigh_info[cpu].size[i], i,
30 *cpumask_bits(neigh_info[cpu].neighbors[i])); 30 *cpumask_bits(neigh_info[cpu].neighbors[i]));
31 } 31 }
32 32
diff --git a/litmus/binheap.c b/litmus/binheap.c
new file mode 100644
index 000000000000..8d42403ad52c
--- /dev/null
+++ b/litmus/binheap.c
@@ -0,0 +1,443 @@
1#include <litmus/binheap.h>
2
3//extern void dump_node_data(struct binheap_node* parent, struct binheap_node* child);
4//extern void dump_node_data2(struct binheap_handle *handle, struct binheap_node* bad_node);
5
6int binheap_is_in_this_heap(struct binheap_node *node,
7 struct binheap_handle* heap)
8{
9 if(!binheap_is_in_heap(node)) {
10 return 0;
11 }
12
13 while(node->parent != NULL) {
14 node = node->parent;
15 }
16
17 return (node == heap->root);
18}
19
20/* Update the node reference pointers. Same logic as Litmus binomial heap. */
21static void __update_ref(struct binheap_node *parent,
22 struct binheap_node *child)
23{
24 *(parent->ref_ptr) = child;
25 *(child->ref_ptr) = parent;
26
27 swap(parent->ref_ptr, child->ref_ptr);
28}
29
30/* Swaps data between two nodes. */
31static void __binheap_swap(struct binheap_node *parent,
32 struct binheap_node *child)
33{
34// if(parent == BINHEAP_POISON || child == BINHEAP_POISON) {
35// dump_node_data(parent, child);
36// BUG();
37// }
38
39 swap(parent->data, child->data);
40 __update_ref(parent, child);
41}
42
43
44/* Swaps memory and data between two nodes. Actual nodes swap instead of
45 * just data. Needed when we delete nodes from the heap.
46 */
47static void __binheap_swap_safe(struct binheap_handle *handle,
48 struct binheap_node *a,
49 struct binheap_node *b)
50{
51 swap(a->data, b->data);
52 __update_ref(a, b);
53
54 if((a->parent != NULL) && (a->parent == b->parent)) {
55 /* special case: shared parent */
56 swap(a->parent->left, a->parent->right);
57 }
58 else {
59 /* Update pointers to swap parents. */
60
61 if(a->parent) {
62 if(a == a->parent->left) {
63 a->parent->left = b;
64 }
65 else {
66 a->parent->right = b;
67 }
68 }
69
70 if(b->parent) {
71 if(b == b->parent->left) {
72 b->parent->left = a;
73 }
74 else {
75 b->parent->right = a;
76 }
77 }
78
79 swap(a->parent, b->parent);
80 }
81
82 /* swap children */
83
84 if(a->left) {
85 a->left->parent = b;
86
87 if(a->right) {
88 a->right->parent = b;
89 }
90 }
91
92 if(b->left) {
93 b->left->parent = a;
94
95 if(b->right) {
96 b->right->parent = a;
97 }
98 }
99
100 swap(a->left, b->left);
101 swap(a->right, b->right);
102
103
104 /* update next/last/root pointers */
105
106 if(a == handle->next) {
107 handle->next = b;
108 }
109 else if(b == handle->next) {
110 handle->next = a;
111 }
112
113 if(a == handle->last) {
114 handle->last = b;
115 }
116 else if(b == handle->last) {
117 handle->last = a;
118 }
119
120 if(a == handle->root) {
121 handle->root = b;
122 }
123 else if(b == handle->root) {
124 handle->root = a;
125 }
126}
127
128
129/**
130 * Update the pointer to the last node in the complete binary tree.
131 * Called internally after the root node has been deleted.
132 */
133static void __binheap_update_last(struct binheap_handle *handle)
134{
135 struct binheap_node *temp = handle->last;
136
137 /* find a "bend" in the tree. */
138 while(temp->parent && (temp == temp->parent->left)) {
139 temp = temp->parent;
140 }
141
142 /* step over to sibling if we're not at root */
143 if(temp->parent != NULL) {
144 temp = temp->parent->left;
145 }
146
147 /* now travel right as far as possible. */
148 while(temp->right != NULL) {
149 temp = temp->right;
150 }
151
152 /* take one step to the left if we're not at the bottom-most level. */
153 if(temp->left != NULL) {
154 temp = temp->left;
155 }
156
157 //BUG_ON(!(temp->left == NULL && temp->right == NULL));
158
159 handle->last = temp;
160}
161
162/**
163 * Update the pointer to the node that will take the next inserted node.
164 * Called internally after a node has been inserted.
165 */
166static void __binheap_update_next(struct binheap_handle *handle)
167{
168 struct binheap_node *temp = handle->next;
169
170 /* find a "bend" in the tree. */
171 while(temp->parent && (temp == temp->parent->right)) {
172 temp = temp->parent;
173 }
174
175 /* step over to sibling if we're not at root */
176 if(temp->parent != NULL) {
177 temp = temp->parent->right;
178 }
179
180 /* now travel left as far as possible. */
181 while(temp->left != NULL) {
182 temp = temp->left;
183 }
184
185 handle->next = temp;
186}
187
188
189
190/* bubble node up towards root */
191static void __binheap_bubble_up(
192 struct binheap_handle *handle,
193 struct binheap_node *node)
194{
195 //BUG_ON(!binheap_is_in_heap(node));
196// if(!binheap_is_in_heap(node))
197// {
198// dump_node_data2(handle, node);
199// BUG();
200// }
201
202 while((node->parent != NULL) &&
203 ((node->data == BINHEAP_POISON) /* let BINHEAP_POISON data bubble to the top */ ||
204 handle->compare(node, node->parent))) {
205 __binheap_swap(node->parent, node);
206 node = node->parent;
207
208// if(!binheap_is_in_heap(node))
209// {
210// dump_node_data2(handle, node);
211// BUG();
212// }
213 }
214}
215
216
217/* bubble node down, swapping with min-child */
218static void __binheap_bubble_down(struct binheap_handle *handle)
219{
220 struct binheap_node *node = handle->root;
221
222 while(node->left != NULL) {
223 if(node->right && handle->compare(node->right, node->left)) {
224 if(handle->compare(node->right, node)) {
225 __binheap_swap(node, node->right);
226 node = node->right;
227 }
228 else {
229 break;
230 }
231 }
232 else {
233 if(handle->compare(node->left, node)) {
234 __binheap_swap(node, node->left);
235 node = node->left;
236 }
237 else {
238 break;
239 }
240 }
241 }
242}
243
244
245
246void __binheap_add(struct binheap_node *new_node,
247 struct binheap_handle *handle,
248 void *data)
249{
250// if(binheap_is_in_heap(new_node))
251// {
252// dump_node_data2(handle, new_node);
253// BUG();
254// }
255
256 new_node->data = data;
257 new_node->ref = new_node;
258 new_node->ref_ptr = &(new_node->ref);
259
260 if(!binheap_empty(handle)) {
261 /* insert left side first */
262 if(handle->next->left == NULL) {
263 handle->next->left = new_node;
264 new_node->parent = handle->next;
265 new_node->left = NULL;
266 new_node->right = NULL;
267
268 handle->last = new_node;
269
270 __binheap_bubble_up(handle, new_node);
271 }
272 else {
273 /* left occupied. insert right. */
274 handle->next->right = new_node;
275 new_node->parent = handle->next;
276 new_node->left = NULL;
277 new_node->right = NULL;
278
279 handle->last = new_node;
280
281 __binheap_update_next(handle);
282 __binheap_bubble_up(handle, new_node);
283 }
284 }
285 else {
286 /* first node in heap */
287
288 new_node->parent = NULL;
289 new_node->left = NULL;
290 new_node->right = NULL;
291
292 handle->root = new_node;
293 handle->next = new_node;
294 handle->last = new_node;
295 }
296}
297
298
299
300/**
301 * Removes the root node from the heap. The node is removed after coalescing
302 * the binheap_node with its original data pointer at the root of the tree.
303 *
304 * The 'last' node in the tree is then swapped up to the root and bubbled
305 * down.
306 */
307void __binheap_delete_root(struct binheap_handle *handle,
308 struct binheap_node *container)
309{
310 struct binheap_node *root = handle->root;
311
312// if(!binheap_is_in_heap(container))
313// {
314// dump_node_data2(handle, container);
315// BUG();
316// }
317
318 if(root != container) {
319 /* coalesce */
320 __binheap_swap_safe(handle, root, container);
321 root = container;
322 }
323
324 if(handle->last != root) {
325 /* swap 'last' node up to root and bubble it down. */
326
327 struct binheap_node *to_move = handle->last;
328
329 if(to_move->parent != root) {
330 handle->next = to_move->parent;
331
332 if(handle->next->right == to_move) {
333 /* disconnect from parent */
334 to_move->parent->right = NULL;
335 handle->last = handle->next->left;
336 }
337 else {
338 /* find new 'last' before we disconnect */
339 __binheap_update_last(handle);
340
341 /* disconnect from parent */
342 to_move->parent->left = NULL;
343 }
344 }
345 else {
346 /* 'last' is direct child of root */
347
348 handle->next = to_move;
349
350 if(to_move == to_move->parent->right) {
351 to_move->parent->right = NULL;
352 handle->last = to_move->parent->left;
353 }
354 else {
355 to_move->parent->left = NULL;
356 handle->last = to_move;
357 }
358 }
359 to_move->parent = NULL;
360
361 /* reconnect as root. We can't just swap data ptrs since root node
362 * may be freed after this function returns.
363 */
364 to_move->left = root->left;
365 to_move->right = root->right;
366 if(to_move->left != NULL) {
367 to_move->left->parent = to_move;
368 }
369 if(to_move->right != NULL) {
370 to_move->right->parent = to_move;
371 }
372
373 handle->root = to_move;
374
375 /* bubble down */
376 __binheap_bubble_down(handle);
377 }
378 else {
379 /* removing last node in tree */
380 handle->root = NULL;
381 handle->next = NULL;
382 handle->last = NULL;
383 }
384
385 /* mark as removed */
386 container->parent = BINHEAP_POISON;
387}
388
389
390/**
391 * Delete an arbitrary node. Bubble node to delete up to the root,
392 * and then delete to root.
393 */
394void __binheap_delete(struct binheap_node *node_to_delete,
395 struct binheap_handle *handle)
396{
397 struct binheap_node *target = node_to_delete->ref;
398 void *temp_data = target->data;
399
400// if(!binheap_is_in_heap(node_to_delete))
401// {
402// dump_node_data2(handle, node_to_delete);
403// BUG();
404// }
405//
406// if(!binheap_is_in_heap(target))
407// {
408// dump_node_data2(handle, target);
409// BUG();
410// }
411
412 /* temporarily set data to null to allow node to bubble up to the top. */
413 target->data = BINHEAP_POISON;
414
415 __binheap_bubble_up(handle, target);
416 __binheap_delete_root(handle, node_to_delete);
417
418 node_to_delete->data = temp_data; /* restore node data pointer */
419 //node_to_delete->parent = BINHEAP_POISON; /* poison the node */
420}
421
422/**
423 * Bubble up a node whose pointer has decreased in value.
424 */
425void __binheap_decrease(struct binheap_node *orig_node,
426 struct binheap_handle *handle)
427{
428 struct binheap_node *target = orig_node->ref;
429
430// if(!binheap_is_in_heap(orig_node))
431// {
432// dump_node_data2(handle, orig_node);
433// BUG();
434// }
435//
436// if(!binheap_is_in_heap(target))
437// {
438// dump_node_data2(handle, target);
439// BUG();
440// }
441//
442 __binheap_bubble_up(handle, target);
443}
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
index 9b44dc2d8d1e..b346bdd65b3b 100644
--- a/litmus/edf_common.c
+++ b/litmus/edf_common.c
@@ -12,40 +12,61 @@
12#include <litmus/sched_plugin.h> 12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h> 13#include <litmus/sched_trace.h>
14 14
15#ifdef CONFIG_LITMUS_NESTED_LOCKING
16#include <litmus/locking.h>
17#endif
18
15#include <litmus/edf_common.h> 19#include <litmus/edf_common.h>
16 20
21
22
17/* edf_higher_prio - returns true if first has a higher EDF priority 23/* edf_higher_prio - returns true if first has a higher EDF priority
18 * than second. Deadline ties are broken by PID. 24 * than second. Deadline ties are broken by PID.
19 * 25 *
20 * both first and second may be NULL 26 * both first and second may be NULL
21 */ 27 */
22int edf_higher_prio(struct task_struct* first, 28#ifdef CONFIG_LITMUS_NESTED_LOCKING
23 struct task_struct* second) 29int __edf_higher_prio(
30 struct task_struct* first, comparison_mode_t first_mode,
31 struct task_struct* second, comparison_mode_t second_mode)
32#else
33int edf_higher_prio(struct task_struct* first, struct task_struct* second)
34#endif
24{ 35{
25 struct task_struct *first_task = first; 36 struct task_struct *first_task = first;
26 struct task_struct *second_task = second; 37 struct task_struct *second_task = second;
27 38
28 /* There is no point in comparing a task to itself. */ 39 /* There is no point in comparing a task to itself. */
29 if (first && first == second) { 40 if (first && first == second) {
30 TRACE_TASK(first, 41 TRACE_CUR("WARNING: pointless edf priority comparison: %s/%d\n", first->comm, first->pid);
31 "WARNING: pointless edf priority comparison.\n"); 42 WARN_ON(1);
32 return 0; 43 return 0;
33 } 44 }
34 45
35 46
36 /* check for NULL tasks */ 47 /* check for NULL tasks */
37 if (!first || !second) 48 if (!first || !second) {
38 return first && !second; 49 return first && !second;
50 }
39 51
40#ifdef CONFIG_LITMUS_LOCKING 52#ifdef CONFIG_LITMUS_LOCKING
41 53 /* Check for EFFECTIVE priorities. Change task
42 /* Check for inherited priorities. Change task
43 * used for comparison in such a case. 54 * used for comparison in such a case.
44 */ 55 */
45 if (unlikely(first->rt_param.inh_task)) 56 if (unlikely(first->rt_param.inh_task)
57#ifdef CONFIG_LITMUS_NESTED_LOCKING
58 && (first_mode == EFFECTIVE)
59#endif
60 ) {
46 first_task = first->rt_param.inh_task; 61 first_task = first->rt_param.inh_task;
47 if (unlikely(second->rt_param.inh_task)) 62 }
63 if (unlikely(second->rt_param.inh_task)
64#ifdef CONFIG_LITMUS_NESTED_LOCKING
65 && (second_mode == EFFECTIVE)
66#endif
67 ) {
48 second_task = second->rt_param.inh_task; 68 second_task = second->rt_param.inh_task;
69 }
49 70
50 /* Check for priority boosting. Tie-break by start of boosting. 71 /* Check for priority boosting. Tie-break by start of boosting.
51 */ 72 */
@@ -53,37 +74,109 @@ int edf_higher_prio(struct task_struct* first,
53 /* first_task is boosted, how about second_task? */ 74 /* first_task is boosted, how about second_task? */
54 if (!is_priority_boosted(second_task) || 75 if (!is_priority_boosted(second_task) ||
55 lt_before(get_boost_start(first_task), 76 lt_before(get_boost_start(first_task),
56 get_boost_start(second_task))) 77 get_boost_start(second_task))) {
57 return 1; 78 return 1;
58 else 79 }
80 else {
59 return 0; 81 return 0;
60 } else if (unlikely(is_priority_boosted(second_task))) 82 }
83 }
84 else if (unlikely(is_priority_boosted(second_task))) {
61 /* second_task is boosted, first is not*/ 85 /* second_task is boosted, first is not*/
62 return 0; 86 return 0;
87 }
63 88
64#endif 89#endif
65 90
91// // rate-monotonic for testing
92// if (!is_realtime(second_task)) {
93// return true;
94// }
95//
96// if (shorter_period(first_task, second_task)) {
97// return true;
98// }
99//
100// if (get_period(first_task) == get_period(second_task)) {
101// if (first_task->pid < second_task->pid) {
102// return true;
103// }
104// else if (first_task->pid == second_task->pid) {
105// return !second->rt_param.inh_task;
106// }
107// }
108
109 if (!is_realtime(second_task)) {
110 return true;
111 }
112
113 if (earlier_deadline(first_task, second_task)) {
114 return true;
115 }
116 if (get_deadline(first_task) == get_deadline(second_task)) {
117
118 if (shorter_period(first_task, second_task)) {
119 return true;
120 }
121 if (get_rt_period(first_task) == get_rt_period(second_task)) {
122 if (first_task->pid < second_task->pid) {
123 return true;
124 }
125 if (first_task->pid == second_task->pid) {
126#ifdef CONFIG_LITMUS_SOFTIRQD
127 if (first_task->rt_param.is_proxy_thread <
128 second_task->rt_param.is_proxy_thread) {
129 return true;
130 }
131 if(first_task->rt_param.is_proxy_thread == second_task->rt_param.is_proxy_thread) {
132 return !second->rt_param.inh_task;
133 }
134#else
135 return !second->rt_param.inh_task;
136#endif
137 }
138
139 }
140 }
141
142 return false;
143}
144
145
146#ifdef CONFIG_LITMUS_NESTED_LOCKING
147int edf_higher_prio(struct task_struct* first, struct task_struct* second)
148{
149 return __edf_higher_prio(first, EFFECTIVE, second, EFFECTIVE);
150}
151
152int edf_max_heap_order(struct binheap_node *a, struct binheap_node *b)
153{
154 struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node);
155 struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node);
66 156
67 return !is_realtime(second_task) || 157 return __edf_higher_prio(l_a->hp_waiter_eff_prio, EFFECTIVE, l_b->hp_waiter_eff_prio, EFFECTIVE);
158}
68 159
69 /* is the deadline of the first task earlier? 160int edf_min_heap_order(struct binheap_node *a, struct binheap_node *b)
70 * Then it has higher priority. 161{
71 */ 162 return edf_max_heap_order(b, a); // swap comparison
72 earlier_deadline(first_task, second_task) || 163}
73 164
74 /* Do we have a deadline tie? 165int edf_max_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b)
75 * Then break by PID. 166{
76 */ 167 struct nested_info *l_a = (struct nested_info *)binheap_entry(a, struct nested_info, hp_binheap_node);
77 (get_deadline(first_task) == get_deadline(second_task) && 168 struct nested_info *l_b = (struct nested_info *)binheap_entry(b, struct nested_info, hp_binheap_node);
78 (first_task->pid < second_task->pid ||
79 169
80 /* If the PIDs are the same then the task with the inherited 170 return __edf_higher_prio(l_a->hp_waiter_eff_prio, BASE, l_b->hp_waiter_eff_prio, BASE);
81 * priority wins.
82 */
83 (first_task->pid == second_task->pid &&
84 !second->rt_param.inh_task)));
85} 171}
86 172
173int edf_min_heap_base_priority_order(struct binheap_node *a, struct binheap_node *b)
174{
175 return edf_max_heap_base_priority_order(b, a); // swap comparison
176}
177#endif
178
179
87int edf_ready_order(struct bheap_node* a, struct bheap_node* b) 180int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
88{ 181{
89 return edf_higher_prio(bheap2task(a), bheap2task(b)); 182 return edf_higher_prio(bheap2task(a), bheap2task(b));
diff --git a/litmus/fdso.c b/litmus/fdso.c
index aa7b384264e3..18fc61b6414a 100644
--- a/litmus/fdso.c
+++ b/litmus/fdso.c
@@ -20,9 +20,22 @@
20 20
21extern struct fdso_ops generic_lock_ops; 21extern struct fdso_ops generic_lock_ops;
22 22
23#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
24extern struct fdso_ops generic_affinity_ops;
25#endif
26
23static const struct fdso_ops* fdso_ops[] = { 27static const struct fdso_ops* fdso_ops[] = {
24 &generic_lock_ops, /* FMLP_SEM */ 28 &generic_lock_ops, /* FMLP_SEM */
25 &generic_lock_ops, /* SRP_SEM */ 29 &generic_lock_ops, /* SRP_SEM */
30 &generic_lock_ops, /* RSM_MUTEX */
31 &generic_lock_ops, /* IKGLP_SEM */
32 &generic_lock_ops, /* KFMLP_SEM */
33#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
34 &generic_affinity_ops, /* IKGLP_SIMPLE_GPU_AFF_OBS */
35 &generic_affinity_ops, /* IKGLP_GPU_AFF_OBS */
36 &generic_affinity_ops, /* KFMLP_SIMPLE_GPU_AFF_OBS */
37 &generic_affinity_ops, /* KFMLP_GPU_AFF_OBS */
38#endif
26}; 39};
27 40
28static int fdso_create(void** obj_ref, obj_type_t type, void* __user config) 41static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
diff --git a/litmus/gpu_affinity.c b/litmus/gpu_affinity.c
new file mode 100644
index 000000000000..9762be1a085e
--- /dev/null
+++ b/litmus/gpu_affinity.c
@@ -0,0 +1,113 @@
1
2#ifdef CONFIG_LITMUS_NVIDIA
3
4#include <linux/sched.h>
5#include <litmus/litmus.h>
6#include <litmus/gpu_affinity.h>
7
8#include <litmus/sched_trace.h>
9
10#define OBSERVATION_CAP 2*1e9
11
12static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
13{
14 fp_t relative_err;
15 fp_t err, new;
16 fp_t actual = _integer_to_fp(observed);
17
18 err = _sub(actual, fb->est);
19 new = _add(_mul(a, err), _mul(b, fb->accum_err));
20
21 relative_err = _div(err, actual);
22
23 fb->est = new;
24 fb->accum_err = _add(fb->accum_err, err);
25
26 return relative_err;
27}
28
29void update_gpu_estimate(struct task_struct *t, lt_t observed)
30{
31 feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);
32
33 BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST);
34
35 if(unlikely(fb->est.val == 0)) {
36 // kludge-- cap observed values to prevent whacky estimations.
37 // whacky stuff happens during the first few jobs.
38 if(unlikely(observed > OBSERVATION_CAP)) {
39 TRACE_TASK(t, "Crazy observation was capped: %llu -> %llu\n",
40 observed, OBSERVATION_CAP);
41 observed = OBSERVATION_CAP;
42 }
43
44 // take the first observation as our estimate
45 // (initial value of 0 was bogus anyhow)
46 fb->est = _integer_to_fp(observed);
47 fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work.
48 }
49 else {
50 fp_t rel_err = update_estimate(fb,
51 tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration],
52 tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration],
53 observed);
54
55 if(unlikely(_fp_to_integer(fb->est) <= 0)) {
56 TRACE_TASK(t, "Invalid estimate. Patching.\n");
57 fb->est = _integer_to_fp(observed);
58 fb->accum_err = _div(fb->est, _integer_to_fp(2)); // ...seems to work.
59 }
60 else {
61// struct migration_info mig_info;
62
63 sched_trace_prediction_err(t,
64 &(tsk_rt(t)->gpu_migration),
65 &rel_err);
66
67// mig_info.observed = observed;
68// mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration);
69// mig_info.distance = tsk_rt(t)->gpu_migration;
70//
71// sched_trace_migration(t, &mig_info);
72 }
73 }
74
75 TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n",
76 tsk_rt(t)->gpu_migration,
77 observed,
78 _fp_to_integer(fb->est),
79 _point(fb->est));
80}
81
82gpu_migration_dist_t gpu_migration_distance(int a, int b)
83{
84 // GPUs organized in a binary hierarchy, no more than 2^MIG_FAR GPUs
85 int i;
86 int dist;
87
88 if(likely(a >= 0 && b >= 0)) {
89 for(i = 0; i <= MIG_FAR; ++i) {
90 if(a>>i == b>>i) {
91 dist = i;
92 goto out;
93 }
94 }
95 dist = MIG_NONE; // hopefully never reached.
96 TRACE_CUR("WARNING: GPU distance too far! %d -> %d\n", a, b);
97 }
98 else {
99 dist = MIG_NONE;
100 }
101
102out:
103 TRACE_CUR("Distance %d -> %d is %d\n",
104 a, b, dist);
105
106 return dist;
107}
108
109
110
111
112#endif
113
diff --git a/litmus/ikglp_lock.c b/litmus/ikglp_lock.c
new file mode 100644
index 000000000000..83b708ab85cb
--- /dev/null
+++ b/litmus/ikglp_lock.c
@@ -0,0 +1,2838 @@
1#include <linux/slab.h>
2#include <linux/uaccess.h>
3
4#include <litmus/trace.h>
5#include <litmus/sched_plugin.h>
6#include <litmus/fdso.h>
7
8#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
9#include <litmus/gpu_affinity.h>
10#include <litmus/nvidia_info.h>
11#endif
12
13#include <litmus/ikglp_lock.h>
14
15// big signed value.
16#define IKGLP_INVAL_DISTANCE 0x7FFFFFFF
17
18int ikglp_max_heap_base_priority_order(struct binheap_node *a,
19 struct binheap_node *b)
20{
21 ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node);
22 ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node);
23
24 BUG_ON(!d_a);
25 BUG_ON(!d_b);
26
27 return litmus->__compare(d_a->task, BASE, d_b->task, BASE);
28}
29
30int ikglp_min_heap_base_priority_order(struct binheap_node *a,
31 struct binheap_node *b)
32{
33 ikglp_heap_node_t *d_a = binheap_entry(a, ikglp_heap_node_t, node);
34 ikglp_heap_node_t *d_b = binheap_entry(b, ikglp_heap_node_t, node);
35
36 return litmus->__compare(d_b->task, BASE, d_a->task, BASE);
37}
38
39int ikglp_donor_max_heap_base_priority_order(struct binheap_node *a,
40 struct binheap_node *b)
41{
42 ikglp_wait_state_t *d_a = binheap_entry(a, ikglp_wait_state_t, node);
43 ikglp_wait_state_t *d_b = binheap_entry(b, ikglp_wait_state_t, node);
44
45 return litmus->__compare(d_a->task, BASE, d_b->task, BASE);
46}
47
48
49int ikglp_min_heap_donee_order(struct binheap_node *a,
50 struct binheap_node *b)
51{
52 struct task_struct *prio_a, *prio_b;
53
54 ikglp_donee_heap_node_t *d_a =
55 binheap_entry(a, ikglp_donee_heap_node_t, node);
56 ikglp_donee_heap_node_t *d_b =
57 binheap_entry(b, ikglp_donee_heap_node_t, node);
58
59 if(!d_a->donor_info) {
60 prio_a = d_a->task;
61 }
62 else {
63 prio_a = d_a->donor_info->task;
64 BUG_ON(d_a->task != d_a->donor_info->donee_info->task);
65 }
66
67 if(!d_b->donor_info) {
68 prio_b = d_b->task;
69 }
70 else {
71 prio_b = d_b->donor_info->task;
72 BUG_ON(d_b->task != d_b->donor_info->donee_info->task);
73 }
74
75 // note reversed order
76 return litmus->__compare(prio_b, BASE, prio_a, BASE);
77}
78
79
80
81static inline int ikglp_get_idx(struct ikglp_semaphore *sem,
82 struct fifo_queue *queue)
83{
84 return (queue - &sem->fifo_queues[0]);
85}
86
87static inline struct fifo_queue* ikglp_get_queue(struct ikglp_semaphore *sem,
88 struct task_struct *holder)
89{
90 int i;
91 for(i = 0; i < sem->nr_replicas; ++i)
92 if(sem->fifo_queues[i].owner == holder)
93 return(&sem->fifo_queues[i]);
94 return(NULL);
95}
96
97
98
99static struct task_struct* ikglp_find_hp_waiter(struct fifo_queue *kqueue,
100 struct task_struct *skip)
101{
102 struct list_head *pos;
103 struct task_struct *queued, *found = NULL;
104
105 list_for_each(pos, &kqueue->wait.task_list) {
106 queued = (struct task_struct*) list_entry(pos,
107 wait_queue_t, task_list)->private;
108
109 /* Compare task prios, find high prio task. */
110 if(queued != skip && litmus->compare(queued, found))
111 found = queued;
112 }
113 return found;
114}
115
116static struct fifo_queue* ikglp_find_shortest(struct ikglp_semaphore *sem,
117 struct fifo_queue *search_start)
118{
119 // we start our search at search_start instead of at the beginning of the
120 // queue list to load-balance across all resources.
121 struct fifo_queue* step = search_start;
122 struct fifo_queue* shortest = sem->shortest_fifo_queue;
123
124 do {
125 step = (step+1 != &sem->fifo_queues[sem->nr_replicas]) ?
126 step+1 : &sem->fifo_queues[0];
127
128 if(step->count < shortest->count) {
129 shortest = step;
130 if(step->count == 0)
131 break; /* can't get any shorter */
132 }
133
134 }while(step != search_start);
135
136 return(shortest);
137}
138
139static inline struct task_struct* ikglp_mth_highest(struct ikglp_semaphore *sem)
140{
141 return binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node)->task;
142}
143
144
145
146#if 0
147static void print_global_list(struct binheap_node* n, int depth)
148{
149 ikglp_heap_node_t *global_heap_node;
150 char padding[81] = " ";
151
152 if(n == NULL) {
153 TRACE_CUR("+-> %p\n", NULL);
154 return;
155 }
156
157 global_heap_node = binheap_entry(n, ikglp_heap_node_t, node);
158
159 if(depth*2 <= 80)
160 padding[depth*2] = '\0';
161
162 TRACE_CUR("%s+-> %s/%d\n",
163 padding,
164 global_heap_node->task->comm,
165 global_heap_node->task->pid);
166
167 if(n->left) print_global_list(n->left, depth+1);
168 if(n->right) print_global_list(n->right, depth+1);
169}
170
171static void print_donees(struct ikglp_semaphore *sem, struct binheap_node *n, int depth)
172{
173 ikglp_donee_heap_node_t *donee_node;
174 char padding[81] = " ";
175 struct task_struct* donor = NULL;
176
177 if(n == NULL) {
178 TRACE_CUR("+-> %p\n", NULL);
179 return;
180 }
181
182 donee_node = binheap_entry(n, ikglp_donee_heap_node_t, node);
183
184 if(depth*2 <= 80)
185 padding[depth*2] = '\0';
186
187 if(donee_node->donor_info) {
188 donor = donee_node->donor_info->task;
189 }
190
191 TRACE_CUR("%s+-> %s/%d (d: %s/%d) (fq: %d)\n",
192 padding,
193 donee_node->task->comm,
194 donee_node->task->pid,
195 (donor) ? donor->comm : "nil",
196 (donor) ? donor->pid : -1,
197 ikglp_get_idx(sem, donee_node->fq));
198
199 if(n->left) print_donees(sem, n->left, depth+1);
200 if(n->right) print_donees(sem, n->right, depth+1);
201}
202
203static void print_donors(struct binheap_node *n, int depth)
204{
205 ikglp_wait_state_t *donor_node;
206 char padding[81] = " ";
207
208 if(n == NULL) {
209 TRACE_CUR("+-> %p\n", NULL);
210 return;
211 }
212
213 donor_node = binheap_entry(n, ikglp_wait_state_t, node);
214
215 if(depth*2 <= 80)
216 padding[depth*2] = '\0';
217
218
219 TRACE_CUR("%s+-> %s/%d (donee: %s/%d)\n",
220 padding,
221 donor_node->task->comm,
222 donor_node->task->pid,
223 donor_node->donee_info->task->comm,
224 donor_node->donee_info->task->pid);
225
226 if(n->left) print_donors(n->left, depth+1);
227 if(n->right) print_donors(n->right, depth+1);
228}
229#endif
230
231static void ikglp_add_global_list(struct ikglp_semaphore *sem,
232 struct task_struct *t,
233 ikglp_heap_node_t *node)
234{
235
236
237 node->task = t;
238 INIT_BINHEAP_NODE(&node->node);
239
240 if(sem->top_m_size < sem->m) {
241 TRACE_CUR("Trivially adding %s/%d to top-m global list.\n",
242 t->comm, t->pid);
243// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
244// print_global_list(sem->top_m.root, 1);
245
246 binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node);
247 ++(sem->top_m_size);
248
249// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
250// print_global_list(sem->top_m.root, 1);
251 }
252 else if(litmus->__compare(t, BASE, ikglp_mth_highest(sem), BASE)) {
253 ikglp_heap_node_t *evicted =
254 binheap_top_entry(&sem->top_m, ikglp_heap_node_t, node);
255
256 TRACE_CUR("Adding %s/%d to top-m and evicting %s/%d.\n",
257 t->comm, t->pid,
258 evicted->task->comm, evicted->task->pid);
259
260// TRACE_CUR("Not-Top-M Before:\n");
261// print_global_list(sem->not_top_m.root, 1);
262// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
263// print_global_list(sem->top_m.root, 1);
264
265
266 binheap_delete_root(&sem->top_m, ikglp_heap_node_t, node);
267 INIT_BINHEAP_NODE(&evicted->node);
268 binheap_add(&evicted->node, &sem->not_top_m, ikglp_heap_node_t, node);
269
270 binheap_add(&node->node, &sem->top_m, ikglp_heap_node_t, node);
271
272// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
273// print_global_list(sem->top_m.root, 1);
274// TRACE_CUR("Not-Top-M After:\n");
275// print_global_list(sem->not_top_m.root, 1);
276 }
277 else {
278 TRACE_CUR("Trivially adding %s/%d to not-top-m global list.\n",
279 t->comm, t->pid);
280// TRACE_CUR("Not-Top-M Before:\n");
281// print_global_list(sem->not_top_m.root, 1);
282
283 binheap_add(&node->node, &sem->not_top_m, ikglp_heap_node_t, node);
284
285// TRACE_CUR("Not-Top-M After:\n");
286// print_global_list(sem->not_top_m.root, 1);
287 }
288}
289
290
291static void ikglp_del_global_list(struct ikglp_semaphore *sem,
292 struct task_struct *t,
293 ikglp_heap_node_t *node)
294{
295 BUG_ON(!binheap_is_in_heap(&node->node));
296
297 TRACE_CUR("Removing %s/%d from global list.\n", t->comm, t->pid);
298
299 if(binheap_is_in_this_heap(&node->node, &sem->top_m)) {
300 TRACE_CUR("%s/%d is in top-m\n", t->comm, t->pid);
301
302// TRACE_CUR("Not-Top-M Before:\n");
303// print_global_list(sem->not_top_m.root, 1);
304// TRACE_CUR("Top-M Before (size = %d):\n", sem->top_m_size);
305// print_global_list(sem->top_m.root, 1);
306
307
308 binheap_delete(&node->node, &sem->top_m);
309
310 if(!binheap_empty(&sem->not_top_m)) {
311 ikglp_heap_node_t *promoted =
312 binheap_top_entry(&sem->not_top_m, ikglp_heap_node_t, node);
313
314 TRACE_CUR("Promoting %s/%d to top-m\n",
315 promoted->task->comm, promoted->task->pid);
316
317 binheap_delete_root(&sem->not_top_m, ikglp_heap_node_t, node);
318 INIT_BINHEAP_NODE(&promoted->node);
319
320 binheap_add(&promoted->node, &sem->top_m, ikglp_heap_node_t, node);
321 }
322 else {
323 TRACE_CUR("No one to promote to top-m.\n");
324 --(sem->top_m_size);
325 }
326
327// TRACE_CUR("Top-M After (size = %d):\n", sem->top_m_size);
328// print_global_list(sem->top_m.root, 1);
329// TRACE_CUR("Not-Top-M After:\n");
330// print_global_list(sem->not_top_m.root, 1);
331 }
332 else {
333 TRACE_CUR("%s/%d is in not-top-m\n", t->comm, t->pid);
334// TRACE_CUR("Not-Top-M Before:\n");
335// print_global_list(sem->not_top_m.root, 1);
336
337 binheap_delete(&node->node, &sem->not_top_m);
338
339// TRACE_CUR("Not-Top-M After:\n");
340// print_global_list(sem->not_top_m.root, 1);
341 }
342}
343
344
345static void ikglp_add_donees(struct ikglp_semaphore *sem,
346 struct fifo_queue *fq,
347 struct task_struct *t,
348 ikglp_donee_heap_node_t* node)
349{
350// TRACE_CUR("Adding %s/%d to donee list.\n", t->comm, t->pid);
351// TRACE_CUR("donees Before:\n");
352// print_donees(sem, sem->donees.root, 1);
353
354 node->task = t;
355 node->donor_info = NULL;
356 node->fq = fq;
357 INIT_BINHEAP_NODE(&node->node);
358
359 binheap_add(&node->node, &sem->donees, ikglp_donee_heap_node_t, node);
360
361// TRACE_CUR("donees After:\n");
362// print_donees(sem, sem->donees.root, 1);
363}
364
365
366static void ikglp_refresh_owners_prio_increase(struct task_struct *t,
367 struct fifo_queue *fq,
368 struct ikglp_semaphore *sem,
369 unsigned long flags)
370{
371 // priority of 't' has increased (note: 't' might already be hp_waiter).
372 if ((t == fq->hp_waiter) || litmus->compare(t, fq->hp_waiter)) {
373 struct task_struct *old_max_eff_prio;
374 struct task_struct *new_max_eff_prio;
375 struct task_struct *new_prio = NULL;
376 struct task_struct *owner = fq->owner;
377
378 if(fq->hp_waiter)
379 TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
380 fq->hp_waiter->comm, fq->hp_waiter->pid);
381 else
382 TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
383
384 if(owner)
385 {
386 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
387
388// TRACE_TASK(owner, "Heap Before:\n");
389// print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0);
390
391 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
392
393 fq->hp_waiter = t;
394 fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
395
396 binheap_decrease(&fq->nest.hp_binheap_node,
397 &tsk_rt(owner)->hp_blocked_tasks);
398
399// TRACE_TASK(owner, "Heap After:\n");
400// print_hp_waiters(tsk_rt(owner)->hp_blocked_tasks.root, 0);
401
402 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
403
404 if(new_max_eff_prio != old_max_eff_prio) {
405 TRACE_TASK(t, "is new hp_waiter.\n");
406
407 if ((effective_priority(owner) == old_max_eff_prio) ||
408 (litmus->__compare(new_max_eff_prio, BASE,
409 owner, EFFECTIVE))){
410 new_prio = new_max_eff_prio;
411 }
412 }
413 else {
414 TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
415 }
416
417 if(new_prio) {
418 // set new inheritance and propagate
419 TRACE_TASK(t, "Effective priority changed for owner %s/%d to %s/%d\n",
420 owner->comm, owner->pid,
421 new_prio->comm, new_prio->pid);
422 litmus->nested_increase_prio(owner, new_prio, &sem->lock,
423 flags); // unlocks lock.
424 }
425 else {
426 TRACE_TASK(t, "No change in effective priority (is %s/%d). Propagation halted.\n",
427 new_max_eff_prio->comm, new_max_eff_prio->pid);
428 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
429 unlock_fine_irqrestore(&sem->lock, flags);
430 }
431 }
432 else {
433 fq->hp_waiter = t;
434 fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
435
436 TRACE_TASK(t, "no owner.\n");
437 unlock_fine_irqrestore(&sem->lock, flags);
438 }
439 }
440 else {
441 TRACE_TASK(t, "hp_waiter is unaffected.\n");
442 unlock_fine_irqrestore(&sem->lock, flags);
443 }
444}
445
446// hp_waiter has decreased
447static void ikglp_refresh_owners_prio_decrease(struct fifo_queue *fq,
448 struct ikglp_semaphore *sem,
449 unsigned long flags)
450{
451 struct task_struct *owner = fq->owner;
452
453 struct task_struct *old_max_eff_prio;
454 struct task_struct *new_max_eff_prio;
455
456 if(!owner) {
457 TRACE_CUR("No owner. Returning.\n");
458 unlock_fine_irqrestore(&sem->lock, flags);
459 return;
460 }
461
462 TRACE_CUR("ikglp_refresh_owners_prio_decrease\n");
463
464 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
465
466 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
467
468 binheap_delete(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks);
469 fq->nest.hp_waiter_eff_prio = fq->hp_waiter;
470 binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks,
471 struct nested_info, hp_binheap_node);
472
473 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
474
475 if((old_max_eff_prio != new_max_eff_prio) &&
476 (effective_priority(owner) == old_max_eff_prio))
477 {
478 // Need to set new effective_priority for owner
479 struct task_struct *decreased_prio;
480
481 TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n",
482 ikglp_get_idx(sem, fq));
483
484 if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
485 TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of fq %d.\n",
486 (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
487 (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
488 owner->comm,
489 owner->pid,
490 ikglp_get_idx(sem, fq));
491
492 decreased_prio = new_max_eff_prio;
493 }
494 else {
495 TRACE_CUR("%s/%d has lesser base priority than base priority of owner (%s/%d) of fq %d.\n",
496 (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
497 (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
498 owner->comm,
499 owner->pid,
500 ikglp_get_idx(sem, fq));
501
502 decreased_prio = NULL;
503 }
504
505 // beware: recursion
506 litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags); // will unlock mutex->lock
507 }
508 else {
509 TRACE_TASK(owner, "No need to propagate priority decrease forward.\n");
510 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
511 unlock_fine_irqrestore(&sem->lock, flags);
512 }
513}
514
515
516static void ikglp_remove_donation_from_owner(struct binheap_node *n,
517 struct fifo_queue *fq,
518 struct ikglp_semaphore *sem,
519 unsigned long flags)
520{
521 struct task_struct *owner = fq->owner;
522
523 struct task_struct *old_max_eff_prio;
524 struct task_struct *new_max_eff_prio;
525
526 BUG_ON(!owner);
527
528 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
529
530 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
531
532 binheap_delete(n, &tsk_rt(owner)->hp_blocked_tasks);
533
534 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
535
536 if((old_max_eff_prio != new_max_eff_prio) &&
537 (effective_priority(owner) == old_max_eff_prio))
538 {
539 // Need to set new effective_priority for owner
540 struct task_struct *decreased_prio;
541
542 TRACE_CUR("Propagating decreased inheritance to holder of fq %d.\n",
543 ikglp_get_idx(sem, fq));
544
545 if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
546 TRACE_CUR("has greater base priority than base priority of owner of fq %d.\n",
547 ikglp_get_idx(sem, fq));
548 decreased_prio = new_max_eff_prio;
549 }
550 else {
551 TRACE_CUR("has lesser base priority than base priority of owner of fq %d.\n",
552 ikglp_get_idx(sem, fq));
553 decreased_prio = NULL;
554 }
555
556 // beware: recursion
557 litmus->nested_decrease_prio(owner, decreased_prio, &sem->lock, flags); // will unlock mutex->lock
558 }
559 else {
560 TRACE_TASK(owner, "No need to propagate priority decrease forward.\n");
561 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
562 unlock_fine_irqrestore(&sem->lock, flags);
563 }
564}
565
566static void ikglp_remove_donation_from_fq_waiter(struct task_struct *t,
567 struct binheap_node *n)
568{
569 struct task_struct *old_max_eff_prio;
570 struct task_struct *new_max_eff_prio;
571
572 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
573
574 old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
575
576 binheap_delete(n, &tsk_rt(t)->hp_blocked_tasks);
577
578 new_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
579
580 if((old_max_eff_prio != new_max_eff_prio) &&
581 (effective_priority(t) == old_max_eff_prio))
582 {
583 // Need to set new effective_priority for owner
584 struct task_struct *decreased_prio;
585
586 if(litmus->__compare(new_max_eff_prio, BASE, t, BASE)) {
587 decreased_prio = new_max_eff_prio;
588 }
589 else {
590 decreased_prio = NULL;
591 }
592
593 tsk_rt(t)->inh_task = decreased_prio;
594 }
595
596 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
597}
598
599static void ikglp_get_immediate(struct task_struct* t,
600 struct fifo_queue *fq,
601 struct ikglp_semaphore *sem,
602 unsigned long flags)
603{
604 // resource available now
605 TRACE_CUR("queue %d: acquired immediately\n", ikglp_get_idx(sem, fq));
606
607 fq->owner = t;
608
609 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
610 binheap_add(&fq->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
611 struct nested_info, hp_binheap_node);
612 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
613
614 ++(fq->count);
615
616 ikglp_add_global_list(sem, t, &fq->global_heap_node);
617 ikglp_add_donees(sem, fq, t, &fq->donee_heap_node);
618
619 sem->shortest_fifo_queue = ikglp_find_shortest(sem, sem->shortest_fifo_queue);
620
621#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
622 if(sem->aff_obs) {
623 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t);
624 sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, t);
625 }
626#endif
627
628 unlock_fine_irqrestore(&sem->lock, flags);
629}
630
631
632
633
634
635static void __ikglp_enqueue_on_fq(struct ikglp_semaphore *sem,
636 struct fifo_queue* fq,
637 struct task_struct* t,
638 wait_queue_t *wait,
639 ikglp_heap_node_t *global_heap_node,
640 ikglp_donee_heap_node_t *donee_heap_node)
641{
642 /* resource is not free => must suspend and wait */
643 TRACE_TASK(t, "Enqueuing on fq %d.\n",
644 ikglp_get_idx(sem, fq));
645
646 init_waitqueue_entry(wait, t);
647
648 __add_wait_queue_tail_exclusive(&fq->wait, wait);
649
650 ++(fq->count);
651 ++(sem->nr_in_fifos);
652
653 // update global list.
654 if(likely(global_heap_node)) {
655 if(binheap_is_in_heap(&global_heap_node->node)) {
656 WARN_ON(1);
657 ikglp_del_global_list(sem, t, global_heap_node);
658 }
659 ikglp_add_global_list(sem, t, global_heap_node);
660 }
661 // update donor eligiblity list.
662 if(likely(donee_heap_node)) {
663// if(binheap_is_in_heap(&donee_heap_node->node)) {
664// WARN_ON(1);
665// }
666 ikglp_add_donees(sem, fq, t, donee_heap_node);
667 }
668
669 if(sem->shortest_fifo_queue == fq) {
670 sem->shortest_fifo_queue = ikglp_find_shortest(sem, fq);
671 }
672
673#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
674 if(sem->aff_obs) {
675 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, fq, t);
676 }
677#endif
678
679 TRACE_TASK(t, "shortest queue is now %d\n", ikglp_get_idx(sem, fq));
680}
681
682
683static void ikglp_enqueue_on_fq(
684 struct ikglp_semaphore *sem,
685 struct fifo_queue *fq,
686 ikglp_wait_state_t *wait,
687 unsigned long flags)
688{
689 /* resource is not free => must suspend and wait */
690 TRACE_TASK(wait->task, "queue %d: Resource is not free => must suspend and wait.\n",
691 ikglp_get_idx(sem, fq));
692
693 INIT_BINHEAP_NODE(&wait->global_heap_node.node);
694 INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
695
696 __ikglp_enqueue_on_fq(sem, fq, wait->task, &wait->fq_node,
697 &wait->global_heap_node, &wait->donee_heap_node);
698
699 ikglp_refresh_owners_prio_increase(wait->task, fq, sem, flags); // unlocks sem->lock
700}
701
702
703static void __ikglp_enqueue_on_pq(struct ikglp_semaphore *sem,
704 ikglp_wait_state_t *wait)
705{
706 TRACE_TASK(wait->task, "goes to PQ.\n");
707
708 wait->pq_node.task = wait->task; // copy over task (little redundant...)
709
710 binheap_add(&wait->pq_node.node, &sem->priority_queue,
711 ikglp_heap_node_t, node);
712}
713
714static void ikglp_enqueue_on_pq(struct ikglp_semaphore *sem,
715 ikglp_wait_state_t *wait)
716{
717 INIT_BINHEAP_NODE(&wait->global_heap_node.node);
718 INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
719 INIT_BINHEAP_NODE(&wait->pq_node.node);
720
721 __ikglp_enqueue_on_pq(sem, wait);
722}
723
724static void ikglp_enqueue_on_donor(struct ikglp_semaphore *sem,
725 ikglp_wait_state_t* wait,
726 unsigned long flags)
727{
728 struct task_struct *t = wait->task;
729 ikglp_donee_heap_node_t *donee_node = NULL;
730 struct task_struct *donee;
731
732 struct task_struct *old_max_eff_prio;
733 struct task_struct *new_max_eff_prio;
734 struct task_struct *new_prio = NULL;
735
736 INIT_BINHEAP_NODE(&wait->global_heap_node.node);
737 INIT_BINHEAP_NODE(&wait->donee_heap_node.node);
738 INIT_BINHEAP_NODE(&wait->pq_node.node);
739 INIT_BINHEAP_NODE(&wait->node);
740
741// TRACE_CUR("Adding %s/%d as donor.\n", t->comm, t->pid);
742// TRACE_CUR("donors Before:\n");
743// print_donors(sem->donors.root, 1);
744
745 // Add donor to the global list.
746 ikglp_add_global_list(sem, t, &wait->global_heap_node);
747
748 // Select a donee
749#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
750 donee_node = (sem->aff_obs) ?
751 sem->aff_obs->ops->advise_donee_selection(sem->aff_obs, t) :
752 binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
753#else
754 donee_node = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
755#endif
756
757 donee = donee_node->task;
758
759 TRACE_TASK(t, "Donee selected: %s/%d\n", donee->comm, donee->pid);
760
761 TRACE_CUR("Temporarily removing %s/%d to donee list.\n",
762 donee->comm, donee->pid);
763// TRACE_CUR("donees Before:\n");
764// print_donees(sem, sem->donees.root, 1);
765
766 //binheap_delete_root(&sem->donees, ikglp_donee_heap_node_t, node); // will re-add it shortly
767 binheap_delete(&donee_node->node, &sem->donees);
768
769// TRACE_CUR("donees After:\n");
770// print_donees(sem, sem->donees.root, 1);
771
772
773 wait->donee_info = donee_node;
774
775 // Add t to donor heap.
776 binheap_add(&wait->node, &sem->donors, ikglp_wait_state_t, node);
777
778 // Now adjust the donee's priority.
779
780 // Lock the donee's inheritance heap.
781 raw_spin_lock(&tsk_rt(donee)->hp_blocked_tasks_lock);
782
783 old_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks);
784
785 if(donee_node->donor_info) {
786 // Steal donation relation. Evict old donor to PQ.
787
788 // Remove old donor from donor heap
789 ikglp_wait_state_t *old_wait = donee_node->donor_info;
790 struct task_struct *old_donor = old_wait->task;
791
792 TRACE_TASK(t, "Donee (%s/%d) had donor %s/%d. Moving old donor to PQ.\n",
793 donee->comm, donee->pid, old_donor->comm, old_donor->pid);
794
795 binheap_delete(&old_wait->node, &sem->donors);
796
797 // Remove donation from donee's inheritance heap.
798 binheap_delete(&old_wait->prio_donation.hp_binheap_node,
799 &tsk_rt(donee)->hp_blocked_tasks);
800 // WARNING: have not updated inh_prio!
801
802 // Add old donor to PQ.
803 __ikglp_enqueue_on_pq(sem, old_wait);
804
805 // Remove old donor from the global heap.
806 ikglp_del_global_list(sem, old_donor, &old_wait->global_heap_node);
807 }
808
809 // Add back donee's node to the donees heap with increased prio
810 donee_node->donor_info = wait;
811 INIT_BINHEAP_NODE(&donee_node->node);
812
813
814 TRACE_CUR("Adding %s/%d back to donee list.\n", donee->comm, donee->pid);
815// TRACE_CUR("donees Before:\n");
816// print_donees(sem, sem->donees.root, 1);
817
818 binheap_add(&donee_node->node, &sem->donees, ikglp_donee_heap_node_t, node);
819
820// TRACE_CUR("donees After:\n");
821// print_donees(sem, sem->donees.root, 1);
822
823 // Add an inheritance/donation to the donee's inheritance heap.
824 wait->prio_donation.lock = (struct litmus_lock*)sem;
825 wait->prio_donation.hp_waiter_eff_prio = t;
826 wait->prio_donation.hp_waiter_ptr = NULL;
827 INIT_BINHEAP_NODE(&wait->prio_donation.hp_binheap_node);
828
829 binheap_add(&wait->prio_donation.hp_binheap_node,
830 &tsk_rt(donee)->hp_blocked_tasks,
831 struct nested_info, hp_binheap_node);
832
833 new_max_eff_prio = top_priority(&tsk_rt(donee)->hp_blocked_tasks);
834
835 if(new_max_eff_prio != old_max_eff_prio) {
836 if ((effective_priority(donee) == old_max_eff_prio) ||
837 (litmus->__compare(new_max_eff_prio, BASE, donee, EFFECTIVE))){
838 TRACE_TASK(t, "Donation increases %s/%d's effective priority\n",
839 donee->comm, donee->pid);
840 new_prio = new_max_eff_prio;
841 }
842// else {
843// // should be bug. donor would not be in top-m.
844// TRACE_TASK(t, "Donation is not greater than base prio of %s/%d?\n", donee->comm, donee->pid);
845// WARN_ON(1);
846// }
847// }
848// else {
849// // should be bug. donor would not be in top-m.
850// TRACE_TASK(t, "No change in %s/%d's inheritance heap?\n", donee->comm, donee->pid);
851// WARN_ON(1);
852 }
853
854 if(new_prio) {
855 struct fifo_queue *donee_fq = donee_node->fq;
856
857 if(donee != donee_fq->owner) {
858 TRACE_TASK(t, "%s/%d is not the owner. Propagating priority to owner %s/%d.\n",
859 donee->comm, donee->pid,
860 donee_fq->owner->comm, donee_fq->owner->pid);
861
862 raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock);
863 ikglp_refresh_owners_prio_increase(donee, donee_fq, sem, flags); // unlocks sem->lock
864 }
865 else {
866 TRACE_TASK(t, "%s/%d is the owner. Progatating priority immediatly.\n",
867 donee->comm, donee->pid);
868 litmus->nested_increase_prio(donee, new_prio, &sem->lock, flags); // unlocks sem->lock and donee's heap lock
869 }
870 }
871 else {
872 TRACE_TASK(t, "No change in effective priority (it is %d/%s). BUG?\n",
873 new_max_eff_prio->comm, new_max_eff_prio->pid);
874 raw_spin_unlock(&tsk_rt(donee)->hp_blocked_tasks_lock);
875 unlock_fine_irqrestore(&sem->lock, flags);
876 }
877
878
879// TRACE_CUR("donors After:\n");
880// print_donors(sem->donors.root, 1);
881}
882
883int ikglp_lock(struct litmus_lock* l)
884{
885 struct task_struct* t = current;
886 struct ikglp_semaphore *sem = ikglp_from_lock(l);
887 unsigned long flags = 0, real_flags;
888 struct fifo_queue *fq = NULL;
889 int replica = -EINVAL;
890
891#ifdef CONFIG_LITMUS_DGL_SUPPORT
892 raw_spinlock_t *dgl_lock;
893#endif
894
895 ikglp_wait_state_t wait;
896
897 if (!is_realtime(t))
898 return -EPERM;
899
900#ifdef CONFIG_LITMUS_DGL_SUPPORT
901 dgl_lock = litmus->get_dgl_spinlock(t);
902#endif
903
904 raw_spin_lock_irqsave(&sem->real_lock, real_flags);
905
906 lock_global_irqsave(dgl_lock, flags);
907 lock_fine_irqsave(&sem->lock, flags);
908
909 if(sem->nr_in_fifos < sem->m) {
910 // enqueue somwhere
911#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
912 fq = (sem->aff_obs) ?
913 sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) :
914 sem->shortest_fifo_queue;
915#else
916 fq = sem->shortest_fifo_queue;
917#endif
918 if(fq->count == 0) {
919 // take available resource
920 replica = ikglp_get_idx(sem, fq);
921
922 ikglp_get_immediate(t, fq, sem, flags); // unlocks sem->lock
923
924 unlock_global_irqrestore(dgl_lock, flags);
925 raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
926 goto acquired;
927 }
928 else {
929 wait.task = t; // THIS IS CRITICALLY IMPORTANT!!!
930
931 tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked
932 mb();
933
934 /* FIXME: interruptible would be nice some day */
935 set_task_state(t, TASK_UNINTERRUPTIBLE);
936
937 ikglp_enqueue_on_fq(sem, fq, &wait, flags); // unlocks sem->lock
938 }
939 }
940 else {
941 // donor!
942 wait.task = t; // THIS IS CRITICALLY IMPORTANT!!!
943
944 tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked
945 mb();
946
947 /* FIXME: interruptible would be nice some day */
948 set_task_state(t, TASK_UNINTERRUPTIBLE);
949
950 if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) {
951 // enqueue on PQ
952 ikglp_enqueue_on_pq(sem, &wait);
953 unlock_fine_irqrestore(&sem->lock, flags);
954 }
955 else {
956 // enqueue as donor
957 ikglp_enqueue_on_donor(sem, &wait, flags); // unlocks sem->lock
958 }
959 }
960
961 unlock_global_irqrestore(dgl_lock, flags);
962 raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
963
964 TS_LOCK_SUSPEND;
965
966 schedule();
967
968 TS_LOCK_RESUME;
969
970 fq = ikglp_get_queue(sem, t);
971 BUG_ON(!fq);
972
973 replica = ikglp_get_idx(sem, fq);
974
975acquired:
976 TRACE_CUR("Acquired lock %d, queue %d\n",
977 l->ident, replica);
978
979#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
980 if(sem->aff_obs) {
981 return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq);
982 }
983#endif
984
985 return replica;
986}
987
988//int ikglp_lock(struct litmus_lock* l)
989//{
990// struct task_struct* t = current;
991// struct ikglp_semaphore *sem = ikglp_from_lock(l);
992// unsigned long flags = 0, real_flags;
993// struct fifo_queue *fq = NULL;
994// int replica = -EINVAL;
995//
996//#ifdef CONFIG_LITMUS_DGL_SUPPORT
997// raw_spinlock_t *dgl_lock;
998//#endif
999//
1000// ikglp_wait_state_t wait;
1001//
1002// if (!is_realtime(t))
1003// return -EPERM;
1004//
1005//#ifdef CONFIG_LITMUS_DGL_SUPPORT
1006// dgl_lock = litmus->get_dgl_spinlock(t);
1007//#endif
1008//
1009// raw_spin_lock_irqsave(&sem->real_lock, real_flags);
1010//
1011// lock_global_irqsave(dgl_lock, flags);
1012// lock_fine_irqsave(&sem->lock, flags);
1013//
1014//
1015//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1016// fq = (sem->aff_obs) ?
1017// sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t) :
1018// sem->shortest_fifo_queue;
1019//#else
1020// fq = sem->shortest_fifo_queue;
1021//#endif
1022//
1023// if(fq->count == 0) {
1024// // take available resource
1025// replica = ikglp_get_idx(sem, fq);
1026//
1027// ikglp_get_immediate(t, fq, sem, flags); // unlocks sem->lock
1028//
1029// unlock_global_irqrestore(dgl_lock, flags);
1030// raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
1031// }
1032// else
1033// {
1034// // we have to suspend.
1035//
1036// wait.task = t; // THIS IS CRITICALLY IMPORTANT!!!
1037//
1038// tsk_rt(t)->blocked_lock = (struct litmus_lock*)sem; // record where we are blocked
1039// mb();
1040//
1041// /* FIXME: interruptible would be nice some day */
1042// set_task_state(t, TASK_UNINTERRUPTIBLE);
1043//
1044// if(fq->count < sem->max_fifo_len) {
1045// // enqueue on fq
1046// ikglp_enqueue_on_fq(sem, fq, &wait, flags); // unlocks sem->lock
1047// }
1048// else {
1049//
1050// TRACE_CUR("IKGLP fifo queues are full (at least they better be).\n");
1051//
1052// // no room in fifos. Go to PQ or donors.
1053//
1054// if(litmus->__compare(ikglp_mth_highest(sem), BASE, t, BASE)) {
1055// // enqueue on PQ
1056// ikglp_enqueue_on_pq(sem, &wait);
1057// unlock_fine_irqrestore(&sem->lock, flags);
1058// }
1059// else {
1060// // enqueue as donor
1061// ikglp_enqueue_on_donor(sem, &wait, flags); // unlocks sem->lock
1062// }
1063// }
1064//
1065// unlock_global_irqrestore(dgl_lock, flags);
1066// raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
1067//
1068// TS_LOCK_SUSPEND;
1069//
1070// schedule();
1071//
1072// TS_LOCK_RESUME;
1073//
1074// fq = ikglp_get_queue(sem, t);
1075// BUG_ON(!fq);
1076//
1077// replica = ikglp_get_idx(sem, fq);
1078// }
1079//
1080// TRACE_CUR("Acquired lock %d, queue %d\n",
1081// l->ident, replica);
1082//
1083//#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1084// if(sem->aff_obs) {
1085// return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, fq);
1086// }
1087//#endif
1088//
1089// return replica;
1090//}
1091
1092static void ikglp_move_donor_to_fq(struct ikglp_semaphore *sem,
1093 struct fifo_queue *fq,
1094 ikglp_wait_state_t *donor_info)
1095{
1096 struct task_struct *t = donor_info->task;
1097
1098 TRACE_CUR("Donor %s/%d being moved to fq %d\n",
1099 t->comm,
1100 t->pid,
1101 ikglp_get_idx(sem, fq));
1102
1103 binheap_delete(&donor_info->node, &sem->donors);
1104
1105 __ikglp_enqueue_on_fq(sem, fq, t,
1106 &donor_info->fq_node,
1107 NULL, // already in global_list, so pass null to prevent adding 2nd time.
1108 &donor_info->donee_heap_node);
1109
1110 // warning:
1111 // ikglp_update_owners_prio(t, fq, sem, flags) has not been called.
1112}
1113
1114static void ikglp_move_pq_to_fq(struct ikglp_semaphore *sem,
1115 struct fifo_queue *fq,
1116 ikglp_wait_state_t *wait)
1117{
1118 struct task_struct *t = wait->task;
1119
1120 TRACE_CUR("PQ request %s/%d being moved to fq %d\n",
1121 t->comm,
1122 t->pid,
1123 ikglp_get_idx(sem, fq));
1124
1125 binheap_delete(&wait->pq_node.node, &sem->priority_queue);
1126
1127 __ikglp_enqueue_on_fq(sem, fq, t,
1128 &wait->fq_node,
1129 &wait->global_heap_node,
1130 &wait->donee_heap_node);
1131 // warning:
1132 // ikglp_update_owners_prio(t, fq, sem, flags) has not been called.
1133}
1134
1135static ikglp_wait_state_t* ikglp_find_hp_waiter_to_steal(
1136 struct ikglp_semaphore* sem)
1137{
1138 /* must hold sem->lock */
1139
1140 struct fifo_queue *fq = NULL;
1141 struct list_head *pos;
1142 struct task_struct *queued;
1143 int i;
1144
1145 for(i = 0; i < sem->nr_replicas; ++i) {
1146 if( (sem->fifo_queues[i].count > 1) &&
1147 (!fq || litmus->compare(sem->fifo_queues[i].hp_waiter, fq->hp_waiter)) ) {
1148
1149 TRACE_CUR("hp_waiter on fq %d (%s/%d) has higher prio than hp_waiter on fq %d (%s/%d)\n",
1150 ikglp_get_idx(sem, &sem->fifo_queues[i]),
1151 sem->fifo_queues[i].hp_waiter->comm,
1152 sem->fifo_queues[i].hp_waiter->pid,
1153 (fq) ? ikglp_get_idx(sem, fq) : -1,
1154 (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->comm : "nil") : "nilXX",
1155 (fq) ? ((fq->hp_waiter) ? fq->hp_waiter->pid : -1) : -2);
1156
1157 fq = &sem->fifo_queues[i];
1158
1159 WARN_ON(!(fq->hp_waiter));
1160 }
1161 }
1162
1163 if(fq) {
1164 struct task_struct *max_hp = fq->hp_waiter;
1165 ikglp_wait_state_t* ret = NULL;
1166
1167 TRACE_CUR("Searching for %s/%d on fq %d\n",
1168 max_hp->comm,
1169 max_hp->pid,
1170 ikglp_get_idx(sem, fq));
1171
1172 BUG_ON(!max_hp);
1173
1174 list_for_each(pos, &fq->wait.task_list) {
1175 wait_queue_t *wait = list_entry(pos, wait_queue_t, task_list);
1176
1177 queued = (struct task_struct*) wait->private;
1178
1179 TRACE_CUR("fq %d entry: %s/%d\n",
1180 ikglp_get_idx(sem, fq),
1181 queued->comm,
1182 queued->pid);
1183
1184 /* Compare task prios, find high prio task. */
1185 if (queued == max_hp) {
1186 TRACE_CUR("Found it!\n");
1187 ret = container_of(wait, ikglp_wait_state_t, fq_node);
1188 }
1189 }
1190
1191 WARN_ON(!ret);
1192 return ret;
1193 }
1194
1195 return(NULL);
1196}
1197
1198static void ikglp_steal_to_fq(struct ikglp_semaphore *sem,
1199 struct fifo_queue *fq,
1200 ikglp_wait_state_t *fq_wait)
1201{
1202 struct task_struct *t = fq_wait->task;
1203 struct fifo_queue *fq_steal = fq_wait->donee_heap_node.fq;
1204
1205 TRACE_CUR("FQ request %s/%d being moved to fq %d\n",
1206 t->comm,
1207 t->pid,
1208 ikglp_get_idx(sem, fq));
1209
1210 fq_wait->donee_heap_node.fq = fq; // just to be safe
1211
1212
1213 __remove_wait_queue(&fq_steal->wait, &fq_wait->fq_node);
1214 --(fq_steal->count);
1215
1216#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1217 if(sem->aff_obs) {
1218 sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq_steal, t);
1219 }
1220#endif
1221
1222 if(t == fq_steal->hp_waiter) {
1223 fq_steal->hp_waiter = ikglp_find_hp_waiter(fq_steal, NULL);
1224 TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
1225 ikglp_get_idx(sem, fq_steal),
1226 (fq_steal->hp_waiter) ? fq_steal->hp_waiter->comm : "nil",
1227 (fq_steal->hp_waiter) ? fq_steal->hp_waiter->pid : -1);
1228 }
1229
1230
1231 // Update shortest.
1232 if(fq_steal->count < sem->shortest_fifo_queue->count) {
1233 sem->shortest_fifo_queue = fq_steal;
1234 }
1235
1236 __ikglp_enqueue_on_fq(sem, fq, t,
1237 &fq_wait->fq_node,
1238 NULL,
1239 NULL);
1240
1241 // warning: We have not checked the priority inheritance of fq's owner yet.
1242}
1243
1244
1245static void ikglp_migrate_fq_to_owner_heap_nodes(struct ikglp_semaphore *sem,
1246 struct fifo_queue *fq,
1247 ikglp_wait_state_t *old_wait)
1248{
1249 struct task_struct *t = old_wait->task;
1250
1251 BUG_ON(old_wait->donee_heap_node.fq != fq);
1252
1253 TRACE_TASK(t, "Migrating wait_state to memory of queue %d.\n",
1254 ikglp_get_idx(sem, fq));
1255
1256 // need to migrate global_heap_node and donee_heap_node off of the stack
1257 // to the nodes allocated for the owner of this fq.
1258
1259 // TODO: Enhance binheap() to perform this operation in place.
1260
1261 ikglp_del_global_list(sem, t, &old_wait->global_heap_node); // remove
1262 fq->global_heap_node = old_wait->global_heap_node; // copy
1263 ikglp_add_global_list(sem, t, &fq->global_heap_node); // re-add
1264
1265 binheap_delete(&old_wait->donee_heap_node.node, &sem->donees); // remove
1266 fq->donee_heap_node = old_wait->donee_heap_node; // copy
1267
1268 if(fq->donee_heap_node.donor_info) {
1269 // let donor know that our location has changed
1270 BUG_ON(fq->donee_heap_node.donor_info->donee_info->task != t); // validate cross-link
1271 fq->donee_heap_node.donor_info->donee_info = &fq->donee_heap_node;
1272 }
1273 INIT_BINHEAP_NODE(&fq->donee_heap_node.node);
1274 binheap_add(&fq->donee_heap_node.node, &sem->donees,
1275 ikglp_donee_heap_node_t, node); // re-add
1276}
1277
1278int ikglp_unlock(struct litmus_lock* l)
1279{
1280 struct ikglp_semaphore *sem = ikglp_from_lock(l);
1281 struct task_struct *t = current;
1282 struct task_struct *donee = NULL;
1283 struct task_struct *next = NULL;
1284 struct task_struct *new_on_fq = NULL;
1285 struct fifo_queue *fq_of_new_on_fq = NULL;
1286
1287 ikglp_wait_state_t *other_donor_info = NULL;
1288 struct fifo_queue *to_steal = NULL;
1289 int need_steal_prio_reeval = 0;
1290 struct fifo_queue *fq;
1291
1292#ifdef CONFIG_LITMUS_DGL_SUPPORT
1293 raw_spinlock_t *dgl_lock;
1294#endif
1295
1296 unsigned long flags = 0, real_flags;
1297
1298 int err = 0;
1299
1300 fq = ikglp_get_queue(sem, t); // returns NULL if 't' is not owner.
1301
1302 if (!fq) {
1303 err = -EINVAL;
1304 goto out;
1305 }
1306
1307#ifdef CONFIG_LITMUS_DGL_SUPPORT
1308 dgl_lock = litmus->get_dgl_spinlock(t);
1309#endif
1310 raw_spin_lock_irqsave(&sem->real_lock, real_flags);
1311
1312 lock_global_irqsave(dgl_lock, flags); // TODO: Push this deeper
1313 lock_fine_irqsave(&sem->lock, flags);
1314
1315 TRACE_TASK(t, "Freeing replica %d.\n", ikglp_get_idx(sem, fq));
1316
1317
1318 // Remove 't' from the heaps, but data in nodes will still be good.
1319 ikglp_del_global_list(sem, t, &fq->global_heap_node);
1320 binheap_delete(&fq->donee_heap_node.node, &sem->donees);
1321
1322 fq->owner = NULL; // no longer owned!!
1323 --(fq->count);
1324 if(fq->count < sem->shortest_fifo_queue->count) {
1325 sem->shortest_fifo_queue = fq;
1326 }
1327 --(sem->nr_in_fifos);
1328
1329#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1330 if(sem->aff_obs) {
1331 sem->aff_obs->ops->notify_dequeue(sem->aff_obs, fq, t);
1332 sem->aff_obs->ops->notify_freed(sem->aff_obs, fq, t);
1333 }
1334#endif
1335
1336 // Move the next request into the FQ and update heaps as needed.
1337 // We defer re-evaluation of priorities to later in the function.
1338 if(fq->donee_heap_node.donor_info) { // move my donor to FQ
1339 ikglp_wait_state_t *donor_info = fq->donee_heap_node.donor_info;
1340
1341 new_on_fq = donor_info->task;
1342
1343 // donor moved to FQ
1344 donee = t;
1345
1346#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1347 if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
1348 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1349 if(fq_of_new_on_fq->count == 0) {
1350 // ignore it?
1351// fq_of_new_on_fq = fq;
1352 }
1353 }
1354 else {
1355 fq_of_new_on_fq = fq;
1356 }
1357#else
1358 fq_of_new_on_fq = fq;
1359#endif
1360
1361 TRACE_TASK(t, "Moving MY donor (%s/%d) to fq %d (non-aff wanted fq %d).\n",
1362 new_on_fq->comm, new_on_fq->pid,
1363 ikglp_get_idx(sem, fq_of_new_on_fq),
1364 ikglp_get_idx(sem, fq));
1365
1366
1367 ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, donor_info);
1368 }
1369 else if(!binheap_empty(&sem->donors)) { // No donor, so move any donor to FQ
1370 // move other donor to FQ
1371 // Select a donor
1372#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1373 other_donor_info = (sem->aff_obs) ?
1374 sem->aff_obs->ops->advise_donor_to_fq(sem->aff_obs, fq) :
1375 binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
1376#else
1377 other_donor_info = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
1378#endif
1379
1380 new_on_fq = other_donor_info->task;
1381 donee = other_donor_info->donee_info->task;
1382
1383 // update the donee's heap position.
1384 other_donor_info->donee_info->donor_info = NULL; // clear the cross-link
1385 binheap_decrease(&other_donor_info->donee_info->node, &sem->donees);
1386
1387#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1388 if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
1389 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1390 if(fq_of_new_on_fq->count == 0) {
1391 // ignore it?
1392// fq_of_new_on_fq = fq;
1393 }
1394 }
1395 else {
1396 fq_of_new_on_fq = fq;
1397 }
1398#else
1399 fq_of_new_on_fq = fq;
1400#endif
1401
1402 TRACE_TASK(t, "Moving a donor (%s/%d) to fq %d (non-aff wanted fq %d).\n",
1403 new_on_fq->comm, new_on_fq->pid,
1404 ikglp_get_idx(sem, fq_of_new_on_fq),
1405 ikglp_get_idx(sem, fq));
1406
1407 ikglp_move_donor_to_fq(sem, fq_of_new_on_fq, other_donor_info);
1408 }
1409 else if(!binheap_empty(&sem->priority_queue)) { // No donors, so move PQ
1410 ikglp_heap_node_t *pq_node = binheap_top_entry(&sem->priority_queue,
1411 ikglp_heap_node_t, node);
1412 ikglp_wait_state_t *pq_wait = container_of(pq_node, ikglp_wait_state_t,
1413 pq_node);
1414
1415 new_on_fq = pq_wait->task;
1416
1417#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1418 if(sem->aff_obs && sem->aff_obs->relax_max_fifo_len) {
1419 fq_of_new_on_fq = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, new_on_fq);
1420 if(fq_of_new_on_fq->count == 0) {
1421 // ignore it?
1422// fq_of_new_on_fq = fq;
1423 }
1424 }
1425 else {
1426 fq_of_new_on_fq = fq;
1427 }
1428#else
1429 fq_of_new_on_fq = fq;
1430#endif
1431
1432 TRACE_TASK(t, "Moving a pq waiter (%s/%d) to fq %d (non-aff wanted fq %d).\n",
1433 new_on_fq->comm, new_on_fq->pid,
1434 ikglp_get_idx(sem, fq_of_new_on_fq),
1435 ikglp_get_idx(sem, fq));
1436
1437 ikglp_move_pq_to_fq(sem, fq_of_new_on_fq, pq_wait);
1438 }
1439 else if(fq->count == 0) { // No PQ and this queue is empty, so steal.
1440 ikglp_wait_state_t *fq_wait;
1441
1442 TRACE_TASK(t, "Looking to steal a request for fq %d...\n",
1443 ikglp_get_idx(sem, fq));
1444
1445#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1446 fq_wait = (sem->aff_obs) ?
1447 sem->aff_obs->ops->advise_steal(sem->aff_obs, fq) :
1448 ikglp_find_hp_waiter_to_steal(sem);
1449#else
1450 fq_wait = ikglp_find_hp_waiter_to_steal(sem);
1451#endif
1452
1453 if(fq_wait) {
1454 to_steal = fq_wait->donee_heap_node.fq;
1455
1456 new_on_fq = fq_wait->task;
1457 fq_of_new_on_fq = fq;
1458 need_steal_prio_reeval = (new_on_fq == to_steal->hp_waiter);
1459
1460 TRACE_TASK(t, "Found %s/%d of fq %d to steal for fq %d...\n",
1461 new_on_fq->comm, new_on_fq->pid,
1462 ikglp_get_idx(sem, to_steal),
1463 ikglp_get_idx(sem, fq));
1464
1465 ikglp_steal_to_fq(sem, fq, fq_wait);
1466 }
1467 else {
1468 TRACE_TASK(t, "Found nothing to steal for fq %d.\n",
1469 ikglp_get_idx(sem, fq));
1470 }
1471 }
1472 else { // move no one
1473 }
1474
1475 // 't' must drop all priority and clean up data structures before hand-off.
1476
1477 // DROP ALL INHERITANCE. IKGLP MUST BE OUTER-MOST
1478 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
1479 {
1480 int count = 0;
1481 while(!binheap_empty(&tsk_rt(t)->hp_blocked_tasks)) {
1482 binheap_delete_root(&tsk_rt(t)->hp_blocked_tasks,
1483 struct nested_info, hp_binheap_node);
1484 ++count;
1485 }
1486 litmus->decrease_prio(t, NULL);
1487 WARN_ON(count > 2); // should not be greater than 2. only local fq inh and donation can be possible.
1488 }
1489 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
1490
1491
1492
1493 // Now patch up other priorities.
1494 //
1495 // At most one of the following:
1496 // if(donee && donee != t), decrease prio, propagate to owner, or onward
1497 // if(to_steal), update owner's prio (hp_waiter has already been set)
1498 //
1499
1500 BUG_ON((other_donor_info != NULL) && (to_steal != NULL));
1501
1502 if(other_donor_info) {
1503 struct fifo_queue *other_fq = other_donor_info->donee_info->fq;
1504
1505 BUG_ON(!donee);
1506 BUG_ON(donee == t);
1507
1508 TRACE_TASK(t, "Terminating donation relation of donor %s/%d to donee %s/%d!\n",
1509 other_donor_info->task->comm, other_donor_info->task->pid,
1510 donee->comm, donee->pid);
1511
1512 // need to terminate donation relation.
1513 if(donee == other_fq->owner) {
1514 TRACE_TASK(t, "Donee %s/%d is an owner of fq %d.\n",
1515 donee->comm, donee->pid,
1516 ikglp_get_idx(sem, other_fq));
1517
1518 ikglp_remove_donation_from_owner(&other_donor_info->prio_donation.hp_binheap_node, other_fq, sem, flags);
1519 lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
1520 }
1521 else {
1522 TRACE_TASK(t, "Donee %s/%d is an blocked in of fq %d.\n",
1523 donee->comm, donee->pid,
1524 ikglp_get_idx(sem, other_fq));
1525
1526 ikglp_remove_donation_from_fq_waiter(donee, &other_donor_info->prio_donation.hp_binheap_node);
1527 if(donee == other_fq->hp_waiter) {
1528 TRACE_TASK(t, "Donee %s/%d was an hp_waiter of fq %d. Rechecking hp_waiter.\n",
1529 donee->comm, donee->pid,
1530 ikglp_get_idx(sem, other_fq));
1531
1532 other_fq->hp_waiter = ikglp_find_hp_waiter(other_fq, NULL);
1533 TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
1534 ikglp_get_idx(sem, other_fq),
1535 (other_fq->hp_waiter) ? other_fq->hp_waiter->comm : "nil",
1536 (other_fq->hp_waiter) ? other_fq->hp_waiter->pid : -1);
1537
1538 ikglp_refresh_owners_prio_decrease(other_fq, sem, flags); // unlocks sem->lock. reacquire it.
1539 lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
1540 }
1541 }
1542 }
1543 else if(to_steal) {
1544 TRACE_TASK(t, "Rechecking priority inheritance of fq %d, triggered by stealing.\n",
1545 ikglp_get_idx(sem, to_steal));
1546
1547 if(need_steal_prio_reeval) {
1548 ikglp_refresh_owners_prio_decrease(to_steal, sem, flags); // unlocks sem->lock. reacquire it.
1549 lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
1550 }
1551 }
1552
1553 // check for new HP waiter.
1554 if(new_on_fq) {
1555 if(fq == fq_of_new_on_fq) {
1556 // fq->owner is null, so just update the hp_waiter without locking.
1557 if(new_on_fq == fq->hp_waiter) {
1558 TRACE_TASK(t, "new_on_fq is already hp_waiter.\n",
1559 fq->hp_waiter->comm, fq->hp_waiter->pid);
1560 fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter); // set this just to be sure...
1561 }
1562 else if(litmus->compare(new_on_fq, fq->hp_waiter)) {
1563 if(fq->hp_waiter)
1564 TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
1565 fq->hp_waiter->comm, fq->hp_waiter->pid);
1566 else
1567 TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
1568
1569 fq->hp_waiter = new_on_fq;
1570 fq->nest.hp_waiter_eff_prio = effective_priority(fq->hp_waiter);
1571
1572 TRACE_TASK(t, "New hp_waiter for fq %d is %s/%d!\n",
1573 ikglp_get_idx(sem, fq),
1574 (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
1575 (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
1576 }
1577 }
1578 else {
1579 ikglp_refresh_owners_prio_increase(new_on_fq, fq_of_new_on_fq, sem, flags); // unlocks sem->lock. reacquire it.
1580 lock_fine_irqsave(&sem->lock, flags); // there should be no contention!!!!
1581 }
1582 }
1583
1584wake_kludge:
1585 if(waitqueue_active(&fq->wait))
1586 {
1587 wait_queue_t *wait = list_entry(fq->wait.task_list.next, wait_queue_t, task_list);
1588 ikglp_wait_state_t *fq_wait = container_of(wait, ikglp_wait_state_t, fq_node);
1589 next = (struct task_struct*) wait->private;
1590
1591 __remove_wait_queue(&fq->wait, wait);
1592
1593 TRACE_CUR("queue %d: ASSIGNING %s/%d as owner - next\n",
1594 ikglp_get_idx(sem, fq),
1595 next->comm, next->pid);
1596
1597 // migrate wait-state to fifo-memory.
1598 ikglp_migrate_fq_to_owner_heap_nodes(sem, fq, fq_wait);
1599
1600 /* next becomes the resouce holder */
1601 fq->owner = next;
1602 tsk_rt(next)->blocked_lock = NULL;
1603
1604#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1605 if(sem->aff_obs) {
1606 sem->aff_obs->ops->notify_acquired(sem->aff_obs, fq, next);
1607 }
1608#endif
1609
1610 /* determine new hp_waiter if necessary */
1611 if (next == fq->hp_waiter) {
1612
1613 TRACE_TASK(next, "was highest-prio waiter\n");
1614 /* next has the highest priority --- it doesn't need to
1615 * inherit. However, we need to make sure that the
1616 * next-highest priority in the queue is reflected in
1617 * hp_waiter. */
1618 fq->hp_waiter = ikglp_find_hp_waiter(fq, NULL);
1619 TRACE_TASK(next, "New hp_waiter for fq %d is %s/%d!\n",
1620 ikglp_get_idx(sem, fq),
1621 (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
1622 (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
1623
1624 fq->nest.hp_waiter_eff_prio = (fq->hp_waiter) ?
1625 effective_priority(fq->hp_waiter) : NULL;
1626
1627 if (fq->hp_waiter)
1628 TRACE_TASK(fq->hp_waiter, "is new highest-prio waiter\n");
1629 else
1630 TRACE("no further waiters\n");
1631
1632 raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
1633
1634// TRACE_TASK(next, "Heap Before:\n");
1635// print_hp_waiters(tsk_rt(next)->hp_blocked_tasks.root, 0);
1636
1637 binheap_add(&fq->nest.hp_binheap_node,
1638 &tsk_rt(next)->hp_blocked_tasks,
1639 struct nested_info,
1640 hp_binheap_node);
1641
1642// TRACE_TASK(next, "Heap After:\n");
1643// print_hp_waiters(tsk_rt(next)->hp_blocked_tasks.root, 0);
1644
1645 raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
1646 }
1647 else {
1648 /* Well, if 'next' is not the highest-priority waiter,
1649 * then it (probably) ought to inherit the highest-priority
1650 * waiter's priority. */
1651 TRACE_TASK(next, "is not hp_waiter of replica %d. hp_waiter is %s/%d\n",
1652 ikglp_get_idx(sem, fq),
1653 (fq->hp_waiter) ? fq->hp_waiter->comm : "nil",
1654 (fq->hp_waiter) ? fq->hp_waiter->pid : -1);
1655
1656 raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
1657
1658 binheap_add(&fq->nest.hp_binheap_node,
1659 &tsk_rt(next)->hp_blocked_tasks,
1660 struct nested_info,
1661 hp_binheap_node);
1662
1663 /* It is possible that 'next' *should* be the hp_waiter, but isn't
1664 * because that update hasn't yet executed (update operation is
1665 * probably blocked on mutex->lock). So only inherit if the top of
1666 * 'next's top heap node is indeed the effective prio. of hp_waiter.
1667 * (We use fq->hp_waiter_eff_prio instead of effective_priority(hp_waiter)
1668 * since the effective priority of hp_waiter can change (and the
1669 * update has not made it to this lock).)
1670 */
1671 if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
1672 fq->nest.hp_waiter_eff_prio))
1673 {
1674 if(fq->nest.hp_waiter_eff_prio)
1675 litmus->increase_prio(next, fq->nest.hp_waiter_eff_prio);
1676 else
1677 WARN_ON(1);
1678 }
1679
1680 raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
1681 }
1682
1683
1684 // wake up the new resource holder!
1685 wake_up_process(next);
1686 }
1687 if(fq_of_new_on_fq && fq_of_new_on_fq != fq && fq_of_new_on_fq->count == 1) {
1688 // The guy we promoted when to an empty FQ. (Why didn't stealing pick this up?)
1689 // Wake up the new guy too.
1690
1691 BUG_ON(fq_of_new_on_fq->owner != NULL);
1692
1693 fq = fq_of_new_on_fq;
1694 fq_of_new_on_fq = NULL;
1695 goto wake_kludge;
1696 }
1697
1698 unlock_fine_irqrestore(&sem->lock, flags);
1699 unlock_global_irqrestore(dgl_lock, flags);
1700
1701 raw_spin_unlock_irqrestore(&sem->real_lock, real_flags);
1702
1703out:
1704 return err;
1705}
1706
1707
1708
1709int ikglp_close(struct litmus_lock* l)
1710{
1711 struct task_struct *t = current;
1712 struct ikglp_semaphore *sem = ikglp_from_lock(l);
1713 unsigned long flags;
1714
1715 int owner = 0;
1716 int i;
1717
1718 raw_spin_lock_irqsave(&sem->real_lock, flags);
1719
1720 for(i = 0; i < sem->nr_replicas; ++i) {
1721 if(sem->fifo_queues[i].owner == t) {
1722 owner = 1;
1723 break;
1724 }
1725 }
1726
1727 raw_spin_unlock_irqrestore(&sem->real_lock, flags);
1728
1729 if (owner)
1730 ikglp_unlock(l);
1731
1732 return 0;
1733}
1734
1735void ikglp_free(struct litmus_lock* l)
1736{
1737 struct ikglp_semaphore *sem = ikglp_from_lock(l);
1738
1739 kfree(sem->fifo_queues);
1740 kfree(sem);
1741}
1742
1743
1744
1745struct litmus_lock* ikglp_new(int m,
1746 struct litmus_lock_ops* ops,
1747 void* __user arg)
1748{
1749 struct ikglp_semaphore* sem;
1750 int nr_replicas = 0;
1751 int i;
1752
1753 if(!access_ok(VERIFY_READ, arg, sizeof(nr_replicas)))
1754 {
1755 return(NULL);
1756 }
1757 if(__copy_from_user(&nr_replicas, arg, sizeof(nr_replicas)))
1758 {
1759 return(NULL);
1760 }
1761 if(nr_replicas < 1)
1762 {
1763 return(NULL);
1764 }
1765
1766 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1767 if(!sem)
1768 {
1769 return NULL;
1770 }
1771
1772 sem->fifo_queues = kmalloc(sizeof(struct fifo_queue)*nr_replicas, GFP_KERNEL);
1773 if(!sem->fifo_queues)
1774 {
1775 kfree(sem);
1776 return NULL;
1777 }
1778
1779 sem->litmus_lock.ops = ops;
1780
1781#ifdef CONFIG_DEBUG_SPINLOCK
1782 {
1783 __raw_spin_lock_init(&sem->lock, ((struct litmus_lock*)sem)->cheat_lockdep, &((struct litmus_lock*)sem)->key);
1784 }
1785#else
1786 raw_spin_lock_init(&sem->lock);
1787#endif
1788
1789 raw_spin_lock_init(&sem->real_lock);
1790
1791 sem->nr_replicas = nr_replicas;
1792 sem->m = m;
1793 sem->max_fifo_len = (sem->m/nr_replicas) + ((sem->m%nr_replicas) != 0);
1794 sem->nr_in_fifos = 0;
1795
1796 TRACE("New IKGLP Sem: m = %d, k = %d, max fifo_len = %d\n",
1797 sem->m,
1798 sem->nr_replicas,
1799 sem->max_fifo_len);
1800
1801 for(i = 0; i < nr_replicas; ++i)
1802 {
1803 struct fifo_queue* q = &(sem->fifo_queues[i]);
1804
1805 q->owner = NULL;
1806 q->hp_waiter = NULL;
1807 init_waitqueue_head(&q->wait);
1808 q->count = 0;
1809
1810 q->global_heap_node.task = NULL;
1811 INIT_BINHEAP_NODE(&q->global_heap_node.node);
1812
1813 q->donee_heap_node.task = NULL;
1814 q->donee_heap_node.donor_info = NULL;
1815 q->donee_heap_node.fq = NULL;
1816 INIT_BINHEAP_NODE(&q->donee_heap_node.node);
1817
1818 q->nest.lock = (struct litmus_lock*)sem;
1819 q->nest.hp_waiter_eff_prio = NULL;
1820 q->nest.hp_waiter_ptr = &q->hp_waiter;
1821 INIT_BINHEAP_NODE(&q->nest.hp_binheap_node);
1822 }
1823
1824 sem->shortest_fifo_queue = &sem->fifo_queues[0];
1825
1826 sem->top_m_size = 0;
1827
1828 // init heaps
1829 INIT_BINHEAP_HANDLE(&sem->top_m, ikglp_min_heap_base_priority_order);
1830 INIT_BINHEAP_HANDLE(&sem->not_top_m, ikglp_max_heap_base_priority_order);
1831 INIT_BINHEAP_HANDLE(&sem->donees, ikglp_min_heap_donee_order);
1832 INIT_BINHEAP_HANDLE(&sem->priority_queue, ikglp_max_heap_base_priority_order);
1833 INIT_BINHEAP_HANDLE(&sem->donors, ikglp_donor_max_heap_base_priority_order);
1834
1835#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1836 sem->aff_obs = NULL;
1837#endif
1838
1839 return &sem->litmus_lock;
1840}
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
1871
1872static inline int __replica_to_gpu(struct ikglp_affinity* aff, int replica)
1873{
1874 int gpu = replica % aff->nr_rsrc;
1875 return gpu;
1876}
1877
1878static inline int replica_to_gpu(struct ikglp_affinity* aff, int replica)
1879{
1880 int gpu = __replica_to_gpu(aff, replica) + aff->offset;
1881 return gpu;
1882}
1883
1884static inline int gpu_to_base_replica(struct ikglp_affinity* aff, int gpu)
1885{
1886 int replica = gpu - aff->offset;
1887 return replica;
1888}
1889
1890
1891int ikglp_aff_obs_close(struct affinity_observer* obs)
1892{
1893 return 0;
1894}
1895
1896void ikglp_aff_obs_free(struct affinity_observer* obs)
1897{
1898 struct ikglp_affinity *ikglp_aff = ikglp_aff_obs_from_aff_obs(obs);
1899 kfree(ikglp_aff->nr_cur_users_on_rsrc);
1900 kfree(ikglp_aff->q_info);
1901 kfree(ikglp_aff);
1902}
1903
1904static struct affinity_observer* ikglp_aff_obs_new(struct affinity_observer_ops* ops,
1905 struct ikglp_affinity_ops* ikglp_ops,
1906 void* __user args)
1907{
1908 struct ikglp_affinity* ikglp_aff;
1909 struct gpu_affinity_observer_args aff_args;
1910 struct ikglp_semaphore* sem;
1911 int i;
1912 unsigned long flags;
1913
1914 if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
1915 return(NULL);
1916 }
1917 if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
1918 return(NULL);
1919 }
1920
1921 sem = (struct ikglp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
1922
1923 if(sem->litmus_lock.type != IKGLP_SEM) {
1924 TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type);
1925 return(NULL);
1926 }
1927
1928 if((aff_args.nr_simult_users <= 0) ||
1929 (sem->nr_replicas%aff_args.nr_simult_users != 0)) {
1930 TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
1931 "(%d) per replica. #replicas should be evenly divisible "
1932 "by #simult_users.\n",
1933 sem->litmus_lock.ident,
1934 sem->nr_replicas,
1935 aff_args.nr_simult_users);
1936 return(NULL);
1937 }
1938
1939 if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
1940 TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
1941 NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
1942// return(NULL);
1943 }
1944
1945 ikglp_aff = kmalloc(sizeof(*ikglp_aff), GFP_KERNEL);
1946 if(!ikglp_aff) {
1947 return(NULL);
1948 }
1949
1950 ikglp_aff->q_info = kmalloc(sizeof(struct ikglp_queue_info)*sem->nr_replicas, GFP_KERNEL);
1951 if(!ikglp_aff->q_info) {
1952 kfree(ikglp_aff);
1953 return(NULL);
1954 }
1955
1956 ikglp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->nr_replicas / aff_args.nr_simult_users), GFP_KERNEL);
1957 if(!ikglp_aff->nr_cur_users_on_rsrc) {
1958 kfree(ikglp_aff->q_info);
1959 kfree(ikglp_aff);
1960 return(NULL);
1961 }
1962
1963 affinity_observer_new(&ikglp_aff->obs, ops, &aff_args.obs);
1964
1965 ikglp_aff->ops = ikglp_ops;
1966 ikglp_aff->offset = aff_args.replica_to_gpu_offset;
1967 ikglp_aff->nr_simult = aff_args.nr_simult_users;
1968 ikglp_aff->nr_rsrc = sem->nr_replicas / ikglp_aff->nr_simult;
1969 ikglp_aff->relax_max_fifo_len = (aff_args.relaxed_rules) ? 1 : 0;
1970
1971 TRACE_CUR("GPU affinity_observer: offset = %d, nr_simult = %d, "
1972 "nr_rsrc = %d, relaxed_fifo_len = %d\n",
1973 ikglp_aff->offset, ikglp_aff->nr_simult, ikglp_aff->nr_rsrc,
1974 ikglp_aff->relax_max_fifo_len);
1975
1976 memset(ikglp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(ikglp_aff->nr_rsrc));
1977
1978 for(i = 0; i < sem->nr_replicas; ++i) {
1979 ikglp_aff->q_info[i].q = &sem->fifo_queues[i];
1980 ikglp_aff->q_info[i].estimated_len = 0;
1981
1982 // multiple q_info's will point to the same resource (aka GPU) if
1983 // aff_args.nr_simult_users > 1
1984 ikglp_aff->q_info[i].nr_cur_users = &ikglp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(ikglp_aff,i)];
1985 }
1986
1987 // attach observer to the lock
1988 raw_spin_lock_irqsave(&sem->real_lock, flags);
1989 sem->aff_obs = ikglp_aff;
1990 raw_spin_unlock_irqrestore(&sem->real_lock, flags);
1991
1992 return &ikglp_aff->obs;
1993}
1994
1995
1996
1997
1998static int gpu_replica_to_resource(struct ikglp_affinity* aff,
1999 struct fifo_queue* fq) {
2000 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2001 return(replica_to_gpu(aff, ikglp_get_idx(sem, fq)));
2002}
2003
2004
2005// Smart IKGLP Affinity
2006
2007//static inline struct ikglp_queue_info* ikglp_aff_find_shortest(struct ikglp_affinity* aff)
2008//{
2009// struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2010// struct ikglp_queue_info *shortest = &aff->q_info[0];
2011// int i;
2012//
2013// for(i = 1; i < sem->nr_replicas; ++i) {
2014// if(aff->q_info[i].estimated_len < shortest->estimated_len) {
2015// shortest = &aff->q_info[i];
2016// }
2017// }
2018//
2019// return(shortest);
2020//}
2021
2022struct fifo_queue* gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
2023{
2024 // advise_enqueue must be smart as not not break IKGLP rules:
2025 // * No queue can be greater than ceil(m/k) in length. We may return
2026 // such a queue, but IKGLP will be smart enough as to send requests
2027 // to donors or PQ.
2028 // * Cannot let a queue idle if there exist waiting PQ/donors
2029 // -- needed to guarantee parallel progress of waiters.
2030 //
2031 // We may be able to relax some of these constraints, but this will have to
2032 // be carefully evaluated.
2033 //
2034 // Huristic strategy: Find the shortest queue that is not full.
2035
2036 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2037 lt_t min_len;
2038 int min_nr_users;
2039 struct ikglp_queue_info *shortest;
2040 struct fifo_queue *to_enqueue;
2041 int i;
2042 int affinity_gpu;
2043
2044 int max_fifo_len = (aff->relax_max_fifo_len) ?
2045 sem->m : sem->max_fifo_len;
2046
2047 // simply pick the shortest queue if, we have no affinity, or we have
2048 // affinity with the shortest
2049 if(unlikely(tsk_rt(t)->last_gpu < 0)) {
2050 affinity_gpu = aff->offset; // first gpu
2051 TRACE_CUR("no affinity\n");
2052 }
2053 else {
2054 affinity_gpu = tsk_rt(t)->last_gpu;
2055 }
2056
2057 // all things being equal, let's start with the queue with which we have
2058 // affinity. this helps us maintain affinity even when we don't have
2059 // an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
2060 shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
2061
2062 // if(shortest == aff->shortest_queue) {
2063 // TRACE_CUR("special case: have affinity with shortest queue\n");
2064 // goto out;
2065 // }
2066
2067 min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
2068 min_nr_users = *(shortest->nr_cur_users);
2069
2070 TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
2071 get_gpu_estimate(t, MIG_LOCAL),
2072 ikglp_get_idx(sem, shortest->q),
2073 shortest->q->count,
2074 min_len);
2075
2076 for(i = 0; i < sem->nr_replicas; ++i) {
2077 if(&aff->q_info[i] != shortest) {
2078 if(aff->q_info[i].q->count < max_fifo_len) {
2079
2080 lt_t est_len =
2081 aff->q_info[i].estimated_len +
2082 get_gpu_estimate(t,
2083 gpu_migration_distance(tsk_rt(t)->last_gpu,
2084 replica_to_gpu(aff, i)));
2085
2086 // queue is smaller, or they're equal and the other has a smaller number
2087 // of total users.
2088 //
2089 // tie-break on the shortest number of simult users. this only kicks in
2090 // when there are more than 1 empty queues.
2091 if((shortest->q->count >= max_fifo_len) || /* 'shortest' is full and i-th queue is not */
2092 (est_len < min_len) || /* i-th queue has shortest length */
2093 ((est_len == min_len) && /* equal lengths, but one has fewer over-all users */
2094 (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
2095
2096 shortest = &aff->q_info[i];
2097 min_len = est_len;
2098 min_nr_users = *(aff->q_info[i].nr_cur_users);
2099 }
2100
2101 TRACE_CUR("cs is %llu on queue %d (count = %d): est len = %llu\n",
2102 get_gpu_estimate(t,
2103 gpu_migration_distance(tsk_rt(t)->last_gpu,
2104 replica_to_gpu(aff, i))),
2105 ikglp_get_idx(sem, aff->q_info[i].q),
2106 aff->q_info[i].q->count,
2107 est_len);
2108 }
2109 else {
2110 TRACE_CUR("queue %d is too long. ineligible for enqueue.\n",
2111 ikglp_get_idx(sem, aff->q_info[i].q));
2112 }
2113 }
2114 }
2115
2116 if(shortest->q->count >= max_fifo_len) {
2117 TRACE_CUR("selected fq %d is too long, but returning it anyway.\n",
2118 ikglp_get_idx(sem, shortest->q));
2119 }
2120
2121 to_enqueue = shortest->q;
2122 TRACE_CUR("enqueue on fq %d (count = %d) (non-aff wanted fq %d)\n",
2123 ikglp_get_idx(sem, to_enqueue),
2124 to_enqueue->count,
2125 ikglp_get_idx(sem, sem->shortest_fifo_queue));
2126
2127 return to_enqueue;
2128
2129 //return(sem->shortest_fifo_queue);
2130}
2131
2132
2133
2134
2135static ikglp_wait_state_t* pick_steal(struct ikglp_affinity* aff,
2136 int dest_gpu,
2137 struct fifo_queue* fq)
2138{
2139 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2140 ikglp_wait_state_t *wait = NULL;
2141 int max_improvement = -(MIG_NONE+1);
2142 int replica = ikglp_get_idx(sem, fq);
2143
2144 if(waitqueue_active(&fq->wait)) {
2145 int this_gpu = replica_to_gpu(aff, replica);
2146 struct list_head *pos;
2147
2148 list_for_each(pos, &fq->wait.task_list) {
2149 wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list);
2150 ikglp_wait_state_t *tmp_wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
2151
2152 int tmp_improvement =
2153 gpu_migration_distance(this_gpu, tsk_rt(tmp_wait->task)->last_gpu) -
2154 gpu_migration_distance(dest_gpu, tsk_rt(tmp_wait->task)->last_gpu);
2155
2156 if(tmp_improvement > max_improvement) {
2157 wait = tmp_wait;
2158 max_improvement = tmp_improvement;
2159
2160 if(max_improvement >= (MIG_NONE-1)) {
2161 goto out;
2162 }
2163 }
2164 }
2165
2166 BUG_ON(!wait);
2167 }
2168 else {
2169 TRACE_CUR("fq %d is empty!\n", replica);
2170 }
2171
2172out:
2173
2174 TRACE_CUR("Candidate victim from fq %d is %s/%d. aff improvement = %d.\n",
2175 replica,
2176 (wait) ? wait->task->comm : "nil",
2177 (wait) ? wait->task->pid : -1,
2178 max_improvement);
2179
2180 return wait;
2181}
2182
2183
2184ikglp_wait_state_t* gpu_ikglp_advise_steal(struct ikglp_affinity* aff,
2185 struct fifo_queue* dst)
2186{
2187 // Huristic strategy: Find task with greatest improvement in affinity.
2188 //
2189 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2190 ikglp_wait_state_t *to_steal_state = NULL;
2191// ikglp_wait_state_t *default_to_steal_state = ikglp_find_hp_waiter_to_steal(sem);
2192 int max_improvement = -(MIG_NONE+1);
2193 int replica, i;
2194 int dest_gpu;
2195
2196 replica = ikglp_get_idx(sem, dst);
2197 dest_gpu = replica_to_gpu(aff, replica);
2198
2199 for(i = 0; i < sem->nr_replicas; ++i) {
2200 ikglp_wait_state_t *tmp_to_steal_state =
2201 pick_steal(aff, dest_gpu, &sem->fifo_queues[i]);
2202
2203 if(tmp_to_steal_state) {
2204 int tmp_improvement =
2205 gpu_migration_distance(replica_to_gpu(aff, i), tsk_rt(tmp_to_steal_state->task)->last_gpu) -
2206 gpu_migration_distance(dest_gpu, tsk_rt(tmp_to_steal_state->task)->last_gpu);
2207
2208 if(tmp_improvement > max_improvement) {
2209 to_steal_state = tmp_to_steal_state;
2210 max_improvement = tmp_improvement;
2211
2212 if(max_improvement >= (MIG_NONE-1)) {
2213 goto out;
2214 }
2215 }
2216 }
2217 }
2218
2219out:
2220 if(!to_steal_state) {
2221 TRACE_CUR("Could not find anyone to steal.\n");
2222 }
2223 else {
2224 TRACE_CUR("Selected victim %s/%d on fq %d (GPU %d) for fq %d (GPU %d): improvement = %d\n",
2225 to_steal_state->task->comm, to_steal_state->task->pid,
2226 ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq),
2227 replica_to_gpu(aff, ikglp_get_idx(sem, to_steal_state->donee_heap_node.fq)),
2228 ikglp_get_idx(sem, dst),
2229 dest_gpu,
2230 max_improvement);
2231
2232// TRACE_CUR("Non-aff wanted to select victim %s/%d on fq %d (GPU %d) for fq %d (GPU %d): improvement = %d\n",
2233// default_to_steal_state->task->comm, default_to_steal_state->task->pid,
2234// ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq),
2235// replica_to_gpu(aff, ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq)),
2236// ikglp_get_idx(sem, dst),
2237// replica_to_gpu(aff, ikglp_get_idx(sem, dst)),
2238//
2239// gpu_migration_distance(
2240// replica_to_gpu(aff, ikglp_get_idx(sem, default_to_steal_state->donee_heap_node.fq)),
2241// tsk_rt(default_to_steal_state->task)->last_gpu) -
2242// gpu_migration_distance(dest_gpu, tsk_rt(default_to_steal_state->task)->last_gpu));
2243 }
2244
2245 return(to_steal_state);
2246}
2247
2248
2249static inline int has_donor(wait_queue_t* fq_wait)
2250{
2251 ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
2252 return(wait->donee_heap_node.donor_info != NULL);
2253}
2254
2255static ikglp_donee_heap_node_t* pick_donee(struct ikglp_affinity* aff,
2256 struct fifo_queue* fq,
2257 int* dist_from_head)
2258{
2259 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2260 struct task_struct *donee;
2261 ikglp_donee_heap_node_t *donee_node;
2262 struct task_struct *mth_highest = ikglp_mth_highest(sem);
2263
2264// lt_t now = litmus_clock();
2265//
2266// TRACE_CUR("fq %d: mth_highest: %s/%d, deadline = %d: (donor) = ??? ",
2267// ikglp_get_idx(sem, fq),
2268// mth_highest->comm, mth_highest->pid,
2269// (int)get_deadline(mth_highest) - now);
2270
2271 if(fq->owner &&
2272 fq->donee_heap_node.donor_info == NULL &&
2273 mth_highest != fq->owner &&
2274 litmus->__compare(mth_highest, BASE, fq->owner, BASE)) {
2275 donee = fq->owner;
2276 donee_node = &(fq->donee_heap_node);
2277 *dist_from_head = 0;
2278
2279 BUG_ON(donee != donee_node->task);
2280
2281 TRACE_CUR("picked owner of fq %d as donee\n",
2282 ikglp_get_idx(sem, fq));
2283
2284 goto out;
2285 }
2286 else if(waitqueue_active(&fq->wait)) {
2287 struct list_head *pos;
2288
2289
2290// TRACE_CUR("fq %d: owner: %s/%d, deadline = %d: (donor) = %s/%d "
2291// "(mth_highest != fq->owner) = %d "
2292// "(mth_highest > fq->owner) = %d\n",
2293// ikglp_get_idx(sem, fq),
2294// (fq->owner) ? fq->owner->comm : "nil",
2295// (fq->owner) ? fq->owner->pid : -1,
2296// (fq->owner) ? (int)get_deadline(fq->owner) - now : -999,
2297// (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->comm : "nil",
2298// (fq->donee_heap_node.donor_info) ? fq->donee_heap_node.donor_info->task->pid : -1,
2299// (mth_highest != fq->owner),
2300// (litmus->__compare(mth_highest, BASE, fq->owner, BASE)));
2301
2302
2303 *dist_from_head = 1;
2304
2305 // iterating from the start of the queue is nice since this means
2306 // the donee will be closer to obtaining a resource.
2307 list_for_each(pos, &fq->wait.task_list) {
2308 wait_queue_t *fq_wait = list_entry(pos, wait_queue_t, task_list);
2309 ikglp_wait_state_t *wait = container_of(fq_wait, ikglp_wait_state_t, fq_node);
2310
2311// TRACE_CUR("fq %d: waiter %d: %s/%d, deadline = %d (donor) = %s/%d "
2312// "(mth_highest != wait->task) = %d "
2313// "(mth_highest > wait->task) = %d\n",
2314// ikglp_get_idx(sem, fq),
2315// dist_from_head,
2316// wait->task->comm, wait->task->pid,
2317// (int)get_deadline(wait->task) - now,
2318// (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->comm : "nil",
2319// (wait->donee_heap_node.donor_info) ? wait->donee_heap_node.donor_info->task->pid : -1,
2320// (mth_highest != wait->task),
2321// (litmus->__compare(mth_highest, BASE, wait->task, BASE)));
2322
2323
2324 if(!has_donor(fq_wait) &&
2325 mth_highest != wait->task &&
2326 litmus->__compare(mth_highest, BASE, wait->task, BASE)) {
2327 donee = (struct task_struct*) fq_wait->private;
2328 donee_node = &wait->donee_heap_node;
2329
2330 BUG_ON(donee != donee_node->task);
2331
2332 TRACE_CUR("picked waiter in fq %d as donee\n",
2333 ikglp_get_idx(sem, fq));
2334
2335 goto out;
2336 }
2337 ++(*dist_from_head);
2338 }
2339 }
2340
2341 donee = NULL;
2342 donee_node = NULL;
2343 //*dist_from_head = sem->max_fifo_len + 1;
2344 *dist_from_head = IKGLP_INVAL_DISTANCE;
2345
2346 TRACE_CUR("Found no one to be donee in fq %d!\n", ikglp_get_idx(sem, fq));
2347
2348out:
2349
2350 TRACE_CUR("Candidate donee for fq %d is %s/%d (dist_from_head = %d)\n",
2351 ikglp_get_idx(sem, fq),
2352 (donee) ? (donee)->comm : "nil",
2353 (donee) ? (donee)->pid : -1,
2354 *dist_from_head);
2355
2356 return donee_node;
2357}
2358
2359ikglp_donee_heap_node_t* gpu_ikglp_advise_donee_selection(
2360 struct ikglp_affinity* aff,
2361 struct task_struct* donor)
2362{
2363 // Huristic strategy: Find the highest-priority donee that is waiting on
2364 // a queue closest to our affinity. (1) The donee CANNOT already have a
2365 // donor (exception: donee is the lowest-prio task in the donee heap).
2366 // (2) Requests in 'top_m' heap are ineligible.
2367 //
2368 // Further strategy: amongst elible donees waiting for the same GPU, pick
2369 // the one closest to the head of the FIFO queue (including owners).
2370 //
2371 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2372 ikglp_donee_heap_node_t *donee_node;
2373 gpu_migration_dist_t distance;
2374 int start, i, j;
2375
2376 ikglp_donee_heap_node_t *default_donee;
2377 ikglp_wait_state_t *default_donee_donor_info;
2378
2379 if(tsk_rt(donor)->last_gpu < 0) {
2380 // no affinity. just return the min prio, like standard IKGLP
2381 // TODO: Find something closer to the head of the queue??
2382 donee_node = binheap_top_entry(&sem->donees,
2383 ikglp_donee_heap_node_t,
2384 node);
2385 goto out;
2386 }
2387
2388
2389 // Temporarily break any donation relation the default donee (the lowest
2390 // prio task in the FIFO queues) to make it eligible for selection below.
2391 //
2392 // NOTE: The original donor relation *must* be restored, even if we select
2393 // the default donee throug affinity-aware selection, before returning
2394 // from this function so we don't screw up our heap ordering.
2395 // The standard IKGLP algorithm will steal the donor relationship if needed.
2396 default_donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
2397 default_donee_donor_info = default_donee->donor_info; // back-up donor relation
2398 default_donee->donor_info = NULL; // temporarily break any donor relation.
2399
2400 // initialize our search
2401 donee_node = NULL;
2402 distance = MIG_NONE;
2403
2404 // TODO: The below search logic may work well for locating nodes to steal
2405 // when an FQ goes idle. Validate this code and apply it to stealing.
2406
2407 // begin search with affinity GPU.
2408 start = gpu_to_base_replica(aff, tsk_rt(donor)->last_gpu);
2409 i = start;
2410 do { // "for each gpu" / "for each aff->nr_rsrc"
2411 gpu_migration_dist_t temp_distance = gpu_migration_distance(start, i);
2412
2413 // only interested in queues that will improve our distance
2414 if(temp_distance < distance || donee_node == NULL) {
2415 int dist_from_head = IKGLP_INVAL_DISTANCE;
2416
2417 TRACE_CUR("searching for donor on GPU %d", i);
2418
2419 // visit each queue and pick a donee. bail as soon as we find
2420 // one for this class.
2421
2422 for(j = 0; j < aff->nr_simult; ++j) {
2423 int temp_dist_from_head;
2424 ikglp_donee_heap_node_t *temp_donee_node;
2425 struct fifo_queue *fq;
2426
2427 fq = &(sem->fifo_queues[i + j*aff->nr_rsrc]);
2428 temp_donee_node = pick_donee(aff, fq, &temp_dist_from_head);
2429
2430 if(temp_dist_from_head < dist_from_head)
2431 {
2432 // we check all the FQs for this GPU to spread priorities
2433 // out across the queues. does this decrease jitter?
2434 donee_node = temp_donee_node;
2435 dist_from_head = temp_dist_from_head;
2436 }
2437 }
2438
2439 if(dist_from_head != IKGLP_INVAL_DISTANCE) {
2440 TRACE_CUR("found donee %s/%d and is the %d-th waiter.\n",
2441 donee_node->task->comm, donee_node->task->pid,
2442 dist_from_head);
2443 }
2444 else {
2445 TRACE_CUR("found no eligible donors from GPU %d\n", i);
2446 }
2447 }
2448 else {
2449 TRACE_CUR("skipping GPU %d (distance = %d, best donor "
2450 "distance = %d)\n", i, temp_distance, distance);
2451 }
2452
2453 i = (i+1 < aff->nr_rsrc) ? i+1 : 0; // increment with wrap-around
2454 } while (i != start);
2455
2456
2457 // restore old donor info state.
2458 default_donee->donor_info = default_donee_donor_info;
2459
2460 if(!donee_node) {
2461 donee_node = default_donee;
2462
2463 TRACE_CUR("Could not find a donee. We have to steal one.\n");
2464 WARN_ON(default_donee->donor_info == NULL);
2465 }
2466
2467out:
2468
2469 TRACE_CUR("Selected donee %s/%d on fq %d (GPU %d) for %s/%d with affinity for GPU %d\n",
2470 donee_node->task->comm, donee_node->task->pid,
2471 ikglp_get_idx(sem, donee_node->fq),
2472 replica_to_gpu(aff, ikglp_get_idx(sem, donee_node->fq)),
2473 donor->comm, donor->pid, tsk_rt(donor)->last_gpu);
2474
2475 return(donee_node);
2476}
2477
2478
2479
2480static void __find_closest_donor(int target_gpu,
2481 struct binheap_node* donor_node,
2482 ikglp_wait_state_t** cur_closest,
2483 int* cur_dist)
2484{
2485 ikglp_wait_state_t *this_donor =
2486 binheap_entry(donor_node, ikglp_wait_state_t, node);
2487
2488 int this_dist =
2489 gpu_migration_distance(target_gpu, tsk_rt(this_donor->task)->last_gpu);
2490
2491// TRACE_CUR("%s/%d: dist from target = %d\n",
2492// this_donor->task->comm,
2493// this_donor->task->pid,
2494// this_dist);
2495
2496 if(this_dist < *cur_dist) {
2497 // take this donor
2498 *cur_dist = this_dist;
2499 *cur_closest = this_donor;
2500 }
2501 else if(this_dist == *cur_dist) {
2502 // priority tie-break. Even though this is a pre-order traversal,
2503 // this is a heap, not a binary tree, so we still need to do a priority
2504 // comparision.
2505 if(!(*cur_closest) ||
2506 litmus->compare(this_donor->task, (*cur_closest)->task)) {
2507 *cur_dist = this_dist;
2508 *cur_closest = this_donor;
2509 }
2510 }
2511
2512 if(donor_node->left) __find_closest_donor(target_gpu, donor_node->left, cur_closest, cur_dist);
2513 if(donor_node->right) __find_closest_donor(target_gpu, donor_node->right, cur_closest, cur_dist);
2514}
2515
2516ikglp_wait_state_t* gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq)
2517{
2518 // Huristic strategy: Find donor with the closest affinity to fq.
2519 // Tie-break on priority.
2520
2521 // We need to iterate over all the donors to do this. Unfortunatly,
2522 // our donors are organized in a heap. We'll visit each node with a
2523 // recurisve call. This is realitively safe since there are only sem->m
2524 // donors, at most. We won't recurse too deeply to have to worry about
2525 // our stack. (even with 128 CPUs, our nest depth is at most 7 deep).
2526
2527 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2528 ikglp_wait_state_t *donor = NULL;
2529 int distance = MIG_NONE;
2530 int gpu = replica_to_gpu(aff, ikglp_get_idx(sem, fq));
2531 ikglp_wait_state_t* default_donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
2532
2533 __find_closest_donor(gpu, sem->donors.root, &donor, &distance);
2534
2535 TRACE_CUR("Selected donor %s/%d (distance = %d) to move to fq %d "
2536 "(non-aff wanted %s/%d). differs = %d\n",
2537 donor->task->comm, donor->task->pid,
2538 distance,
2539 ikglp_get_idx(sem, fq),
2540 default_donor->task->comm, default_donor->task->pid,
2541 (donor->task != default_donor->task)
2542 );
2543
2544 return(donor);
2545}
2546
2547
2548
2549void gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2550{
2551 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2552 int replica = ikglp_get_idx(sem, fq);
2553 int gpu = replica_to_gpu(aff, replica);
2554 struct ikglp_queue_info *info = &aff->q_info[replica];
2555 lt_t est_time;
2556 lt_t est_len_before;
2557
2558 if(current == t) {
2559 tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
2560 }
2561
2562 est_len_before = info->estimated_len;
2563 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
2564 info->estimated_len += est_time;
2565
2566 TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n",
2567 ikglp_get_idx(sem, info->q),
2568 est_len_before, est_time,
2569 info->estimated_len);
2570
2571 // if(aff->shortest_queue == info) {
2572 // // we may no longer be the shortest
2573 // aff->shortest_queue = ikglp_aff_find_shortest(aff);
2574 //
2575 // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
2576 // ikglp_get_idx(sem, aff->shortest_queue->q),
2577 // aff->shortest_queue->q->count,
2578 // aff->shortest_queue->estimated_len);
2579 // }
2580}
2581
2582void gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2583{
2584 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2585 int replica = ikglp_get_idx(sem, fq);
2586 int gpu = replica_to_gpu(aff, replica);
2587 struct ikglp_queue_info *info = &aff->q_info[replica];
2588 lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
2589
2590 if(est_time > info->estimated_len) {
2591 WARN_ON(1);
2592 info->estimated_len = 0;
2593 }
2594 else {
2595 info->estimated_len -= est_time;
2596 }
2597
2598 TRACE_CUR("fq %d est len is now %llu\n",
2599 ikglp_get_idx(sem, info->q),
2600 info->estimated_len);
2601
2602 // check to see if we're the shortest queue now.
2603 // if((aff->shortest_queue != info) &&
2604 // (aff->shortest_queue->estimated_len > info->estimated_len)) {
2605 //
2606 // aff->shortest_queue = info;
2607 //
2608 // TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
2609 // ikglp_get_idx(sem, info->q),
2610 // info->q->count,
2611 // info->estimated_len);
2612 // }
2613}
2614
2615void gpu_ikglp_notify_acquired(struct ikglp_affinity* aff,
2616 struct fifo_queue* fq,
2617 struct task_struct* t)
2618{
2619 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2620 int replica = ikglp_get_idx(sem, fq);
2621 int gpu = replica_to_gpu(aff, replica);
2622
2623 tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration
2624
2625 TRACE_CUR("%s/%d acquired gpu %d (prev = %d). migration type = %d\n",
2626 t->comm, t->pid, gpu, tsk_rt(t)->last_gpu, tsk_rt(t)->gpu_migration);
2627
2628 // count the number or resource holders
2629 ++(*(aff->q_info[replica].nr_cur_users));
2630
2631 reg_nv_device(gpu, 1, t); // register
2632
2633 tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
2634 reset_gpu_tracker(t);
2635 start_gpu_tracker(t);
2636}
2637
2638void gpu_ikglp_notify_freed(struct ikglp_affinity* aff,
2639 struct fifo_queue* fq,
2640 struct task_struct* t)
2641{
2642 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2643 int replica = ikglp_get_idx(sem, fq);
2644 int gpu = replica_to_gpu(aff, replica);
2645 lt_t est_time;
2646
2647 stop_gpu_tracker(t); // stop the tracker before we do anything else.
2648
2649 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
2650
2651 // count the number or resource holders
2652 --(*(aff->q_info[replica].nr_cur_users));
2653
2654 reg_nv_device(gpu, 0, t); // unregister
2655
2656 // update estimates
2657 update_gpu_estimate(t, get_gpu_time(t));
2658
2659 TRACE_CUR("%s/%d freed gpu %d (prev = %d). mig type = %d. actual time was %llu. "
2660 "estimated was %llu. diff is %d\n",
2661 t->comm, t->pid, gpu, tsk_rt(t)->last_gpu,
2662 tsk_rt(t)->gpu_migration,
2663 get_gpu_time(t),
2664 est_time,
2665 (long long)get_gpu_time(t) - (long long)est_time);
2666
2667 tsk_rt(t)->last_gpu = gpu;
2668}
2669
2670struct ikglp_affinity_ops gpu_ikglp_affinity =
2671{
2672 .advise_enqueue = gpu_ikglp_advise_enqueue,
2673 .advise_steal = gpu_ikglp_advise_steal,
2674 .advise_donee_selection = gpu_ikglp_advise_donee_selection,
2675 .advise_donor_to_fq = gpu_ikglp_advise_donor_to_fq,
2676
2677 .notify_enqueue = gpu_ikglp_notify_enqueue,
2678 .notify_dequeue = gpu_ikglp_notify_dequeue,
2679 .notify_acquired = gpu_ikglp_notify_acquired,
2680 .notify_freed = gpu_ikglp_notify_freed,
2681
2682 .replica_to_resource = gpu_replica_to_resource,
2683};
2684
2685struct affinity_observer* ikglp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
2686 void* __user args)
2687{
2688 return ikglp_aff_obs_new(ops, &gpu_ikglp_affinity, args);
2689}
2690
2691
2692
2693
2694
2695
2696
2697
2698// Simple ikglp Affinity (standard ikglp with auto-gpu registration)
2699
2700struct fifo_queue* simple_gpu_ikglp_advise_enqueue(struct ikglp_affinity* aff, struct task_struct* t)
2701{
2702 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2703 int min_count;
2704 int min_nr_users;
2705 struct ikglp_queue_info *shortest;
2706 struct fifo_queue *to_enqueue;
2707 int i;
2708
2709 // TRACE_CUR("Simple GPU ikglp advise_enqueue invoked\n");
2710
2711 shortest = &aff->q_info[0];
2712 min_count = shortest->q->count;
2713 min_nr_users = *(shortest->nr_cur_users);
2714
2715 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
2716 ikglp_get_idx(sem, shortest->q),
2717 shortest->q->count,
2718 min_nr_users);
2719
2720 for(i = 1; i < sem->nr_replicas; ++i) {
2721 int len = aff->q_info[i].q->count;
2722
2723 // queue is smaller, or they're equal and the other has a smaller number
2724 // of total users.
2725 //
2726 // tie-break on the shortest number of simult users. this only kicks in
2727 // when there are more than 1 empty queues.
2728 if((len < min_count) ||
2729 ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
2730 shortest = &aff->q_info[i];
2731 min_count = shortest->q->count;
2732 min_nr_users = *(aff->q_info[i].nr_cur_users);
2733 }
2734
2735 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
2736 ikglp_get_idx(sem, aff->q_info[i].q),
2737 aff->q_info[i].q->count,
2738 *(aff->q_info[i].nr_cur_users));
2739 }
2740
2741 to_enqueue = shortest->q;
2742 TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
2743 ikglp_get_idx(sem, to_enqueue),
2744 ikglp_get_idx(sem, sem->shortest_fifo_queue));
2745
2746 return to_enqueue;
2747}
2748
2749ikglp_wait_state_t* simple_gpu_ikglp_advise_steal(struct ikglp_affinity* aff,
2750 struct fifo_queue* dst)
2751{
2752 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2753 // TRACE_CUR("Simple GPU ikglp advise_steal invoked\n");
2754 return ikglp_find_hp_waiter_to_steal(sem);
2755}
2756
2757ikglp_donee_heap_node_t* simple_gpu_ikglp_advise_donee_selection(struct ikglp_affinity* aff, struct task_struct* donor)
2758{
2759 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2760 ikglp_donee_heap_node_t *donee = binheap_top_entry(&sem->donees, ikglp_donee_heap_node_t, node);
2761 return(donee);
2762}
2763
2764ikglp_wait_state_t* simple_gpu_ikglp_advise_donor_to_fq(struct ikglp_affinity* aff, struct fifo_queue* fq)
2765{
2766 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2767 ikglp_wait_state_t* donor = binheap_top_entry(&sem->donors, ikglp_wait_state_t, node);
2768 return(donor);
2769}
2770
2771void simple_gpu_ikglp_notify_enqueue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2772{
2773 // TRACE_CUR("Simple GPU ikglp notify_enqueue invoked\n");
2774}
2775
2776void simple_gpu_ikglp_notify_dequeue(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2777{
2778 // TRACE_CUR("Simple GPU ikglp notify_dequeue invoked\n");
2779}
2780
2781void simple_gpu_ikglp_notify_acquired(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2782{
2783 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2784 int replica = ikglp_get_idx(sem, fq);
2785 int gpu = replica_to_gpu(aff, replica);
2786
2787 // TRACE_CUR("Simple GPU ikglp notify_acquired invoked\n");
2788
2789 // count the number or resource holders
2790 ++(*(aff->q_info[replica].nr_cur_users));
2791
2792 reg_nv_device(gpu, 1, t); // register
2793}
2794
2795void simple_gpu_ikglp_notify_freed(struct ikglp_affinity* aff, struct fifo_queue* fq, struct task_struct* t)
2796{
2797 struct ikglp_semaphore *sem = ikglp_from_lock(aff->obs.lock);
2798 int replica = ikglp_get_idx(sem, fq);
2799 int gpu = replica_to_gpu(aff, replica);
2800
2801 // TRACE_CUR("Simple GPU ikglp notify_freed invoked\n");
2802 // count the number or resource holders
2803 --(*(aff->q_info[replica].nr_cur_users));
2804
2805 reg_nv_device(gpu, 0, t); // unregister
2806}
2807
2808struct ikglp_affinity_ops simple_gpu_ikglp_affinity =
2809{
2810 .advise_enqueue = simple_gpu_ikglp_advise_enqueue,
2811 .advise_steal = simple_gpu_ikglp_advise_steal,
2812 .advise_donee_selection = simple_gpu_ikglp_advise_donee_selection,
2813 .advise_donor_to_fq = simple_gpu_ikglp_advise_donor_to_fq,
2814
2815 .notify_enqueue = simple_gpu_ikglp_notify_enqueue,
2816 .notify_dequeue = simple_gpu_ikglp_notify_dequeue,
2817 .notify_acquired = simple_gpu_ikglp_notify_acquired,
2818 .notify_freed = simple_gpu_ikglp_notify_freed,
2819
2820 .replica_to_resource = gpu_replica_to_resource,
2821};
2822
2823struct affinity_observer* ikglp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
2824 void* __user args)
2825{
2826 return ikglp_aff_obs_new(ops, &simple_gpu_ikglp_affinity, args);
2827}
2828
2829#endif
2830
2831
2832
2833
2834
2835
2836
2837
2838
diff --git a/litmus/jobs.c b/litmus/jobs.c
index 36e314625d86..1d97462cc128 100644
--- a/litmus/jobs.c
+++ b/litmus/jobs.c
@@ -10,8 +10,21 @@ void prepare_for_next_period(struct task_struct *t)
10{ 10{
11 BUG_ON(!t); 11 BUG_ON(!t);
12 /* prepare next release */ 12 /* prepare next release */
13 t->rt_param.job_params.release = t->rt_param.job_params.deadline; 13
14 t->rt_param.job_params.deadline += get_rt_period(t); 14 if(tsk_rt(t)->task_params.cls == RT_CLASS_SOFT_W_SLIP) {
15 /* allow the release point to slip if we've passed our deadline. */
16 lt_t now = litmus_clock();
17 t->rt_param.job_params.release =
18 (t->rt_param.job_params.deadline < now) ?
19 now : t->rt_param.job_params.deadline;
20 t->rt_param.job_params.deadline =
21 t->rt_param.job_params.release + get_rt_period(t);
22 }
23 else {
24 t->rt_param.job_params.release = t->rt_param.job_params.deadline;
25 t->rt_param.job_params.deadline += get_rt_period(t);
26 }
27
15 t->rt_param.job_params.exec_time = 0; 28 t->rt_param.job_params.exec_time = 0;
16 /* update job sequence number */ 29 /* update job sequence number */
17 t->rt_param.job_params.job_no++; 30 t->rt_param.job_params.job_no++;
diff --git a/litmus/kexclu_affinity.c b/litmus/kexclu_affinity.c
new file mode 100644
index 000000000000..5ef5e54d600d
--- /dev/null
+++ b/litmus/kexclu_affinity.c
@@ -0,0 +1,92 @@
1#include <litmus/fdso.h>
2#include <litmus/sched_plugin.h>
3#include <litmus/trace.h>
4#include <litmus/litmus.h>
5#include <litmus/locking.h>
6
7#include <litmus/kexclu_affinity.h>
8
9static int create_generic_aff_obs(void** obj_ref, obj_type_t type, void* __user arg);
10static int open_generic_aff_obs(struct od_table_entry* entry, void* __user arg);
11static int close_generic_aff_obs(struct od_table_entry* entry);
12static void destroy_generic_aff_obs(obj_type_t type, void* sem);
13
14struct fdso_ops generic_affinity_ops = {
15 .create = create_generic_aff_obs,
16 .open = open_generic_aff_obs,
17 .close = close_generic_aff_obs,
18 .destroy = destroy_generic_aff_obs
19};
20
21static atomic_t aff_obs_id_gen = ATOMIC_INIT(0);
22
23static inline bool is_affinity_observer(struct od_table_entry *entry)
24{
25 return (entry->class == &generic_affinity_ops);
26}
27
28static inline struct affinity_observer* get_affinity_observer(struct od_table_entry* entry)
29{
30 BUG_ON(!is_affinity_observer(entry));
31 return (struct affinity_observer*) entry->obj->obj;
32}
33
34static int create_generic_aff_obs(void** obj_ref, obj_type_t type, void* __user arg)
35{
36 struct affinity_observer* aff_obs;
37 int err;
38
39 err = litmus->allocate_aff_obs(&aff_obs, type, arg);
40 if (err == 0) {
41 BUG_ON(!aff_obs->lock);
42 aff_obs->type = type;
43 *obj_ref = aff_obs;
44 }
45 return err;
46}
47
48static int open_generic_aff_obs(struct od_table_entry* entry, void* __user arg)
49{
50 struct affinity_observer* aff_obs = get_affinity_observer(entry);
51 if (aff_obs->ops->open)
52 return aff_obs->ops->open(aff_obs, arg);
53 else
54 return 0; /* default: any task can open it */
55}
56
57static int close_generic_aff_obs(struct od_table_entry* entry)
58{
59 struct affinity_observer* aff_obs = get_affinity_observer(entry);
60 if (aff_obs->ops->close)
61 return aff_obs->ops->close(aff_obs);
62 else
63 return 0; /* default: closing succeeds */
64}
65
66static void destroy_generic_aff_obs(obj_type_t type, void* obj)
67{
68 struct affinity_observer* aff_obs = (struct affinity_observer*) obj;
69 aff_obs->ops->deallocate(aff_obs);
70}
71
72
73struct litmus_lock* get_lock_from_od(int od)
74{
75 extern struct fdso_ops generic_lock_ops;
76
77 struct od_table_entry *entry = get_entry_for_od(od);
78
79 if(entry && entry->class == &generic_lock_ops) {
80 return (struct litmus_lock*) entry->obj->obj;
81 }
82 return NULL;
83}
84
85void affinity_observer_new(struct affinity_observer* aff,
86 struct affinity_observer_ops* ops,
87 struct affinity_observer_args* args)
88{
89 aff->ops = ops;
90 aff->lock = get_lock_from_od(args->lock_od);
91 aff->ident = atomic_inc_return(&aff_obs_id_gen);
92} \ No newline at end of file
diff --git a/litmus/kfmlp_lock.c b/litmus/kfmlp_lock.c
new file mode 100644
index 000000000000..bff857ed8d4e
--- /dev/null
+++ b/litmus/kfmlp_lock.c
@@ -0,0 +1,1002 @@
1#include <linux/slab.h>
2#include <linux/uaccess.h>
3
4#include <litmus/trace.h>
5#include <litmus/sched_plugin.h>
6#include <litmus/fdso.h>
7
8#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
9#include <litmus/gpu_affinity.h>
10#include <litmus/nvidia_info.h>
11#endif
12
13#include <litmus/kfmlp_lock.h>
14
15static inline int kfmlp_get_idx(struct kfmlp_semaphore* sem,
16 struct kfmlp_queue* queue)
17{
18 return (queue - &sem->queues[0]);
19}
20
21static inline struct kfmlp_queue* kfmlp_get_queue(struct kfmlp_semaphore* sem,
22 struct task_struct* holder)
23{
24 int i;
25 for(i = 0; i < sem->num_resources; ++i)
26 if(sem->queues[i].owner == holder)
27 return(&sem->queues[i]);
28 return(NULL);
29}
30
31/* caller is responsible for locking */
32static struct task_struct* kfmlp_find_hp_waiter(struct kfmlp_queue *kqueue,
33 struct task_struct *skip)
34{
35 struct list_head *pos;
36 struct task_struct *queued, *found = NULL;
37
38 list_for_each(pos, &kqueue->wait.task_list) {
39 queued = (struct task_struct*) list_entry(pos, wait_queue_t,
40 task_list)->private;
41
42 /* Compare task prios, find high prio task. */
43 //if (queued != skip && edf_higher_prio(queued, found))
44 if (queued != skip && litmus->compare(queued, found))
45 found = queued;
46 }
47 return found;
48}
49
50static inline struct kfmlp_queue* kfmlp_find_shortest(struct kfmlp_semaphore* sem,
51 struct kfmlp_queue* search_start)
52{
53 // we start our search at search_start instead of at the beginning of the
54 // queue list to load-balance across all resources.
55 struct kfmlp_queue* step = search_start;
56 struct kfmlp_queue* shortest = sem->shortest_queue;
57
58 do
59 {
60 step = (step+1 != &sem->queues[sem->num_resources]) ?
61 step+1 : &sem->queues[0];
62
63 if(step->count < shortest->count)
64 {
65 shortest = step;
66 if(step->count == 0)
67 break; /* can't get any shorter */
68 }
69
70 }while(step != search_start);
71
72 return(shortest);
73}
74
75
76static struct task_struct* kfmlp_select_hp_steal(struct kfmlp_semaphore* sem,
77 wait_queue_t** to_steal,
78 struct kfmlp_queue** to_steal_from)
79{
80 /* must hold sem->lock */
81
82 int i;
83
84 *to_steal = NULL;
85 *to_steal_from = NULL;
86
87 for(i = 0; i < sem->num_resources; ++i)
88 {
89 if( (sem->queues[i].count > 1) &&
90 ((*to_steal_from == NULL) ||
91 //(edf_higher_prio(sem->queues[i].hp_waiter, my_queue->hp_waiter))) )
92 (litmus->compare(sem->queues[i].hp_waiter, (*to_steal_from)->hp_waiter))) )
93 {
94 *to_steal_from = &sem->queues[i];
95 }
96 }
97
98 if(*to_steal_from)
99 {
100 struct list_head *pos;
101 struct task_struct *target = (*to_steal_from)->hp_waiter;
102
103 TRACE_CUR("want to steal hp_waiter (%s/%d) from queue %d\n",
104 target->comm,
105 target->pid,
106 kfmlp_get_idx(sem, *to_steal_from));
107
108 list_for_each(pos, &(*to_steal_from)->wait.task_list)
109 {
110 wait_queue_t *node = list_entry(pos, wait_queue_t, task_list);
111 struct task_struct *queued = (struct task_struct*) node->private;
112 /* Compare task prios, find high prio task. */
113 if (queued == target)
114 {
115 *to_steal = node;
116
117 TRACE_CUR("steal: selected %s/%d from queue %d\n",
118 queued->comm, queued->pid,
119 kfmlp_get_idx(sem, *to_steal_from));
120
121 return queued;
122 }
123 }
124
125 TRACE_CUR("Could not find %s/%d in queue %d!!! THIS IS A BUG!\n",
126 target->comm,
127 target->pid,
128 kfmlp_get_idx(sem, *to_steal_from));
129 }
130
131 return NULL;
132}
133
134static void kfmlp_steal_node(struct kfmlp_semaphore *sem,
135 struct kfmlp_queue *dst,
136 wait_queue_t *wait,
137 struct kfmlp_queue *src)
138{
139 struct task_struct* t = (struct task_struct*) wait->private;
140
141 __remove_wait_queue(&src->wait, wait);
142 --(src->count);
143
144 if(t == src->hp_waiter) {
145 src->hp_waiter = kfmlp_find_hp_waiter(src, NULL);
146
147 TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
148 kfmlp_get_idx(sem, src),
149 (src->hp_waiter) ? src->hp_waiter->comm : "nil",
150 (src->hp_waiter) ? src->hp_waiter->pid : -1);
151
152 if(src->owner && tsk_rt(src->owner)->inh_task == t) {
153 litmus->decrease_prio(src->owner, src->hp_waiter);
154 }
155 }
156
157 if(sem->shortest_queue->count > src->count) {
158 sem->shortest_queue = src;
159 TRACE_CUR("queue %d is the shortest\n", kfmlp_get_idx(sem, sem->shortest_queue));
160 }
161
162#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
163 if(sem->aff_obs) {
164 sem->aff_obs->ops->notify_dequeue(sem->aff_obs, src, t);
165 }
166#endif
167
168 init_waitqueue_entry(wait, t);
169 __add_wait_queue_tail_exclusive(&dst->wait, wait);
170 ++(dst->count);
171
172 if(litmus->compare(t, dst->hp_waiter)) {
173 dst->hp_waiter = t;
174
175 TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
176 kfmlp_get_idx(sem, dst),
177 t->comm, t->pid);
178
179 if(dst->owner && litmus->compare(t, dst->owner))
180 {
181 litmus->increase_prio(dst->owner, t);
182 }
183 }
184
185#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
186 if(sem->aff_obs) {
187 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, dst, t);
188 }
189#endif
190}
191
192
193int kfmlp_lock(struct litmus_lock* l)
194{
195 struct task_struct* t = current;
196 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
197 struct kfmlp_queue* my_queue = NULL;
198 wait_queue_t wait;
199 unsigned long flags;
200
201 if (!is_realtime(t))
202 return -EPERM;
203
204 spin_lock_irqsave(&sem->lock, flags);
205
206#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
207 if(sem->aff_obs) {
208 my_queue = sem->aff_obs->ops->advise_enqueue(sem->aff_obs, t);
209 }
210 if(!my_queue) {
211 my_queue = sem->shortest_queue;
212 }
213#else
214 my_queue = sem->shortest_queue;
215#endif
216
217 if (my_queue->owner) {
218 /* resource is not free => must suspend and wait */
219 TRACE_CUR("queue %d: Resource is not free => must suspend and wait. (queue size = %d)\n",
220 kfmlp_get_idx(sem, my_queue),
221 my_queue->count);
222
223 init_waitqueue_entry(&wait, t);
224
225 /* FIXME: interruptible would be nice some day */
226 set_task_state(t, TASK_UNINTERRUPTIBLE);
227
228 __add_wait_queue_tail_exclusive(&my_queue->wait, &wait);
229
230 TRACE_CUR("queue %d: hp_waiter is currently %s/%d\n",
231 kfmlp_get_idx(sem, my_queue),
232 (my_queue->hp_waiter) ? my_queue->hp_waiter->comm : "nil",
233 (my_queue->hp_waiter) ? my_queue->hp_waiter->pid : -1);
234
235 /* check if we need to activate priority inheritance */
236 //if (edf_higher_prio(t, my_queue->hp_waiter))
237 if (litmus->compare(t, my_queue->hp_waiter)) {
238 my_queue->hp_waiter = t;
239 TRACE_CUR("queue %d: %s/%d is new hp_waiter\n",
240 kfmlp_get_idx(sem, my_queue),
241 t->comm, t->pid);
242
243 //if (edf_higher_prio(t, my_queue->owner))
244 if (litmus->compare(t, my_queue->owner)) {
245 litmus->increase_prio(my_queue->owner, my_queue->hp_waiter);
246 }
247 }
248
249 ++(my_queue->count);
250
251 if(my_queue == sem->shortest_queue) {
252 sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
253 TRACE_CUR("queue %d is the shortest\n",
254 kfmlp_get_idx(sem, sem->shortest_queue));
255 }
256
257#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
258 if(sem->aff_obs) {
259 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t);
260 }
261#endif
262
263 /* release lock before sleeping */
264 spin_unlock_irqrestore(&sem->lock, flags);
265
266 /* We depend on the FIFO order. Thus, we don't need to recheck
267 * when we wake up; we are guaranteed to have the lock since
268 * there is only one wake up per release (or steal).
269 */
270 schedule();
271
272
273 if(my_queue->owner == t) {
274 TRACE_CUR("queue %d: acquired through waiting\n",
275 kfmlp_get_idx(sem, my_queue));
276 }
277 else {
278 /* this case may happen if our wait entry was stolen
279 between queues. record where we went. */
280 my_queue = kfmlp_get_queue(sem, t);
281
282 BUG_ON(!my_queue);
283 TRACE_CUR("queue %d: acquired through stealing\n",
284 kfmlp_get_idx(sem, my_queue));
285 }
286 }
287 else {
288 TRACE_CUR("queue %d: acquired immediately\n",
289 kfmlp_get_idx(sem, my_queue));
290
291 my_queue->owner = t;
292
293 ++(my_queue->count);
294
295 if(my_queue == sem->shortest_queue) {
296 sem->shortest_queue = kfmlp_find_shortest(sem, my_queue);
297 TRACE_CUR("queue %d is the shortest\n",
298 kfmlp_get_idx(sem, sem->shortest_queue));
299 }
300
301#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
302 if(sem->aff_obs) {
303 sem->aff_obs->ops->notify_enqueue(sem->aff_obs, my_queue, t);
304 sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, t);
305 }
306#endif
307
308 spin_unlock_irqrestore(&sem->lock, flags);
309 }
310
311
312#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
313 if(sem->aff_obs) {
314 return sem->aff_obs->ops->replica_to_resource(sem->aff_obs, my_queue);
315 }
316#endif
317 return kfmlp_get_idx(sem, my_queue);
318}
319
320
321int kfmlp_unlock(struct litmus_lock* l)
322{
323 struct task_struct *t = current, *next;
324 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
325 struct kfmlp_queue *my_queue, *to_steal_from;
326 unsigned long flags;
327 int err = 0;
328
329 my_queue = kfmlp_get_queue(sem, t);
330
331 if (!my_queue) {
332 err = -EINVAL;
333 goto out;
334 }
335
336 spin_lock_irqsave(&sem->lock, flags);
337
338 TRACE_CUR("queue %d: unlocking\n", kfmlp_get_idx(sem, my_queue));
339
340 my_queue->owner = NULL; // clear ownership
341 --(my_queue->count);
342
343 if(my_queue->count < sem->shortest_queue->count)
344 {
345 sem->shortest_queue = my_queue;
346 TRACE_CUR("queue %d is the shortest\n",
347 kfmlp_get_idx(sem, sem->shortest_queue));
348 }
349
350#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
351 if(sem->aff_obs) {
352 sem->aff_obs->ops->notify_dequeue(sem->aff_obs, my_queue, t);
353 sem->aff_obs->ops->notify_freed(sem->aff_obs, my_queue, t);
354 }
355#endif
356
357 /* we lose the benefit of priority inheritance (if any) */
358 if (tsk_rt(t)->inh_task)
359 litmus->decrease_prio(t, NULL);
360
361
362 /* check if there are jobs waiting for this resource */
363RETRY:
364 next = __waitqueue_remove_first(&my_queue->wait);
365 if (next) {
366 /* next becomes the resouce holder */
367 my_queue->owner = next;
368
369#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
370 if(sem->aff_obs) {
371 sem->aff_obs->ops->notify_acquired(sem->aff_obs, my_queue, next);
372 }
373#endif
374
375 TRACE_CUR("queue %d: lock ownership passed to %s/%d\n",
376 kfmlp_get_idx(sem, my_queue), next->comm, next->pid);
377
378 /* determine new hp_waiter if necessary */
379 if (next == my_queue->hp_waiter) {
380 TRACE_TASK(next, "was highest-prio waiter\n");
381 my_queue->hp_waiter = kfmlp_find_hp_waiter(my_queue, next);
382 if (my_queue->hp_waiter)
383 TRACE_TASK(my_queue->hp_waiter, "queue %d: is new highest-prio waiter\n", kfmlp_get_idx(sem, my_queue));
384 else
385 TRACE("queue %d: no further waiters\n", kfmlp_get_idx(sem, my_queue));
386 } else {
387 /* Well, if next is not the highest-priority waiter,
388 * then it ought to inherit the highest-priority
389 * waiter's priority. */
390 litmus->increase_prio(next, my_queue->hp_waiter);
391 }
392
393 /* wake up next */
394 wake_up_process(next);
395 }
396 else {
397 // TODO: put this stealing logic before we attempt to release
398 // our resource. (simplifies code and gets rid of ugly goto RETRY.
399 wait_queue_t *wait;
400
401 TRACE_CUR("queue %d: looking to steal someone...\n",
402 kfmlp_get_idx(sem, my_queue));
403
404#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
405 next = (sem->aff_obs) ?
406 sem->aff_obs->ops->advise_steal(sem->aff_obs, &wait, &to_steal_from) :
407 kfmlp_select_hp_steal(sem, &wait, &to_steal_from);
408#else
409 next = kfmlp_select_hp_steal(sem, &wait, &to_steal_from);
410#endif
411
412 if(next) {
413 TRACE_CUR("queue %d: stealing %s/%d from queue %d\n",
414 kfmlp_get_idx(sem, my_queue),
415 next->comm, next->pid,
416 kfmlp_get_idx(sem, to_steal_from));
417
418 kfmlp_steal_node(sem, my_queue, wait, to_steal_from);
419
420 goto RETRY; // will succeed this time.
421 }
422 else {
423 TRACE_CUR("queue %d: no one to steal.\n",
424 kfmlp_get_idx(sem, my_queue));
425 }
426 }
427
428 spin_unlock_irqrestore(&sem->lock, flags);
429
430out:
431 return err;
432}
433
434int kfmlp_close(struct litmus_lock* l)
435{
436 struct task_struct *t = current;
437 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
438 struct kfmlp_queue *my_queue;
439 unsigned long flags;
440
441 int owner;
442
443 spin_lock_irqsave(&sem->lock, flags);
444
445 my_queue = kfmlp_get_queue(sem, t);
446 owner = (my_queue) ? (my_queue->owner == t) : 0;
447
448 spin_unlock_irqrestore(&sem->lock, flags);
449
450 if (owner)
451 kfmlp_unlock(l);
452
453 return 0;
454}
455
456void kfmlp_free(struct litmus_lock* l)
457{
458 struct kfmlp_semaphore *sem = kfmlp_from_lock(l);
459 kfree(sem->queues);
460 kfree(sem);
461}
462
463
464
465struct litmus_lock* kfmlp_new(struct litmus_lock_ops* ops, void* __user args)
466{
467 struct kfmlp_semaphore* sem;
468 int num_resources = 0;
469 int i;
470
471 if(!access_ok(VERIFY_READ, args, sizeof(num_resources)))
472 {
473 return(NULL);
474 }
475 if(__copy_from_user(&num_resources, args, sizeof(num_resources)))
476 {
477 return(NULL);
478 }
479 if(num_resources < 1)
480 {
481 return(NULL);
482 }
483
484 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
485 if(!sem)
486 {
487 return(NULL);
488 }
489
490 sem->queues = kmalloc(sizeof(struct kfmlp_queue)*num_resources, GFP_KERNEL);
491 if(!sem->queues)
492 {
493 kfree(sem);
494 return(NULL);
495 }
496
497 sem->litmus_lock.ops = ops;
498 spin_lock_init(&sem->lock);
499 sem->num_resources = num_resources;
500
501 for(i = 0; i < num_resources; ++i)
502 {
503 sem->queues[i].owner = NULL;
504 sem->queues[i].hp_waiter = NULL;
505 init_waitqueue_head(&sem->queues[i].wait);
506 sem->queues[i].count = 0;
507 }
508
509 sem->shortest_queue = &sem->queues[0];
510
511#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
512 sem->aff_obs = NULL;
513#endif
514
515 return &sem->litmus_lock;
516}
517
518
519
520
521#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
522
523static inline int __replica_to_gpu(struct kfmlp_affinity* aff, int replica)
524{
525 int gpu = replica % aff->nr_rsrc;
526 return gpu;
527}
528
529static inline int replica_to_gpu(struct kfmlp_affinity* aff, int replica)
530{
531 int gpu = __replica_to_gpu(aff, replica) + aff->offset;
532 return gpu;
533}
534
535static inline int gpu_to_base_replica(struct kfmlp_affinity* aff, int gpu)
536{
537 int replica = gpu - aff->offset;
538 return replica;
539}
540
541
542int kfmlp_aff_obs_close(struct affinity_observer* obs)
543{
544 return 0;
545}
546
547void kfmlp_aff_obs_free(struct affinity_observer* obs)
548{
549 struct kfmlp_affinity *kfmlp_aff = kfmlp_aff_obs_from_aff_obs(obs);
550 kfree(kfmlp_aff->nr_cur_users_on_rsrc);
551 kfree(kfmlp_aff->q_info);
552 kfree(kfmlp_aff);
553}
554
555static struct affinity_observer* kfmlp_aff_obs_new(struct affinity_observer_ops* ops,
556 struct kfmlp_affinity_ops* kfmlp_ops,
557 void* __user args)
558{
559 struct kfmlp_affinity* kfmlp_aff;
560 struct gpu_affinity_observer_args aff_args;
561 struct kfmlp_semaphore* sem;
562 int i;
563 unsigned long flags;
564
565 if(!access_ok(VERIFY_READ, args, sizeof(aff_args))) {
566 return(NULL);
567 }
568 if(__copy_from_user(&aff_args, args, sizeof(aff_args))) {
569 return(NULL);
570 }
571
572 sem = (struct kfmlp_semaphore*) get_lock_from_od(aff_args.obs.lock_od);
573
574 if(sem->litmus_lock.type != KFMLP_SEM) {
575 TRACE_CUR("Lock type not supported. Type = %d\n", sem->litmus_lock.type);
576 return(NULL);
577 }
578
579 if((aff_args.nr_simult_users <= 0) ||
580 (sem->num_resources%aff_args.nr_simult_users != 0)) {
581 TRACE_CUR("Lock %d does not support #replicas (%d) for #simult_users "
582 "(%d) per replica. #replicas should be evenly divisible "
583 "by #simult_users.\n",
584 sem->litmus_lock.ident,
585 sem->num_resources,
586 aff_args.nr_simult_users);
587 return(NULL);
588 }
589
590 if(aff_args.nr_simult_users > NV_MAX_SIMULT_USERS) {
591 TRACE_CUR("System does not support #simult_users > %d. %d requested.\n",
592 NV_MAX_SIMULT_USERS, aff_args.nr_simult_users);
593// return(NULL);
594 }
595
596 kfmlp_aff = kmalloc(sizeof(*kfmlp_aff), GFP_KERNEL);
597 if(!kfmlp_aff) {
598 return(NULL);
599 }
600
601 kfmlp_aff->q_info = kmalloc(sizeof(struct kfmlp_queue_info)*sem->num_resources, GFP_KERNEL);
602 if(!kfmlp_aff->q_info) {
603 kfree(kfmlp_aff);
604 return(NULL);
605 }
606
607 kfmlp_aff->nr_cur_users_on_rsrc = kmalloc(sizeof(int)*(sem->num_resources / aff_args.nr_simult_users), GFP_KERNEL);
608 if(!kfmlp_aff->nr_cur_users_on_rsrc) {
609 kfree(kfmlp_aff->q_info);
610 kfree(kfmlp_aff);
611 return(NULL);
612 }
613
614 affinity_observer_new(&kfmlp_aff->obs, ops, &aff_args.obs);
615
616 kfmlp_aff->ops = kfmlp_ops;
617 kfmlp_aff->offset = aff_args.replica_to_gpu_offset;
618 kfmlp_aff->nr_simult = aff_args.nr_simult_users;
619 kfmlp_aff->nr_rsrc = sem->num_resources / kfmlp_aff->nr_simult;
620
621 memset(kfmlp_aff->nr_cur_users_on_rsrc, 0, sizeof(int)*(sem->num_resources / kfmlp_aff->nr_rsrc));
622
623 for(i = 0; i < sem->num_resources; ++i) {
624 kfmlp_aff->q_info[i].q = &sem->queues[i];
625 kfmlp_aff->q_info[i].estimated_len = 0;
626
627 // multiple q_info's will point to the same resource (aka GPU) if
628 // aff_args.nr_simult_users > 1
629 kfmlp_aff->q_info[i].nr_cur_users = &kfmlp_aff->nr_cur_users_on_rsrc[__replica_to_gpu(kfmlp_aff,i)];
630 }
631
632 // attach observer to the lock
633 spin_lock_irqsave(&sem->lock, flags);
634 sem->aff_obs = kfmlp_aff;
635 spin_unlock_irqrestore(&sem->lock, flags);
636
637 return &kfmlp_aff->obs;
638}
639
640
641
642
643static int gpu_replica_to_resource(struct kfmlp_affinity* aff,
644 struct kfmlp_queue* fq) {
645 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
646 return(replica_to_gpu(aff, kfmlp_get_idx(sem, fq)));
647}
648
649
650// Smart KFMLP Affinity
651
652//static inline struct kfmlp_queue_info* kfmlp_aff_find_shortest(struct kfmlp_affinity* aff)
653//{
654// struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
655// struct kfmlp_queue_info *shortest = &aff->q_info[0];
656// int i;
657//
658// for(i = 1; i < sem->num_resources; ++i) {
659// if(aff->q_info[i].estimated_len < shortest->estimated_len) {
660// shortest = &aff->q_info[i];
661// }
662// }
663//
664// return(shortest);
665//}
666
667struct kfmlp_queue* gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
668{
669 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
670 lt_t min_len;
671 int min_nr_users;
672 struct kfmlp_queue_info *shortest;
673 struct kfmlp_queue *to_enqueue;
674 int i;
675 int affinity_gpu;
676
677 // simply pick the shortest queue if, we have no affinity, or we have
678 // affinity with the shortest
679 if(unlikely(tsk_rt(t)->last_gpu < 0)) {
680 affinity_gpu = aff->offset; // first gpu
681 TRACE_CUR("no affinity\n");
682 }
683 else {
684 affinity_gpu = tsk_rt(t)->last_gpu;
685 }
686
687 // all things being equal, let's start with the queue with which we have
688 // affinity. this helps us maintain affinity even when we don't have
689 // an estiamte for local-affinity execution time (i.e., 2nd time on GPU)
690 shortest = &aff->q_info[gpu_to_base_replica(aff, affinity_gpu)];
691
692// if(shortest == aff->shortest_queue) {
693// TRACE_CUR("special case: have affinity with shortest queue\n");
694// goto out;
695// }
696
697 min_len = shortest->estimated_len + get_gpu_estimate(t, MIG_LOCAL);
698 min_nr_users = *(shortest->nr_cur_users);
699
700 TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
701 get_gpu_estimate(t, MIG_LOCAL),
702 kfmlp_get_idx(sem, shortest->q),
703 min_len);
704
705 for(i = 0; i < sem->num_resources; ++i) {
706 if(&aff->q_info[i] != shortest) {
707
708 lt_t est_len =
709 aff->q_info[i].estimated_len +
710 get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i)));
711
712 // queue is smaller, or they're equal and the other has a smaller number
713 // of total users.
714 //
715 // tie-break on the shortest number of simult users. this only kicks in
716 // when there are more than 1 empty queues.
717 if((est_len < min_len) ||
718 ((est_len == min_len) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
719 shortest = &aff->q_info[i];
720 min_len = est_len;
721 min_nr_users = *(aff->q_info[i].nr_cur_users);
722 }
723
724 TRACE_CUR("cs is %llu on queue %d: est len = %llu\n",
725 get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, replica_to_gpu(aff, i))),
726 kfmlp_get_idx(sem, aff->q_info[i].q),
727 est_len);
728 }
729 }
730
731 to_enqueue = shortest->q;
732 TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
733 kfmlp_get_idx(sem, to_enqueue),
734 kfmlp_get_idx(sem, sem->shortest_queue));
735
736 return to_enqueue;
737}
738
739struct task_struct* gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
740{
741 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
742
743 // For now, just steal highest priority waiter
744 // TODO: Implement affinity-aware stealing.
745
746 return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
747}
748
749
750void gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
751{
752 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
753 int replica = kfmlp_get_idx(sem, fq);
754 int gpu = replica_to_gpu(aff, replica);
755 struct kfmlp_queue_info *info = &aff->q_info[replica];
756 lt_t est_time;
757 lt_t est_len_before;
758
759 if(current == t) {
760 tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
761 }
762
763 est_len_before = info->estimated_len;
764 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
765 info->estimated_len += est_time;
766
767 TRACE_CUR("fq %d: q_len (%llu) + est_cs (%llu) = %llu\n",
768 kfmlp_get_idx(sem, info->q),
769 est_len_before, est_time,
770 info->estimated_len);
771
772// if(aff->shortest_queue == info) {
773// // we may no longer be the shortest
774// aff->shortest_queue = kfmlp_aff_find_shortest(aff);
775//
776// TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
777// kfmlp_get_idx(sem, aff->shortest_queue->q),
778// aff->shortest_queue->q->count,
779// aff->shortest_queue->estimated_len);
780// }
781}
782
783void gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
784{
785 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
786 int replica = kfmlp_get_idx(sem, fq);
787 int gpu = replica_to_gpu(aff, replica);
788 struct kfmlp_queue_info *info = &aff->q_info[replica];
789 lt_t est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
790
791 if(est_time > info->estimated_len) {
792 WARN_ON(1);
793 info->estimated_len = 0;
794 }
795 else {
796 info->estimated_len -= est_time;
797 }
798
799 TRACE_CUR("fq %d est len is now %llu\n",
800 kfmlp_get_idx(sem, info->q),
801 info->estimated_len);
802
803 // check to see if we're the shortest queue now.
804// if((aff->shortest_queue != info) &&
805// (aff->shortest_queue->estimated_len > info->estimated_len)) {
806//
807// aff->shortest_queue = info;
808//
809// TRACE_CUR("shortest queue is fq %d (with %d in queue) has est len %llu\n",
810// kfmlp_get_idx(sem, info->q),
811// info->q->count,
812// info->estimated_len);
813// }
814}
815
816void gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
817{
818 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
819 int replica = kfmlp_get_idx(sem, fq);
820 int gpu = replica_to_gpu(aff, replica);
821
822 tsk_rt(t)->gpu_migration = gpu_migration_distance(tsk_rt(t)->last_gpu, gpu); // record the type of migration
823
824 TRACE_CUR("%s/%d acquired gpu %d. migration type = %d\n",
825 t->comm, t->pid, gpu, tsk_rt(t)->gpu_migration);
826
827 // count the number or resource holders
828 ++(*(aff->q_info[replica].nr_cur_users));
829
830 reg_nv_device(gpu, 1, t); // register
831
832 tsk_rt(t)->suspend_gpu_tracker_on_block = 0;
833 reset_gpu_tracker(t);
834 start_gpu_tracker(t);
835}
836
837void gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
838{
839 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
840 int replica = kfmlp_get_idx(sem, fq);
841 int gpu = replica_to_gpu(aff, replica);
842 lt_t est_time;
843
844 stop_gpu_tracker(t); // stop the tracker before we do anything else.
845
846 est_time = get_gpu_estimate(t, gpu_migration_distance(tsk_rt(t)->last_gpu, gpu));
847
848 tsk_rt(t)->last_gpu = gpu;
849
850 // count the number or resource holders
851 --(*(aff->q_info[replica].nr_cur_users));
852
853 reg_nv_device(gpu, 0, t); // unregister
854
855 // update estimates
856 update_gpu_estimate(t, get_gpu_time(t));
857
858 TRACE_CUR("%s/%d freed gpu %d. actual time was %llu. estimated was %llu. diff is %d\n",
859 t->comm, t->pid, gpu,
860 get_gpu_time(t),
861 est_time,
862 (long long)get_gpu_time(t) - (long long)est_time);
863}
864
865struct kfmlp_affinity_ops gpu_kfmlp_affinity =
866{
867 .advise_enqueue = gpu_kfmlp_advise_enqueue,
868 .advise_steal = gpu_kfmlp_advise_steal,
869 .notify_enqueue = gpu_kfmlp_notify_enqueue,
870 .notify_dequeue = gpu_kfmlp_notify_dequeue,
871 .notify_acquired = gpu_kfmlp_notify_acquired,
872 .notify_freed = gpu_kfmlp_notify_freed,
873 .replica_to_resource = gpu_replica_to_resource,
874};
875
876struct affinity_observer* kfmlp_gpu_aff_obs_new(struct affinity_observer_ops* ops,
877 void* __user args)
878{
879 return kfmlp_aff_obs_new(ops, &gpu_kfmlp_affinity, args);
880}
881
882
883
884
885
886
887
888
889// Simple KFMLP Affinity (standard KFMLP with auto-gpu registration)
890
891struct kfmlp_queue* simple_gpu_kfmlp_advise_enqueue(struct kfmlp_affinity* aff, struct task_struct* t)
892{
893 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
894 int min_count;
895 int min_nr_users;
896 struct kfmlp_queue_info *shortest;
897 struct kfmlp_queue *to_enqueue;
898 int i;
899
900// TRACE_CUR("Simple GPU KFMLP advise_enqueue invoked\n");
901
902 shortest = &aff->q_info[0];
903 min_count = shortest->q->count;
904 min_nr_users = *(shortest->nr_cur_users);
905
906 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
907 kfmlp_get_idx(sem, shortest->q),
908 shortest->q->count,
909 min_nr_users);
910
911 for(i = 1; i < sem->num_resources; ++i) {
912 int len = aff->q_info[i].q->count;
913
914 // queue is smaller, or they're equal and the other has a smaller number
915 // of total users.
916 //
917 // tie-break on the shortest number of simult users. this only kicks in
918 // when there are more than 1 empty queues.
919 if((len < min_count) ||
920 ((len == min_count) && (*(aff->q_info[i].nr_cur_users) < min_nr_users))) {
921 shortest = &aff->q_info[i];
922 min_count = shortest->q->count;
923 min_nr_users = *(aff->q_info[i].nr_cur_users);
924 }
925
926 TRACE_CUR("queue %d: waiters = %d, total holders = %d\n",
927 kfmlp_get_idx(sem, aff->q_info[i].q),
928 aff->q_info[i].q->count,
929 *(aff->q_info[i].nr_cur_users));
930 }
931
932 to_enqueue = shortest->q;
933 TRACE_CUR("enqueue on fq %d (non-aff wanted fq %d)\n",
934 kfmlp_get_idx(sem, to_enqueue),
935 kfmlp_get_idx(sem, sem->shortest_queue));
936
937 return to_enqueue;
938}
939
940struct task_struct* simple_gpu_kfmlp_advise_steal(struct kfmlp_affinity* aff, wait_queue_t** to_steal, struct kfmlp_queue** to_steal_from)
941{
942 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
943// TRACE_CUR("Simple GPU KFMLP advise_steal invoked\n");
944 return kfmlp_select_hp_steal(sem, to_steal, to_steal_from);
945}
946
947void simple_gpu_kfmlp_notify_enqueue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
948{
949// TRACE_CUR("Simple GPU KFMLP notify_enqueue invoked\n");
950}
951
952void simple_gpu_kfmlp_notify_dequeue(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
953{
954// TRACE_CUR("Simple GPU KFMLP notify_dequeue invoked\n");
955}
956
957void simple_gpu_kfmlp_notify_acquired(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
958{
959 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
960 int replica = kfmlp_get_idx(sem, fq);
961 int gpu = replica_to_gpu(aff, replica);
962
963// TRACE_CUR("Simple GPU KFMLP notify_acquired invoked\n");
964
965 // count the number or resource holders
966 ++(*(aff->q_info[replica].nr_cur_users));
967
968 reg_nv_device(gpu, 1, t); // register
969}
970
971void simple_gpu_kfmlp_notify_freed(struct kfmlp_affinity* aff, struct kfmlp_queue* fq, struct task_struct* t)
972{
973 struct kfmlp_semaphore *sem = kfmlp_from_lock(aff->obs.lock);
974 int replica = kfmlp_get_idx(sem, fq);
975 int gpu = replica_to_gpu(aff, replica);
976
977// TRACE_CUR("Simple GPU KFMLP notify_freed invoked\n");
978 // count the number or resource holders
979 --(*(aff->q_info[replica].nr_cur_users));
980
981 reg_nv_device(gpu, 0, t); // unregister
982}
983
984struct kfmlp_affinity_ops simple_gpu_kfmlp_affinity =
985{
986 .advise_enqueue = simple_gpu_kfmlp_advise_enqueue,
987 .advise_steal = simple_gpu_kfmlp_advise_steal,
988 .notify_enqueue = simple_gpu_kfmlp_notify_enqueue,
989 .notify_dequeue = simple_gpu_kfmlp_notify_dequeue,
990 .notify_acquired = simple_gpu_kfmlp_notify_acquired,
991 .notify_freed = simple_gpu_kfmlp_notify_freed,
992 .replica_to_resource = gpu_replica_to_resource,
993};
994
995struct affinity_observer* kfmlp_simple_gpu_aff_obs_new(struct affinity_observer_ops* ops,
996 void* __user args)
997{
998 return kfmlp_aff_obs_new(ops, &simple_gpu_kfmlp_affinity, args);
999}
1000
1001#endif
1002
diff --git a/litmus/litmus.c b/litmus/litmus.c
index 301390148d02..d1f836c8af6e 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -21,6 +21,10 @@
21#include <litmus/affinity.h> 21#include <litmus/affinity.h>
22#endif 22#endif
23 23
24#ifdef CONFIG_LITMUS_NVIDIA
25#include <litmus/nvidia_info.h>
26#endif
27
24/* Number of RT tasks that exist in the system */ 28/* Number of RT tasks that exist in the system */
25atomic_t rt_task_count = ATOMIC_INIT(0); 29atomic_t rt_task_count = ATOMIC_INIT(0);
26static DEFINE_RAW_SPINLOCK(task_transition_lock); 30static DEFINE_RAW_SPINLOCK(task_transition_lock);
@@ -51,6 +55,28 @@ void bheap_node_free(struct bheap_node* hn)
51struct release_heap* release_heap_alloc(int gfp_flags); 55struct release_heap* release_heap_alloc(int gfp_flags);
52void release_heap_free(struct release_heap* rh); 56void release_heap_free(struct release_heap* rh);
53 57
58#ifdef CONFIG_LITMUS_NVIDIA
59/*
60 * sys_register_nv_device
61 * @nv_device_id: The Nvidia device id that the task want to register
62 * @reg_action: set to '1' to register the specified device. zero otherwise.
63 * Syscall for register task's designated nvidia device into NV_DEVICE_REG array
64 * Returns EFAULT if nv_device_id is out of range.
65 * 0 if success
66 */
67asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action)
68{
69 /* register the device to caller (aka 'current') */
70 return(reg_nv_device(nv_device_id, reg_action, current));
71}
72#else
73asmlinkage long sys_register_nv_device(int nv_device_id, int reg_action)
74{
75 return(-EINVAL);
76}
77#endif
78
79
54/* 80/*
55 * sys_set_task_rt_param 81 * sys_set_task_rt_param
56 * @pid: Pid of the task which scheduling parameters must be changed 82 * @pid: Pid of the task which scheduling parameters must be changed
@@ -269,6 +295,7 @@ asmlinkage long sys_query_job_no(unsigned int __user *job)
269 return retval; 295 return retval;
270} 296}
271 297
298
272/* sys_null_call() is only used for determining raw system call 299/* sys_null_call() is only used for determining raw system call
273 * overheads (kernel entry, kernel exit). It has no useful side effects. 300 * overheads (kernel entry, kernel exit). It has no useful side effects.
274 * If ts is non-NULL, then the current Feather-Trace time is recorded. 301 * If ts is non-NULL, then the current Feather-Trace time is recorded.
@@ -286,12 +313,42 @@ asmlinkage long sys_null_call(cycles_t __user *ts)
286 return ret; 313 return ret;
287} 314}
288 315
316
317#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
318void init_gpu_affinity_state(struct task_struct* p)
319{
320 // under-damped
321 //p->rt_param.gpu_fb_param_a = _frac(14008, 10000);
322 //p->rt_param.gpu_fb_param_b = _frac(16024, 10000);
323
324 // emperical;
325 p->rt_param.gpu_fb_param_a[0] = _frac(7550, 10000);
326 p->rt_param.gpu_fb_param_b[0] = _frac(45800, 10000);
327
328 p->rt_param.gpu_fb_param_a[1] = _frac(8600, 10000);
329 p->rt_param.gpu_fb_param_b[1] = _frac(40000, 10000);
330
331 p->rt_param.gpu_fb_param_a[2] = _frac(6890, 10000);
332 p->rt_param.gpu_fb_param_b[2] = _frac(40000, 10000);
333
334 p->rt_param.gpu_fb_param_a[3] = _frac(7580, 10000);
335 p->rt_param.gpu_fb_param_b[3] = _frac(34590, 10000);
336
337 p->rt_param.gpu_migration = MIG_NONE;
338 p->rt_param.last_gpu = -1;
339}
340#endif
341
289/* p is a real-time task. Re-init its state as a best-effort task. */ 342/* p is a real-time task. Re-init its state as a best-effort task. */
290static void reinit_litmus_state(struct task_struct* p, int restore) 343static void reinit_litmus_state(struct task_struct* p, int restore)
291{ 344{
292 struct rt_task user_config = {}; 345 struct rt_task user_config = {};
293 void* ctrl_page = NULL; 346 void* ctrl_page = NULL;
294 347
348#ifdef CONFIG_LITMUS_NESTED_LOCKING
349 binheap_order_t prio_order = NULL;
350#endif
351
295 if (restore) { 352 if (restore) {
296 /* Safe user-space provided configuration data. 353 /* Safe user-space provided configuration data.
297 * and allocated page. */ 354 * and allocated page. */
@@ -299,11 +356,38 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
299 ctrl_page = p->rt_param.ctrl_page; 356 ctrl_page = p->rt_param.ctrl_page;
300 } 357 }
301 358
359#ifdef CONFIG_LITMUS_NESTED_LOCKING
360 prio_order = p->rt_param.hp_blocked_tasks.compare;
361#endif
362
302 /* We probably should not be inheriting any task's priority 363 /* We probably should not be inheriting any task's priority
303 * at this point in time. 364 * at this point in time.
304 */ 365 */
305 WARN_ON(p->rt_param.inh_task); 366 WARN_ON(p->rt_param.inh_task);
306 367
368#ifdef CONFIG_LITMUS_NESTED_LOCKING
369 WARN_ON(p->rt_param.blocked_lock);
370 WARN_ON(!binheap_empty(&p->rt_param.hp_blocked_tasks));
371#endif
372
373#ifdef CONFIG_LITMUS_SOFTIRQD
374 /* We probably should not have any tasklets executing for
375 * us at this time.
376 */
377 WARN_ON(p->rt_param.cur_klitirqd);
378 WARN_ON(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD);
379
380 if(p->rt_param.cur_klitirqd)
381 flush_pending(p->rt_param.cur_klitirqd, p);
382
383 if(atomic_read(&p->rt_param.klitirqd_sem_stat) == HELD)
384 up_and_set_stat(p, NOT_HELD, &p->rt_param.klitirqd_sem);
385#endif
386
387#ifdef CONFIG_LITMUS_NVIDIA
388 WARN_ON(p->rt_param.held_gpus != 0);
389#endif
390
307 /* Cleanup everything else. */ 391 /* Cleanup everything else. */
308 memset(&p->rt_param, 0, sizeof(p->rt_param)); 392 memset(&p->rt_param, 0, sizeof(p->rt_param));
309 393
@@ -312,6 +396,15 @@ static void reinit_litmus_state(struct task_struct* p, int restore)
312 p->rt_param.task_params = user_config; 396 p->rt_param.task_params = user_config;
313 p->rt_param.ctrl_page = ctrl_page; 397 p->rt_param.ctrl_page = ctrl_page;
314 } 398 }
399
400#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
401 init_gpu_affinity_state(p);
402#endif
403
404#ifdef CONFIG_LITMUS_NESTED_LOCKING
405 INIT_BINHEAP_HANDLE(&p->rt_param.hp_blocked_tasks, prio_order);
406 raw_spin_lock_init(&p->rt_param.hp_blocked_tasks_lock);
407#endif
315} 408}
316 409
317long litmus_admit_task(struct task_struct* tsk) 410long litmus_admit_task(struct task_struct* tsk)
@@ -358,6 +451,26 @@ long litmus_admit_task(struct task_struct* tsk)
358 bheap_node_init(&tsk_rt(tsk)->heap_node, tsk); 451 bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
359 } 452 }
360 453
454
455#ifdef CONFIG_LITMUS_NVIDIA
456 atomic_set(&tsk_rt(tsk)->nv_int_count, 0);
457#endif
458#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
459 init_gpu_affinity_state(tsk);
460#endif
461#ifdef CONFIG_LITMUS_NESTED_LOCKING
462 tsk_rt(tsk)->blocked_lock = NULL;
463 raw_spin_lock_init(&tsk_rt(tsk)->hp_blocked_tasks_lock);
464 //INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks, prio_order); // done by scheduler
465#endif
466#ifdef CONFIG_LITMUS_SOFTIRQD
467 /* proxy thread off by default */
468 tsk_rt(tsk)is_proxy_thread = 0;
469 tsk_rt(tsk)cur_klitirqd = NULL;
470 mutex_init(&tsk_rt(tsk)->klitirqd_sem);
471 atomic_set(&tsk_rt(tsk)->klitirqd_sem_stat, NOT_HELD);
472#endif
473
361 retval = litmus->admit_task(tsk); 474 retval = litmus->admit_task(tsk);
362 475
363 if (!retval) { 476 if (!retval) {
@@ -403,7 +516,7 @@ static void synch_on_plugin_switch(void* info)
403 */ 516 */
404int switch_sched_plugin(struct sched_plugin* plugin) 517int switch_sched_plugin(struct sched_plugin* plugin)
405{ 518{
406 unsigned long flags; 519 //unsigned long flags;
407 int ret = 0; 520 int ret = 0;
408 521
409 BUG_ON(!plugin); 522 BUG_ON(!plugin);
@@ -417,8 +530,15 @@ int switch_sched_plugin(struct sched_plugin* plugin)
417 while (atomic_read(&cannot_use_plugin) < num_online_cpus()) 530 while (atomic_read(&cannot_use_plugin) < num_online_cpus())
418 cpu_relax(); 531 cpu_relax();
419 532
533#ifdef CONFIG_LITMUS_SOFTIRQD
534 if(!klitirqd_is_dead())
535 {
536 kill_klitirqd();
537 }
538#endif
539
420 /* stop task transitions */ 540 /* stop task transitions */
421 raw_spin_lock_irqsave(&task_transition_lock, flags); 541 //raw_spin_lock_irqsave(&task_transition_lock, flags);
422 542
423 /* don't switch if there are active real-time tasks */ 543 /* don't switch if there are active real-time tasks */
424 if (atomic_read(&rt_task_count) == 0) { 544 if (atomic_read(&rt_task_count) == 0) {
@@ -436,7 +556,7 @@ int switch_sched_plugin(struct sched_plugin* plugin)
436 } else 556 } else
437 ret = -EBUSY; 557 ret = -EBUSY;
438out: 558out:
439 raw_spin_unlock_irqrestore(&task_transition_lock, flags); 559 //raw_spin_unlock_irqrestore(&task_transition_lock, flags);
440 atomic_set(&cannot_use_plugin, 0); 560 atomic_set(&cannot_use_plugin, 0);
441 return ret; 561 return ret;
442} 562}
diff --git a/litmus/litmus_pai_softirq.c b/litmus/litmus_pai_softirq.c
new file mode 100644
index 000000000000..300571a81bbd
--- /dev/null
+++ b/litmus/litmus_pai_softirq.c
@@ -0,0 +1,64 @@
1#include <linux/interrupt.h>
2#include <linux/percpu.h>
3#include <linux/cpu.h>
4#include <linux/kthread.h>
5#include <linux/ftrace.h>
6#include <linux/smp.h>
7#include <linux/slab.h>
8#include <linux/mutex.h>
9
10#include <linux/sched.h>
11#include <linux/cpuset.h>
12
13#include <litmus/litmus.h>
14#include <litmus/sched_trace.h>
15#include <litmus/jobs.h>
16#include <litmus/sched_plugin.h>
17#include <litmus/litmus_softirq.h>
18
19
20
21int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
22{
23 int ret = 0; /* assume failure */
24 if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
25 {
26 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
27 BUG();
28 }
29
30 ret = litmus->enqueue_pai_tasklet(t);
31
32 return(ret);
33}
34
35EXPORT_SYMBOL(__litmus_tasklet_schedule);
36
37
38
39// failure causes default Linux handling.
40int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
41{
42 int ret = 0; /* assume failure */
43 return(ret);
44}
45EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);
46
47
48// failure causes default Linux handling.
49int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id)
50{
51 int ret = 0; /* assume failure */
52 return(ret);
53}
54EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first);
55
56
57// failure causes default Linux handling.
58int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
59{
60 int ret = 0; /* assume failure */
61 return(ret);
62}
63EXPORT_SYMBOL(__litmus_schedule_work);
64
diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
index 4bf725a36c9c..9ab7e015a3c1 100644
--- a/litmus/litmus_proc.c
+++ b/litmus/litmus_proc.c
@@ -20,11 +20,18 @@ static struct proc_dir_entry *litmus_dir = NULL,
20#ifdef CONFIG_RELEASE_MASTER 20#ifdef CONFIG_RELEASE_MASTER
21 *release_master_file = NULL, 21 *release_master_file = NULL,
22#endif 22#endif
23#ifdef CONFIG_LITMUS_SOFTIRQD
24 *klitirqd_file = NULL,
25#endif
23 *plugs_file = NULL; 26 *plugs_file = NULL;
24 27
25/* in litmus/sync.c */ 28/* in litmus/sync.c */
26int count_tasks_waiting_for_release(void); 29int count_tasks_waiting_for_release(void);
27 30
31extern int proc_read_klitirqd_stats(char *page, char **start,
32 off_t off, int count,
33 int *eof, void *data);
34
28static int proc_read_stats(char *page, char **start, 35static int proc_read_stats(char *page, char **start,
29 off_t off, int count, 36 off_t off, int count,
30 int *eof, void *data) 37 int *eof, void *data)
@@ -161,6 +168,12 @@ int __init init_litmus_proc(void)
161 release_master_file->write_proc = proc_write_release_master; 168 release_master_file->write_proc = proc_write_release_master;
162#endif 169#endif
163 170
171#ifdef CONFIG_LITMUS_SOFTIRQD
172 klitirqd_file =
173 create_proc_read_entry("klitirqd_stats", 0444, litmus_dir,
174 proc_read_klitirqd_stats, NULL);
175#endif
176
164 stat_file = create_proc_read_entry("stats", 0444, litmus_dir, 177 stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
165 proc_read_stats, NULL); 178 proc_read_stats, NULL);
166 179
@@ -187,6 +200,10 @@ void exit_litmus_proc(void)
187 remove_proc_entry("stats", litmus_dir); 200 remove_proc_entry("stats", litmus_dir);
188 if (curr_file) 201 if (curr_file)
189 remove_proc_entry("active_plugin", litmus_dir); 202 remove_proc_entry("active_plugin", litmus_dir);
203#ifdef CONFIG_LITMUS_SOFTIRQD
204 if (klitirqd_file)
205 remove_proc_entry("klitirqd_stats", litmus_dir);
206#endif
190#ifdef CONFIG_RELEASE_MASTER 207#ifdef CONFIG_RELEASE_MASTER
191 if (release_master_file) 208 if (release_master_file)
192 remove_proc_entry("release_master", litmus_dir); 209 remove_proc_entry("release_master", litmus_dir);
diff --git a/litmus/litmus_softirq.c b/litmus/litmus_softirq.c
new file mode 100644
index 000000000000..9f7d9da5facb
--- /dev/null
+++ b/litmus/litmus_softirq.c
@@ -0,0 +1,1582 @@
1#include <linux/interrupt.h>
2#include <linux/percpu.h>
3#include <linux/cpu.h>
4#include <linux/kthread.h>
5#include <linux/ftrace.h>
6#include <linux/smp.h>
7#include <linux/slab.h>
8#include <linux/mutex.h>
9
10#include <linux/sched.h>
11#include <linux/cpuset.h>
12
13#include <litmus/litmus.h>
14#include <litmus/sched_trace.h>
15#include <litmus/jobs.h>
16#include <litmus/sched_plugin.h>
17#include <litmus/litmus_softirq.h>
18
19/* TODO: Remove unneeded mb() and other barriers. */
20
21
22/* counts number of daemons ready to handle litmus irqs. */
23static atomic_t num_ready_klitirqds = ATOMIC_INIT(0);
24
25enum pending_flags
26{
27 LIT_TASKLET_LOW = 0x1,
28 LIT_TASKLET_HI = LIT_TASKLET_LOW<<1,
29 LIT_WORK = LIT_TASKLET_HI<<1
30};
31
32/* only support tasklet processing for now. */
33struct tasklet_head
34{
35 struct tasklet_struct *head;
36 struct tasklet_struct **tail;
37};
38
39struct klitirqd_info
40{
41 struct task_struct* klitirqd;
42 struct task_struct* current_owner;
43 int terminating;
44
45
46 raw_spinlock_t lock;
47
48 u32 pending;
49 atomic_t num_hi_pending;
50 atomic_t num_low_pending;
51 atomic_t num_work_pending;
52
53 /* in order of priority */
54 struct tasklet_head pending_tasklets_hi;
55 struct tasklet_head pending_tasklets;
56 struct list_head worklist;
57};
58
59/* one list for each klitirqd */
60static struct klitirqd_info klitirqds[NR_LITMUS_SOFTIRQD];
61
62
63
64
65
66int proc_read_klitirqd_stats(char *page, char **start,
67 off_t off, int count,
68 int *eof, void *data)
69{
70 int len = snprintf(page, PAGE_SIZE,
71 "num ready klitirqds: %d\n\n",
72 atomic_read(&num_ready_klitirqds));
73
74 if(klitirqd_is_ready())
75 {
76 int i;
77 for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
78 {
79 len +=
80 snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */
81 "klitirqd_th%d: %s/%d\n"
82 "\tcurrent_owner: %s/%d\n"
83 "\tpending: %x\n"
84 "\tnum hi: %d\n"
85 "\tnum low: %d\n"
86 "\tnum work: %d\n\n",
87 i,
88 klitirqds[i].klitirqd->comm, klitirqds[i].klitirqd->pid,
89 (klitirqds[i].current_owner != NULL) ?
90 klitirqds[i].current_owner->comm : "(null)",
91 (klitirqds[i].current_owner != NULL) ?
92 klitirqds[i].current_owner->pid : 0,
93 klitirqds[i].pending,
94 atomic_read(&klitirqds[i].num_hi_pending),
95 atomic_read(&klitirqds[i].num_low_pending),
96 atomic_read(&klitirqds[i].num_work_pending));
97 }
98 }
99
100 return(len);
101}
102
103
104
105
106
107#if 0
108static atomic_t dump_id = ATOMIC_INIT(0);
109
110static void __dump_state(struct klitirqd_info* which, const char* caller)
111{
112 struct tasklet_struct* list;
113
114 int id = atomic_inc_return(&dump_id);
115
116 //if(in_interrupt())
117 {
118 if(which->current_owner)
119 {
120 TRACE("(id: %d caller: %s)\n"
121 "klitirqd: %s/%d\n"
122 "current owner: %s/%d\n"
123 "pending: %x\n",
124 id, caller,
125 which->klitirqd->comm, which->klitirqd->pid,
126 which->current_owner->comm, which->current_owner->pid,
127 which->pending);
128 }
129 else
130 {
131 TRACE("(id: %d caller: %s)\n"
132 "klitirqd: %s/%d\n"
133 "current owner: %p\n"
134 "pending: %x\n",
135 id, caller,
136 which->klitirqd->comm, which->klitirqd->pid,
137 NULL,
138 which->pending);
139 }
140
141 list = which->pending_tasklets.head;
142 while(list)
143 {
144 struct tasklet_struct *t = list;
145 list = list->next; /* advance */
146 if(t->owner)
147 TRACE("(id: %d caller: %s) Tasklet: %x, Owner = %s/%d\n", id, caller, t, t->owner->comm, t->owner->pid);
148 else
149 TRACE("(id: %d caller: %s) Tasklet: %x, Owner = %p\n", id, caller, t, NULL);
150 }
151 }
152}
153
154static void dump_state(struct klitirqd_info* which, const char* caller)
155{
156 unsigned long flags;
157
158 raw_spin_lock_irqsave(&which->lock, flags);
159 __dump_state(which, caller);
160 raw_spin_unlock_irqrestore(&which->lock, flags);
161}
162#endif
163
164
165/* forward declarations */
166static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
167 struct klitirqd_info *which,
168 int wakeup);
169static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
170 struct klitirqd_info *which,
171 int wakeup);
172static void ___litmus_schedule_work(struct work_struct *w,
173 struct klitirqd_info *which,
174 int wakeup);
175
176
177
178inline unsigned int klitirqd_id(struct task_struct* tsk)
179{
180 int i;
181 for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
182 {
183 if(klitirqds[i].klitirqd == tsk)
184 {
185 return i;
186 }
187 }
188
189 BUG();
190
191 return 0;
192}
193
194
195inline static u32 litirq_pending_hi_irqoff(struct klitirqd_info* which)
196{
197 return (which->pending & LIT_TASKLET_HI);
198}
199
200inline static u32 litirq_pending_low_irqoff(struct klitirqd_info* which)
201{
202 return (which->pending & LIT_TASKLET_LOW);
203}
204
205inline static u32 litirq_pending_work_irqoff(struct klitirqd_info* which)
206{
207 return (which->pending & LIT_WORK);
208}
209
210inline static u32 litirq_pending_irqoff(struct klitirqd_info* which)
211{
212 return(which->pending);
213}
214
215
216inline static u32 litirq_pending(struct klitirqd_info* which)
217{
218 unsigned long flags;
219 u32 pending;
220
221 raw_spin_lock_irqsave(&which->lock, flags);
222 pending = litirq_pending_irqoff(which);
223 raw_spin_unlock_irqrestore(&which->lock, flags);
224
225 return pending;
226};
227
228inline static u32 litirq_pending_with_owner(struct klitirqd_info* which, struct task_struct* owner)
229{
230 unsigned long flags;
231 u32 pending;
232
233 raw_spin_lock_irqsave(&which->lock, flags);
234 pending = litirq_pending_irqoff(which);
235 if(pending)
236 {
237 if(which->current_owner != owner)
238 {
239 pending = 0; // owner switch!
240 }
241 }
242 raw_spin_unlock_irqrestore(&which->lock, flags);
243
244 return pending;
245}
246
247
248inline static u32 litirq_pending_and_sem_and_owner(struct klitirqd_info* which,
249 struct mutex** sem,
250 struct task_struct** t)
251{
252 unsigned long flags;
253 u32 pending;
254
255 /* init values */
256 *sem = NULL;
257 *t = NULL;
258
259 raw_spin_lock_irqsave(&which->lock, flags);
260
261 pending = litirq_pending_irqoff(which);
262 if(pending)
263 {
264 if(which->current_owner != NULL)
265 {
266 *t = which->current_owner;
267 *sem = &tsk_rt(which->current_owner)->klitirqd_sem;
268 }
269 else
270 {
271 BUG();
272 }
273 }
274 raw_spin_unlock_irqrestore(&which->lock, flags);
275
276 if(likely(*sem))
277 {
278 return pending;
279 }
280 else
281 {
282 return 0;
283 }
284}
285
286/* returns true if the next piece of work to do is from a different owner.
287 */
288static int tasklet_ownership_change(
289 struct klitirqd_info* which,
290 enum pending_flags taskletQ)
291{
292 /* this function doesn't have to look at work objects since they have
293 priority below tasklets. */
294
295 unsigned long flags;
296 int ret = 0;
297
298 raw_spin_lock_irqsave(&which->lock, flags);
299
300 switch(taskletQ)
301 {
302 case LIT_TASKLET_HI:
303 if(litirq_pending_hi_irqoff(which))
304 {
305 ret = (which->pending_tasklets_hi.head->owner !=
306 which->current_owner);
307 }
308 break;
309 case LIT_TASKLET_LOW:
310 if(litirq_pending_low_irqoff(which))
311 {
312 ret = (which->pending_tasklets.head->owner !=
313 which->current_owner);
314 }
315 break;
316 default:
317 break;
318 }
319
320 raw_spin_unlock_irqrestore(&which->lock, flags);
321
322 TRACE_TASK(which->klitirqd, "ownership change needed: %d\n", ret);
323
324 return ret;
325}
326
327
328static void __reeval_prio(struct klitirqd_info* which)
329{
330 struct task_struct* next_owner = NULL;
331 struct task_struct* klitirqd = which->klitirqd;
332
333 /* Check in prio-order */
334 u32 pending = litirq_pending_irqoff(which);
335
336 //__dump_state(which, "__reeval_prio: before");
337
338 if(pending)
339 {
340 if(pending & LIT_TASKLET_HI)
341 {
342 next_owner = which->pending_tasklets_hi.head->owner;
343 }
344 else if(pending & LIT_TASKLET_LOW)
345 {
346 next_owner = which->pending_tasklets.head->owner;
347 }
348 else if(pending & LIT_WORK)
349 {
350 struct work_struct* work =
351 list_first_entry(&which->worklist, struct work_struct, entry);
352 next_owner = work->owner;
353 }
354 }
355
356 if(next_owner != which->current_owner)
357 {
358 struct task_struct* old_owner = which->current_owner;
359
360 /* bind the next owner. */
361 which->current_owner = next_owner;
362 mb();
363
364 if(next_owner != NULL)
365 {
366 if(!in_interrupt())
367 {
368 TRACE_CUR("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
369 ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm,
370 ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid,
371 next_owner->comm, next_owner->pid);
372 }
373 else
374 {
375 TRACE("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
376 ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm,
377 ((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid,
378 next_owner->comm, next_owner->pid);
379 }
380
381 litmus->increase_prio_inheritance_klitirqd(klitirqd, old_owner, next_owner);
382 }
383 else
384 {
385 if(likely(!in_interrupt()))
386 {
387 TRACE_CUR("%s: Ownership change: %s/%d to NULL (reverting)\n",
388 __FUNCTION__, klitirqd->comm, klitirqd->pid);
389 }
390 else
391 {
392 // is this a bug?
393 TRACE("%s: Ownership change: %s/%d to NULL (reverting)\n",
394 __FUNCTION__, klitirqd->comm, klitirqd->pid);
395 }
396
397 BUG_ON(pending != 0);
398 litmus->decrease_prio_inheritance_klitirqd(klitirqd, old_owner, NULL);
399 }
400 }
401
402 //__dump_state(which, "__reeval_prio: after");
403}
404
405static void reeval_prio(struct klitirqd_info* which)
406{
407 unsigned long flags;
408
409 raw_spin_lock_irqsave(&which->lock, flags);
410 __reeval_prio(which);
411 raw_spin_unlock_irqrestore(&which->lock, flags);
412}
413
414
415static void wakeup_litirqd_locked(struct klitirqd_info* which)
416{
417 /* Interrupts are disabled: no need to stop preemption */
418 if (which && which->klitirqd)
419 {
420 __reeval_prio(which); /* configure the proper priority */
421
422 if(which->klitirqd->state != TASK_RUNNING)
423 {
424 TRACE("%s: Waking up klitirqd: %s/%d\n", __FUNCTION__,
425 which->klitirqd->comm, which->klitirqd->pid);
426
427 wake_up_process(which->klitirqd);
428 }
429 }
430}
431
432
433static void do_lit_tasklet(struct klitirqd_info* which,
434 struct tasklet_head* pending_tasklets)
435{
436 unsigned long flags;
437 struct tasklet_struct *list;
438 atomic_t* count;
439
440 raw_spin_lock_irqsave(&which->lock, flags);
441
442 //__dump_state(which, "do_lit_tasklet: before steal");
443
444 /* copy out the tasklets for our private use. */
445 list = pending_tasklets->head;
446 pending_tasklets->head = NULL;
447 pending_tasklets->tail = &pending_tasklets->head;
448
449 /* remove pending flag */
450 which->pending &= (pending_tasklets == &which->pending_tasklets) ?
451 ~LIT_TASKLET_LOW :
452 ~LIT_TASKLET_HI;
453
454 count = (pending_tasklets == &which->pending_tasklets) ?
455 &which->num_low_pending:
456 &which->num_hi_pending;
457
458 //__dump_state(which, "do_lit_tasklet: after steal");
459
460 raw_spin_unlock_irqrestore(&which->lock, flags);
461
462
463 while(list)
464 {
465 struct tasklet_struct *t = list;
466
467 /* advance, lest we forget */
468 list = list->next;
469
470 /* execute tasklet if it has my priority and is free */
471 if ((t->owner == which->current_owner) && tasklet_trylock(t)) {
472 if (!atomic_read(&t->count)) {
473
474 sched_trace_tasklet_begin(t->owner);
475
476 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
477 {
478 BUG();
479 }
480 TRACE_CUR("%s: Invoking tasklet.\n", __FUNCTION__);
481 t->func(t->data);
482 tasklet_unlock(t);
483
484 atomic_dec(count);
485
486 sched_trace_tasklet_end(t->owner, 0ul);
487
488 continue; /* process more tasklets */
489 }
490 tasklet_unlock(t);
491 }
492
493 TRACE_CUR("%s: Could not invoke tasklet. Requeuing.\n", __FUNCTION__);
494
495 /* couldn't process tasklet. put it back at the end of the queue. */
496 if(pending_tasklets == &which->pending_tasklets)
497 ___litmus_tasklet_schedule(t, which, 0);
498 else
499 ___litmus_tasklet_hi_schedule(t, which, 0);
500 }
501}
502
503
504// returns 1 if priorities need to be changed to continue processing
505// pending tasklets.
506static int do_litirq(struct klitirqd_info* which)
507{
508 u32 pending;
509 int resched = 0;
510
511 if(in_interrupt())
512 {
513 TRACE("%s: exiting early: in interrupt context!\n", __FUNCTION__);
514 return(0);
515 }
516
517 if(which->klitirqd != current)
518 {
519 TRACE_CUR("%s: exiting early: thread/info mismatch! Running %s/%d but given %s/%d.\n",
520 __FUNCTION__, current->comm, current->pid,
521 which->klitirqd->comm, which->klitirqd->pid);
522 return(0);
523 }
524
525 if(!is_realtime(current))
526 {
527 TRACE_CUR("%s: exiting early: klitirqd is not real-time. Sched Policy = %d\n",
528 __FUNCTION__, current->policy);
529 return(0);
530 }
531
532
533 /* We only handle tasklets & work objects, no need for RCU triggers? */
534
535 pending = litirq_pending(which);
536 if(pending)
537 {
538 /* extract the work to do and do it! */
539 if(pending & LIT_TASKLET_HI)
540 {
541 TRACE_CUR("%s: Invoking HI tasklets.\n", __FUNCTION__);
542 do_lit_tasklet(which, &which->pending_tasklets_hi);
543 resched = tasklet_ownership_change(which, LIT_TASKLET_HI);
544
545 if(resched)
546 {
547 TRACE_CUR("%s: HI tasklets of another owner remain. "
548 "Skipping any LOW tasklets.\n", __FUNCTION__);
549 }
550 }
551
552 if(!resched && (pending & LIT_TASKLET_LOW))
553 {
554 TRACE_CUR("%s: Invoking LOW tasklets.\n", __FUNCTION__);
555 do_lit_tasklet(which, &which->pending_tasklets);
556 resched = tasklet_ownership_change(which, LIT_TASKLET_LOW);
557
558 if(resched)
559 {
560 TRACE_CUR("%s: LOW tasklets of another owner remain. "
561 "Skipping any work objects.\n", __FUNCTION__);
562 }
563 }
564 }
565
566 return(resched);
567}
568
569
570static void do_work(struct klitirqd_info* which)
571{
572 unsigned long flags;
573 work_func_t f;
574 struct work_struct* work;
575
576 // only execute one work-queue item to yield to tasklets.
577 // ...is this a good idea, or should we just batch them?
578 raw_spin_lock_irqsave(&which->lock, flags);
579
580 if(!litirq_pending_work_irqoff(which))
581 {
582 raw_spin_unlock_irqrestore(&which->lock, flags);
583 goto no_work;
584 }
585
586 work = list_first_entry(&which->worklist, struct work_struct, entry);
587 list_del_init(&work->entry);
588
589 if(list_empty(&which->worklist))
590 {
591 which->pending &= ~LIT_WORK;
592 }
593
594 raw_spin_unlock_irqrestore(&which->lock, flags);
595
596
597
598 /* safe to read current_owner outside of lock since only this thread
599 may write to the pointer. */
600 if(work->owner == which->current_owner)
601 {
602 TRACE_CUR("%s: Invoking work object.\n", __FUNCTION__);
603 // do the work!
604 work_clear_pending(work);
605 f = work->func;
606 f(work); /* can't touch 'work' after this point,
607 the user may have freed it. */
608
609 atomic_dec(&which->num_work_pending);
610 }
611 else
612 {
613 TRACE_CUR("%s: Could not invoke work object. Requeuing.\n",
614 __FUNCTION__);
615 ___litmus_schedule_work(work, which, 0);
616 }
617
618no_work:
619 return;
620}
621
622
623static int set_litmus_daemon_sched(void)
624{
625 /* set up a daemon job that will never complete.
626 it should only ever run on behalf of another
627 real-time task.
628
629 TODO: Transition to a new job whenever a
630 new tasklet is handled */
631
632 int ret = 0;
633
634 struct rt_task tp = {
635 .exec_cost = 0,
636 .period = 1000000000, /* dummy 1 second period */
637 .phase = 0,
638 .cpu = task_cpu(current),
639 .budget_policy = NO_ENFORCEMENT,
640 .cls = RT_CLASS_BEST_EFFORT
641 };
642
643 struct sched_param param = { .sched_priority = 0};
644
645
646 /* set task params, mark as proxy thread, and init other data */
647 tsk_rt(current)->task_params = tp;
648 tsk_rt(current)->is_proxy_thread = 1;
649 tsk_rt(current)->cur_klitirqd = NULL;
650 mutex_init(&tsk_rt(current)->klitirqd_sem);
651 atomic_set(&tsk_rt(current)->klitirqd_sem_stat, NOT_HELD);
652
653 /* inform the OS we're SCHED_LITMUS --
654 sched_setscheduler_nocheck() calls litmus_admit_task(). */
655 sched_setscheduler_nocheck(current, SCHED_LITMUS, &param);
656
657 return ret;
658}
659
660static void enter_execution_phase(struct klitirqd_info* which,
661 struct mutex* sem,
662 struct task_struct* t)
663{
664 TRACE_CUR("%s: Trying to enter execution phase. "
665 "Acquiring semaphore of %s/%d\n", __FUNCTION__,
666 t->comm, t->pid);
667 down_and_set_stat(current, HELD, sem);
668 TRACE_CUR("%s: Execution phase entered! "
669 "Acquired semaphore of %s/%d\n", __FUNCTION__,
670 t->comm, t->pid);
671}
672
673static void exit_execution_phase(struct klitirqd_info* which,
674 struct mutex* sem,
675 struct task_struct* t)
676{
677 TRACE_CUR("%s: Exiting execution phase. "
678 "Releasing semaphore of %s/%d\n", __FUNCTION__,
679 t->comm, t->pid);
680 if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) == HELD)
681 {
682 up_and_set_stat(current, NOT_HELD, sem);
683 TRACE_CUR("%s: Execution phase exited! "
684 "Released semaphore of %s/%d\n", __FUNCTION__,
685 t->comm, t->pid);
686 }
687 else
688 {
689 TRACE_CUR("%s: COULDN'T RELEASE SEMAPHORE BECAUSE ONE IS NOT HELD!\n", __FUNCTION__);
690 }
691}
692
693/* main loop for klitsoftirqd */
694static int run_klitirqd(void* unused)
695{
696 struct klitirqd_info* which = &klitirqds[klitirqd_id(current)];
697 struct mutex* sem;
698 struct task_struct* owner;
699
700 int rt_status = set_litmus_daemon_sched();
701
702 if(rt_status != 0)
703 {
704 TRACE_CUR("%s: Failed to transition to rt-task.\n", __FUNCTION__);
705 goto rt_failed;
706 }
707
708 atomic_inc(&num_ready_klitirqds);
709
710 set_current_state(TASK_INTERRUPTIBLE);
711
712 while (!kthread_should_stop())
713 {
714 preempt_disable();
715 if (!litirq_pending(which))
716 {
717 /* sleep for work */
718 TRACE_CUR("%s: No more tasklets or work objects. Going to sleep.\n",
719 __FUNCTION__);
720 preempt_enable_no_resched();
721 schedule();
722
723 if(kthread_should_stop()) /* bail out */
724 {
725 TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
726 continue;
727 }
728
729 preempt_disable();
730 }
731
732 __set_current_state(TASK_RUNNING);
733
734 while (litirq_pending_and_sem_and_owner(which, &sem, &owner))
735 {
736 int needs_resched = 0;
737
738 preempt_enable_no_resched();
739
740 BUG_ON(sem == NULL);
741
742 // wait to enter execution phase; wait for 'current_owner' to block.
743 enter_execution_phase(which, sem, owner);
744
745 if(kthread_should_stop())
746 {
747 TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
748 break;
749 }
750
751 preempt_disable();
752
753 /* Double check that there's still pending work and the owner hasn't
754 * changed. Pending items may have been flushed while we were sleeping.
755 */
756 if(litirq_pending_with_owner(which, owner))
757 {
758 TRACE_CUR("%s: Executing tasklets and/or work objects.\n",
759 __FUNCTION__);
760
761 needs_resched = do_litirq(which);
762
763 preempt_enable_no_resched();
764
765 // work objects are preemptible.
766 if(!needs_resched)
767 {
768 do_work(which);
769 }
770
771 // exit execution phase.
772 exit_execution_phase(which, sem, owner);
773
774 TRACE_CUR("%s: Setting up next priority.\n", __FUNCTION__);
775 reeval_prio(which); /* check if we need to change priority here */
776 }
777 else
778 {
779 TRACE_CUR("%s: Pending work was flushed! Prev owner was %s/%d\n",
780 __FUNCTION__,
781 owner->comm, owner->pid);
782 preempt_enable_no_resched();
783
784 // exit execution phase.
785 exit_execution_phase(which, sem, owner);
786 }
787
788 cond_resched();
789 preempt_disable();
790 }
791 preempt_enable();
792 set_current_state(TASK_INTERRUPTIBLE);
793 }
794 __set_current_state(TASK_RUNNING);
795
796 atomic_dec(&num_ready_klitirqds);
797
798rt_failed:
799 litmus_exit_task(current);
800
801 return rt_status;
802}
803
804
805struct klitirqd_launch_data
806{
807 int* cpu_affinity;
808 struct work_struct work;
809};
810
811/* executed by a kworker from workqueues */
812static void launch_klitirqd(struct work_struct *work)
813{
814 int i;
815
816 struct klitirqd_launch_data* launch_data =
817 container_of(work, struct klitirqd_launch_data, work);
818
819 TRACE("%s: Creating %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
820
821 /* create the daemon threads */
822 for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
823 {
824 if(launch_data->cpu_affinity)
825 {
826 klitirqds[i].klitirqd =
827 kthread_create(
828 run_klitirqd,
829 /* treat the affinity as a pointer, we'll cast it back later */
830 (void*)(long long)launch_data->cpu_affinity[i],
831 "klitirqd_th%d/%d",
832 i,
833 launch_data->cpu_affinity[i]);
834
835 /* litmus will put is in the right cluster. */
836 kthread_bind(klitirqds[i].klitirqd, launch_data->cpu_affinity[i]);
837 }
838 else
839 {
840 klitirqds[i].klitirqd =
841 kthread_create(
842 run_klitirqd,
843 /* treat the affinity as a pointer, we'll cast it back later */
844 (void*)(long long)(-1),
845 "klitirqd_th%d",
846 i);
847 }
848 }
849
850 TRACE("%s: Launching %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
851
852 /* unleash the daemons */
853 for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
854 {
855 wake_up_process(klitirqds[i].klitirqd);
856 }
857
858 if(launch_data->cpu_affinity)
859 kfree(launch_data->cpu_affinity);
860 kfree(launch_data);
861}
862
863
864void spawn_klitirqd(int* affinity)
865{
866 int i;
867 struct klitirqd_launch_data* delayed_launch;
868
869 if(atomic_read(&num_ready_klitirqds) != 0)
870 {
871 TRACE("%s: At least one klitirqd is already running! Need to call kill_klitirqd()?\n");
872 return;
873 }
874
875 /* init the tasklet & work queues */
876 for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
877 {
878 klitirqds[i].terminating = 0;
879 klitirqds[i].pending = 0;
880
881 klitirqds[i].num_hi_pending.counter = 0;
882 klitirqds[i].num_low_pending.counter = 0;
883 klitirqds[i].num_work_pending.counter = 0;
884
885 klitirqds[i].pending_tasklets_hi.head = NULL;
886 klitirqds[i].pending_tasklets_hi.tail = &klitirqds[i].pending_tasklets_hi.head;
887
888 klitirqds[i].pending_tasklets.head = NULL;
889 klitirqds[i].pending_tasklets.tail = &klitirqds[i].pending_tasklets.head;
890
891 INIT_LIST_HEAD(&klitirqds[i].worklist);
892
893 raw_spin_lock_init(&klitirqds[i].lock);
894 }
895
896 /* wait to flush the initializations to memory since other threads
897 will access it. */
898 mb();
899
900 /* tell a work queue to launch the threads. we can't make scheduling
901 calls since we're in an atomic state. */
902 TRACE("%s: Setting callback up to launch klitirqds\n", __FUNCTION__);
903 delayed_launch = kmalloc(sizeof(struct klitirqd_launch_data), GFP_ATOMIC);
904 if(affinity)
905 {
906 delayed_launch->cpu_affinity =
907 kmalloc(sizeof(int)*NR_LITMUS_SOFTIRQD, GFP_ATOMIC);
908
909 memcpy(delayed_launch->cpu_affinity, affinity,
910 sizeof(int)*NR_LITMUS_SOFTIRQD);
911 }
912 else
913 {
914 delayed_launch->cpu_affinity = NULL;
915 }
916 INIT_WORK(&delayed_launch->work, launch_klitirqd);
917 schedule_work(&delayed_launch->work);
918}
919
920
921void kill_klitirqd(void)
922{
923 if(!klitirqd_is_dead())
924 {
925 int i;
926
927 TRACE("%s: Killing %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);
928
929 for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
930 {
931 if(klitirqds[i].terminating != 1)
932 {
933 klitirqds[i].terminating = 1;
934 mb(); /* just to be sure? */
935 flush_pending(klitirqds[i].klitirqd, NULL);
936
937 /* signal termination */
938 kthread_stop(klitirqds[i].klitirqd);
939 }
940 }
941 }
942}
943
944
945int klitirqd_is_ready(void)
946{
947 return(atomic_read(&num_ready_klitirqds) == NR_LITMUS_SOFTIRQD);
948}
949
950int klitirqd_is_dead(void)
951{
952 return(atomic_read(&num_ready_klitirqds) == 0);
953}
954
955
956struct task_struct* get_klitirqd(unsigned int k_id)
957{
958 return(klitirqds[k_id].klitirqd);
959}
960
961
962void flush_pending(struct task_struct* klitirqd_thread,
963 struct task_struct* owner)
964{
965 unsigned int k_id = klitirqd_id(klitirqd_thread);
966 struct klitirqd_info *which = &klitirqds[k_id];
967
968 unsigned long flags;
969 struct tasklet_struct *list;
970
971 u32 work_flushed = 0;
972
973 raw_spin_lock_irqsave(&which->lock, flags);
974
975 //__dump_state(which, "flush_pending: before");
976
977 // flush hi tasklets.
978 if(litirq_pending_hi_irqoff(which))
979 {
980 which->pending &= ~LIT_TASKLET_HI;
981
982 list = which->pending_tasklets_hi.head;
983 which->pending_tasklets_hi.head = NULL;
984 which->pending_tasklets_hi.tail = &which->pending_tasklets_hi.head;
985
986 TRACE("%s: Handing HI tasklets back to Linux.\n", __FUNCTION__);
987
988 while(list)
989 {
990 struct tasklet_struct *t = list;
991 list = list->next;
992
993 if(likely((t->owner == owner) || (owner == NULL)))
994 {
995 if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
996 {
997 BUG();
998 }
999
1000 work_flushed |= LIT_TASKLET_HI;
1001
1002 t->owner = NULL;
1003
1004 // WTF?
1005 if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
1006 {
1007 atomic_dec(&which->num_hi_pending);
1008 ___tasklet_hi_schedule(t);
1009 }
1010 else
1011 {
1012 TRACE("%s: dropped hi tasklet??\n", __FUNCTION__);
1013 BUG();
1014 }
1015 }
1016 else
1017 {
1018 TRACE("%s: Could not flush a HI tasklet.\n", __FUNCTION__);
1019 // put back on queue.
1020 ___litmus_tasklet_hi_schedule(t, which, 0);
1021 }
1022 }
1023 }
1024
1025 // flush low tasklets.
1026 if(litirq_pending_low_irqoff(which))
1027 {
1028 which->pending &= ~LIT_TASKLET_LOW;
1029
1030 list = which->pending_tasklets.head;
1031 which->pending_tasklets.head = NULL;
1032 which->pending_tasklets.tail = &which->pending_tasklets.head;
1033
1034 TRACE("%s: Handing LOW tasklets back to Linux.\n", __FUNCTION__);
1035
1036 while(list)
1037 {
1038 struct tasklet_struct *t = list;
1039 list = list->next;
1040
1041 if(likely((t->owner == owner) || (owner == NULL)))
1042 {
1043 if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
1044 {
1045 BUG();
1046 }
1047
1048 work_flushed |= LIT_TASKLET_LOW;
1049
1050 t->owner = NULL;
1051 sched_trace_tasklet_end(owner, 1ul);
1052
1053 if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
1054 {
1055 atomic_dec(&which->num_low_pending);
1056 ___tasklet_schedule(t);
1057 }
1058 else
1059 {
1060 TRACE("%s: dropped tasklet??\n", __FUNCTION__);
1061 BUG();
1062 }
1063 }
1064 else
1065 {
1066 TRACE("%s: Could not flush a LOW tasklet.\n", __FUNCTION__);
1067 // put back on queue
1068 ___litmus_tasklet_schedule(t, which, 0);
1069 }
1070 }
1071 }
1072
1073 // flush work objects
1074 if(litirq_pending_work_irqoff(which))
1075 {
1076 which->pending &= ~LIT_WORK;
1077
1078 TRACE("%s: Handing work objects back to Linux.\n", __FUNCTION__);
1079
1080 while(!list_empty(&which->worklist))
1081 {
1082 struct work_struct* work =
1083 list_first_entry(&which->worklist, struct work_struct, entry);
1084 list_del_init(&work->entry);
1085
1086 if(likely((work->owner == owner) || (owner == NULL)))
1087 {
1088 work_flushed |= LIT_WORK;
1089 atomic_dec(&which->num_work_pending);
1090
1091 work->owner = NULL;
1092 sched_trace_work_end(owner, current, 1ul);
1093 __schedule_work(work);
1094 }
1095 else
1096 {
1097 TRACE("%s: Could not flush a work object.\n", __FUNCTION__);
1098 // put back on queue
1099 ___litmus_schedule_work(work, which, 0);
1100 }
1101 }
1102 }
1103
1104 //__dump_state(which, "flush_pending: after (before reeval prio)");
1105
1106
1107 mb(); /* commit changes to pending flags */
1108
1109 /* reset the scheduling priority */
1110 if(work_flushed)
1111 {
1112 __reeval_prio(which);
1113
1114 /* Try to offload flushed tasklets to Linux's ksoftirqd. */
1115 if(work_flushed & (LIT_TASKLET_LOW | LIT_TASKLET_HI))
1116 {
1117 wakeup_softirqd();
1118 }
1119 }
1120 else
1121 {
1122 TRACE_CUR("%s: no work flushed, so __reeval_prio() skipped\n", __FUNCTION__);
1123 }
1124
1125 raw_spin_unlock_irqrestore(&which->lock, flags);
1126}
1127
1128
1129
1130
1131static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
1132 struct klitirqd_info *which,
1133 int wakeup)
1134{
1135 unsigned long flags;
1136 u32 old_pending;
1137
1138 t->next = NULL;
1139
1140 raw_spin_lock_irqsave(&which->lock, flags);
1141
1142 //__dump_state(which, "___litmus_tasklet_schedule: before queuing");
1143
1144 *(which->pending_tasklets.tail) = t;
1145 which->pending_tasklets.tail = &t->next;
1146
1147 old_pending = which->pending;
1148 which->pending |= LIT_TASKLET_LOW;
1149
1150 atomic_inc(&which->num_low_pending);
1151
1152 mb();
1153
1154 if(!old_pending && wakeup)
1155 {
1156 wakeup_litirqd_locked(which); /* wake up the klitirqd */
1157 }
1158
1159 //__dump_state(which, "___litmus_tasklet_schedule: after queuing");
1160
1161 raw_spin_unlock_irqrestore(&which->lock, flags);
1162}
1163
1164int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
1165{
1166 int ret = 0; /* assume failure */
1167 if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
1168 {
1169 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
1170 BUG();
1171 }
1172
1173 if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
1174 {
1175 TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
1176 BUG();
1177 }
1178
1179 if(likely(!klitirqds[k_id].terminating))
1180 {
1181 /* Can't accept tasklets while we're processing a workqueue
1182 because they're handled by the same thread. This case is
1183 very RARE.
1184
1185 TODO: Use a separate thread for work objects!!!!!!
1186 */
1187 if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
1188 {
1189 ret = 1;
1190 ___litmus_tasklet_schedule(t, &klitirqds[k_id], 1);
1191 }
1192 else
1193 {
1194 TRACE("%s: rejected tasklet because of pending work.\n",
1195 __FUNCTION__);
1196 }
1197 }
1198 return(ret);
1199}
1200
1201EXPORT_SYMBOL(__litmus_tasklet_schedule);
1202
1203
1204static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
1205 struct klitirqd_info *which,
1206 int wakeup)
1207{
1208 unsigned long flags;
1209 u32 old_pending;
1210
1211 t->next = NULL;
1212
1213 raw_spin_lock_irqsave(&which->lock, flags);
1214
1215 *(which->pending_tasklets_hi.tail) = t;
1216 which->pending_tasklets_hi.tail = &t->next;
1217
1218 old_pending = which->pending;
1219 which->pending |= LIT_TASKLET_HI;
1220
1221 atomic_inc(&which->num_hi_pending);
1222
1223 mb();
1224
1225 if(!old_pending && wakeup)
1226 {
1227 wakeup_litirqd_locked(which); /* wake up the klitirqd */
1228 }
1229
1230 raw_spin_unlock_irqrestore(&which->lock, flags);
1231}
1232
1233int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
1234{
1235 int ret = 0; /* assume failure */
1236 if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
1237 {
1238 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
1239 BUG();
1240 }
1241
1242 if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
1243 {
1244 TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
1245 BUG();
1246 }
1247
1248 if(unlikely(!klitirqd_is_ready()))
1249 {
1250 TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
1251 BUG();
1252 }
1253
1254 if(likely(!klitirqds[k_id].terminating))
1255 {
1256 if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
1257 {
1258 ret = 1;
1259 ___litmus_tasklet_hi_schedule(t, &klitirqds[k_id], 1);
1260 }
1261 else
1262 {
1263 TRACE("%s: rejected tasklet because of pending work.\n",
1264 __FUNCTION__);
1265 }
1266 }
1267 return(ret);
1268}
1269
1270EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);
1271
1272
1273int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id)
1274{
1275 int ret = 0; /* assume failure */
1276 u32 old_pending;
1277
1278 BUG_ON(!irqs_disabled());
1279
1280 if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
1281 {
1282 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
1283 BUG();
1284 }
1285
1286 if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
1287 {
1288 TRACE("%s: No klitirqd_th%u!\n", __FUNCTION__, k_id);
1289 BUG();
1290 }
1291
1292 if(unlikely(!klitirqd_is_ready()))
1293 {
1294 TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
1295 BUG();
1296 }
1297
1298 if(likely(!klitirqds[k_id].terminating))
1299 {
1300 raw_spin_lock(&klitirqds[k_id].lock);
1301
1302 if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
1303 {
1304 ret = 1; // success!
1305
1306 t->next = klitirqds[k_id].pending_tasklets_hi.head;
1307 klitirqds[k_id].pending_tasklets_hi.head = t;
1308
1309 old_pending = klitirqds[k_id].pending;
1310 klitirqds[k_id].pending |= LIT_TASKLET_HI;
1311
1312 atomic_inc(&klitirqds[k_id].num_hi_pending);
1313
1314 mb();
1315
1316 if(!old_pending)
1317 wakeup_litirqd_locked(&klitirqds[k_id]); /* wake up the klitirqd */
1318 }
1319 else
1320 {
1321 TRACE("%s: rejected tasklet because of pending work.\n",
1322 __FUNCTION__);
1323 }
1324
1325 raw_spin_unlock(&klitirqds[k_id].lock);
1326 }
1327 return(ret);
1328}
1329
1330EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first);
1331
1332
1333
1334static void ___litmus_schedule_work(struct work_struct *w,
1335 struct klitirqd_info *which,
1336 int wakeup)
1337{
1338 unsigned long flags;
1339 u32 old_pending;
1340
1341 raw_spin_lock_irqsave(&which->lock, flags);
1342
1343 work_pending(w);
1344 list_add_tail(&w->entry, &which->worklist);
1345
1346 old_pending = which->pending;
1347 which->pending |= LIT_WORK;
1348
1349 atomic_inc(&which->num_work_pending);
1350
1351 mb();
1352
1353 if(!old_pending && wakeup)
1354 {
1355 wakeup_litirqd_locked(which); /* wakeup the klitirqd */
1356 }
1357
1358 raw_spin_unlock_irqrestore(&which->lock, flags);
1359}
1360
1361int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
1362{
1363 int ret = 1; /* assume success */
1364 if(unlikely(w->owner == NULL) || !is_realtime(w->owner))
1365 {
1366 TRACE("%s: No owner associated with this work object!\n", __FUNCTION__);
1367 BUG();
1368 }
1369
1370 if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
1371 {
1372 TRACE("%s: No klitirqd_th%u!\n", k_id);
1373 BUG();
1374 }
1375
1376 if(unlikely(!klitirqd_is_ready()))
1377 {
1378 TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
1379 BUG();
1380 }
1381
1382 if(likely(!klitirqds[k_id].terminating))
1383 ___litmus_schedule_work(w, &klitirqds[k_id], 1);
1384 else
1385 ret = 0;
1386 return(ret);
1387}
1388EXPORT_SYMBOL(__litmus_schedule_work);
1389
1390
1391static int set_klitirqd_sem_status(unsigned long stat)
1392{
1393 TRACE_CUR("SETTING STATUS FROM %d TO %d\n",
1394 atomic_read(&tsk_rt(current)->klitirqd_sem_stat),
1395 stat);
1396 atomic_set(&tsk_rt(current)->klitirqd_sem_stat, stat);
1397 //mb();
1398
1399 return(0);
1400}
1401
1402static int set_klitirqd_sem_status_if_not_held(unsigned long stat)
1403{
1404 if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) != HELD)
1405 {
1406 return(set_klitirqd_sem_status(stat));
1407 }
1408 return(-1);
1409}
1410
1411
1412void __down_and_reset_and_set_stat(struct task_struct* t,
1413 enum klitirqd_sem_status to_reset,
1414 enum klitirqd_sem_status to_set,
1415 struct mutex* sem)
1416{
1417#if 0
1418 struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
1419 struct task_struct* task = container_of(param, struct task_struct, rt_param);
1420
1421 TRACE_CUR("%s: entered. Locking semaphore of %s/%d\n",
1422 __FUNCTION__, task->comm, task->pid);
1423#endif
1424
1425 mutex_lock_sfx(sem,
1426 set_klitirqd_sem_status_if_not_held, to_reset,
1427 set_klitirqd_sem_status, to_set);
1428#if 0
1429 TRACE_CUR("%s: exiting. Have semaphore of %s/%d\n",
1430 __FUNCTION__, task->comm, task->pid);
1431#endif
1432}
1433
1434void down_and_set_stat(struct task_struct* t,
1435 enum klitirqd_sem_status to_set,
1436 struct mutex* sem)
1437{
1438#if 0
1439 struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
1440 struct task_struct* task = container_of(param, struct task_struct, rt_param);
1441
1442 TRACE_CUR("%s: entered. Locking semaphore of %s/%d\n",
1443 __FUNCTION__, task->comm, task->pid);
1444#endif
1445
1446 mutex_lock_sfx(sem,
1447 NULL, 0,
1448 set_klitirqd_sem_status, to_set);
1449
1450#if 0
1451 TRACE_CUR("%s: exiting. Have semaphore of %s/%d\n",
1452 __FUNCTION__, task->comm, task->pid);
1453#endif
1454}
1455
1456
1457void up_and_set_stat(struct task_struct* t,
1458 enum klitirqd_sem_status to_set,
1459 struct mutex* sem)
1460{
1461#if 0
1462 struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
1463 struct task_struct* task = container_of(param, struct task_struct, rt_param);
1464
1465 TRACE_CUR("%s: entered. Unlocking semaphore of %s/%d\n",
1466 __FUNCTION__,
1467 task->comm, task->pid);
1468#endif
1469
1470 mutex_unlock_sfx(sem, NULL, 0,
1471 set_klitirqd_sem_status, to_set);
1472
1473#if 0
1474 TRACE_CUR("%s: exiting. Unlocked semaphore of %s/%d\n",
1475 __FUNCTION__,
1476 task->comm, task->pid);
1477#endif
1478}
1479
1480
1481
1482void release_klitirqd_lock(struct task_struct* t)
1483{
1484 if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == HELD))
1485 {
1486 struct mutex* sem;
1487 struct task_struct* owner = t;
1488
1489 if(t->state == TASK_RUNNING)
1490 {
1491 TRACE_TASK(t, "NOT giving up klitirqd_sem because we're not blocked!\n");
1492 return;
1493 }
1494
1495 if(likely(!tsk_rt(t)->is_proxy_thread))
1496 {
1497 sem = &tsk_rt(t)->klitirqd_sem;
1498 }
1499 else
1500 {
1501 unsigned int k_id = klitirqd_id(t);
1502 owner = klitirqds[k_id].current_owner;
1503
1504 BUG_ON(t != klitirqds[k_id].klitirqd);
1505
1506 if(likely(owner))
1507 {
1508 sem = &tsk_rt(owner)->klitirqd_sem;
1509 }
1510 else
1511 {
1512 BUG();
1513
1514 // We had the rug pulled out from under us. Abort attempt
1515 // to reacquire the lock since our client no longer needs us.
1516 TRACE_CUR("HUH?! How did this happen?\n");
1517 atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD);
1518 return;
1519 }
1520 }
1521
1522 //TRACE_CUR("Releasing semaphore of %s/%d...\n", owner->comm, owner->pid);
1523 up_and_set_stat(t, NEED_TO_REACQUIRE, sem);
1524 //TRACE_CUR("Semaphore of %s/%d released!\n", owner->comm, owner->pid);
1525 }
1526 /*
1527 else if(is_realtime(t))
1528 {
1529 TRACE_CUR("%s: Nothing to do. Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat);
1530 }
1531 */
1532}
1533
1534int reacquire_klitirqd_lock(struct task_struct* t)
1535{
1536 int ret = 0;
1537
1538 if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == NEED_TO_REACQUIRE))
1539 {
1540 struct mutex* sem;
1541 struct task_struct* owner = t;
1542
1543 if(likely(!tsk_rt(t)->is_proxy_thread))
1544 {
1545 sem = &tsk_rt(t)->klitirqd_sem;
1546 }
1547 else
1548 {
1549 unsigned int k_id = klitirqd_id(t);
1550 //struct task_struct* owner = klitirqds[k_id].current_owner;
1551 owner = klitirqds[k_id].current_owner;
1552
1553 BUG_ON(t != klitirqds[k_id].klitirqd);
1554
1555 if(likely(owner))
1556 {
1557 sem = &tsk_rt(owner)->klitirqd_sem;
1558 }
1559 else
1560 {
1561 // We had the rug pulled out from under us. Abort attempt
1562 // to reacquire the lock since our client no longer needs us.
1563 TRACE_CUR("No longer needs to reacquire klitirqd_sem!\n");
1564 atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD);
1565 return(0);
1566 }
1567 }
1568
1569 //TRACE_CUR("Trying to reacquire semaphore of %s/%d\n", owner->comm, owner->pid);
1570 __down_and_reset_and_set_stat(t, REACQUIRING, HELD, sem);
1571 //TRACE_CUR("Reacquired semaphore %s/%d\n", owner->comm, owner->pid);
1572 }
1573 /*
1574 else if(is_realtime(t))
1575 {
1576 TRACE_CUR("%s: Nothing to do. Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat);
1577 }
1578 */
1579
1580 return(ret);
1581}
1582
diff --git a/litmus/locking.c b/litmus/locking.c
index 0c1aa6aa40b7..718a5a3281d7 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -4,6 +4,15 @@
4 4
5#include <litmus/sched_plugin.h> 5#include <litmus/sched_plugin.h>
6#include <litmus/trace.h> 6#include <litmus/trace.h>
7#include <litmus/litmus.h>
8
9#ifdef CONFIG_LITMUS_DGL_SUPPORT
10#include <linux/uaccess.h>
11#endif
12
13#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
14#include <litmus/gpu_affinity.h>
15#endif
7 16
8static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg); 17static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg);
9static int open_generic_lock(struct od_table_entry* entry, void* __user arg); 18static int open_generic_lock(struct od_table_entry* entry, void* __user arg);
@@ -17,6 +26,9 @@ struct fdso_ops generic_lock_ops = {
17 .destroy = destroy_generic_lock 26 .destroy = destroy_generic_lock
18}; 27};
19 28
29static atomic_t lock_id_gen = ATOMIC_INIT(0);
30
31
20static inline bool is_lock(struct od_table_entry* entry) 32static inline bool is_lock(struct od_table_entry* entry)
21{ 33{
22 return entry->class == &generic_lock_ops; 34 return entry->class == &generic_lock_ops;
@@ -34,8 +46,21 @@ static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user ar
34 int err; 46 int err;
35 47
36 err = litmus->allocate_lock(&lock, type, arg); 48 err = litmus->allocate_lock(&lock, type, arg);
37 if (err == 0) 49 if (err == 0) {
50#ifdef CONFIG_LITMUS_NESTED_LOCKING
51 lock->nest.lock = lock;
52 lock->nest.hp_waiter_eff_prio = NULL;
53
54 INIT_BINHEAP_NODE(&lock->nest.hp_binheap_node);
55 if(!lock->nest.hp_waiter_ptr) {
56 TRACE_CUR("BEWARE: hp_waiter_ptr should probably not be NULL in "
57 "most uses. (exception: IKGLP donors)\n");
58 }
59#endif
60 lock->type = type;
61 lock->ident = atomic_inc_return(&lock_id_gen);
38 *obj_ref = lock; 62 *obj_ref = lock;
63 }
39 return err; 64 return err;
40} 65}
41 66
@@ -74,7 +99,8 @@ asmlinkage long sys_litmus_lock(int lock_od)
74 entry = get_entry_for_od(lock_od); 99 entry = get_entry_for_od(lock_od);
75 if (entry && is_lock(entry)) { 100 if (entry && is_lock(entry)) {
76 l = get_lock(entry); 101 l = get_lock(entry);
77 TRACE_CUR("attempts to lock 0x%p\n", l); 102 //TRACE_CUR("attempts to lock 0x%p\n", l);
103 TRACE_CUR("attempts to lock %d\n", l->ident);
78 err = l->ops->lock(l); 104 err = l->ops->lock(l);
79 } 105 }
80 106
@@ -96,7 +122,8 @@ asmlinkage long sys_litmus_unlock(int lock_od)
96 entry = get_entry_for_od(lock_od); 122 entry = get_entry_for_od(lock_od);
97 if (entry && is_lock(entry)) { 123 if (entry && is_lock(entry)) {
98 l = get_lock(entry); 124 l = get_lock(entry);
99 TRACE_CUR("attempts to unlock 0x%p\n", l); 125 //TRACE_CUR("attempts to unlock 0x%p\n", l);
126 TRACE_CUR("attempts to unlock %d\n", l->ident);
100 err = l->ops->unlock(l); 127 err = l->ops->unlock(l);
101 } 128 }
102 129
@@ -121,8 +148,366 @@ struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
121 return(t); 148 return(t);
122} 149}
123 150
151#ifdef CONFIG_LITMUS_NESTED_LOCKING
152
153void print_hp_waiters(struct binheap_node* n, int depth)
154{
155 struct litmus_lock *l;
156 struct nested_info *nest;
157 char padding[81] = " ";
158 struct task_struct *hp = NULL;
159 struct task_struct *hp_eff = NULL;
160 struct task_struct *node_prio = NULL;
161
162
163 if(n == NULL) {
164 TRACE("+-> %p\n", NULL);
165 return;
166 }
167
168 nest = binheap_entry(n, struct nested_info, hp_binheap_node);
169 l = nest->lock;
170
171 if(depth*2 <= 80)
172 padding[depth*2] = '\0';
173
174 if(nest->hp_waiter_ptr && *(nest->hp_waiter_ptr)) {
175 hp = *(nest->hp_waiter_ptr);
176
177 if(tsk_rt(hp)->inh_task) {
178 hp_eff = tsk_rt(hp)->inh_task;
179 }
180 }
181
182 node_prio = nest->hp_waiter_eff_prio;
183
184 TRACE("%s+-> %s/%d [waiter = %s/%d] [waiter's inh = %s/%d] (lock = %d)\n",
185 padding,
186 (node_prio) ? node_prio->comm : "nil",
187 (node_prio) ? node_prio->pid : -1,
188 (hp) ? hp->comm : "nil",
189 (hp) ? hp->pid : -1,
190 (hp_eff) ? hp_eff->comm : "nil",
191 (hp_eff) ? hp_eff->pid : -1,
192 l->ident);
193
194 if(n->left) print_hp_waiters(n->left, depth+1);
195 if(n->right) print_hp_waiters(n->right, depth+1);
196}
197#endif
198
199
200#ifdef CONFIG_LITMUS_DGL_SUPPORT
201
202void select_next_lock(dgl_wait_state_t* dgl_wait /*, struct litmus_lock* prev_lock*/)
203{
204 /*
205 We pick the next lock in reverse order. This causes inheritance propagation
206 from locks received earlier to flow in the same direction as regular nested
207 locking. This might make fine-grain DGL easier in the future.
208 */
209
210 BUG_ON(tsk_rt(dgl_wait->task)->blocked_lock);
211
212 //WARN_ON(dgl_wait->locks[dgl_wait->last_primary] != prev_lock);
213
214 // note reverse order
215 for(dgl_wait->last_primary = dgl_wait->last_primary - 1;
216 dgl_wait->last_primary >= 0;
217 --(dgl_wait->last_primary)){
218 if(!dgl_wait->locks[dgl_wait->last_primary]->ops->is_owner(
219 dgl_wait->locks[dgl_wait->last_primary], dgl_wait->task)) {
220
221 tsk_rt(dgl_wait->task)->blocked_lock =
222 dgl_wait->locks[dgl_wait->last_primary];
223 mb();
224
225 TRACE_CUR("New blocked lock is %d\n",
226 dgl_wait->locks[dgl_wait->last_primary]->ident);
227
228 break;
229 }
230 }
231}
232
233int dgl_wake_up(wait_queue_t *wq_node, unsigned mode, int sync, void *key)
234{
235 // should never be called.
236 BUG();
237 return 1;
238}
239
240void __waitqueue_dgl_remove_first(wait_queue_head_t *wq,
241 dgl_wait_state_t** dgl_wait,
242 struct task_struct **task)
243{
244 wait_queue_t *q;
245
246 *dgl_wait = NULL;
247 *task = NULL;
248
249 if (waitqueue_active(wq)) {
250 q = list_entry(wq->task_list.next,
251 wait_queue_t, task_list);
252
253 if(q->func == dgl_wake_up) {
254 *dgl_wait = (dgl_wait_state_t*) q->private;
255 }
256 else {
257 *task = (struct task_struct*) q->private;
258 }
259
260 __remove_wait_queue(wq, q);
261 }
262}
263
264void init_dgl_waitqueue_entry(wait_queue_t *wq_node, dgl_wait_state_t* dgl_wait)
265{
266 init_waitqueue_entry(wq_node, dgl_wait->task);
267 wq_node->private = dgl_wait;
268 wq_node->func = dgl_wake_up;
269}
270
271
272static long do_litmus_dgl_lock(dgl_wait_state_t *dgl_wait)
273{
274 int i;
275 unsigned long irqflags; //, dummyflags;
276 raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(dgl_wait->task);
277
278 BUG_ON(dgl_wait->task != current);
279
280 raw_spin_lock_irqsave(dgl_lock, irqflags);
281
282
283 dgl_wait->nr_remaining = dgl_wait->size;
284
285 TRACE_CUR("Locking DGL with size %d\n", dgl_wait->size);
286
287 // try to acquire each lock. enqueue (non-blocking) if it is unavailable.
288 for(i = 0; i < dgl_wait->size; ++i) {
289 struct litmus_lock *l = dgl_wait->locks[i];
290
291 // dgl_lock() must set task state to TASK_UNINTERRUPTIBLE if task blocks.
292
293 if(l->ops->dgl_lock(l, dgl_wait, &dgl_wait->wq_nodes[i])) {
294 --(dgl_wait->nr_remaining);
295 TRACE_CUR("Acquired lock %d immediatly.\n", l->ident);
296 }
297 }
298
299 if(dgl_wait->nr_remaining == 0) {
300 // acquired entire group immediatly
301 TRACE_CUR("Acquired all locks in DGL immediatly!\n");
302 }
303 else {
304
305 TRACE_CUR("As many as %d locks in DGL are pending. Suspending.\n",
306 dgl_wait->nr_remaining);
307
308#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
309 // KLUDGE: don't count this suspension as time in the critical gpu
310 // critical section
311 if(tsk_rt(dgl_wait->task)->held_gpus) {
312 tsk_rt(dgl_wait->task)->suspend_gpu_tracker_on_block = 1;
313 }
314#endif
315
316 // note reverse order. see comments in select_next_lock for reason.
317 for(i = dgl_wait->size - 1; i >= 0; --i) {
318 struct litmus_lock *l = dgl_wait->locks[i];
319 if(!l->ops->is_owner(l, dgl_wait->task)) { // double-check to be thread safe
320
321 TRACE_CUR("Activating priority inheritance on lock %d\n",
322 l->ident);
323
324 TS_DGL_LOCK_SUSPEND;
325
326 l->ops->enable_priority(l, dgl_wait);
327 dgl_wait->last_primary = i;
328
329 TRACE_CUR("Suspending for lock %d\n", l->ident);
330
331 raw_spin_unlock_irqrestore(dgl_lock, irqflags); // free dgl_lock before suspending
332
333 schedule(); // suspend!!!
334
335 TS_DGL_LOCK_RESUME;
336
337 TRACE_CUR("Woken up from DGL suspension.\n");
338
339 goto all_acquired; // we should hold all locks when we wake up.
340 }
341 }
342
343 TRACE_CUR("Didn't have to suspend after all, but calling schedule() anyway.\n");
344 //BUG();
345 }
346
347 raw_spin_unlock_irqrestore(dgl_lock, irqflags);
348
349all_acquired:
350
351 // FOR SANITY CHECK FOR TESTING
352// for(i = 0; i < dgl_wait->size; ++i) {
353// struct litmus_lock *l = dgl_wait->locks[i];
354// BUG_ON(!l->ops->is_owner(l, dgl_wait->task));
355// }
356
357 TRACE_CUR("Acquired entire DGL\n");
358
359 return 0;
360}
361
362static int supports_dgl(struct litmus_lock *l)
363{
364 struct litmus_lock_ops* ops = l->ops;
365
366 return (ops->dgl_lock &&
367 ops->is_owner &&
368 ops->enable_priority);
369}
370
371asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size)
372{
373 struct task_struct *t = current;
374 long err = -EINVAL;
375 int dgl_ods[MAX_DGL_SIZE];
376 int i;
377
378 dgl_wait_state_t dgl_wait_state; // lives on the stack until all resources in DGL are held.
379
380 if(dgl_size > MAX_DGL_SIZE || dgl_size < 1)
381 goto out;
382
383 if(!access_ok(VERIFY_READ, usr_dgl_ods, dgl_size*(sizeof(int))))
384 goto out;
385
386 if(__copy_from_user(&dgl_ods, usr_dgl_ods, dgl_size*(sizeof(int))))
387 goto out;
388
389 if (!is_realtime(t)) {
390 err = -EPERM;
391 goto out;
392 }
393
394 for(i = 0; i < dgl_size; ++i) {
395 struct od_table_entry *entry = get_entry_for_od(dgl_ods[i]);
396 if(entry && is_lock(entry)) {
397 dgl_wait_state.locks[i] = get_lock(entry);
398 if(!supports_dgl(dgl_wait_state.locks[i])) {
399 TRACE_CUR("Lock %d does not support all required DGL operations.\n",
400 dgl_wait_state.locks[i]->ident);
401 goto out;
402 }
403 }
404 else {
405 TRACE_CUR("Invalid lock identifier\n");
406 goto out;
407 }
408 }
409
410 dgl_wait_state.task = t;
411 dgl_wait_state.size = dgl_size;
412
413 TS_DGL_LOCK_START;
414 err = do_litmus_dgl_lock(&dgl_wait_state);
415
416 /* Note: task my have been suspended or preempted in between! Take
417 * this into account when computing overheads. */
418 TS_DGL_LOCK_END;
419
420out:
421 return err;
422}
423
424static long do_litmus_dgl_unlock(struct litmus_lock* dgl_locks[], int dgl_size)
425{
426 int i;
427 long err = 0;
428
429 TRACE_CUR("Unlocking a DGL of %d size\n", dgl_size);
430
431 for(i = dgl_size - 1; i >= 0; --i) { // unlock in reverse order
432
433 struct litmus_lock *l = dgl_locks[i];
434 long tmp_err;
435
436 TRACE_CUR("Unlocking lock %d of DGL.\n", l->ident);
437
438 tmp_err = l->ops->unlock(l);
439
440 if(tmp_err) {
441 TRACE_CUR("There was an error unlocking %d: %d.\n", l->ident, tmp_err);
442 err = tmp_err;
443 }
444 }
445
446 TRACE_CUR("DGL unlocked. err = %d\n", err);
447
448 return err;
449}
450
451asmlinkage long sys_litmus_dgl_unlock(void* __user usr_dgl_ods, int dgl_size)
452{
453 long err = -EINVAL;
454 int dgl_ods[MAX_DGL_SIZE];
455 struct od_table_entry* entry;
456 int i;
457
458 struct litmus_lock* dgl_locks[MAX_DGL_SIZE];
459
460 if(dgl_size > MAX_DGL_SIZE || dgl_size < 1)
461 goto out;
462
463 if(!access_ok(VERIFY_READ, usr_dgl_ods, dgl_size*(sizeof(int))))
464 goto out;
465
466 if(__copy_from_user(&dgl_ods, usr_dgl_ods, dgl_size*(sizeof(int))))
467 goto out;
468
469 for(i = 0; i < dgl_size; ++i) {
470 entry = get_entry_for_od(dgl_ods[i]);
471 if(entry && is_lock(entry)) {
472 dgl_locks[i] = get_lock(entry);
473 if(!supports_dgl(dgl_locks[i])) {
474 TRACE_CUR("Lock %d does not support all required DGL operations.\n",
475 dgl_locks[i]->ident);
476 goto out;
477 }
478 }
479 else {
480 TRACE_CUR("Invalid lock identifier\n");
481 goto out;
482 }
483 }
484
485 TS_DGL_UNLOCK_START;
486 err = do_litmus_dgl_unlock(dgl_locks, dgl_size);
487
488 /* Note: task my have been suspended or preempted in between! Take
489 * this into account when computing overheads. */
490 TS_DGL_UNLOCK_END;
491
492out:
493 return err;
494}
495
496#else // CONFIG_LITMUS_DGL_SUPPORT
497
498asmlinkage long sys_litmus_dgl_lock(void* __user usr_dgl_ods, int dgl_size)
499{
500 return -ENOSYS;
501}
502
503asmlinkage long sys_litmus_dgl_unlock(void* __user usr_dgl_ods, int dgl_size)
504{
505 return -ENOSYS;
506}
507
508#endif
124 509
125#else 510#else // CONFIG_LITMUS_LOCKING
126 511
127struct fdso_ops generic_lock_ops = {}; 512struct fdso_ops generic_lock_ops = {};
128 513
diff --git a/litmus/nvidia_info.c b/litmus/nvidia_info.c
new file mode 100644
index 000000000000..4b86a50d3bd1
--- /dev/null
+++ b/litmus/nvidia_info.c
@@ -0,0 +1,597 @@
1#include <linux/module.h>
2#include <linux/semaphore.h>
3#include <linux/pci.h>
4
5#include <litmus/sched_trace.h>
6#include <litmus/nvidia_info.h>
7#include <litmus/litmus.h>
8
9#include <litmus/sched_plugin.h>
10
11#include <litmus/binheap.h>
12
13typedef unsigned char NvV8; /* "void": enumerated or multiple fields */
14typedef unsigned short NvV16; /* "void": enumerated or multiple fields */
15typedef unsigned char NvU8; /* 0 to 255 */
16typedef unsigned short NvU16; /* 0 to 65535 */
17typedef signed char NvS8; /* -128 to 127 */
18typedef signed short NvS16; /* -32768 to 32767 */
19typedef float NvF32; /* IEEE Single Precision (S1E8M23) */
20typedef double NvF64; /* IEEE Double Precision (S1E11M52) */
21typedef unsigned int NvV32; /* "void": enumerated or multiple fields */
22typedef unsigned int NvU32; /* 0 to 4294967295 */
23typedef unsigned long long NvU64; /* 0 to 18446744073709551615 */
24typedef union
25{
26 volatile NvV8 Reg008[1];
27 volatile NvV16 Reg016[1];
28 volatile NvV32 Reg032[1];
29} litmus_nv_hwreg_t, * litmus_nv_phwreg_t;
30
31typedef struct
32{
33 NvU64 address;
34 NvU64 size;
35 NvU32 offset;
36 NvU32 *map;
37 litmus_nv_phwreg_t map_u;
38} litmus_nv_aperture_t;
39
40typedef struct
41{
42 void *priv; /* private data */
43 void *os_state; /* os-specific device state */
44
45 int rmInitialized;
46 int flags;
47
48 /* PCI config info */
49 NvU32 domain;
50 NvU16 bus;
51 NvU16 slot;
52 NvU16 vendor_id;
53 NvU16 device_id;
54 NvU16 subsystem_id;
55 NvU32 gpu_id;
56 void *handle;
57
58 NvU32 pci_cfg_space[16];
59
60 /* physical characteristics */
61 litmus_nv_aperture_t bars[3];
62 litmus_nv_aperture_t *regs;
63 litmus_nv_aperture_t *fb, ud;
64 litmus_nv_aperture_t agp;
65
66 NvU32 interrupt_line;
67
68 NvU32 agp_config;
69 NvU32 agp_status;
70
71 NvU32 primary_vga;
72
73 NvU32 sim_env;
74
75 NvU32 rc_timer_enabled;
76
77 /* list of events allocated for this device */
78 void *event_list;
79
80 void *kern_mappings;
81
82} litmus_nv_state_t;
83
84typedef struct work_struct litmus_nv_task_t;
85
86typedef struct litmus_nv_work_s {
87 litmus_nv_task_t task;
88 void *data;
89} litmus_nv_work_t;
90
91typedef struct litmus_nv_linux_state_s {
92 litmus_nv_state_t nv_state;
93 atomic_t usage_count;
94
95 struct pci_dev *dev;
96 void *agp_bridge;
97 void *alloc_queue;
98
99 void *timer_sp;
100 void *isr_sp;
101 void *pci_cfgchk_sp;
102 void *isr_bh_sp;
103
104#ifdef CONFIG_CUDA_4_0
105 char registry_keys[512];
106#endif
107
108 /* keep track of any pending bottom halfes */
109 struct tasklet_struct tasklet;
110 litmus_nv_work_t work;
111
112 /* get a timer callback every second */
113 struct timer_list rc_timer;
114
115 /* lock for linux-specific data, not used by core rm */
116 struct semaphore ldata_lock;
117
118 /* lock for linux-specific alloc queue */
119 struct semaphore at_lock;
120
121#if 0
122#if defined(NV_USER_MAP)
123 /* list of user mappings */
124 struct nv_usermap_s *usermap_list;
125
126 /* lock for VMware-specific mapping list */
127 struct semaphore mt_lock;
128#endif /* defined(NV_USER_MAP) */
129#if defined(NV_PM_SUPPORT_OLD_STYLE_APM)
130 void *apm_nv_dev;
131#endif
132#endif
133
134 NvU32 device_num;
135 struct litmus_nv_linux_state_s *next;
136} litmus_nv_linux_state_t;
137
138void dump_nvidia_info(const struct tasklet_struct *t)
139{
140 litmus_nv_state_t* nvstate = NULL;
141 litmus_nv_linux_state_t* linuxstate = NULL;
142 struct pci_dev* pci = NULL;
143
144 nvstate = (litmus_nv_state_t*)(t->data);
145
146 if(nvstate)
147 {
148 TRACE("NV State:\n"
149 "\ttasklet ptr = %p\n"
150 "\tstate ptr = %p\n"
151 "\tprivate data ptr = %p\n"
152 "\tos state ptr = %p\n"
153 "\tdomain = %u\n"
154 "\tbus = %u\n"
155 "\tslot = %u\n"
156 "\tvender_id = %u\n"
157 "\tdevice_id = %u\n"
158 "\tsubsystem_id = %u\n"
159 "\tgpu_id = %u\n"
160 "\tinterrupt_line = %u\n",
161 t,
162 nvstate,
163 nvstate->priv,
164 nvstate->os_state,
165 nvstate->domain,
166 nvstate->bus,
167 nvstate->slot,
168 nvstate->vendor_id,
169 nvstate->device_id,
170 nvstate->subsystem_id,
171 nvstate->gpu_id,
172 nvstate->interrupt_line);
173
174 linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
175 }
176 else
177 {
178 TRACE("INVALID NVSTATE????\n");
179 }
180
181 if(linuxstate)
182 {
183 int ls_offset = (void*)(&(linuxstate->device_num)) - (void*)(linuxstate);
184 int ns_offset_raw = (void*)(&(linuxstate->device_num)) - (void*)(&(linuxstate->nv_state));
185 int ns_offset_desired = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
186
187
188 TRACE("LINUX NV State:\n"
189 "\tlinux nv state ptr: %p\n"
190 "\taddress of tasklet: %p\n"
191 "\taddress of work: %p\n"
192 "\tusage_count: %d\n"
193 "\tdevice_num: %u\n"
194 "\ttasklet addr == this tasklet: %d\n"
195 "\tpci: %p\n",
196 linuxstate,
197 &(linuxstate->tasklet),
198 &(linuxstate->work),
199 atomic_read(&(linuxstate->usage_count)),
200 linuxstate->device_num,
201 (t == &(linuxstate->tasklet)),
202 linuxstate->dev);
203
204 pci = linuxstate->dev;
205
206 TRACE("Offsets:\n"
207 "\tOffset from LinuxState: %d, %x\n"
208 "\tOffset from NVState: %d, %x\n"
209 "\tOffset from parameter: %d, %x\n"
210 "\tdevice_num: %u\n",
211 ls_offset, ls_offset,
212 ns_offset_raw, ns_offset_raw,
213 ns_offset_desired, ns_offset_desired,
214 *((u32*)((void*)nvstate + ns_offset_desired)));
215 }
216 else
217 {
218 TRACE("INVALID LINUXNVSTATE?????\n");
219 }
220
221#if 0
222 if(pci)
223 {
224 TRACE("PCI DEV Info:\n"
225 "pci device ptr: %p\n"
226 "\tdevfn = %d\n"
227 "\tvendor = %d\n"
228 "\tdevice = %d\n"
229 "\tsubsystem_vendor = %d\n"
230 "\tsubsystem_device = %d\n"
231 "\tslot # = %d\n",
232 pci,
233 pci->devfn,
234 pci->vendor,
235 pci->device,
236 pci->subsystem_vendor,
237 pci->subsystem_device,
238 pci->slot->number);
239 }
240 else
241 {
242 TRACE("INVALID PCIDEV PTR?????\n");
243 }
244#endif
245}
246
247static struct module* nvidia_mod = NULL;
248int init_nvidia_info(void)
249{
250 mutex_lock(&module_mutex);
251 nvidia_mod = find_module("nvidia");
252 mutex_unlock(&module_mutex);
253 if(nvidia_mod != NULL)
254 {
255 TRACE("%s : Found NVIDIA module. Core Code: %p to %p\n", __FUNCTION__,
256 (void*)(nvidia_mod->module_core),
257 (void*)(nvidia_mod->module_core) + nvidia_mod->core_size);
258 init_nv_device_reg();
259 return(0);
260 }
261 else
262 {
263 TRACE("%s : Could not find NVIDIA module! Loaded?\n", __FUNCTION__);
264 return(-1);
265 }
266}
267
268void shutdown_nvidia_info(void)
269{
270 nvidia_mod = NULL;
271 mb();
272}
273
274/* works with pointers to static data inside the module too. */
275int is_nvidia_func(void* func_addr)
276{
277 int ret = 0;
278 if(nvidia_mod)
279 {
280 ret = within_module_core((long unsigned int)func_addr, nvidia_mod);
281 /*
282 if(ret)
283 {
284 TRACE("%s : %p is in NVIDIA module: %d\n",
285 __FUNCTION__, func_addr, ret);
286 }*/
287 }
288
289 return(ret);
290}
291
292u32 get_tasklet_nv_device_num(const struct tasklet_struct *t)
293{
294 // life is too short to use hard-coded offsets. update this later.
295 litmus_nv_state_t* nvstate = (litmus_nv_state_t*)(t->data);
296 litmus_nv_linux_state_t* linuxstate = container_of(nvstate, litmus_nv_linux_state_t, nv_state);
297
298 BUG_ON(linuxstate->device_num >= NV_DEVICE_NUM);
299
300 return(linuxstate->device_num);
301
302 //int DEVICE_NUM_OFFSET = (void*)(&(linuxstate->device_num)) - (void*)(nvstate);
303
304#if 0
305 // offset determined though observed behavior of the NV driver.
306 //const int DEVICE_NUM_OFFSET = 0x480; // CUDA 4.0 RC1
307 //const int DEVICE_NUM_OFFSET = 0x510; // CUDA 4.0 RC2
308
309 void* state = (void*)(t->data);
310 void* device_num_ptr = state + DEVICE_NUM_OFFSET;
311
312 //dump_nvidia_info(t);
313 return(*((u32*)device_num_ptr));
314#endif
315}
316
317u32 get_work_nv_device_num(const struct work_struct *t)
318{
319 // offset determined though observed behavior of the NV driver.
320 const int DEVICE_NUM_OFFSET = sizeof(struct work_struct);
321 void* state = (void*)(t);
322 void** device_num_ptr = state + DEVICE_NUM_OFFSET;
323 return(*((u32*)(*device_num_ptr)));
324}
325
326
327typedef struct {
328 raw_spinlock_t lock;
329 int nr_owners;
330 struct task_struct* max_prio_owner;
331 struct task_struct* owners[NV_MAX_SIMULT_USERS];
332}nv_device_registry_t;
333
334static nv_device_registry_t NV_DEVICE_REG[NV_DEVICE_NUM];
335
336int init_nv_device_reg(void)
337{
338 int i;
339
340 memset(NV_DEVICE_REG, 0, sizeof(NV_DEVICE_REG));
341
342 for(i = 0; i < NV_DEVICE_NUM; ++i)
343 {
344 raw_spin_lock_init(&NV_DEVICE_REG[i].lock);
345 }
346
347 return(1);
348}
349
350/* use to get nv_device_id by given owner.
351 (if return -1, can't get the assocaite device id)*/
352/*
353int get_nv_device_id(struct task_struct* owner)
354{
355 int i;
356 if(!owner)
357 {
358 return(-1);
359 }
360 for(i = 0; i < NV_DEVICE_NUM; ++i)
361 {
362 if(NV_DEVICE_REG[i].device_owner == owner)
363 return(i);
364 }
365 return(-1);
366}
367*/
368
369static struct task_struct* find_hp_owner(nv_device_registry_t *reg, struct task_struct *skip) {
370 int i;
371 struct task_struct *found = NULL;
372 for(i = 0; i < reg->nr_owners; ++i) {
373 if(reg->owners[i] && reg->owners[i] != skip && litmus->compare(reg->owners[i], found)) {
374 found = reg->owners[i];
375 }
376 }
377 return found;
378}
379
380#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
381void pai_check_priority_increase(struct task_struct *t, int reg_device_id)
382{
383 unsigned long flags;
384 nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
385
386 if(reg->max_prio_owner != t) {
387
388 raw_spin_lock_irqsave(&reg->lock, flags);
389
390 if(reg->max_prio_owner != t) {
391 if(litmus->compare(t, reg->max_prio_owner)) {
392 litmus->change_prio_pai_tasklet(reg->max_prio_owner, t);
393 reg->max_prio_owner = t;
394 }
395 }
396
397 raw_spin_unlock_irqrestore(&reg->lock, flags);
398 }
399}
400
401
402void pai_check_priority_decrease(struct task_struct *t, int reg_device_id)
403{
404 unsigned long flags;
405 nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
406
407 if(reg->max_prio_owner == t) {
408
409 raw_spin_lock_irqsave(&reg->lock, flags);
410
411 if(reg->max_prio_owner == t) {
412 reg->max_prio_owner = find_hp_owner(reg, NULL);
413 if(reg->max_prio_owner != t) {
414 litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
415 }
416 }
417
418 raw_spin_unlock_irqrestore(&reg->lock, flags);
419 }
420}
421#endif
422
423static int __reg_nv_device(int reg_device_id, struct task_struct *t)
424{
425 int ret = 0;
426 int i;
427 struct task_struct *old_max = NULL;
428 unsigned long flags;
429 nv_device_registry_t *reg = &NV_DEVICE_REG[reg_device_id];
430
431 if(test_bit(reg_device_id, &tsk_rt(t)->held_gpus)) {
432 // TODO: check if taks is already registered.
433 return ret; // assume already registered.
434 }
435
436
437 raw_spin_lock_irqsave(&reg->lock, flags);
438
439 if(reg->nr_owners < NV_MAX_SIMULT_USERS) {
440 TRACE_TASK(t, "registers GPU %d\n", reg_device_id);
441 for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
442 if(reg->owners[i] == NULL) {
443 reg->owners[i] = t;
444
445 //if(edf_higher_prio(t, reg->max_prio_owner)) {
446 if(litmus->compare(t, reg->max_prio_owner)) {
447 old_max = reg->max_prio_owner;
448 reg->max_prio_owner = t;
449
450#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
451 litmus->change_prio_pai_tasklet(old_max, t);
452#endif
453 }
454
455#ifdef CONFIG_LITMUS_SOFTIRQD
456 down_and_set_stat(t, HELD, &tsk_rt(t)->klitirqd_sem);
457#endif
458 ++(reg->nr_owners);
459
460 break;
461 }
462 }
463 }
464 else
465 {
466 TRACE_CUR("%s: device %d is already in use!\n", __FUNCTION__, reg_device_id);
467 //ret = -EBUSY;
468 }
469
470 raw_spin_unlock_irqrestore(&reg->lock, flags);
471
472 __set_bit(reg_device_id, &tsk_rt(t)->held_gpus);
473
474 return(ret);
475}
476
477static int __clear_reg_nv_device(int de_reg_device_id, struct task_struct *t)
478{
479 int ret = 0;
480 int i;
481 unsigned long flags;
482 nv_device_registry_t *reg = &NV_DEVICE_REG[de_reg_device_id];
483
484#ifdef CONFIG_LITMUS_SOFTIRQD
485 struct task_struct* klitirqd_th = get_klitirqd(de_reg_device_id);
486#endif
487
488 if(!test_bit(de_reg_device_id, &tsk_rt(t)->held_gpus)) {
489 return ret;
490 }
491
492 raw_spin_lock_irqsave(&reg->lock, flags);
493
494 TRACE_TASK(t, "unregisters GPU %d\n", de_reg_device_id);
495
496 for(i = 0; i < NV_MAX_SIMULT_USERS; ++i) {
497 if(reg->owners[i] == t) {
498#ifdef CONFIG_LITMUS_SOFTIRQD
499 flush_pending(klitirqd_th, t);
500#endif
501 if(reg->max_prio_owner == t) {
502 reg->max_prio_owner = find_hp_owner(reg, t);
503#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
504 litmus->change_prio_pai_tasklet(t, reg->max_prio_owner);
505#endif
506 }
507
508#ifdef CONFIG_LITMUS_SOFTIRQD
509 up_and_set_stat(t, NOT_HELD, &tsk_rt(t)->klitirqd_sem);
510#endif
511
512 reg->owners[i] = NULL;
513 --(reg->nr_owners);
514
515 break;
516 }
517 }
518
519 raw_spin_unlock_irqrestore(&reg->lock, flags);
520
521 __clear_bit(de_reg_device_id, &tsk_rt(t)->held_gpus);
522
523 return(ret);
524}
525
526
527int reg_nv_device(int reg_device_id, int reg_action, struct task_struct *t)
528{
529 int ret;
530
531 if((reg_device_id < NV_DEVICE_NUM) && (reg_device_id >= 0))
532 {
533 if(reg_action)
534 ret = __reg_nv_device(reg_device_id, t);
535 else
536 ret = __clear_reg_nv_device(reg_device_id, t);
537 }
538 else
539 {
540 ret = -ENODEV;
541 }
542
543 return(ret);
544}
545
546/* use to get the owner of nv_device_id. */
547struct task_struct* get_nv_max_device_owner(u32 target_device_id)
548{
549 struct task_struct *owner = NULL;
550 BUG_ON(target_device_id >= NV_DEVICE_NUM);
551 owner = NV_DEVICE_REG[target_device_id].max_prio_owner;
552 return(owner);
553}
554
555void lock_nv_registry(u32 target_device_id, unsigned long* flags)
556{
557 BUG_ON(target_device_id >= NV_DEVICE_NUM);
558
559 if(in_interrupt())
560 TRACE("Locking registry for %d.\n", target_device_id);
561 else
562 TRACE_CUR("Locking registry for %d.\n", target_device_id);
563
564 raw_spin_lock_irqsave(&NV_DEVICE_REG[target_device_id].lock, *flags);
565}
566
567void unlock_nv_registry(u32 target_device_id, unsigned long* flags)
568{
569 BUG_ON(target_device_id >= NV_DEVICE_NUM);
570
571 if(in_interrupt())
572 TRACE("Unlocking registry for %d.\n", target_device_id);
573 else
574 TRACE_CUR("Unlocking registry for %d.\n", target_device_id);
575
576 raw_spin_unlock_irqrestore(&NV_DEVICE_REG[target_device_id].lock, *flags);
577}
578
579
580//void increment_nv_int_count(u32 device)
581//{
582// unsigned long flags;
583// struct task_struct* owner;
584//
585// lock_nv_registry(device, &flags);
586//
587// owner = NV_DEVICE_REG[device].device_owner;
588// if(owner)
589// {
590// atomic_inc(&tsk_rt(owner)->nv_int_count);
591// }
592//
593// unlock_nv_registry(device, &flags);
594//}
595//EXPORT_SYMBOL(increment_nv_int_count);
596
597
diff --git a/litmus/preempt.c b/litmus/preempt.c
index 5704d0bf4c0b..28368d5bc046 100644
--- a/litmus/preempt.c
+++ b/litmus/preempt.c
@@ -30,6 +30,7 @@ void sched_state_will_schedule(struct task_struct* tsk)
30 /* Litmus tasks should never be subject to a remote 30 /* Litmus tasks should never be subject to a remote
31 * set_tsk_need_resched(). */ 31 * set_tsk_need_resched(). */
32 BUG_ON(is_realtime(tsk)); 32 BUG_ON(is_realtime(tsk));
33
33#ifdef CONFIG_PREEMPT_STATE_TRACE 34#ifdef CONFIG_PREEMPT_STATE_TRACE
34 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n", 35 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
35 __builtin_return_address(0)); 36 __builtin_return_address(0));
@@ -45,13 +46,17 @@ void sched_state_ipi(void)
45 /* Cause scheduler to be invoked. 46 /* Cause scheduler to be invoked.
46 * This will cause a transition to WILL_SCHEDULE. */ 47 * This will cause a transition to WILL_SCHEDULE. */
47 set_tsk_need_resched(current); 48 set_tsk_need_resched(current);
49 /*
48 TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n", 50 TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n",
49 current->comm, current->pid); 51 current->comm, current->pid);
52 */
50 } else { 53 } else {
51 /* ignore */ 54 /* ignore */
55 /*
52 TRACE_STATE("ignoring IPI in state %x (%s)\n", 56 TRACE_STATE("ignoring IPI in state %x (%s)\n",
53 get_sched_state(), 57 get_sched_state(),
54 sched_state_name(get_sched_state())); 58 sched_state_name(get_sched_state()));
59 */
55 } 60 }
56} 61}
57 62
diff --git a/litmus/rsm_lock.c b/litmus/rsm_lock.c
new file mode 100644
index 000000000000..75ed87c5ed48
--- /dev/null
+++ b/litmus/rsm_lock.c
@@ -0,0 +1,796 @@
1#include <linux/slab.h>
2#include <linux/uaccess.h>
3
4#include <litmus/trace.h>
5#include <litmus/sched_plugin.h>
6#include <litmus/rsm_lock.h>
7
8//#include <litmus/edf_common.h>
9
10#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
11#include <litmus/gpu_affinity.h>
12#endif
13
14
15/* caller is responsible for locking */
16static struct task_struct* rsm_mutex_find_hp_waiter(struct rsm_mutex *mutex,
17 struct task_struct* skip)
18{
19 wait_queue_t *q;
20 struct list_head *pos;
21 struct task_struct *queued = NULL, *found = NULL;
22
23#ifdef CONFIG_LITMUS_DGL_SUPPORT
24 dgl_wait_state_t *dgl_wait = NULL;
25#endif
26
27 list_for_each(pos, &mutex->wait.task_list) {
28 q = list_entry(pos, wait_queue_t, task_list);
29
30#ifdef CONFIG_LITMUS_DGL_SUPPORT
31 if(q->func == dgl_wake_up) {
32 dgl_wait = (dgl_wait_state_t*) q->private;
33 if(tsk_rt(dgl_wait->task)->blocked_lock == &mutex->litmus_lock) {
34 queued = dgl_wait->task;
35 }
36 else {
37 queued = NULL; // skip it.
38 }
39 }
40 else {
41 queued = (struct task_struct*) q->private;
42 }
43#else
44 queued = (struct task_struct*) q->private;
45#endif
46
47 /* Compare task prios, find high prio task. */
48 //if (queued && queued != skip && edf_higher_prio(queued, found)) {
49 if (queued && queued != skip && litmus->compare(queued, found)) {
50 found = queued;
51 }
52 }
53 return found;
54}
55
56
57#ifdef CONFIG_LITMUS_DGL_SUPPORT
58
59int rsm_mutex_is_owner(struct litmus_lock *l, struct task_struct *t)
60{
61 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
62 return(mutex->owner == t);
63}
64
65// return 1 if resource was immediatly acquired.
66// Assumes mutex->lock is held.
67// Must set task state to TASK_UNINTERRUPTIBLE if task blocks.
68int rsm_mutex_dgl_lock(struct litmus_lock *l, dgl_wait_state_t* dgl_wait,
69 wait_queue_t* wq_node)
70{
71 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
72 struct task_struct *t = dgl_wait->task;
73
74 int acquired_immediatly = 0;
75
76 BUG_ON(t != current);
77
78 if (mutex->owner) {
79 TRACE_TASK(t, "Enqueuing on lock %d.\n", l->ident);
80
81 init_dgl_waitqueue_entry(wq_node, dgl_wait);
82
83 set_task_state(t, TASK_UNINTERRUPTIBLE);
84 __add_wait_queue_tail_exclusive(&mutex->wait, wq_node);
85 } else {
86 TRACE_TASK(t, "Acquired lock %d with no blocking.\n", l->ident);
87
88 /* it's ours now */
89 mutex->owner = t;
90
91 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
92 binheap_add(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
93 struct nested_info, hp_binheap_node);
94 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
95
96 acquired_immediatly = 1;
97 }
98
99 return acquired_immediatly;
100}
101
102void rsm_mutex_enable_priority(struct litmus_lock *l,
103 dgl_wait_state_t* dgl_wait)
104{
105 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
106 struct task_struct *t = dgl_wait->task;
107 struct task_struct *owner = mutex->owner;
108 unsigned long flags = 0; // these are unused under DGL coarse-grain locking
109
110 BUG_ON(owner == t);
111
112 tsk_rt(t)->blocked_lock = l;
113 mb();
114
115 //if (edf_higher_prio(t, mutex->hp_waiter)) {
116 if (litmus->compare(t, mutex->hp_waiter)) {
117
118 struct task_struct *old_max_eff_prio;
119 struct task_struct *new_max_eff_prio;
120 struct task_struct *new_prio = NULL;
121
122 if(mutex->hp_waiter)
123 TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
124 mutex->hp_waiter->comm, mutex->hp_waiter->pid);
125 else
126 TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
127
128 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
129
130 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
131 mutex->hp_waiter = t;
132 l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
133 binheap_decrease(&l->nest.hp_binheap_node,
134 &tsk_rt(owner)->hp_blocked_tasks);
135 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
136
137 if(new_max_eff_prio != old_max_eff_prio) {
138 TRACE_TASK(t, "is new hp_waiter.\n");
139
140 if ((effective_priority(owner) == old_max_eff_prio) ||
141 //(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))){
142 (litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))){
143 new_prio = new_max_eff_prio;
144 }
145 }
146 else {
147 TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
148 }
149
150 if(new_prio) {
151 litmus->nested_increase_prio(owner, new_prio,
152 &mutex->lock, flags); // unlocks lock.
153 }
154 else {
155 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
156 unlock_fine_irqrestore(&mutex->lock, flags);
157 }
158 }
159 else {
160 TRACE_TASK(t, "no change in hp_waiter.\n");
161 unlock_fine_irqrestore(&mutex->lock, flags);
162 }
163}
164
165static void select_next_lock_if_primary(struct litmus_lock *l,
166 dgl_wait_state_t *dgl_wait)
167{
168 if(tsk_rt(dgl_wait->task)->blocked_lock == l) {
169 TRACE_CUR("Lock %d in DGL was primary for %s/%d.\n",
170 l->ident, dgl_wait->task->comm, dgl_wait->task->pid);
171 tsk_rt(dgl_wait->task)->blocked_lock = NULL;
172 mb();
173 select_next_lock(dgl_wait /*, l*/); // pick the next lock to be blocked on
174 }
175 else {
176 TRACE_CUR("Got lock early! Lock %d in DGL was NOT primary for %s/%d.\n",
177 l->ident, dgl_wait->task->comm, dgl_wait->task->pid);
178 }
179}
180#endif
181
182
183
184
185int rsm_mutex_lock(struct litmus_lock* l)
186{
187 struct task_struct *t = current;
188 struct task_struct *owner;
189 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
190 wait_queue_t wait;
191 unsigned long flags;
192
193#ifdef CONFIG_LITMUS_DGL_SUPPORT
194 raw_spinlock_t *dgl_lock;
195#endif
196
197 if (!is_realtime(t))
198 return -EPERM;
199
200#ifdef CONFIG_LITMUS_DGL_SUPPORT
201 dgl_lock = litmus->get_dgl_spinlock(t);
202#endif
203
204 lock_global_irqsave(dgl_lock, flags);
205 lock_fine_irqsave(&mutex->lock, flags);
206
207 if (mutex->owner) {
208 TRACE_TASK(t, "Blocking on lock %d.\n", l->ident);
209
210#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
211 // KLUDGE: don't count this suspension as time in the critical gpu
212 // critical section
213 if(tsk_rt(t)->held_gpus) {
214 tsk_rt(t)->suspend_gpu_tracker_on_block = 1;
215 }
216#endif
217
218 /* resource is not free => must suspend and wait */
219
220 owner = mutex->owner;
221
222 init_waitqueue_entry(&wait, t);
223
224 tsk_rt(t)->blocked_lock = l; /* record where we are blocked */
225 mb(); // needed?
226
227 /* FIXME: interruptible would be nice some day */
228 set_task_state(t, TASK_UNINTERRUPTIBLE);
229
230 __add_wait_queue_tail_exclusive(&mutex->wait, &wait);
231
232 /* check if we need to activate priority inheritance */
233 //if (edf_higher_prio(t, mutex->hp_waiter)) {
234 if (litmus->compare(t, mutex->hp_waiter)) {
235
236 struct task_struct *old_max_eff_prio;
237 struct task_struct *new_max_eff_prio;
238 struct task_struct *new_prio = NULL;
239
240 if(mutex->hp_waiter)
241 TRACE_TASK(t, "has higher prio than hp_waiter (%s/%d).\n",
242 mutex->hp_waiter->comm, mutex->hp_waiter->pid);
243 else
244 TRACE_TASK(t, "has higher prio than hp_waiter (NIL).\n");
245
246 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
247
248 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
249 mutex->hp_waiter = t;
250 l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
251 binheap_decrease(&l->nest.hp_binheap_node,
252 &tsk_rt(owner)->hp_blocked_tasks);
253 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
254
255 if(new_max_eff_prio != old_max_eff_prio) {
256 TRACE_TASK(t, "is new hp_waiter.\n");
257
258 if ((effective_priority(owner) == old_max_eff_prio) ||
259 //(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))){
260 (litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))){
261 new_prio = new_max_eff_prio;
262 }
263 }
264 else {
265 TRACE_TASK(t, "no change in max_eff_prio of heap.\n");
266 }
267
268 if(new_prio) {
269 litmus->nested_increase_prio(owner, new_prio, &mutex->lock,
270 flags); // unlocks lock.
271 }
272 else {
273 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
274 unlock_fine_irqrestore(&mutex->lock, flags);
275 }
276 }
277 else {
278 TRACE_TASK(t, "no change in hp_waiter.\n");
279
280 unlock_fine_irqrestore(&mutex->lock, flags);
281 }
282
283 unlock_global_irqrestore(dgl_lock, flags);
284
285 TS_LOCK_SUSPEND;
286
287 /* We depend on the FIFO order. Thus, we don't need to recheck
288 * when we wake up; we are guaranteed to have the lock since
289 * there is only one wake up per release.
290 */
291
292 schedule();
293
294 TS_LOCK_RESUME;
295
296 /* Since we hold the lock, no other task will change
297 * ->owner. We can thus check it without acquiring the spin
298 * lock. */
299 BUG_ON(mutex->owner != t);
300
301 TRACE_TASK(t, "Acquired lock %d.\n", l->ident);
302
303 } else {
304 TRACE_TASK(t, "Acquired lock %d with no blocking.\n", l->ident);
305
306 /* it's ours now */
307 mutex->owner = t;
308
309 raw_spin_lock(&tsk_rt(mutex->owner)->hp_blocked_tasks_lock);
310 binheap_add(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks,
311 struct nested_info, hp_binheap_node);
312 raw_spin_unlock(&tsk_rt(mutex->owner)->hp_blocked_tasks_lock);
313
314
315 unlock_fine_irqrestore(&mutex->lock, flags);
316 unlock_global_irqrestore(dgl_lock, flags);
317 }
318
319 return 0;
320}
321
322
323
324int rsm_mutex_unlock(struct litmus_lock* l)
325{
326 struct task_struct *t = current, *next = NULL;
327 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
328 unsigned long flags;
329
330 struct task_struct *old_max_eff_prio;
331
332 int wake_up_task = 1;
333
334#ifdef CONFIG_LITMUS_DGL_SUPPORT
335 dgl_wait_state_t *dgl_wait = NULL;
336 raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t);
337#endif
338
339 int err = 0;
340
341 if (mutex->owner != t) {
342 err = -EINVAL;
343 return err;
344 }
345
346 lock_global_irqsave(dgl_lock, flags);
347 lock_fine_irqsave(&mutex->lock, flags);
348
349 raw_spin_lock(&tsk_rt(t)->hp_blocked_tasks_lock);
350
351 TRACE_TASK(t, "Freeing lock %d\n", l->ident);
352
353 old_max_eff_prio = top_priority(&tsk_rt(t)->hp_blocked_tasks);
354 binheap_delete(&l->nest.hp_binheap_node, &tsk_rt(t)->hp_blocked_tasks);
355
356 if(tsk_rt(t)->inh_task){
357 struct task_struct *new_max_eff_prio =
358 top_priority(&tsk_rt(t)->hp_blocked_tasks);
359
360 if((new_max_eff_prio == NULL) ||
361 /* there was a change in eff prio */
362 ( (new_max_eff_prio != old_max_eff_prio) &&
363 /* and owner had the old eff prio */
364 (effective_priority(t) == old_max_eff_prio)) )
365 {
366 // old_max_eff_prio > new_max_eff_prio
367
368 //if(__edf_higher_prio(new_max_eff_prio, BASE, t, EFFECTIVE)) {
369 if(litmus->__compare(new_max_eff_prio, BASE, t, EFFECTIVE)) {
370 TRACE_TASK(t, "new_max_eff_prio > task's eff_prio-- new_max_eff_prio: %s/%d task: %s/%d [%s/%d]\n",
371 new_max_eff_prio->comm, new_max_eff_prio->pid,
372 t->comm, t->pid, tsk_rt(t)->inh_task->comm,
373 tsk_rt(t)->inh_task->pid);
374 WARN_ON(1);
375 }
376
377 litmus->decrease_prio(t, new_max_eff_prio);
378 }
379 }
380
381 if(binheap_empty(&tsk_rt(t)->hp_blocked_tasks) &&
382 tsk_rt(t)->inh_task != NULL)
383 {
384 WARN_ON(tsk_rt(t)->inh_task != NULL);
385 TRACE_TASK(t, "No more locks are held, but eff_prio = %s/%d\n",
386 tsk_rt(t)->inh_task->comm, tsk_rt(t)->inh_task->pid);
387 }
388
389 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock);
390
391
392 /* check if there are jobs waiting for this resource */
393#ifdef CONFIG_LITMUS_DGL_SUPPORT
394 __waitqueue_dgl_remove_first(&mutex->wait, &dgl_wait, &next);
395 if(dgl_wait) {
396 next = dgl_wait->task;
397 //select_next_lock_if_primary(l, dgl_wait);
398 }
399#else
400 next = __waitqueue_remove_first(&mutex->wait);
401#endif
402 if (next) {
403 /* next becomes the resouce holder */
404 mutex->owner = next;
405 TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid);
406
407 /* determine new hp_waiter if necessary */
408 if (next == mutex->hp_waiter) {
409
410 TRACE_TASK(next, "was highest-prio waiter\n");
411 /* next has the highest priority --- it doesn't need to
412 * inherit. However, we need to make sure that the
413 * next-highest priority in the queue is reflected in
414 * hp_waiter. */
415 mutex->hp_waiter = rsm_mutex_find_hp_waiter(mutex, next);
416 l->nest.hp_waiter_eff_prio = (mutex->hp_waiter) ?
417 effective_priority(mutex->hp_waiter) :
418 NULL;
419
420 if (mutex->hp_waiter)
421 TRACE_TASK(mutex->hp_waiter, "is new highest-prio waiter\n");
422 else
423 TRACE("no further waiters\n");
424
425 raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
426
427 binheap_add(&l->nest.hp_binheap_node,
428 &tsk_rt(next)->hp_blocked_tasks,
429 struct nested_info, hp_binheap_node);
430
431#ifdef CONFIG_LITMUS_DGL_SUPPORT
432 if(dgl_wait) {
433 select_next_lock_if_primary(l, dgl_wait);
434 //wake_up_task = atomic_dec_and_test(&dgl_wait->nr_remaining);
435 --(dgl_wait->nr_remaining);
436 wake_up_task = (dgl_wait->nr_remaining == 0);
437 }
438#endif
439 raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
440 }
441 else {
442 /* Well, if 'next' is not the highest-priority waiter,
443 * then it (probably) ought to inherit the highest-priority
444 * waiter's priority. */
445 TRACE_TASK(next, "is not hp_waiter of lock %d.\n", l->ident);
446
447 raw_spin_lock(&tsk_rt(next)->hp_blocked_tasks_lock);
448
449 binheap_add(&l->nest.hp_binheap_node,
450 &tsk_rt(next)->hp_blocked_tasks,
451 struct nested_info, hp_binheap_node);
452
453#ifdef CONFIG_LITMUS_DGL_SUPPORT
454 if(dgl_wait) {
455 select_next_lock_if_primary(l, dgl_wait);
456 --(dgl_wait->nr_remaining);
457 wake_up_task = (dgl_wait->nr_remaining == 0);
458 }
459#endif
460
461 /* It is possible that 'next' *should* be the hp_waiter, but isn't
462 * because that update hasn't yet executed (update operation is
463 * probably blocked on mutex->lock). So only inherit if the top of
464 * 'next's top heap node is indeed the effective prio. of hp_waiter.
465 * (We use l->hp_waiter_eff_prio instead of effective_priority(hp_waiter)
466 * since the effective priority of hp_waiter can change (and the
467 * update has not made it to this lock).)
468 */
469#ifdef CONFIG_LITMUS_DGL_SUPPORT
470 if((l->nest.hp_waiter_eff_prio != NULL) &&
471 (top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
472 l->nest.hp_waiter_eff_prio))
473 {
474 if(dgl_wait && tsk_rt(next)->blocked_lock) {
475 BUG_ON(wake_up_task);
476 //if(__edf_higher_prio(l->nest.hp_waiter_eff_prio, BASE, next, EFFECTIVE)) {
477 if(litmus->__compare(l->nest.hp_waiter_eff_prio, BASE, next, EFFECTIVE)) {
478 litmus->nested_increase_prio(next,
479 l->nest.hp_waiter_eff_prio, &mutex->lock, flags); // unlocks lock && hp_blocked_tasks_lock.
480 goto out; // all spinlocks are released. bail out now.
481 }
482 }
483 else {
484 litmus->increase_prio(next, l->nest.hp_waiter_eff_prio);
485 }
486 }
487
488 raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
489#else
490 if(likely(top_priority(&tsk_rt(next)->hp_blocked_tasks) ==
491 l->nest.hp_waiter_eff_prio))
492 {
493 litmus->increase_prio(next, l->nest.hp_waiter_eff_prio);
494 }
495 raw_spin_unlock(&tsk_rt(next)->hp_blocked_tasks_lock);
496#endif
497 }
498
499 if(wake_up_task) {
500 TRACE_TASK(next, "waking up since it is no longer blocked.\n");
501
502 tsk_rt(next)->blocked_lock = NULL;
503 mb();
504
505#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
506 // re-enable tracking
507 if(tsk_rt(next)->held_gpus) {
508 tsk_rt(next)->suspend_gpu_tracker_on_block = 0;
509 }
510#endif
511
512 wake_up_process(next);
513 }
514 else {
515 TRACE_TASK(next, "is still blocked.\n");
516 }
517 }
518 else {
519 /* becomes available */
520 mutex->owner = NULL;
521 }
522
523 unlock_fine_irqrestore(&mutex->lock, flags);
524
525#ifdef CONFIG_LITMUS_DGL_SUPPORT
526out:
527#endif
528 unlock_global_irqrestore(dgl_lock, flags);
529
530 return err;
531}
532
533
534void rsm_mutex_propagate_increase_inheritance(struct litmus_lock* l,
535 struct task_struct* t,
536 raw_spinlock_t* to_unlock,
537 unsigned long irqflags)
538{
539 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
540
541 // relay-style locking
542 lock_fine(&mutex->lock);
543 unlock_fine(to_unlock);
544
545 if(tsk_rt(t)->blocked_lock == l) { // prevent race on tsk_rt(t)->blocked
546 struct task_struct *owner = mutex->owner;
547
548 struct task_struct *old_max_eff_prio;
549 struct task_struct *new_max_eff_prio;
550
551 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
552
553 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
554
555 //if((t != mutex->hp_waiter) && edf_higher_prio(t, mutex->hp_waiter)) {
556 if((t != mutex->hp_waiter) && litmus->compare(t, mutex->hp_waiter)) {
557 TRACE_TASK(t, "is new highest-prio waiter by propagation.\n");
558 mutex->hp_waiter = t;
559 }
560 if(t == mutex->hp_waiter) {
561 // reflect the decreased priority in the heap node.
562 l->nest.hp_waiter_eff_prio = effective_priority(mutex->hp_waiter);
563
564 BUG_ON(!binheap_is_in_heap(&l->nest.hp_binheap_node));
565 BUG_ON(!binheap_is_in_this_heap(&l->nest.hp_binheap_node,
566 &tsk_rt(owner)->hp_blocked_tasks));
567
568 binheap_decrease(&l->nest.hp_binheap_node,
569 &tsk_rt(owner)->hp_blocked_tasks);
570 }
571
572 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
573
574
575 if(new_max_eff_prio != old_max_eff_prio) {
576 // new_max_eff_prio > old_max_eff_prio holds.
577 if ((effective_priority(owner) == old_max_eff_prio) ||
578 //(__edf_higher_prio(new_max_eff_prio, BASE, owner, EFFECTIVE))) {
579 (litmus->__compare(new_max_eff_prio, BASE, owner, EFFECTIVE))) {
580 TRACE_CUR("Propagating inheritance to holder of lock %d.\n",
581 l->ident);
582
583 // beware: recursion
584 litmus->nested_increase_prio(owner, new_max_eff_prio,
585 &mutex->lock, irqflags); // unlocks mutex->lock
586 }
587 else {
588 TRACE_CUR("Lower priority than holder %s/%d. No propagation.\n",
589 owner->comm, owner->pid);
590 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
591 unlock_fine_irqrestore(&mutex->lock, irqflags);
592 }
593 }
594 else {
595 TRACE_TASK(mutex->owner, "No change in maxiumum effective priority.\n");
596 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
597 unlock_fine_irqrestore(&mutex->lock, irqflags);
598 }
599 }
600 else {
601 struct litmus_lock *still_blocked = tsk_rt(t)->blocked_lock;
602
603 TRACE_TASK(t, "is not blocked on lock %d.\n", l->ident);
604 if(still_blocked) {
605 TRACE_TASK(t, "is still blocked on a lock though (lock %d).\n",
606 still_blocked->ident);
607 if(still_blocked->ops->propagate_increase_inheritance) {
608 /* due to relay-style nesting of spinlocks (acq. A, acq. B, free A, free B)
609 we know that task 't' has not released any locks behind us in this
610 chain. Propagation just needs to catch up with task 't'. */
611 still_blocked->ops->propagate_increase_inheritance(still_blocked,
612 t,
613 &mutex->lock,
614 irqflags);
615 }
616 else {
617 TRACE_TASK(t,
618 "Inheritor is blocked on lock (%p) that does not "
619 "support nesting!\n",
620 still_blocked);
621 unlock_fine_irqrestore(&mutex->lock, irqflags);
622 }
623 }
624 else {
625 unlock_fine_irqrestore(&mutex->lock, irqflags);
626 }
627 }
628}
629
630
631void rsm_mutex_propagate_decrease_inheritance(struct litmus_lock* l,
632 struct task_struct* t,
633 raw_spinlock_t* to_unlock,
634 unsigned long irqflags)
635{
636 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
637
638 // relay-style locking
639 lock_fine(&mutex->lock);
640 unlock_fine(to_unlock);
641
642 if(tsk_rt(t)->blocked_lock == l) { // prevent race on tsk_rt(t)->blocked
643 if(t == mutex->hp_waiter) {
644 struct task_struct *owner = mutex->owner;
645
646 struct task_struct *old_max_eff_prio;
647 struct task_struct *new_max_eff_prio;
648
649 raw_spin_lock(&tsk_rt(owner)->hp_blocked_tasks_lock);
650
651 old_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
652
653 binheap_delete(&l->nest.hp_binheap_node, &tsk_rt(owner)->hp_blocked_tasks);
654 mutex->hp_waiter = rsm_mutex_find_hp_waiter(mutex, NULL);
655 l->nest.hp_waiter_eff_prio = (mutex->hp_waiter) ?
656 effective_priority(mutex->hp_waiter) : NULL;
657 binheap_add(&l->nest.hp_binheap_node,
658 &tsk_rt(owner)->hp_blocked_tasks,
659 struct nested_info, hp_binheap_node);
660
661 new_max_eff_prio = top_priority(&tsk_rt(owner)->hp_blocked_tasks);
662
663 if((old_max_eff_prio != new_max_eff_prio) &&
664 (effective_priority(owner) == old_max_eff_prio))
665 {
666 // Need to set new effective_priority for owner
667
668 struct task_struct *decreased_prio;
669
670 TRACE_CUR("Propagating decreased inheritance to holder of lock %d.\n",
671 l->ident);
672
673 //if(__edf_higher_prio(new_max_eff_prio, BASE, owner, BASE)) {
674 if(litmus->__compare(new_max_eff_prio, BASE, owner, BASE)) {
675 TRACE_CUR("%s/%d has greater base priority than base priority of owner (%s/%d) of lock %d.\n",
676 (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
677 (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
678 owner->comm,
679 owner->pid,
680 l->ident);
681
682 decreased_prio = new_max_eff_prio;
683 }
684 else {
685 TRACE_CUR("%s/%d has lesser base priority than base priority of owner (%s/%d) of lock %d.\n",
686 (new_max_eff_prio) ? new_max_eff_prio->comm : "nil",
687 (new_max_eff_prio) ? new_max_eff_prio->pid : -1,
688 owner->comm,
689 owner->pid,
690 l->ident);
691
692 decreased_prio = NULL;
693 }
694
695 // beware: recursion
696 litmus->nested_decrease_prio(owner, decreased_prio, &mutex->lock, irqflags); // will unlock mutex->lock
697 }
698 else {
699 raw_spin_unlock(&tsk_rt(owner)->hp_blocked_tasks_lock);
700 unlock_fine_irqrestore(&mutex->lock, irqflags);
701 }
702 }
703 else {
704 TRACE_TASK(t, "is not hp_waiter. No propagation.\n");
705 unlock_fine_irqrestore(&mutex->lock, irqflags);
706 }
707 }
708 else {
709 struct litmus_lock *still_blocked = tsk_rt(t)->blocked_lock;
710
711 TRACE_TASK(t, "is not blocked on lock %d.\n", l->ident);
712 if(still_blocked) {
713 TRACE_TASK(t, "is still blocked on a lock though (lock %d).\n",
714 still_blocked->ident);
715 if(still_blocked->ops->propagate_decrease_inheritance) {
716 /* due to linked nesting of spinlocks (acq. A, acq. B, free A, free B)
717 we know that task 't' has not released any locks behind us in this
718 chain. propagation just needs to catch up with task 't' */
719 still_blocked->ops->propagate_decrease_inheritance(still_blocked,
720 t,
721 &mutex->lock,
722 irqflags);
723 }
724 else {
725 TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
726 still_blocked);
727 unlock_fine_irqrestore(&mutex->lock, irqflags);
728 }
729 }
730 else {
731 unlock_fine_irqrestore(&mutex->lock, irqflags);
732 }
733 }
734}
735
736
737int rsm_mutex_close(struct litmus_lock* l)
738{
739 struct task_struct *t = current;
740 struct rsm_mutex *mutex = rsm_mutex_from_lock(l);
741 unsigned long flags;
742
743 int owner;
744
745#ifdef CONFIG_LITMUS_DGL_SUPPORT
746 raw_spinlock_t *dgl_lock = litmus->get_dgl_spinlock(t);
747#endif
748
749 lock_global_irqsave(dgl_lock, flags);
750 lock_fine_irqsave(&mutex->lock, flags);
751
752 owner = (mutex->owner == t);
753
754 unlock_fine_irqrestore(&mutex->lock, flags);
755 unlock_global_irqrestore(dgl_lock, flags);
756
757 if (owner)
758 rsm_mutex_unlock(l);
759
760 return 0;
761}
762
763void rsm_mutex_free(struct litmus_lock* lock)
764{
765 kfree(rsm_mutex_from_lock(lock));
766}
767
768struct litmus_lock* rsm_mutex_new(struct litmus_lock_ops* ops)
769{
770 struct rsm_mutex* mutex;
771
772 mutex = kmalloc(sizeof(*mutex), GFP_KERNEL);
773 if (!mutex)
774 return NULL;
775
776 mutex->litmus_lock.ops = ops;
777 mutex->owner = NULL;
778 mutex->hp_waiter = NULL;
779 init_waitqueue_head(&mutex->wait);
780
781
782#ifdef CONFIG_DEBUG_SPINLOCK
783 {
784 __raw_spin_lock_init(&mutex->lock,
785 ((struct litmus_lock*)mutex)->cheat_lockdep,
786 &((struct litmus_lock*)mutex)->key);
787 }
788#else
789 raw_spin_lock_init(&mutex->lock);
790#endif
791
792 ((struct litmus_lock*)mutex)->nest.hp_waiter_ptr = &mutex->hp_waiter;
793
794 return &mutex->litmus_lock;
795}
796
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
index 480c62bc895b..be14dbec6ed2 100644
--- a/litmus/sched_cedf.c
+++ b/litmus/sched_cedf.c
@@ -29,7 +29,7 @@
29#include <linux/percpu.h> 29#include <linux/percpu.h>
30#include <linux/sched.h> 30#include <linux/sched.h>
31#include <linux/slab.h> 31#include <linux/slab.h>
32 32#include <linux/uaccess.h>
33#include <linux/module.h> 33#include <linux/module.h>
34 34
35#include <litmus/litmus.h> 35#include <litmus/litmus.h>
@@ -42,6 +42,16 @@
42#include <litmus/clustered.h> 42#include <litmus/clustered.h>
43 43
44#include <litmus/bheap.h> 44#include <litmus/bheap.h>
45#include <litmus/binheap.h>
46
47#ifdef CONFIG_LITMUS_LOCKING
48#include <litmus/kfmlp_lock.h>
49#endif
50
51#ifdef CONFIG_LITMUS_NESTED_LOCKING
52#include <litmus/rsm_lock.h>
53#include <litmus/ikglp_lock.h>
54#endif
45 55
46#ifdef CONFIG_SCHED_CPU_AFFINITY 56#ifdef CONFIG_SCHED_CPU_AFFINITY
47#include <litmus/affinity.h> 57#include <litmus/affinity.h>
@@ -49,7 +59,27 @@
49 59
50/* to configure the cluster size */ 60/* to configure the cluster size */
51#include <litmus/litmus_proc.h> 61#include <litmus/litmus_proc.h>
52#include <linux/uaccess.h> 62
63#ifdef CONFIG_SCHED_CPU_AFFINITY
64#include <litmus/affinity.h>
65#endif
66
67#ifdef CONFIG_LITMUS_SOFTIRQD
68#include <litmus/litmus_softirq.h>
69#endif
70
71#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
72#include <linux/interrupt.h>
73#include <litmus/trace.h>
74#endif
75
76#ifdef CONFIG_LITMUS_NVIDIA
77#include <litmus/nvidia_info.h>
78#endif
79
80#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
81#include <litmus/gpu_affinity.h>
82#endif
53 83
54/* Reference configuration variable. Determines which cache level is used to 84/* Reference configuration variable. Determines which cache level is used to
55 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that 85 * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
@@ -70,7 +100,7 @@ typedef struct {
70 struct task_struct* linked; /* only RT tasks */ 100 struct task_struct* linked; /* only RT tasks */
71 struct task_struct* scheduled; /* only RT tasks */ 101 struct task_struct* scheduled; /* only RT tasks */
72 atomic_t will_schedule; /* prevent unneeded IPIs */ 102 atomic_t will_schedule; /* prevent unneeded IPIs */
73 struct bheap_node* hn; 103 struct binheap_node hn;
74} cpu_entry_t; 104} cpu_entry_t;
75 105
76/* one cpu_entry_t per CPU */ 106/* one cpu_entry_t per CPU */
@@ -83,6 +113,14 @@ DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
83#define test_will_schedule(cpu) \ 113#define test_will_schedule(cpu) \
84 (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule)) 114 (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
85 115
116#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
117struct tasklet_head
118{
119 struct tasklet_struct *head;
120 struct tasklet_struct **tail;
121};
122#endif
123
86/* 124/*
87 * In C-EDF there is a cedf domain _per_ cluster 125 * In C-EDF there is a cedf domain _per_ cluster
88 * The number of clusters is dynamically determined accordingly to the 126 * The number of clusters is dynamically determined accordingly to the
@@ -96,10 +134,17 @@ typedef struct clusterdomain {
96 /* map of this cluster cpus */ 134 /* map of this cluster cpus */
97 cpumask_var_t cpu_map; 135 cpumask_var_t cpu_map;
98 /* the cpus queue themselves according to priority in here */ 136 /* the cpus queue themselves according to priority in here */
99 struct bheap_node *heap_node; 137 struct binheap_handle cpu_heap;
100 struct bheap cpu_heap;
101 /* lock for this cluster */ 138 /* lock for this cluster */
102#define cluster_lock domain.ready_lock 139#define cluster_lock domain.ready_lock
140
141#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
142 struct tasklet_head pending_tasklets;
143#endif
144
145#ifdef CONFIG_LITMUS_DGL_SUPPORT
146 raw_spinlock_t dgl_lock;
147#endif
103} cedf_domain_t; 148} cedf_domain_t;
104 149
105/* a cedf_domain per cluster; allocation is done at init/activation time */ 150/* a cedf_domain per cluster; allocation is done at init/activation time */
@@ -108,6 +153,22 @@ cedf_domain_t *cedf;
108#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster) 153#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
109#define task_cpu_cluster(task) remote_cluster(get_partition(task)) 154#define task_cpu_cluster(task) remote_cluster(get_partition(task))
110 155
156/* total number of cluster */
157static int num_clusters;
158/* we do not support cluster of different sizes */
159static unsigned int cluster_size;
160
161static int clusters_allocated = 0;
162
163#ifdef CONFIG_LITMUS_DGL_SUPPORT
164static raw_spinlock_t* cedf_get_dgl_spinlock(struct task_struct *t)
165{
166 cedf_domain_t *cluster = task_cpu_cluster(t);
167 return(&cluster->dgl_lock);
168}
169#endif
170
171
111/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling 172/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
112 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose 173 * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
113 * information during the initialization of the plugin (e.g., topology) 174 * information during the initialization of the plugin (e.g., topology)
@@ -115,11 +176,11 @@ cedf_domain_t *cedf;
115 */ 176 */
116#define VERBOSE_INIT 177#define VERBOSE_INIT
117 178
118static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) 179static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b)
119{ 180{
120 cpu_entry_t *a, *b; 181 cpu_entry_t *a = binheap_entry(_a, cpu_entry_t, hn);
121 a = _a->value; 182 cpu_entry_t *b = binheap_entry(_b, cpu_entry_t, hn);
122 b = _b->value; 183
123 /* Note that a and b are inverted: we want the lowest-priority CPU at 184 /* Note that a and b are inverted: we want the lowest-priority CPU at
124 * the top of the heap. 185 * the top of the heap.
125 */ 186 */
@@ -133,20 +194,17 @@ static void update_cpu_position(cpu_entry_t *entry)
133{ 194{
134 cedf_domain_t *cluster = entry->cluster; 195 cedf_domain_t *cluster = entry->cluster;
135 196
136 if (likely(bheap_node_in_heap(entry->hn))) 197 if (likely(binheap_is_in_heap(&entry->hn))) {
137 bheap_delete(cpu_lower_prio, 198 binheap_delete(&entry->hn, &cluster->cpu_heap);
138 &cluster->cpu_heap, 199 }
139 entry->hn);
140 200
141 bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn); 201 binheap_add(&entry->hn, &cluster->cpu_heap, cpu_entry_t, hn);
142} 202}
143 203
144/* caller must hold cedf lock */ 204/* caller must hold cedf lock */
145static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster) 205static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
146{ 206{
147 struct bheap_node* hn; 207 return binheap_top_entry(&cluster->cpu_heap, cpu_entry_t, hn);
148 hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
149 return hn->value;
150} 208}
151 209
152 210
@@ -208,7 +266,7 @@ static noinline void link_task_to_cpu(struct task_struct* linked,
208} 266}
209 267
210/* unlink - Make sure a task is not linked any longer to an entry 268/* unlink - Make sure a task is not linked any longer to an entry
211 * where it was linked before. Must hold cedf_lock. 269 * where it was linked before. Must hold cluster_lock.
212 */ 270 */
213static noinline void unlink(struct task_struct* t) 271static noinline void unlink(struct task_struct* t)
214{ 272{
@@ -244,7 +302,7 @@ static void preempt(cpu_entry_t *entry)
244} 302}
245 303
246/* requeue - Put an unlinked task into gsn-edf domain. 304/* requeue - Put an unlinked task into gsn-edf domain.
247 * Caller must hold cedf_lock. 305 * Caller must hold cluster_lock.
248 */ 306 */
249static noinline void requeue(struct task_struct* task) 307static noinline void requeue(struct task_struct* task)
250{ 308{
@@ -339,13 +397,17 @@ static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
339 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); 397 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
340} 398}
341 399
342/* caller holds cedf_lock */ 400/* caller holds cluster_lock */
343static noinline void job_completion(struct task_struct *t, int forced) 401static noinline void job_completion(struct task_struct *t, int forced)
344{ 402{
345 BUG_ON(!t); 403 BUG_ON(!t);
346 404
347 sched_trace_task_completion(t, forced); 405 sched_trace_task_completion(t, forced);
348 406
407#ifdef CONFIG_LITMUS_NVIDIA
408 atomic_set(&tsk_rt(t)->nv_int_count, 0);
409#endif
410
349 TRACE_TASK(t, "job_completion().\n"); 411 TRACE_TASK(t, "job_completion().\n");
350 412
351 /* set flags */ 413 /* set flags */
@@ -389,6 +451,314 @@ static void cedf_tick(struct task_struct* t)
389 } 451 }
390} 452}
391 453
454
455
456
457
458
459
460
461
462
463
464
465
466#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
467
468
469static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
470{
471 if (!atomic_read(&tasklet->count)) {
472 if(tasklet->owner) {
473 sched_trace_tasklet_begin(tasklet->owner);
474 }
475
476 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
477 {
478 BUG();
479 }
480 TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n",
481 __FUNCTION__,
482 (tasklet->owner) ? tasklet->owner->pid : -1,
483 (tasklet->owner) ? 0 : 1);
484 tasklet->func(tasklet->data);
485 tasklet_unlock(tasklet);
486
487 if(tasklet->owner) {
488 sched_trace_tasklet_end(tasklet->owner, flushed);
489 }
490 }
491 else {
492 BUG();
493 }
494}
495
496
497static void do_lit_tasklets(cedf_domain_t* cluster, struct task_struct* sched_task)
498{
499 int work_to_do = 1;
500 struct tasklet_struct *tasklet = NULL;
501 unsigned long flags;
502
503 while(work_to_do) {
504
505 TS_NV_SCHED_BOTISR_START;
506
507 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
508
509 if(cluster->pending_tasklets.head != NULL) {
510 // remove tasklet at head.
511 struct tasklet_struct *prev = NULL;
512 tasklet = cluster->pending_tasklets.head;
513
514 // find a tasklet with prio to execute; skip ones where
515 // sched_task has a higher priority.
516 // We use the '!edf' test instead of swaping function arguments since
517 // both sched_task and owner could be NULL. In this case, we want to
518 // still execute the tasklet.
519 while(tasklet && !edf_higher_prio(tasklet->owner, sched_task)) {
520 prev = tasklet;
521 tasklet = tasklet->next;
522 }
523
524 if(tasklet) { // found something to execuite
525 // remove the tasklet from the queue
526 if(prev) {
527 prev->next = tasklet->next;
528 if(prev->next == NULL) {
529 TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
530 cluster->pending_tasklets.tail = &(prev);
531 }
532 }
533 else {
534 cluster->pending_tasklets.head = tasklet->next;
535 if(tasklet->next == NULL) {
536 TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
537 cluster->pending_tasklets.tail = &(cluster->pending_tasklets.head);
538 }
539 }
540 }
541 else {
542 TRACE("%s: No tasklets with eligible priority.\n", __FUNCTION__);
543 }
544 }
545 else {
546 TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
547 }
548
549 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
550
551 if(tasklet) {
552 __do_lit_tasklet(tasklet, 0ul);
553 tasklet = NULL;
554 }
555 else {
556 work_to_do = 0;
557 }
558
559 TS_NV_SCHED_BOTISR_END;
560 }
561}
562
563static void __add_pai_tasklet(struct tasklet_struct* tasklet, cedf_domain_t* cluster)
564{
565 struct tasklet_struct* step;
566
567 tasklet->next = NULL; // make sure there are no old values floating around
568
569 step = cluster->pending_tasklets.head;
570 if(step == NULL) {
571 TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
572 // insert at tail.
573 *(cluster->pending_tasklets.tail) = tasklet;
574 cluster->pending_tasklets.tail = &(tasklet->next);
575 }
576 else if((*(cluster->pending_tasklets.tail) != NULL) &&
577 edf_higher_prio((*(cluster->pending_tasklets.tail))->owner, tasklet->owner)) {
578 // insert at tail.
579 TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
580
581 *(cluster->pending_tasklets.tail) = tasklet;
582 cluster->pending_tasklets.tail = &(tasklet->next);
583 }
584 else {
585
586 // insert the tasklet somewhere in the middle.
587
588 TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
589
590 while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) {
591 step = step->next;
592 }
593
594 // insert tasklet right before step->next.
595
596 TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__,
597 tasklet->owner->pid,
598 (step->owner) ?
599 step->owner->pid :
600 -1,
601 (step->next) ?
602 ((step->next->owner) ?
603 step->next->owner->pid :
604 -1) :
605 -1);
606
607 tasklet->next = step->next;
608 step->next = tasklet;
609
610 // patch up the head if needed.
611 if(cluster->pending_tasklets.head == step)
612 {
613 TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
614 cluster->pending_tasklets.head = tasklet;
615 }
616 }
617}
618
619static void cedf_run_tasklets(struct task_struct* sched_task)
620{
621 cedf_domain_t* cluster;
622
623 preempt_disable();
624
625 cluster = (is_realtime(sched_task)) ?
626 task_cpu_cluster(sched_task) :
627 remote_cluster(smp_processor_id());
628
629 if(cluster && cluster->pending_tasklets.head != NULL) {
630 TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
631 do_lit_tasklets(cluster, sched_task);
632 }
633
634 preempt_enable_no_resched();
635}
636
637
638
639static int cedf_enqueue_pai_tasklet(struct tasklet_struct* tasklet)
640{
641#if 0
642 cedf_domain_t *cluster = NULL;
643 cpu_entry_t *targetCPU = NULL;
644 int thisCPU;
645 int runLocal = 0;
646 int runNow = 0;
647 unsigned long flags;
648
649 if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
650 {
651 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
652 return 0;
653 }
654
655 cluster = task_cpu_cluster(tasklet->owner);
656
657 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
658
659 thisCPU = smp_processor_id();
660
661#ifdef CONFIG_SCHED_CPU_AFFINITY
662 {
663 cpu_entry_t* affinity = NULL;
664
665 // use this CPU if it is in our cluster and isn't running any RT work.
666 if(cpu_isset(thisCPU, *cluster->cpu_map) && (__get_cpu_var(cedf_cpu_entries).linked == NULL)) {
667 affinity = &(__get_cpu_var(cedf_cpu_entries));
668 }
669 else {
670 // this CPU is busy or shouldn't run tasklet in this cluster.
671 // look for available near by CPUs.
672 // NOTE: Affinity towards owner and not this CPU. Is this right?
673 affinity =
674 cedf_get_nearest_available_cpu(cluster,
675 &per_cpu(cedf_cpu_entries, task_cpu(tasklet->owner)));
676 }
677
678 targetCPU = affinity;
679 }
680#endif
681
682 if (targetCPU == NULL) {
683 targetCPU = lowest_prio_cpu(cluster);
684 }
685
686 if (edf_higher_prio(tasklet->owner, targetCPU->linked)) {
687 if (thisCPU == targetCPU->cpu) {
688 TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
689 runLocal = 1;
690 runNow = 1;
691 }
692 else {
693 TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
694 runLocal = 0;
695 runNow = 1;
696 }
697 }
698 else {
699 runLocal = 0;
700 runNow = 0;
701 }
702
703 if(!runLocal) {
704 // enqueue the tasklet
705 __add_pai_tasklet(tasklet, cluster);
706 }
707
708 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
709
710
711 if (runLocal /*&& runNow */) { // runNow == 1 is implied
712 TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
713 __do_lit_tasklet(tasklet, 0ul);
714 }
715 else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied
716 TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
717 preempt(targetCPU); // need to be protected by cluster_lock?
718 }
719 else {
720 TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
721 }
722#else
723 TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
724 __do_lit_tasklet(tasklet, 0ul);
725#endif
726 return(1); // success
727}
728
729static void cedf_change_prio_pai_tasklet(struct task_struct *old_prio,
730 struct task_struct *new_prio)
731{
732 struct tasklet_struct* step;
733 unsigned long flags;
734 cedf_domain_t *cluster;
735 struct task_struct *probe;
736
737 // identify the cluster by the assignment of these tasks. one should
738 // be non-NULL.
739 probe = (old_prio) ? old_prio : new_prio;
740
741 if(probe) {
742 cluster = task_cpu_cluster(probe);
743
744 if(cluster->pending_tasklets.head != NULL) {
745 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
746 for(step = cluster->pending_tasklets.head; step != NULL; step = step->next) {
747 if(step->owner == old_prio) {
748 TRACE("%s: Found tasklet to change: %d\n", __FUNCTION__, step->owner->pid);
749 step->owner = new_prio;
750 }
751 }
752 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
753 }
754 }
755 else {
756 TRACE("%s: Both priorities were NULL\n");
757 }
758}
759
760#endif // PAI
761
392/* Getting schedule() right is a bit tricky. schedule() may not make any 762/* Getting schedule() right is a bit tricky. schedule() may not make any
393 * assumptions on the state of the current task since it may be called for a 763 * assumptions on the state of the current task since it may be called for a
394 * number of reasons. The reasons include a scheduler_tick() determined that it 764 * number of reasons. The reasons include a scheduler_tick() determined that it
@@ -465,6 +835,19 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
465 if (blocks) 835 if (blocks)
466 unlink(entry->scheduled); 836 unlink(entry->scheduled);
467 837
838#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
839 if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) {
840 if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
841 // don't track preemptions or locking protocol suspensions.
842 TRACE_TASK(entry->scheduled, "stopping GPU tracker.\n");
843 stop_gpu_tracker(entry->scheduled);
844 }
845 else if(blocks && !tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
846 TRACE_TASK(entry->scheduled, "GPU tracker remains on during suspension.\n");
847 }
848 }
849#endif
850
468 /* Request a sys_exit_np() call if we would like to preempt but cannot. 851 /* Request a sys_exit_np() call if we would like to preempt but cannot.
469 * We need to make sure to update the link structure anyway in case 852 * We need to make sure to update the link structure anyway in case
470 * that we are still linked. Multiple calls to request_exit_np() don't 853 * that we are still linked. Multiple calls to request_exit_np() don't
@@ -514,7 +897,7 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
514 raw_spin_unlock(&cluster->cluster_lock); 897 raw_spin_unlock(&cluster->cluster_lock);
515 898
516#ifdef WANT_ALL_SCHED_EVENTS 899#ifdef WANT_ALL_SCHED_EVENTS
517 TRACE("cedf_lock released, next=0x%p\n", next); 900 TRACE("cluster_lock released, next=0x%p\n", next);
518 901
519 if (next) 902 if (next)
520 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); 903 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
@@ -522,7 +905,6 @@ static struct task_struct* cedf_schedule(struct task_struct * prev)
522 TRACE("becomes idle at %llu.\n", litmus_clock()); 905 TRACE("becomes idle at %llu.\n", litmus_clock());
523#endif 906#endif
524 907
525
526 return next; 908 return next;
527} 909}
528 910
@@ -548,7 +930,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
548 cpu_entry_t* entry; 930 cpu_entry_t* entry;
549 cedf_domain_t* cluster; 931 cedf_domain_t* cluster;
550 932
551 TRACE("gsn edf: task new %d\n", t->pid); 933 TRACE("c-edf: task new %d\n", t->pid);
552 934
553 /* the cluster doesn't change even if t is running */ 935 /* the cluster doesn't change even if t is running */
554 cluster = task_cpu_cluster(t); 936 cluster = task_cpu_cluster(t);
@@ -586,7 +968,7 @@ static void cedf_task_new(struct task_struct * t, int on_rq, int running)
586static void cedf_task_wake_up(struct task_struct *task) 968static void cedf_task_wake_up(struct task_struct *task)
587{ 969{
588 unsigned long flags; 970 unsigned long flags;
589 lt_t now; 971 //lt_t now;
590 cedf_domain_t *cluster; 972 cedf_domain_t *cluster;
591 973
592 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); 974 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
@@ -594,6 +976,8 @@ static void cedf_task_wake_up(struct task_struct *task)
594 cluster = task_cpu_cluster(task); 976 cluster = task_cpu_cluster(task);
595 977
596 raw_spin_lock_irqsave(&cluster->cluster_lock, flags); 978 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
979
980#if 0 // sproadic task model
597 /* We need to take suspensions because of semaphores into 981 /* We need to take suspensions because of semaphores into
598 * account! If a job resumes after being suspended due to acquiring 982 * account! If a job resumes after being suspended due to acquiring
599 * a semaphore, it should never be treated as a new job release. 983 * a semaphore, it should never be treated as a new job release.
@@ -615,7 +999,13 @@ static void cedf_task_wake_up(struct task_struct *task)
615 } 999 }
616 } 1000 }
617 } 1001 }
618 cedf_job_arrival(task); 1002#else
1003 set_rt_flags(task, RT_F_RUNNING); // periodic model
1004#endif
1005
1006 if(tsk_rt(task)->linked_on == NO_CPU)
1007 cedf_job_arrival(task);
1008
619 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); 1009 raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags);
620} 1010}
621 1011
@@ -642,6 +1032,10 @@ static void cedf_task_exit(struct task_struct * t)
642 unsigned long flags; 1032 unsigned long flags;
643 cedf_domain_t *cluster = task_cpu_cluster(t); 1033 cedf_domain_t *cluster = task_cpu_cluster(t);
644 1034
1035#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1036 cedf_change_prio_pai_tasklet(t, NULL);
1037#endif
1038
645 /* unlink if necessary */ 1039 /* unlink if necessary */
646 raw_spin_lock_irqsave(&cluster->cluster_lock, flags); 1040 raw_spin_lock_irqsave(&cluster->cluster_lock, flags);
647 unlink(t); 1041 unlink(t);
@@ -659,13 +1053,536 @@ static void cedf_task_exit(struct task_struct * t)
659 1053
660static long cedf_admit_task(struct task_struct* tsk) 1054static long cedf_admit_task(struct task_struct* tsk)
661{ 1055{
1056#ifdef CONFIG_LITMUS_NESTED_LOCKING
1057 INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks,
1058 edf_max_heap_base_priority_order);
1059#endif
1060
662 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL; 1061 return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
663} 1062}
664 1063
665/* total number of cluster */ 1064
666static int num_clusters; 1065
667/* we do not support cluster of different sizes */ 1066#ifdef CONFIG_LITMUS_LOCKING
668static unsigned int cluster_size; 1067
1068#include <litmus/fdso.h>
1069
1070
1071
1072/* called with IRQs off */
1073static void __increase_priority_inheritance(struct task_struct* t,
1074 struct task_struct* prio_inh)
1075{
1076 int linked_on;
1077 int check_preempt = 0;
1078
1079 cedf_domain_t* cluster = task_cpu_cluster(t);
1080
1081#ifdef CONFIG_LITMUS_NESTED_LOCKING
1082 /* this sanity check allows for weaker locking in protocols */
1083 /* TODO (klitirqd): Skip this check if 't' is a proxy thread (???) */
1084 if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) {
1085#endif
1086 TRACE_TASK(t, "inherits priority from %s/%d\n",
1087 prio_inh->comm, prio_inh->pid);
1088 tsk_rt(t)->inh_task = prio_inh;
1089
1090 linked_on = tsk_rt(t)->linked_on;
1091
1092 /* If it is scheduled, then we need to reorder the CPU heap. */
1093 if (linked_on != NO_CPU) {
1094 TRACE_TASK(t, "%s: linked on %d\n",
1095 __FUNCTION__, linked_on);
1096 /* Holder is scheduled; need to re-order CPUs.
1097 * We can't use heap_decrease() here since
1098 * the cpu_heap is ordered in reverse direction, so
1099 * it is actually an increase. */
1100 binheap_delete(&per_cpu(cedf_cpu_entries, linked_on).hn,
1101 &cluster->cpu_heap);
1102 binheap_add(&per_cpu(cedf_cpu_entries, linked_on).hn,
1103 &cluster->cpu_heap, cpu_entry_t, hn);
1104
1105 } else {
1106 /* holder may be queued: first stop queue changes */
1107 raw_spin_lock(&cluster->domain.release_lock);
1108 if (is_queued(t)) {
1109 TRACE_TASK(t, "%s: is queued\n",
1110 __FUNCTION__);
1111 /* We need to update the position of holder in some
1112 * heap. Note that this could be a release heap if we
1113 * budget enforcement is used and this job overran. */
1114 check_preempt =
1115 !bheap_decrease(edf_ready_order, tsk_rt(t)->heap_node);
1116 } else {
1117 /* Nothing to do: if it is not queued and not linked
1118 * then it is either sleeping or currently being moved
1119 * by other code (e.g., a timer interrupt handler) that
1120 * will use the correct priority when enqueuing the
1121 * task. */
1122 TRACE_TASK(t, "%s: is NOT queued => Done.\n",
1123 __FUNCTION__);
1124 }
1125 raw_spin_unlock(&cluster->domain.release_lock);
1126
1127 /* If holder was enqueued in a release heap, then the following
1128 * preemption check is pointless, but we can't easily detect
1129 * that case. If you want to fix this, then consider that
1130 * simply adding a state flag requires O(n) time to update when
1131 * releasing n tasks, which conflicts with the goal to have
1132 * O(log n) merges. */
1133 if (check_preempt) {
1134 /* heap_decrease() hit the top level of the heap: make
1135 * sure preemption checks get the right task, not the
1136 * potentially stale cache. */
1137 bheap_uncache_min(edf_ready_order,
1138 &cluster->domain.ready_queue);
1139 check_for_preemptions(cluster);
1140 }
1141 }
1142#ifdef CONFIG_LITMUS_NESTED_LOCKING
1143 }
1144 else {
1145 TRACE_TASK(t, "Spurious invalid priority increase. "
1146 "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
1147 "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
1148 t->comm, t->pid,
1149 effective_priority(t)->comm, effective_priority(t)->pid,
1150 (prio_inh) ? prio_inh->comm : "nil",
1151 (prio_inh) ? prio_inh->pid : -1);
1152 WARN_ON(!prio_inh);
1153 }
1154#endif
1155}
1156
1157/* called with IRQs off */
1158static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
1159{
1160 cedf_domain_t* cluster = task_cpu_cluster(t);
1161
1162 raw_spin_lock(&cluster->cluster_lock);
1163
1164 __increase_priority_inheritance(t, prio_inh);
1165
1166#ifdef CONFIG_LITMUS_SOFTIRQD
1167 if(tsk_rt(t)->cur_klitirqd != NULL)
1168 {
1169 TRACE_TASK(t, "%s/%d inherits a new priority!\n",
1170 tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
1171
1172 __increase_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
1173 }
1174#endif
1175
1176 raw_spin_unlock(&cluster->cluster_lock);
1177
1178#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
1179 if(tsk_rt(t)->held_gpus) {
1180 int i;
1181 for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
1182 i < NV_DEVICE_NUM;
1183 i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
1184 pai_check_priority_increase(t, i);
1185 }
1186 }
1187#endif
1188}
1189
1190/* called with IRQs off */
1191static void __decrease_priority_inheritance(struct task_struct* t,
1192 struct task_struct* prio_inh)
1193{
1194#ifdef CONFIG_LITMUS_NESTED_LOCKING
1195 if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) {
1196#endif
1197 /* A job only stops inheriting a priority when it releases a
1198 * resource. Thus we can make the following assumption.*/
1199 if(prio_inh)
1200 TRACE_TASK(t, "EFFECTIVE priority decreased to %s/%d\n",
1201 prio_inh->comm, prio_inh->pid);
1202 else
1203 TRACE_TASK(t, "base priority restored.\n");
1204
1205 tsk_rt(t)->inh_task = prio_inh;
1206
1207 if(tsk_rt(t)->scheduled_on != NO_CPU) {
1208 TRACE_TASK(t, "is scheduled.\n");
1209
1210 /* Check if rescheduling is necessary. We can't use heap_decrease()
1211 * since the priority was effectively lowered. */
1212 unlink(t);
1213 cedf_job_arrival(t);
1214 }
1215 else {
1216 cedf_domain_t* cluster = task_cpu_cluster(t);
1217 /* task is queued */
1218 raw_spin_lock(&cluster->domain.release_lock);
1219 if (is_queued(t)) {
1220 TRACE_TASK(t, "is queued.\n");
1221
1222 /* decrease in priority, so we have to re-add to binomial heap */
1223 unlink(t);
1224 cedf_job_arrival(t);
1225 }
1226 else {
1227 TRACE_TASK(t, "is not in scheduler. Probably on wait queue somewhere.\n");
1228 }
1229 raw_spin_unlock(&cluster->domain.release_lock);
1230 }
1231#ifdef CONFIG_LITMUS_NESTED_LOCKING
1232 }
1233 else {
1234 TRACE_TASK(t, "Spurious invalid priority decrease. "
1235 "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
1236 "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
1237 t->comm, t->pid,
1238 effective_priority(t)->comm, effective_priority(t)->pid,
1239 (prio_inh) ? prio_inh->comm : "nil",
1240 (prio_inh) ? prio_inh->pid : -1);
1241 }
1242#endif
1243}
1244
1245static void decrease_priority_inheritance(struct task_struct* t,
1246 struct task_struct* prio_inh)
1247{
1248 cedf_domain_t* cluster = task_cpu_cluster(t);
1249
1250 raw_spin_lock(&cluster->cluster_lock);
1251 __decrease_priority_inheritance(t, prio_inh);
1252
1253#ifdef CONFIG_LITMUS_SOFTIRQD
1254 if(tsk_rt(t)->cur_klitirqd != NULL)
1255 {
1256 TRACE_TASK(t, "%s/%d decreases in priority!\n",
1257 tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
1258
1259 __decrease_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
1260 }
1261#endif
1262
1263 raw_spin_unlock(&cluster->cluster_lock);
1264
1265#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
1266 if(tsk_rt(t)->held_gpus) {
1267 int i;
1268 for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
1269 i < NV_DEVICE_NUM;
1270 i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
1271 pai_check_priority_decrease(t, i);
1272 }
1273 }
1274#endif
1275}
1276
1277
1278
1279
1280
1281#ifdef CONFIG_LITMUS_SOFTIRQD
1282/* called with IRQs off */
1283static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd,
1284 struct task_struct* old_owner,
1285 struct task_struct* new_owner)
1286{
1287 cedf_domain_t* cluster = task_cpu_cluster(klitirqd);
1288
1289 BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
1290
1291 raw_spin_lock(&cluster->cluster_lock);
1292
1293 if(old_owner != new_owner)
1294 {
1295 if(old_owner)
1296 {
1297 // unreachable?
1298 tsk_rt(old_owner)->cur_klitirqd = NULL;
1299 }
1300
1301 TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
1302 new_owner->comm, new_owner->pid);
1303
1304 tsk_rt(new_owner)->cur_klitirqd = klitirqd;
1305 }
1306
1307 __decrease_priority_inheritance(klitirqd, NULL); // kludge to clear out cur prio.
1308
1309 __increase_priority_inheritance(klitirqd,
1310 (tsk_rt(new_owner)->inh_task == NULL) ?
1311 new_owner :
1312 tsk_rt(new_owner)->inh_task);
1313
1314 raw_spin_unlock(&cluster->cluster_lock);
1315}
1316
1317
1318/* called with IRQs off */
1319static void decrease_priority_inheritance_klitirqd(struct task_struct* klitirqd,
1320 struct task_struct* old_owner,
1321 struct task_struct* new_owner)
1322{
1323 cedf_domain_t* cluster = task_cpu_cluster(klitirqd);
1324
1325 BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
1326
1327 raw_spin_lock(&cluster->cluster_lock);
1328
1329 TRACE_TASK(klitirqd, "priority restored\n");
1330
1331 __decrease_priority_inheritance(klitirqd, new_owner);
1332
1333 tsk_rt(old_owner)->cur_klitirqd = NULL;
1334
1335 raw_spin_unlock(&cluster->cluster_lock);
1336}
1337#endif // CONFIG_LITMUS_SOFTIRQD
1338
1339
1340
1341
1342
1343
1344
1345#ifdef CONFIG_LITMUS_NESTED_LOCKING
1346
1347/* called with IRQs off */
1348/* preconditions:
1349 (1) The 'hp_blocked_tasks_lock' of task 't' is held.
1350 (2) The lock 'to_unlock' is held.
1351 */
1352static void nested_increase_priority_inheritance(struct task_struct* t,
1353 struct task_struct* prio_inh,
1354 raw_spinlock_t *to_unlock,
1355 unsigned long irqflags)
1356{
1357 struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
1358
1359 if(tsk_rt(t)->inh_task != prio_inh) { // shield redundent calls.
1360 increase_priority_inheritance(t, prio_inh); // increase our prio.
1361 }
1362
1363 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
1364
1365
1366 if(blocked_lock) {
1367 if(blocked_lock->ops->propagate_increase_inheritance) {
1368 TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
1369 blocked_lock->ident);
1370
1371 // beware: recursion
1372 blocked_lock->ops->propagate_increase_inheritance(blocked_lock,
1373 t, to_unlock,
1374 irqflags);
1375 }
1376 else {
1377 TRACE_TASK(t, "Inheritor is blocked on lock (%d) that does not support nesting!\n",
1378 blocked_lock->ident);
1379 unlock_fine_irqrestore(to_unlock, irqflags);
1380 }
1381 }
1382 else {
1383 TRACE_TASK(t, "is not blocked. No propagation.\n");
1384 unlock_fine_irqrestore(to_unlock, irqflags);
1385 }
1386}
1387
1388/* called with IRQs off */
1389/* preconditions:
1390 (1) The 'hp_blocked_tasks_lock' of task 't' is held.
1391 (2) The lock 'to_unlock' is held.
1392 */
1393static void nested_decrease_priority_inheritance(struct task_struct* t,
1394 struct task_struct* prio_inh,
1395 raw_spinlock_t *to_unlock,
1396 unsigned long irqflags)
1397{
1398 struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
1399 decrease_priority_inheritance(t, prio_inh);
1400
1401 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
1402
1403 if(blocked_lock) {
1404 if(blocked_lock->ops->propagate_decrease_inheritance) {
1405 TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
1406 blocked_lock->ident);
1407
1408 // beware: recursion
1409 blocked_lock->ops->propagate_decrease_inheritance(blocked_lock, t,
1410 to_unlock,
1411 irqflags);
1412 }
1413 else {
1414 TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
1415 blocked_lock);
1416 unlock_fine_irqrestore(to_unlock, irqflags);
1417 }
1418 }
1419 else {
1420 TRACE_TASK(t, "is not blocked. No propagation.\n");
1421 unlock_fine_irqrestore(to_unlock, irqflags);
1422 }
1423}
1424
1425
1426/* ******************** RSM MUTEX ********************** */
1427
1428static struct litmus_lock_ops cedf_rsm_mutex_lock_ops = {
1429 .lock = rsm_mutex_lock,
1430 .unlock = rsm_mutex_unlock,
1431 .close = rsm_mutex_close,
1432 .deallocate = rsm_mutex_free,
1433
1434 .propagate_increase_inheritance = rsm_mutex_propagate_increase_inheritance,
1435 .propagate_decrease_inheritance = rsm_mutex_propagate_decrease_inheritance,
1436
1437#ifdef CONFIG_LITMUS_DGL_SUPPORT
1438 .dgl_lock = rsm_mutex_dgl_lock,
1439 .is_owner = rsm_mutex_is_owner,
1440 .enable_priority = rsm_mutex_enable_priority,
1441#endif
1442};
1443
1444static struct litmus_lock* cedf_new_rsm_mutex(void)
1445{
1446 return rsm_mutex_new(&cedf_rsm_mutex_lock_ops);
1447}
1448
1449/* ******************** IKGLP ********************** */
1450
1451static struct litmus_lock_ops cedf_ikglp_lock_ops = {
1452 .lock = ikglp_lock,
1453 .unlock = ikglp_unlock,
1454 .close = ikglp_close,
1455 .deallocate = ikglp_free,
1456
1457 // ikglp can only be an outer-most lock.
1458 .propagate_increase_inheritance = NULL,
1459 .propagate_decrease_inheritance = NULL,
1460};
1461
1462static struct litmus_lock* cedf_new_ikglp(void* __user arg)
1463{
1464 // assumes clusters of uniform size.
1465 return ikglp_new(cluster_size/num_clusters, &cedf_ikglp_lock_ops, arg);
1466}
1467
1468#endif /* CONFIG_LITMUS_NESTED_LOCKING */
1469
1470
1471
1472
1473/* ******************** KFMLP support ********************** */
1474
1475static struct litmus_lock_ops cedf_kfmlp_lock_ops = {
1476 .lock = kfmlp_lock,
1477 .unlock = kfmlp_unlock,
1478 .close = kfmlp_close,
1479 .deallocate = kfmlp_free,
1480
1481 // kfmlp can only be an outer-most lock.
1482 .propagate_increase_inheritance = NULL,
1483 .propagate_decrease_inheritance = NULL,
1484};
1485
1486
1487static struct litmus_lock* cedf_new_kfmlp(void* __user arg)
1488{
1489 return kfmlp_new(&cedf_kfmlp_lock_ops, arg);
1490}
1491
1492
1493/* **** lock constructor **** */
1494
1495static long cedf_allocate_lock(struct litmus_lock **lock, int type,
1496 void* __user args)
1497{
1498 int err;
1499
1500 switch (type) {
1501#ifdef CONFIG_LITMUS_NESTED_LOCKING
1502 case RSM_MUTEX:
1503 *lock = cedf_new_rsm_mutex();
1504 break;
1505
1506 case IKGLP_SEM:
1507 *lock = cedf_new_ikglp(args);
1508 break;
1509#endif
1510 case KFMLP_SEM:
1511 *lock = cedf_new_kfmlp(args);
1512 break;
1513
1514 default:
1515 err = -ENXIO;
1516 goto UNSUPPORTED_LOCK;
1517 };
1518
1519 if (*lock)
1520 err = 0;
1521 else
1522 err = -ENOMEM;
1523
1524UNSUPPORTED_LOCK:
1525 return err;
1526}
1527
1528#endif // CONFIG_LITMUS_LOCKING
1529
1530
1531#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1532static struct affinity_observer_ops cedf_kfmlp_affinity_ops = {
1533 .close = kfmlp_aff_obs_close,
1534 .deallocate = kfmlp_aff_obs_free,
1535};
1536
1537#ifdef CONFIG_LITMUS_NESTED_LOCKING
1538static struct affinity_observer_ops cedf_ikglp_affinity_ops = {
1539 .close = ikglp_aff_obs_close,
1540 .deallocate = ikglp_aff_obs_free,
1541};
1542#endif
1543
1544static long cedf_allocate_affinity_observer(struct affinity_observer **aff_obs,
1545 int type,
1546 void* __user args)
1547{
1548 int err;
1549
1550 switch (type) {
1551
1552 case KFMLP_SIMPLE_GPU_AFF_OBS:
1553 *aff_obs = kfmlp_simple_gpu_aff_obs_new(&cedf_kfmlp_affinity_ops, args);
1554 break;
1555
1556 case KFMLP_GPU_AFF_OBS:
1557 *aff_obs = kfmlp_gpu_aff_obs_new(&cedf_kfmlp_affinity_ops, args);
1558 break;
1559
1560#ifdef CONFIG_LITMUS_NESTED_LOCKING
1561 case IKGLP_SIMPLE_GPU_AFF_OBS:
1562 *aff_obs = ikglp_simple_gpu_aff_obs_new(&cedf_ikglp_affinity_ops, args);
1563 break;
1564
1565 case IKGLP_GPU_AFF_OBS:
1566 *aff_obs = ikglp_gpu_aff_obs_new(&cedf_ikglp_affinity_ops, args);
1567 break;
1568#endif
1569 default:
1570 err = -ENXIO;
1571 goto UNSUPPORTED_AFF_OBS;
1572 };
1573
1574 if (*aff_obs)
1575 err = 0;
1576 else
1577 err = -ENOMEM;
1578
1579UNSUPPORTED_AFF_OBS:
1580 return err;
1581}
1582#endif
1583
1584
1585
669 1586
670#ifdef VERBOSE_INIT 1587#ifdef VERBOSE_INIT
671static void print_cluster_topology(cpumask_var_t mask, int cpu) 1588static void print_cluster_topology(cpumask_var_t mask, int cpu)
@@ -680,16 +1597,17 @@ static void print_cluster_topology(cpumask_var_t mask, int cpu)
680} 1597}
681#endif 1598#endif
682 1599
683static int clusters_allocated = 0;
684
685static void cleanup_cedf(void) 1600static void cleanup_cedf(void)
686{ 1601{
687 int i; 1602 int i;
688 1603
1604#ifdef CONFIG_LITMUS_NVIDIA
1605 shutdown_nvidia_info();
1606#endif
1607
689 if (clusters_allocated) { 1608 if (clusters_allocated) {
690 for (i = 0; i < num_clusters; i++) { 1609 for (i = 0; i < num_clusters; i++) {
691 kfree(cedf[i].cpus); 1610 kfree(cedf[i].cpus);
692 kfree(cedf[i].heap_node);
693 free_cpumask_var(cedf[i].cpu_map); 1611 free_cpumask_var(cedf[i].cpu_map);
694 } 1612 }
695 1613
@@ -749,12 +1667,16 @@ static long cedf_activate_plugin(void)
749 1667
750 cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t), 1668 cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
751 GFP_ATOMIC); 1669 GFP_ATOMIC);
752 cedf[i].heap_node = kmalloc( 1670 INIT_BINHEAP_HANDLE(&(cedf[i].cpu_heap), cpu_lower_prio);
753 cluster_size * sizeof(struct bheap_node),
754 GFP_ATOMIC);
755 bheap_init(&(cedf[i].cpu_heap));
756 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs); 1671 edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
757 1672
1673
1674#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1675 cedf[i].pending_tasklets.head = NULL;
1676 cedf[i].pending_tasklets.tail = &(cedf[i].pending_tasklets.head);
1677#endif
1678
1679
758 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) 1680 if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
759 return -ENOMEM; 1681 return -ENOMEM;
760#ifdef CONFIG_RELEASE_MASTER 1682#ifdef CONFIG_RELEASE_MASTER
@@ -765,6 +1687,10 @@ static long cedf_activate_plugin(void)
765 /* cycle through cluster and add cpus to them */ 1687 /* cycle through cluster and add cpus to them */
766 for (i = 0; i < num_clusters; i++) { 1688 for (i = 0; i < num_clusters; i++) {
767 1689
1690#ifdef CONFIG_LITMUS_DGL_SUPPORT
1691 raw_spin_lock_init(&cedf[i].dgl_lock);
1692#endif
1693
768 for_each_online_cpu(cpu) { 1694 for_each_online_cpu(cpu) {
769 /* check if the cpu is already in a cluster */ 1695 /* check if the cpu is already in a cluster */
770 for (j = 0; j < num_clusters; j++) 1696 for (j = 0; j < num_clusters; j++)
@@ -795,8 +1721,8 @@ static long cedf_activate_plugin(void)
795 atomic_set(&entry->will_schedule, 0); 1721 atomic_set(&entry->will_schedule, 0);
796 entry->cpu = ccpu; 1722 entry->cpu = ccpu;
797 entry->cluster = &cedf[i]; 1723 entry->cluster = &cedf[i];
798 entry->hn = &(cedf[i].heap_node[cpu_count]); 1724
799 bheap_node_init(&entry->hn, entry); 1725 INIT_BINHEAP_NODE(&entry->hn);
800 1726
801 cpu_count++; 1727 cpu_count++;
802 1728
@@ -813,6 +1739,40 @@ static long cedf_activate_plugin(void)
813 } 1739 }
814 } 1740 }
815 1741
1742#ifdef CONFIG_LITMUS_SOFTIRQD
1743 {
1744 /* distribute the daemons evenly across the clusters. */
1745 int* affinity = kmalloc(NR_LITMUS_SOFTIRQD * sizeof(int), GFP_ATOMIC);
1746 int num_daemons_per_cluster = NR_LITMUS_SOFTIRQD / num_clusters;
1747 int left_over = NR_LITMUS_SOFTIRQD % num_clusters;
1748
1749 int daemon = 0;
1750 for(i = 0; i < num_clusters; ++i)
1751 {
1752 int num_on_this_cluster = num_daemons_per_cluster;
1753 if(left_over)
1754 {
1755 ++num_on_this_cluster;
1756 --left_over;
1757 }
1758
1759 for(j = 0; j < num_on_this_cluster; ++j)
1760 {
1761 // first CPU of this cluster
1762 affinity[daemon++] = i*cluster_size;
1763 }
1764 }
1765
1766 spawn_klitirqd(affinity);
1767
1768 kfree(affinity);
1769 }
1770#endif
1771
1772#ifdef CONFIG_LITMUS_NVIDIA
1773 init_nvidia_info();
1774#endif
1775
816 free_cpumask_var(mask); 1776 free_cpumask_var(mask);
817 clusters_allocated = 1; 1777 clusters_allocated = 1;
818 return 0; 1778 return 0;
@@ -831,6 +1791,32 @@ static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
831 .task_block = cedf_task_block, 1791 .task_block = cedf_task_block,
832 .admit_task = cedf_admit_task, 1792 .admit_task = cedf_admit_task,
833 .activate_plugin = cedf_activate_plugin, 1793 .activate_plugin = cedf_activate_plugin,
1794 .compare = edf_higher_prio,
1795#ifdef CONFIG_LITMUS_LOCKING
1796 .allocate_lock = cedf_allocate_lock,
1797 .increase_prio = increase_priority_inheritance,
1798 .decrease_prio = decrease_priority_inheritance,
1799#endif
1800#ifdef CONFIG_LITMUS_NESTED_LOCKING
1801 .nested_increase_prio = nested_increase_priority_inheritance,
1802 .nested_decrease_prio = nested_decrease_priority_inheritance,
1803 .__compare = __edf_higher_prio,
1804#endif
1805#ifdef CONFIG_LITMUS_DGL_SUPPORT
1806 .get_dgl_spinlock = cedf_get_dgl_spinlock,
1807#endif
1808#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1809 .allocate_aff_obs = cedf_allocate_affinity_observer,
1810#endif
1811#ifdef CONFIG_LITMUS_SOFTIRQD
1812 .increase_prio_klitirqd = increase_priority_inheritance_klitirqd,
1813 .decrease_prio_klitirqd = decrease_priority_inheritance_klitirqd,
1814#endif
1815#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1816 .enqueue_pai_tasklet = cedf_enqueue_pai_tasklet,
1817 .change_prio_pai_tasklet = cedf_change_prio_pai_tasklet,
1818 .run_tasklets = cedf_run_tasklets,
1819#endif
834}; 1820};
835 1821
836static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL; 1822static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL;
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index 6ed504f4750e..8c48757fa86c 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -12,23 +12,49 @@
12#include <linux/percpu.h> 12#include <linux/percpu.h>
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/uaccess.h>
16#include <linux/module.h>
15 17
16#include <litmus/litmus.h> 18#include <litmus/litmus.h>
17#include <litmus/jobs.h> 19#include <litmus/jobs.h>
18#include <litmus/sched_plugin.h> 20#include <litmus/sched_plugin.h>
19#include <litmus/edf_common.h> 21#include <litmus/edf_common.h>
20#include <litmus/sched_trace.h> 22#include <litmus/sched_trace.h>
21#include <litmus/trace.h>
22 23
23#include <litmus/preempt.h> 24#include <litmus/preempt.h>
24 25
25#include <litmus/bheap.h> 26#include <litmus/bheap.h>
27#include <litmus/binheap.h>
28
29#ifdef CONFIG_LITMUS_LOCKING
30#include <litmus/kfmlp_lock.h>
31#endif
32
33#ifdef CONFIG_LITMUS_NESTED_LOCKING
34#include <litmus/rsm_lock.h>
35#include <litmus/ikglp_lock.h>
36#endif
26 37
27#ifdef CONFIG_SCHED_CPU_AFFINITY 38#ifdef CONFIG_SCHED_CPU_AFFINITY
28#include <litmus/affinity.h> 39#include <litmus/affinity.h>
29#endif 40#endif
30 41
31#include <linux/module.h> 42#ifdef CONFIG_LITMUS_SOFTIRQD
43#include <litmus/litmus_softirq.h>
44#endif
45
46#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
47#include <linux/interrupt.h>
48#include <litmus/trace.h>
49#endif
50
51#ifdef CONFIG_LITMUS_NVIDIA
52#include <litmus/nvidia_info.h>
53#endif
54
55#if defined(CONFIG_LITMUS_AFFINITY_LOCKING) && defined(CONFIG_LITMUS_NVIDIA)
56#include <litmus/gpu_affinity.h>
57#endif
32 58
33/* Overview of GSN-EDF operations. 59/* Overview of GSN-EDF operations.
34 * 60 *
@@ -103,52 +129,70 @@ typedef struct {
103 int cpu; 129 int cpu;
104 struct task_struct* linked; /* only RT tasks */ 130 struct task_struct* linked; /* only RT tasks */
105 struct task_struct* scheduled; /* only RT tasks */ 131 struct task_struct* scheduled; /* only RT tasks */
106 struct bheap_node* hn; 132 struct binheap_node hn;
107} cpu_entry_t; 133} cpu_entry_t;
108DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries); 134DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries);
109 135
110cpu_entry_t* gsnedf_cpus[NR_CPUS]; 136cpu_entry_t* gsnedf_cpus[NR_CPUS];
111 137
112/* the cpus queue themselves according to priority in here */ 138/* the cpus queue themselves according to priority in here */
113static struct bheap_node gsnedf_heap_node[NR_CPUS]; 139static struct binheap_handle gsnedf_cpu_heap;
114static struct bheap gsnedf_cpu_heap;
115 140
116static rt_domain_t gsnedf; 141static rt_domain_t gsnedf;
117#define gsnedf_lock (gsnedf.ready_lock) 142#define gsnedf_lock (gsnedf.ready_lock)
118 143
144#ifdef CONFIG_LITMUS_DGL_SUPPORT
145static raw_spinlock_t dgl_lock;
146
147static raw_spinlock_t* gsnedf_get_dgl_spinlock(struct task_struct *t)
148{
149 return(&dgl_lock);
150}
151#endif
152
153#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
154struct tasklet_head
155{
156 struct tasklet_struct *head;
157 struct tasklet_struct **tail;
158};
159
160struct tasklet_head gsnedf_pending_tasklets;
161#endif
162
119 163
120/* Uncomment this if you want to see all scheduling decisions in the 164/* Uncomment this if you want to see all scheduling decisions in the
121 * TRACE() log. 165 * TRACE() log.
122#define WANT_ALL_SCHED_EVENTS 166#define WANT_ALL_SCHED_EVENTS
123 */ 167 */
124 168
125static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) 169static int cpu_lower_prio(struct binheap_node *_a, struct binheap_node *_b)
126{ 170{
127 cpu_entry_t *a, *b; 171 cpu_entry_t *a = binheap_entry(_a, cpu_entry_t, hn);
128 a = _a->value; 172 cpu_entry_t *b = binheap_entry(_b, cpu_entry_t, hn);
129 b = _b->value; 173
130 /* Note that a and b are inverted: we want the lowest-priority CPU at 174 /* Note that a and b are inverted: we want the lowest-priority CPU at
131 * the top of the heap. 175 * the top of the heap.
132 */ 176 */
133 return edf_higher_prio(b->linked, a->linked); 177 return edf_higher_prio(b->linked, a->linked);
134} 178}
135 179
180
136/* update_cpu_position - Move the cpu entry to the correct place to maintain 181/* update_cpu_position - Move the cpu entry to the correct place to maintain
137 * order in the cpu queue. Caller must hold gsnedf lock. 182 * order in the cpu queue. Caller must hold gsnedf lock.
138 */ 183 */
139static void update_cpu_position(cpu_entry_t *entry) 184static void update_cpu_position(cpu_entry_t *entry)
140{ 185{
141 if (likely(bheap_node_in_heap(entry->hn))) 186 if (likely(binheap_is_in_heap(&entry->hn))) {
142 bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); 187 binheap_delete(&entry->hn, &gsnedf_cpu_heap);
143 bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn); 188 }
189 binheap_add(&entry->hn, &gsnedf_cpu_heap, cpu_entry_t, hn);
144} 190}
145 191
146/* caller must hold gsnedf lock */ 192/* caller must hold gsnedf lock */
147static cpu_entry_t* lowest_prio_cpu(void) 193static cpu_entry_t* lowest_prio_cpu(void)
148{ 194{
149 struct bheap_node* hn; 195 return binheap_top_entry(&gsnedf_cpu_heap, cpu_entry_t, hn);
150 hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap);
151 return hn->value;
152} 196}
153 197
154 198
@@ -337,6 +381,10 @@ static noinline void job_completion(struct task_struct *t, int forced)
337 381
338 sched_trace_task_completion(t, forced); 382 sched_trace_task_completion(t, forced);
339 383
384#ifdef CONFIG_LITMUS_NVIDIA
385 atomic_set(&tsk_rt(t)->nv_int_count, 0);
386#endif
387
340 TRACE_TASK(t, "job_completion().\n"); 388 TRACE_TASK(t, "job_completion().\n");
341 389
342 /* set flags */ 390 /* set flags */
@@ -379,6 +427,318 @@ static void gsnedf_tick(struct task_struct* t)
379 } 427 }
380} 428}
381 429
430
431
432
433#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
434
435
436static void __do_lit_tasklet(struct tasklet_struct* tasklet, unsigned long flushed)
437{
438 if (!atomic_read(&tasklet->count)) {
439 if(tasklet->owner) {
440 sched_trace_tasklet_begin(tasklet->owner);
441 }
442
443 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &tasklet->state))
444 {
445 BUG();
446 }
447 TRACE("%s: Invoking tasklet with owner pid = %d (flushed = %d).\n",
448 __FUNCTION__,
449 (tasklet->owner) ? tasklet->owner->pid : -1,
450 (tasklet->owner) ? 0 : 1);
451 tasklet->func(tasklet->data);
452 tasklet_unlock(tasklet);
453
454 if(tasklet->owner) {
455 sched_trace_tasklet_end(tasklet->owner, flushed);
456 }
457 }
458 else {
459 BUG();
460 }
461}
462
463static void do_lit_tasklets(struct task_struct* sched_task)
464{
465 int work_to_do = 1;
466 struct tasklet_struct *tasklet = NULL;
467 unsigned long flags;
468
469 while(work_to_do) {
470
471 TS_NV_SCHED_BOTISR_START;
472
473 // execute one tasklet that has higher priority
474 raw_spin_lock_irqsave(&gsnedf_lock, flags);
475
476 if(gsnedf_pending_tasklets.head != NULL) {
477 struct tasklet_struct *prev = NULL;
478 tasklet = gsnedf_pending_tasklets.head;
479
480 while(tasklet && edf_higher_prio(sched_task, tasklet->owner)) {
481 prev = tasklet;
482 tasklet = tasklet->next;
483 }
484
485 // remove the tasklet from the queue
486 if(prev) {
487 prev->next = tasklet->next;
488 if(prev->next == NULL) {
489 TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
490 gsnedf_pending_tasklets.tail = &(prev);
491 }
492 }
493 else {
494 gsnedf_pending_tasklets.head = tasklet->next;
495 if(tasklet->next == NULL) {
496 TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
497 gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
498 }
499 }
500 }
501 else {
502 TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
503 }
504
505 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
506
507 if(tasklet) {
508 __do_lit_tasklet(tasklet, 0ul);
509 tasklet = NULL;
510 }
511 else {
512 work_to_do = 0;
513 }
514
515 TS_NV_SCHED_BOTISR_END;
516 }
517}
518
519//static void do_lit_tasklets(struct task_struct* sched_task)
520//{
521// int work_to_do = 1;
522// struct tasklet_struct *tasklet = NULL;
523// //struct tasklet_struct *step;
524// unsigned long flags;
525//
526// while(work_to_do) {
527//
528// TS_NV_SCHED_BOTISR_START;
529//
530// // remove tasklet at head of list if it has higher priority.
531// raw_spin_lock_irqsave(&gsnedf_lock, flags);
532//
533// if(gsnedf_pending_tasklets.head != NULL) {
534// // remove tasklet at head.
535// tasklet = gsnedf_pending_tasklets.head;
536//
537// if(edf_higher_prio(tasklet->owner, sched_task)) {
538//
539// if(NULL == tasklet->next) {
540// // tasklet is at the head, list only has one element
541// TRACE("%s: Tasklet for %d is the last element in tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
542// gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
543// }
544//
545// // remove the tasklet from the queue
546// gsnedf_pending_tasklets.head = tasklet->next;
547//
548// TRACE("%s: Removed tasklet for %d from tasklet queue.\n", __FUNCTION__, tasklet->owner->pid);
549// }
550// else {
551// TRACE("%s: Pending tasklet (%d) does not have priority to run on this CPU (%d).\n", __FUNCTION__, tasklet->owner->pid, smp_processor_id());
552// tasklet = NULL;
553// }
554// }
555// else {
556// TRACE("%s: Tasklet queue is empty.\n", __FUNCTION__);
557// }
558//
559// raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
560//
561// TS_NV_SCHED_BOTISR_END;
562//
563// if(tasklet) {
564// __do_lit_tasklet(tasklet, 0ul);
565// tasklet = NULL;
566// }
567// else {
568// work_to_do = 0;
569// }
570// }
571//
572// //TRACE("%s: exited.\n", __FUNCTION__);
573//}
574
575static void __add_pai_tasklet(struct tasklet_struct* tasklet)
576{
577 struct tasklet_struct* step;
578
579 tasklet->next = NULL; // make sure there are no old values floating around
580
581 step = gsnedf_pending_tasklets.head;
582 if(step == NULL) {
583 TRACE("%s: tasklet queue empty. inserting tasklet for %d at head.\n", __FUNCTION__, tasklet->owner->pid);
584 // insert at tail.
585 *(gsnedf_pending_tasklets.tail) = tasklet;
586 gsnedf_pending_tasklets.tail = &(tasklet->next);
587 }
588 else if((*(gsnedf_pending_tasklets.tail) != NULL) &&
589 edf_higher_prio((*(gsnedf_pending_tasklets.tail))->owner, tasklet->owner)) {
590 // insert at tail.
591 TRACE("%s: tasklet belongs at end. inserting tasklet for %d at tail.\n", __FUNCTION__, tasklet->owner->pid);
592
593 *(gsnedf_pending_tasklets.tail) = tasklet;
594 gsnedf_pending_tasklets.tail = &(tasklet->next);
595 }
596 else {
597 // insert the tasklet somewhere in the middle.
598
599 TRACE("%s: tasklet belongs somewhere in the middle.\n", __FUNCTION__);
600
601 while(step->next && edf_higher_prio(step->next->owner, tasklet->owner)) {
602 step = step->next;
603 }
604
605 // insert tasklet right before step->next.
606
607 TRACE("%s: inserting tasklet for %d between %d and %d.\n", __FUNCTION__, tasklet->owner->pid, step->owner->pid, (step->next) ? step->next->owner->pid : -1);
608
609 tasklet->next = step->next;
610 step->next = tasklet;
611
612 // patch up the head if needed.
613 if(gsnedf_pending_tasklets.head == step)
614 {
615 TRACE("%s: %d is the new tasklet queue head.\n", __FUNCTION__, tasklet->owner->pid);
616 gsnedf_pending_tasklets.head = tasklet;
617 }
618 }
619}
620
621static void gsnedf_run_tasklets(struct task_struct* sched_task)
622{
623 preempt_disable();
624
625 if(gsnedf_pending_tasklets.head != NULL) {
626 TRACE("%s: There are tasklets to process.\n", __FUNCTION__);
627 do_lit_tasklets(sched_task);
628 }
629
630 preempt_enable_no_resched();
631}
632
633static int gsnedf_enqueue_pai_tasklet(struct tasklet_struct* tasklet)
634{
635 cpu_entry_t *targetCPU = NULL;
636 int thisCPU;
637 int runLocal = 0;
638 int runNow = 0;
639 unsigned long flags;
640
641 if(unlikely((tasklet->owner == NULL) || !is_realtime(tasklet->owner)))
642 {
643 TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
644 return 0;
645 }
646
647
648 raw_spin_lock_irqsave(&gsnedf_lock, flags);
649
650 thisCPU = smp_processor_id();
651
652#ifdef CONFIG_SCHED_CPU_AFFINITY
653 {
654 cpu_entry_t* affinity = NULL;
655
656 // use this CPU if it is in our cluster and isn't running any RT work.
657 if(
658#ifdef CONFIG_RELEASE_MASTER
659 (thisCPU != gsnedf.release_master) &&
660#endif
661 (__get_cpu_var(gsnedf_cpu_entries).linked == NULL)) {
662 affinity = &(__get_cpu_var(gsnedf_cpu_entries));
663 }
664 else {
665 // this CPU is busy or shouldn't run tasklet in this cluster.
666 // look for available near by CPUs.
667 // NOTE: Affinity towards owner and not this CPU. Is this right?
668 affinity =
669 gsnedf_get_nearest_available_cpu(
670 &per_cpu(gsnedf_cpu_entries, task_cpu(tasklet->owner)));
671 }
672
673 targetCPU = affinity;
674 }
675#endif
676
677 if (targetCPU == NULL) {
678 targetCPU = lowest_prio_cpu();
679 }
680
681 if (edf_higher_prio(tasklet->owner, targetCPU->linked)) {
682 if (thisCPU == targetCPU->cpu) {
683 TRACE("%s: Run tasklet locally (and now).\n", __FUNCTION__);
684 runLocal = 1;
685 runNow = 1;
686 }
687 else {
688 TRACE("%s: Run tasklet remotely (and now).\n", __FUNCTION__);
689 runLocal = 0;
690 runNow = 1;
691 }
692 }
693 else {
694 runLocal = 0;
695 runNow = 0;
696 }
697
698 if(!runLocal) {
699 // enqueue the tasklet
700 __add_pai_tasklet(tasklet);
701 }
702
703 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
704
705
706 if (runLocal /*&& runNow */) { // runNow == 1 is implied
707 TRACE("%s: Running tasklet on CPU where it was received.\n", __FUNCTION__);
708 __do_lit_tasklet(tasklet, 0ul);
709 }
710 else if (runNow /*&& !runLocal */) { // runLocal == 0 is implied
711 TRACE("%s: Triggering CPU %d to run tasklet.\n", __FUNCTION__, targetCPU->cpu);
712 preempt(targetCPU); // need to be protected by cedf_lock?
713 }
714 else {
715 TRACE("%s: Scheduling of tasklet was deferred.\n", __FUNCTION__);
716 }
717
718 return(1); // success
719}
720
721static void gsnedf_change_prio_pai_tasklet(struct task_struct *old_prio,
722 struct task_struct *new_prio)
723{
724 struct tasklet_struct* step;
725 unsigned long flags;
726
727 if(gsnedf_pending_tasklets.head != NULL) {
728 raw_spin_lock_irqsave(&gsnedf_lock, flags);
729 for(step = gsnedf_pending_tasklets.head; step != NULL; step = step->next) {
730 if(step->owner == old_prio) {
731 TRACE("%s: Found tasklet to change: %d\n", __FUNCTION__, step->owner->pid);
732 step->owner = new_prio;
733 }
734 }
735 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
736 }
737}
738
739#endif // end PAI
740
741
382/* Getting schedule() right is a bit tricky. schedule() may not make any 742/* Getting schedule() right is a bit tricky. schedule() may not make any
383 * assumptions on the state of the current task since it may be called for a 743 * assumptions on the state of the current task since it may be called for a
384 * number of reasons. The reasons include a scheduler_tick() determined that it 744 * number of reasons. The reasons include a scheduler_tick() determined that it
@@ -437,21 +797,32 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
437 TRACE_TASK(prev, "invoked gsnedf_schedule.\n"); 797 TRACE_TASK(prev, "invoked gsnedf_schedule.\n");
438#endif 798#endif
439 799
800 /*
440 if (exists) 801 if (exists)
441 TRACE_TASK(prev, 802 TRACE_TASK(prev,
442 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " 803 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
443 "state:%d sig:%d\n", 804 "state:%d sig:%d\n",
444 blocks, out_of_time, np, sleep, preempt, 805 blocks, out_of_time, np, sleep, preempt,
445 prev->state, signal_pending(prev)); 806 prev->state, signal_pending(prev));
807 */
808
446 if (entry->linked && preempt) 809 if (entry->linked && preempt)
447 TRACE_TASK(prev, "will be preempted by %s/%d\n", 810 TRACE_TASK(prev, "will be preempted by %s/%d\n",
448 entry->linked->comm, entry->linked->pid); 811 entry->linked->comm, entry->linked->pid);
449 812
450
451 /* If a task blocks we have no choice but to reschedule. 813 /* If a task blocks we have no choice but to reschedule.
452 */ 814 */
453 if (blocks) 815 if (blocks) {
454 unlink(entry->scheduled); 816 unlink(entry->scheduled);
817 }
818
819#if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING)
820 if(exists && is_realtime(entry->scheduled) && tsk_rt(entry->scheduled)->held_gpus) {
821 if(!blocks || tsk_rt(entry->scheduled)->suspend_gpu_tracker_on_block) {
822 stop_gpu_tracker(entry->scheduled);
823 }
824 }
825#endif
455 826
456 /* Request a sys_exit_np() call if we would like to preempt but cannot. 827 /* Request a sys_exit_np() call if we would like to preempt but cannot.
457 * We need to make sure to update the link structure anyway in case 828 * We need to make sure to update the link structure anyway in case
@@ -492,12 +863,15 @@ static struct task_struct* gsnedf_schedule(struct task_struct * prev)
492 entry->scheduled->rt_param.scheduled_on = NO_CPU; 863 entry->scheduled->rt_param.scheduled_on = NO_CPU;
493 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); 864 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
494 } 865 }
495 } else 866 }
867 else
868 {
496 /* Only override Linux scheduler if we have a real-time task 869 /* Only override Linux scheduler if we have a real-time task
497 * scheduled that needs to continue. 870 * scheduled that needs to continue.
498 */ 871 */
499 if (exists) 872 if (exists)
500 next = prev; 873 next = prev;
874 }
501 875
502 sched_state_task_picked(); 876 sched_state_task_picked();
503 877
@@ -524,6 +898,7 @@ static void gsnedf_finish_switch(struct task_struct *prev)
524 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries); 898 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
525 899
526 entry->scheduled = is_realtime(current) ? current : NULL; 900 entry->scheduled = is_realtime(current) ? current : NULL;
901
527#ifdef WANT_ALL_SCHED_EVENTS 902#ifdef WANT_ALL_SCHED_EVENTS
528 TRACE_TASK(prev, "switched away from\n"); 903 TRACE_TASK(prev, "switched away from\n");
529#endif 904#endif
@@ -572,11 +947,14 @@ static void gsnedf_task_new(struct task_struct * t, int on_rq, int running)
572static void gsnedf_task_wake_up(struct task_struct *task) 947static void gsnedf_task_wake_up(struct task_struct *task)
573{ 948{
574 unsigned long flags; 949 unsigned long flags;
575 lt_t now; 950 //lt_t now;
576 951
577 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); 952 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
578 953
579 raw_spin_lock_irqsave(&gsnedf_lock, flags); 954 raw_spin_lock_irqsave(&gsnedf_lock, flags);
955
956
957#if 0 // sporadic task model
580 /* We need to take suspensions because of semaphores into 958 /* We need to take suspensions because of semaphores into
581 * account! If a job resumes after being suspended due to acquiring 959 * account! If a job resumes after being suspended due to acquiring
582 * a semaphore, it should never be treated as a new job release. 960 * a semaphore, it should never be treated as a new job release.
@@ -598,19 +976,26 @@ static void gsnedf_task_wake_up(struct task_struct *task)
598 } 976 }
599 } 977 }
600 } 978 }
979#else // periodic task model
980 set_rt_flags(task, RT_F_RUNNING);
981#endif
982
601 gsnedf_job_arrival(task); 983 gsnedf_job_arrival(task);
602 raw_spin_unlock_irqrestore(&gsnedf_lock, flags); 984 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
603} 985}
604 986
605static void gsnedf_task_block(struct task_struct *t) 987static void gsnedf_task_block(struct task_struct *t)
606{ 988{
989 // TODO: is this called on preemption??
607 unsigned long flags; 990 unsigned long flags;
608 991
609 TRACE_TASK(t, "block at %llu\n", litmus_clock()); 992 TRACE_TASK(t, "block at %llu\n", litmus_clock());
610 993
611 /* unlink if necessary */ 994 /* unlink if necessary */
612 raw_spin_lock_irqsave(&gsnedf_lock, flags); 995 raw_spin_lock_irqsave(&gsnedf_lock, flags);
996
613 unlink(t); 997 unlink(t);
998
614 raw_spin_unlock_irqrestore(&gsnedf_lock, flags); 999 raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
615 1000
616 BUG_ON(!is_realtime(t)); 1001 BUG_ON(!is_realtime(t));
@@ -621,6 +1006,10 @@ static void gsnedf_task_exit(struct task_struct * t)
621{ 1006{
622 unsigned long flags; 1007 unsigned long flags;
623 1008
1009#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1010 gsnedf_change_prio_pai_tasklet(t, NULL);
1011#endif
1012
624 /* unlink if necessary */ 1013 /* unlink if necessary */
625 raw_spin_lock_irqsave(&gsnedf_lock, flags); 1014 raw_spin_lock_irqsave(&gsnedf_lock, flags);
626 unlink(t); 1015 unlink(t);
@@ -637,101 +1026,423 @@ static void gsnedf_task_exit(struct task_struct * t)
637 1026
638static long gsnedf_admit_task(struct task_struct* tsk) 1027static long gsnedf_admit_task(struct task_struct* tsk)
639{ 1028{
1029#ifdef CONFIG_LITMUS_NESTED_LOCKING
1030 INIT_BINHEAP_HANDLE(&tsk_rt(tsk)->hp_blocked_tasks,
1031 edf_max_heap_base_priority_order);
1032#endif
1033
640 return 0; 1034 return 0;
641} 1035}
642 1036
1037
1038
1039
1040
1041
643#ifdef CONFIG_LITMUS_LOCKING 1042#ifdef CONFIG_LITMUS_LOCKING
644 1043
645#include <litmus/fdso.h> 1044#include <litmus/fdso.h>
646 1045
647/* called with IRQs off */ 1046/* called with IRQs off */
648static void set_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh) 1047static void __increase_priority_inheritance(struct task_struct* t,
1048 struct task_struct* prio_inh)
649{ 1049{
650 int linked_on; 1050 int linked_on;
651 int check_preempt = 0; 1051 int check_preempt = 0;
652 1052
1053#ifdef CONFIG_LITMUS_NESTED_LOCKING
1054 /* this sanity check allows for weaker locking in protocols */
1055 /* TODO (klitirqd): Skip this check if 't' is a proxy thread (???) */
1056 if(__edf_higher_prio(prio_inh, BASE, t, EFFECTIVE)) {
1057#endif
1058 TRACE_TASK(t, "inherits priority from %s/%d\n",
1059 prio_inh->comm, prio_inh->pid);
1060 tsk_rt(t)->inh_task = prio_inh;
1061
1062 linked_on = tsk_rt(t)->linked_on;
1063
1064 /* If it is scheduled, then we need to reorder the CPU heap. */
1065 if (linked_on != NO_CPU) {
1066 TRACE_TASK(t, "%s: linked on %d\n",
1067 __FUNCTION__, linked_on);
1068 /* Holder is scheduled; need to re-order CPUs.
1069 * We can't use heap_decrease() here since
1070 * the cpu_heap is ordered in reverse direction, so
1071 * it is actually an increase. */
1072 binheap_delete(&gsnedf_cpus[linked_on]->hn, &gsnedf_cpu_heap);
1073 binheap_add(&gsnedf_cpus[linked_on]->hn,
1074 &gsnedf_cpu_heap, cpu_entry_t, hn);
1075 } else {
1076 /* holder may be queued: first stop queue changes */
1077 raw_spin_lock(&gsnedf.release_lock);
1078 if (is_queued(t)) {
1079 TRACE_TASK(t, "%s: is queued\n",
1080 __FUNCTION__);
1081 /* We need to update the position of holder in some
1082 * heap. Note that this could be a release heap if we
1083 * budget enforcement is used and this job overran. */
1084 check_preempt =
1085 !bheap_decrease(edf_ready_order,
1086 tsk_rt(t)->heap_node);
1087 } else {
1088 /* Nothing to do: if it is not queued and not linked
1089 * then it is either sleeping or currently being moved
1090 * by other code (e.g., a timer interrupt handler) that
1091 * will use the correct priority when enqueuing the
1092 * task. */
1093 TRACE_TASK(t, "%s: is NOT queued => Done.\n",
1094 __FUNCTION__);
1095 }
1096 raw_spin_unlock(&gsnedf.release_lock);
1097
1098 /* If holder was enqueued in a release heap, then the following
1099 * preemption check is pointless, but we can't easily detect
1100 * that case. If you want to fix this, then consider that
1101 * simply adding a state flag requires O(n) time to update when
1102 * releasing n tasks, which conflicts with the goal to have
1103 * O(log n) merges. */
1104 if (check_preempt) {
1105 /* heap_decrease() hit the top level of the heap: make
1106 * sure preemption checks get the right task, not the
1107 * potentially stale cache. */
1108 bheap_uncache_min(edf_ready_order,
1109 &gsnedf.ready_queue);
1110 check_for_preemptions();
1111 }
1112 }
1113#ifdef CONFIG_LITMUS_NESTED_LOCKING
1114 }
1115 else {
1116 TRACE_TASK(t, "Spurious invalid priority increase. "
1117 "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
1118 "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
1119 t->comm, t->pid,
1120 effective_priority(t)->comm, effective_priority(t)->pid,
1121 (prio_inh) ? prio_inh->comm : "nil",
1122 (prio_inh) ? prio_inh->pid : -1);
1123 WARN_ON(!prio_inh);
1124 }
1125#endif
1126}
1127
1128/* called with IRQs off */
1129static void increase_priority_inheritance(struct task_struct* t, struct task_struct* prio_inh)
1130{
653 raw_spin_lock(&gsnedf_lock); 1131 raw_spin_lock(&gsnedf_lock);
654 1132
655 TRACE_TASK(t, "inherits priority from %s/%d\n", prio_inh->comm, prio_inh->pid); 1133 __increase_priority_inheritance(t, prio_inh);
656 tsk_rt(t)->inh_task = prio_inh; 1134
657 1135#ifdef CONFIG_LITMUS_SOFTIRQD
658 linked_on = tsk_rt(t)->linked_on; 1136 if(tsk_rt(t)->cur_klitirqd != NULL)
659 1137 {
660 /* If it is scheduled, then we need to reorder the CPU heap. */ 1138 TRACE_TASK(t, "%s/%d inherits a new priority!\n",
661 if (linked_on != NO_CPU) { 1139 tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
662 TRACE_TASK(t, "%s: linked on %d\n", 1140
663 __FUNCTION__, linked_on); 1141 __increase_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
664 /* Holder is scheduled; need to re-order CPUs. 1142 }
665 * We can't use heap_decrease() here since 1143#endif
666 * the cpu_heap is ordered in reverse direction, so 1144
667 * it is actually an increase. */ 1145 raw_spin_unlock(&gsnedf_lock);
668 bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, 1146
669 gsnedf_cpus[linked_on]->hn); 1147#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
670 bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, 1148 if(tsk_rt(t)->held_gpus) {
671 gsnedf_cpus[linked_on]->hn); 1149 int i;
672 } else { 1150 for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
673 /* holder may be queued: first stop queue changes */ 1151 i < NV_DEVICE_NUM;
674 raw_spin_lock(&gsnedf.release_lock); 1152 i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
675 if (is_queued(t)) { 1153 pai_check_priority_increase(t, i);
676 TRACE_TASK(t, "%s: is queued\n", 1154 }
677 __FUNCTION__); 1155 }
678 /* We need to update the position of holder in some 1156#endif
679 * heap. Note that this could be a release heap if we 1157}
680 * budget enforcement is used and this job overran. */ 1158
681 check_preempt = 1159
682 !bheap_decrease(edf_ready_order, 1160/* called with IRQs off */
683 tsk_rt(t)->heap_node); 1161static void __decrease_priority_inheritance(struct task_struct* t,
684 } else { 1162 struct task_struct* prio_inh)
685 /* Nothing to do: if it is not queued and not linked 1163{
686 * then it is either sleeping or currently being moved 1164#ifdef CONFIG_LITMUS_NESTED_LOCKING
687 * by other code (e.g., a timer interrupt handler) that 1165 if(__edf_higher_prio(t, EFFECTIVE, prio_inh, BASE)) {
688 * will use the correct priority when enqueuing the 1166#endif
689 * task. */ 1167 /* A job only stops inheriting a priority when it releases a
690 TRACE_TASK(t, "%s: is NOT queued => Done.\n", 1168 * resource. Thus we can make the following assumption.*/
691 __FUNCTION__); 1169 if(prio_inh)
1170 TRACE_TASK(t, "EFFECTIVE priority decreased to %s/%d\n",
1171 prio_inh->comm, prio_inh->pid);
1172 else
1173 TRACE_TASK(t, "base priority restored.\n");
1174
1175 tsk_rt(t)->inh_task = prio_inh;
1176
1177 if(tsk_rt(t)->scheduled_on != NO_CPU) {
1178 TRACE_TASK(t, "is scheduled.\n");
1179
1180 /* Check if rescheduling is necessary. We can't use heap_decrease()
1181 * since the priority was effectively lowered. */
1182 unlink(t);
1183 gsnedf_job_arrival(t);
692 } 1184 }
693 raw_spin_unlock(&gsnedf.release_lock); 1185 else {
694 1186 /* task is queued */
695 /* If holder was enqueued in a release heap, then the following 1187 raw_spin_lock(&gsnedf.release_lock);
696 * preemption check is pointless, but we can't easily detect 1188 if (is_queued(t)) {
697 * that case. If you want to fix this, then consider that 1189 TRACE_TASK(t, "is queued.\n");
698 * simply adding a state flag requires O(n) time to update when 1190
699 * releasing n tasks, which conflicts with the goal to have 1191 /* decrease in priority, so we have to re-add to binomial heap */
700 * O(log n) merges. */ 1192 unlink(t);
701 if (check_preempt) { 1193 gsnedf_job_arrival(t);
702 /* heap_decrease() hit the top level of the heap: make 1194 }
703 * sure preemption checks get the right task, not the 1195 else {
704 * potentially stale cache. */ 1196 TRACE_TASK(t, "is not in scheduler. Probably on wait queue somewhere.\n");
705 bheap_uncache_min(edf_ready_order, 1197 }
706 &gsnedf.ready_queue); 1198 raw_spin_unlock(&gsnedf.release_lock);
707 check_for_preemptions();
708 } 1199 }
1200#ifdef CONFIG_LITMUS_NESTED_LOCKING
1201 }
1202 else {
1203 TRACE_TASK(t, "Spurious invalid priority decrease. "
1204 "Inheritance request: %s/%d [eff_prio = %s/%d] to inherit from %s/%d\n"
1205 "Occurance is likely okay: probably due to (hopefully safe) concurrent priority updates.\n",
1206 t->comm, t->pid,
1207 effective_priority(t)->comm, effective_priority(t)->pid,
1208 (prio_inh) ? prio_inh->comm : "nil",
1209 (prio_inh) ? prio_inh->pid : -1);
709 } 1210 }
1211#endif
1212}
1213
1214static void decrease_priority_inheritance(struct task_struct* t,
1215 struct task_struct* prio_inh)
1216{
1217 raw_spin_lock(&gsnedf_lock);
1218 __decrease_priority_inheritance(t, prio_inh);
1219
1220#ifdef CONFIG_LITMUS_SOFTIRQD
1221 if(tsk_rt(t)->cur_klitirqd != NULL)
1222 {
1223 TRACE_TASK(t, "%s/%d decreases in priority!\n",
1224 tsk_rt(t)->cur_klitirqd->comm, tsk_rt(t)->cur_klitirqd->pid);
1225
1226 __decrease_priority_inheritance(tsk_rt(t)->cur_klitirqd, prio_inh);
1227 }
1228#endif
710 1229
711 raw_spin_unlock(&gsnedf_lock); 1230 raw_spin_unlock(&gsnedf_lock);
1231
1232#if defined(CONFIG_LITMUS_PAI_SOFTIRQD) && defined(CONFIG_LITMUS_NVIDIA)
1233 if(tsk_rt(t)->held_gpus) {
1234 int i;
1235 for(i = find_first_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus));
1236 i < NV_DEVICE_NUM;
1237 i = find_next_bit(&tsk_rt(t)->held_gpus, sizeof(tsk_rt(t)->held_gpus), i+1)) {
1238 pai_check_priority_decrease(t, i);
1239 }
1240 }
1241#endif
712} 1242}
713 1243
1244
1245#ifdef CONFIG_LITMUS_SOFTIRQD
714/* called with IRQs off */ 1246/* called with IRQs off */
715static void clear_priority_inheritance(struct task_struct* t) 1247static void increase_priority_inheritance_klitirqd(struct task_struct* klitirqd,
1248 struct task_struct* old_owner,
1249 struct task_struct* new_owner)
716{ 1250{
1251 BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
1252
717 raw_spin_lock(&gsnedf_lock); 1253 raw_spin_lock(&gsnedf_lock);
718 1254
719 /* A job only stops inheriting a priority when it releases a 1255 if(old_owner != new_owner)
720 * resource. Thus we can make the following assumption.*/ 1256 {
721 BUG_ON(tsk_rt(t)->scheduled_on == NO_CPU); 1257 if(old_owner)
1258 {
1259 // unreachable?
1260 tsk_rt(old_owner)->cur_klitirqd = NULL;
1261 }
722 1262
723 TRACE_TASK(t, "priority restored\n"); 1263 TRACE_TASK(klitirqd, "giving ownership to %s/%d.\n",
724 tsk_rt(t)->inh_task = NULL; 1264 new_owner->comm, new_owner->pid);
725 1265
726 /* Check if rescheduling is necessary. We can't use heap_decrease() 1266 tsk_rt(new_owner)->cur_klitirqd = klitirqd;
727 * since the priority was effectively lowered. */ 1267 }
728 unlink(t); 1268
729 gsnedf_job_arrival(t); 1269 __decrease_priority_inheritance(klitirqd, NULL); // kludge to clear out cur prio.
1270
1271 __increase_priority_inheritance(klitirqd,
1272 (tsk_rt(new_owner)->inh_task == NULL) ?
1273 new_owner :
1274 tsk_rt(new_owner)->inh_task);
730 1275
731 raw_spin_unlock(&gsnedf_lock); 1276 raw_spin_unlock(&gsnedf_lock);
732} 1277}
733 1278
734 1279
1280/* called with IRQs off */
1281static void decrease_priority_inheritance_klitirqd(struct task_struct* klitirqd,
1282 struct task_struct* old_owner,
1283 struct task_struct* new_owner)
1284{
1285 BUG_ON(!(tsk_rt(klitirqd)->is_proxy_thread));
1286
1287 raw_spin_lock(&gsnedf_lock);
1288
1289 TRACE_TASK(klitirqd, "priority restored\n");
1290
1291 __decrease_priority_inheritance(klitirqd, new_owner);
1292
1293 tsk_rt(old_owner)->cur_klitirqd = NULL;
1294
1295 raw_spin_unlock(&gsnedf_lock);
1296}
1297#endif
1298
1299
1300
1301
1302#ifdef CONFIG_LITMUS_NESTED_LOCKING
1303
1304/* called with IRQs off */
1305/* preconditions:
1306 (1) The 'hp_blocked_tasks_lock' of task 't' is held.
1307 (2) The lock 'to_unlock' is held.
1308 */
1309static void nested_increase_priority_inheritance(struct task_struct* t,
1310 struct task_struct* prio_inh,
1311 raw_spinlock_t *to_unlock,
1312 unsigned long irqflags)
1313{
1314 struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
1315
1316 if(tsk_rt(t)->inh_task != prio_inh) { // shield redundent calls.
1317 increase_priority_inheritance(t, prio_inh); // increase our prio.
1318 }
1319
1320 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
1321
1322
1323 if(blocked_lock) {
1324 if(blocked_lock->ops->propagate_increase_inheritance) {
1325 TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
1326 blocked_lock->ident);
1327
1328 // beware: recursion
1329 blocked_lock->ops->propagate_increase_inheritance(blocked_lock,
1330 t, to_unlock,
1331 irqflags);
1332 }
1333 else {
1334 TRACE_TASK(t, "Inheritor is blocked on lock (%d) that does not support nesting!\n",
1335 blocked_lock->ident);
1336 unlock_fine_irqrestore(to_unlock, irqflags);
1337 }
1338 }
1339 else {
1340 TRACE_TASK(t, "is not blocked. No propagation.\n");
1341 unlock_fine_irqrestore(to_unlock, irqflags);
1342 }
1343}
1344
1345/* called with IRQs off */
1346/* preconditions:
1347 (1) The 'hp_blocked_tasks_lock' of task 't' is held.
1348 (2) The lock 'to_unlock' is held.
1349 */
1350static void nested_decrease_priority_inheritance(struct task_struct* t,
1351 struct task_struct* prio_inh,
1352 raw_spinlock_t *to_unlock,
1353 unsigned long irqflags)
1354{
1355 struct litmus_lock *blocked_lock = tsk_rt(t)->blocked_lock;
1356 decrease_priority_inheritance(t, prio_inh);
1357
1358 raw_spin_unlock(&tsk_rt(t)->hp_blocked_tasks_lock); // unlock the t's heap.
1359
1360 if(blocked_lock) {
1361 if(blocked_lock->ops->propagate_decrease_inheritance) {
1362 TRACE_TASK(t, "Inheritor is blocked (...perhaps). Checking lock %d.\n",
1363 blocked_lock->ident);
1364
1365 // beware: recursion
1366 blocked_lock->ops->propagate_decrease_inheritance(blocked_lock, t,
1367 to_unlock,
1368 irqflags);
1369 }
1370 else {
1371 TRACE_TASK(t, "Inheritor is blocked on lock (%p) that does not support nesting!\n",
1372 blocked_lock);
1373 unlock_fine_irqrestore(to_unlock, irqflags);
1374 }
1375 }
1376 else {
1377 TRACE_TASK(t, "is not blocked. No propagation.\n");
1378 unlock_fine_irqrestore(to_unlock, irqflags);
1379 }
1380}
1381
1382
1383/* ******************** RSM MUTEX ********************** */
1384
1385static struct litmus_lock_ops gsnedf_rsm_mutex_lock_ops = {
1386 .lock = rsm_mutex_lock,
1387 .unlock = rsm_mutex_unlock,
1388 .close = rsm_mutex_close,
1389 .deallocate = rsm_mutex_free,
1390
1391 .propagate_increase_inheritance = rsm_mutex_propagate_increase_inheritance,
1392 .propagate_decrease_inheritance = rsm_mutex_propagate_decrease_inheritance,
1393
1394#ifdef CONFIG_LITMUS_DGL_SUPPORT
1395 .dgl_lock = rsm_mutex_dgl_lock,
1396 .is_owner = rsm_mutex_is_owner,
1397 .enable_priority = rsm_mutex_enable_priority,
1398#endif
1399};
1400
1401static struct litmus_lock* gsnedf_new_rsm_mutex(void)
1402{
1403 return rsm_mutex_new(&gsnedf_rsm_mutex_lock_ops);
1404}
1405
1406/* ******************** IKGLP ********************** */
1407
1408static struct litmus_lock_ops gsnedf_ikglp_lock_ops = {
1409 .lock = ikglp_lock,
1410 .unlock = ikglp_unlock,
1411 .close = ikglp_close,
1412 .deallocate = ikglp_free,
1413
1414 // ikglp can only be an outer-most lock.
1415 .propagate_increase_inheritance = NULL,
1416 .propagate_decrease_inheritance = NULL,
1417};
1418
1419static struct litmus_lock* gsnedf_new_ikglp(void* __user arg)
1420{
1421 return ikglp_new(num_online_cpus(), &gsnedf_ikglp_lock_ops, arg);
1422}
1423
1424#endif /* CONFIG_LITMUS_NESTED_LOCKING */
1425
1426
1427/* ******************** KFMLP support ********************** */
1428
1429static struct litmus_lock_ops gsnedf_kfmlp_lock_ops = {
1430 .lock = kfmlp_lock,
1431 .unlock = kfmlp_unlock,
1432 .close = kfmlp_close,
1433 .deallocate = kfmlp_free,
1434
1435 // kfmlp can only be an outer-most lock.
1436 .propagate_increase_inheritance = NULL,
1437 .propagate_decrease_inheritance = NULL,
1438};
1439
1440
1441static struct litmus_lock* gsnedf_new_kfmlp(void* __user arg)
1442{
1443 return kfmlp_new(&gsnedf_kfmlp_lock_ops, arg);
1444}
1445
735/* ******************** FMLP support ********************** */ 1446/* ******************** FMLP support ********************** */
736 1447
737/* struct for semaphore with priority inheritance */ 1448/* struct for semaphore with priority inheritance */
@@ -797,7 +1508,7 @@ int gsnedf_fmlp_lock(struct litmus_lock* l)
797 if (edf_higher_prio(t, sem->hp_waiter)) { 1508 if (edf_higher_prio(t, sem->hp_waiter)) {
798 sem->hp_waiter = t; 1509 sem->hp_waiter = t;
799 if (edf_higher_prio(t, sem->owner)) 1510 if (edf_higher_prio(t, sem->owner))
800 set_priority_inheritance(sem->owner, sem->hp_waiter); 1511 increase_priority_inheritance(sem->owner, sem->hp_waiter);
801 } 1512 }
802 1513
803 TS_LOCK_SUSPEND; 1514 TS_LOCK_SUSPEND;
@@ -865,7 +1576,7 @@ int gsnedf_fmlp_unlock(struct litmus_lock* l)
865 /* Well, if next is not the highest-priority waiter, 1576 /* Well, if next is not the highest-priority waiter,
866 * then it ought to inherit the highest-priority 1577 * then it ought to inherit the highest-priority
867 * waiter's priority. */ 1578 * waiter's priority. */
868 set_priority_inheritance(next, sem->hp_waiter); 1579 increase_priority_inheritance(next, sem->hp_waiter);
869 } 1580 }
870 1581
871 /* wake up next */ 1582 /* wake up next */
@@ -876,7 +1587,7 @@ int gsnedf_fmlp_unlock(struct litmus_lock* l)
876 1587
877 /* we lose the benefit of priority inheritance (if any) */ 1588 /* we lose the benefit of priority inheritance (if any) */
878 if (tsk_rt(t)->inh_task) 1589 if (tsk_rt(t)->inh_task)
879 clear_priority_inheritance(t); 1590 decrease_priority_inheritance(t, NULL);
880 1591
881out: 1592out:
882 spin_unlock_irqrestore(&sem->wait.lock, flags); 1593 spin_unlock_irqrestore(&sem->wait.lock, flags);
@@ -914,6 +1625,11 @@ static struct litmus_lock_ops gsnedf_fmlp_lock_ops = {
914 .lock = gsnedf_fmlp_lock, 1625 .lock = gsnedf_fmlp_lock,
915 .unlock = gsnedf_fmlp_unlock, 1626 .unlock = gsnedf_fmlp_unlock,
916 .deallocate = gsnedf_fmlp_free, 1627 .deallocate = gsnedf_fmlp_free,
1628
1629#ifdef CONFIG_LITMUS_NESTED_LOCKING
1630 .propagate_increase_inheritance = NULL,
1631 .propagate_decrease_inheritance = NULL
1632#endif
917}; 1633};
918 1634
919static struct litmus_lock* gsnedf_new_fmlp(void) 1635static struct litmus_lock* gsnedf_new_fmlp(void)
@@ -932,47 +1648,121 @@ static struct litmus_lock* gsnedf_new_fmlp(void)
932 return &sem->litmus_lock; 1648 return &sem->litmus_lock;
933} 1649}
934 1650
935/* **** lock constructor **** */
936
937 1651
938static long gsnedf_allocate_lock(struct litmus_lock **lock, int type, 1652static long gsnedf_allocate_lock(struct litmus_lock **lock, int type,
939 void* __user unused) 1653 void* __user args)
940{ 1654{
941 int err = -ENXIO; 1655 int err;
942 1656
943 /* GSN-EDF currently only supports the FMLP for global resources. */
944 switch (type) { 1657 switch (type) {
945 1658
946 case FMLP_SEM: 1659 case FMLP_SEM:
947 /* Flexible Multiprocessor Locking Protocol */ 1660 /* Flexible Multiprocessor Locking Protocol */
948 *lock = gsnedf_new_fmlp(); 1661 *lock = gsnedf_new_fmlp();
949 if (*lock) 1662 break;
950 err = 0; 1663#ifdef CONFIG_LITMUS_NESTED_LOCKING
951 else 1664 case RSM_MUTEX:
952 err = -ENOMEM; 1665 *lock = gsnedf_new_rsm_mutex();
953 break; 1666 break;
954 1667
1668 case IKGLP_SEM:
1669 *lock = gsnedf_new_ikglp(args);
1670 break;
1671#endif
1672 case KFMLP_SEM:
1673 *lock = gsnedf_new_kfmlp(args);
1674 break;
1675 default:
1676 err = -ENXIO;
1677 goto UNSUPPORTED_LOCK;
955 }; 1678 };
956 1679
1680 if (*lock)
1681 err = 0;
1682 else
1683 err = -ENOMEM;
1684
1685UNSUPPORTED_LOCK:
957 return err; 1686 return err;
958} 1687}
959 1688
1689#endif // CONFIG_LITMUS_LOCKING
1690
1691
1692
1693
1694
1695#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1696static struct affinity_observer_ops gsnedf_kfmlp_affinity_ops = {
1697 .close = kfmlp_aff_obs_close,
1698 .deallocate = kfmlp_aff_obs_free,
1699};
1700
1701#ifdef CONFIG_LITMUS_NESTED_LOCKING
1702static struct affinity_observer_ops gsnedf_ikglp_affinity_ops = {
1703 .close = ikglp_aff_obs_close,
1704 .deallocate = ikglp_aff_obs_free,
1705};
960#endif 1706#endif
961 1707
1708static long gsnedf_allocate_affinity_observer(
1709 struct affinity_observer **aff_obs,
1710 int type,
1711 void* __user args)
1712{
1713 int err;
1714
1715 switch (type) {
1716
1717 case KFMLP_SIMPLE_GPU_AFF_OBS:
1718 *aff_obs = kfmlp_simple_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args);
1719 break;
1720
1721 case KFMLP_GPU_AFF_OBS:
1722 *aff_obs = kfmlp_gpu_aff_obs_new(&gsnedf_kfmlp_affinity_ops, args);
1723 break;
1724
1725#ifdef CONFIG_LITMUS_NESTED_LOCKING
1726 case IKGLP_SIMPLE_GPU_AFF_OBS:
1727 *aff_obs = ikglp_simple_gpu_aff_obs_new(&gsnedf_ikglp_affinity_ops, args);
1728 break;
1729
1730 case IKGLP_GPU_AFF_OBS:
1731 *aff_obs = ikglp_gpu_aff_obs_new(&gsnedf_ikglp_affinity_ops, args);
1732 break;
1733#endif
1734 default:
1735 err = -ENXIO;
1736 goto UNSUPPORTED_AFF_OBS;
1737 };
1738
1739 if (*aff_obs)
1740 err = 0;
1741 else
1742 err = -ENOMEM;
1743
1744UNSUPPORTED_AFF_OBS:
1745 return err;
1746}
1747#endif
1748
1749
1750
1751
962 1752
963static long gsnedf_activate_plugin(void) 1753static long gsnedf_activate_plugin(void)
964{ 1754{
965 int cpu; 1755 int cpu;
966 cpu_entry_t *entry; 1756 cpu_entry_t *entry;
967 1757
968 bheap_init(&gsnedf_cpu_heap); 1758 INIT_BINHEAP_HANDLE(&gsnedf_cpu_heap, cpu_lower_prio);
969#ifdef CONFIG_RELEASE_MASTER 1759#ifdef CONFIG_RELEASE_MASTER
970 gsnedf.release_master = atomic_read(&release_master_cpu); 1760 gsnedf.release_master = atomic_read(&release_master_cpu);
971#endif 1761#endif
972 1762
973 for_each_online_cpu(cpu) { 1763 for_each_online_cpu(cpu) {
974 entry = &per_cpu(gsnedf_cpu_entries, cpu); 1764 entry = &per_cpu(gsnedf_cpu_entries, cpu);
975 bheap_node_init(&entry->hn, entry); 1765 INIT_BINHEAP_NODE(&entry->hn);
976 entry->linked = NULL; 1766 entry->linked = NULL;
977 entry->scheduled = NULL; 1767 entry->scheduled = NULL;
978#ifdef CONFIG_RELEASE_MASTER 1768#ifdef CONFIG_RELEASE_MASTER
@@ -986,6 +1776,20 @@ static long gsnedf_activate_plugin(void)
986 } 1776 }
987#endif 1777#endif
988 } 1778 }
1779
1780#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1781 gsnedf_pending_tasklets.head = NULL;
1782 gsnedf_pending_tasklets.tail = &(gsnedf_pending_tasklets.head);
1783#endif
1784
1785#ifdef CONFIG_LITMUS_SOFTIRQD
1786 spawn_klitirqd(NULL);
1787#endif
1788
1789#ifdef CONFIG_LITMUS_NVIDIA
1790 init_nvidia_info();
1791#endif
1792
989 return 0; 1793 return 0;
990} 1794}
991 1795
@@ -1002,8 +1806,31 @@ static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
1002 .task_block = gsnedf_task_block, 1806 .task_block = gsnedf_task_block,
1003 .admit_task = gsnedf_admit_task, 1807 .admit_task = gsnedf_admit_task,
1004 .activate_plugin = gsnedf_activate_plugin, 1808 .activate_plugin = gsnedf_activate_plugin,
1809 .compare = edf_higher_prio,
1005#ifdef CONFIG_LITMUS_LOCKING 1810#ifdef CONFIG_LITMUS_LOCKING
1006 .allocate_lock = gsnedf_allocate_lock, 1811 .allocate_lock = gsnedf_allocate_lock,
1812 .increase_prio = increase_priority_inheritance,
1813 .decrease_prio = decrease_priority_inheritance,
1814#endif
1815#ifdef CONFIG_LITMUS_NESTED_LOCKING
1816 .nested_increase_prio = nested_increase_priority_inheritance,
1817 .nested_decrease_prio = nested_decrease_priority_inheritance,
1818 .__compare = __edf_higher_prio,
1819#endif
1820#ifdef CONFIG_LITMUS_DGL_SUPPORT
1821 .get_dgl_spinlock = gsnedf_get_dgl_spinlock,
1822#endif
1823#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
1824 .allocate_aff_obs = gsnedf_allocate_affinity_observer,
1825#endif
1826#ifdef CONFIG_LITMUS_SOFTIRQD
1827 .increase_prio_klitirqd = increase_priority_inheritance_klitirqd,
1828 .decrease_prio_klitirqd = decrease_priority_inheritance_klitirqd,
1829#endif
1830#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
1831 .enqueue_pai_tasklet = gsnedf_enqueue_pai_tasklet,
1832 .change_prio_pai_tasklet = gsnedf_change_prio_pai_tasklet,
1833 .run_tasklets = gsnedf_run_tasklets,
1007#endif 1834#endif
1008}; 1835};
1009 1836
@@ -1013,15 +1840,20 @@ static int __init init_gsn_edf(void)
1013 int cpu; 1840 int cpu;
1014 cpu_entry_t *entry; 1841 cpu_entry_t *entry;
1015 1842
1016 bheap_init(&gsnedf_cpu_heap); 1843 INIT_BINHEAP_HANDLE(&gsnedf_cpu_heap, cpu_lower_prio);
1017 /* initialize CPU state */ 1844 /* initialize CPU state */
1018 for (cpu = 0; cpu < NR_CPUS; cpu++) { 1845 for (cpu = 0; cpu < NR_CPUS; ++cpu) {
1019 entry = &per_cpu(gsnedf_cpu_entries, cpu); 1846 entry = &per_cpu(gsnedf_cpu_entries, cpu);
1020 gsnedf_cpus[cpu] = entry; 1847 gsnedf_cpus[cpu] = entry;
1021 entry->cpu = cpu; 1848 entry->cpu = cpu;
1022 entry->hn = &gsnedf_heap_node[cpu]; 1849
1023 bheap_node_init(&entry->hn, entry); 1850 INIT_BINHEAP_NODE(&entry->hn);
1024 } 1851 }
1852
1853#ifdef CONFIG_LITMUS_DGL_SUPPORT
1854 raw_spin_lock_init(&dgl_lock);
1855#endif
1856
1025 edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs); 1857 edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs);
1026 return register_sched_plugin(&gsn_edf_plugin); 1858 return register_sched_plugin(&gsn_edf_plugin);
1027} 1859}
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
index 5a15ce938984..9a6fe487718e 100644
--- a/litmus/sched_litmus.c
+++ b/litmus/sched_litmus.c
@@ -103,7 +103,9 @@ litmus_schedule(struct rq *rq, struct task_struct *prev)
103 } 103 }
104#ifdef __ARCH_WANT_UNLOCKED_CTXSW 104#ifdef __ARCH_WANT_UNLOCKED_CTXSW
105 if (next->oncpu) 105 if (next->oncpu)
106 {
106 TRACE_TASK(next, "waiting for !oncpu"); 107 TRACE_TASK(next, "waiting for !oncpu");
108 }
107 while (next->oncpu) { 109 while (next->oncpu) {
108 cpu_relax(); 110 cpu_relax();
109 mb(); 111 mb();
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index 00a1900d6457..245e41c25a5d 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -13,6 +13,10 @@
13#include <litmus/preempt.h> 13#include <litmus/preempt.h>
14#include <litmus/jobs.h> 14#include <litmus/jobs.h>
15 15
16#ifdef CONFIG_LITMUS_NVIDIA
17#include <litmus/nvidia_info.h>
18#endif
19
16/* 20/*
17 * Generic function to trigger preemption on either local or remote cpu 21 * Generic function to trigger preemption on either local or remote cpu
18 * from scheduler plugins. The key feature is that this function is 22 * from scheduler plugins. The key feature is that this function is
@@ -102,6 +106,9 @@ static long litmus_dummy_complete_job(void)
102 106
103static long litmus_dummy_activate_plugin(void) 107static long litmus_dummy_activate_plugin(void)
104{ 108{
109#ifdef CONFIG_LITMUS_NVIDIA
110 shutdown_nvidia_info();
111#endif
105 return 0; 112 return 0;
106} 113}
107 114
@@ -110,14 +117,93 @@ static long litmus_dummy_deactivate_plugin(void)
110 return 0; 117 return 0;
111} 118}
112 119
113#ifdef CONFIG_LITMUS_LOCKING 120static int litmus_dummy_compare(struct task_struct* a, struct task_struct* b)
121{
122 TRACE_CUR("WARNING: Dummy compare function called!\n");
123 return 0;
124}
114 125
126#ifdef CONFIG_LITMUS_LOCKING
115static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type, 127static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
116 void* __user config) 128 void* __user config)
117{ 129{
118 return -ENXIO; 130 return -ENXIO;
119} 131}
120 132
133static void litmus_dummy_increase_prio(struct task_struct* t, struct task_struct* prio_inh)
134{
135}
136
137static void litmus_dummy_decrease_prio(struct task_struct* t, struct task_struct* prio_inh)
138{
139}
140#endif
141
142#ifdef CONFIG_LITMUS_SOFTIRQD
143static void litmus_dummy_increase_prio_klitirq(struct task_struct* klitirqd,
144 struct task_struct* old_owner,
145 struct task_struct* new_owner)
146{
147}
148
149static void litmus_dummy_decrease_prio_klitirqd(struct task_struct* klitirqd,
150 struct task_struct* old_owner)
151{
152}
153#endif
154
155#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
156static int litmus_dummy_enqueue_pai_tasklet(struct tasklet_struct* t)
157{
158 TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
159 return(0); // failure.
160}
161
162static void litmus_dummy_change_prio_pai_tasklet(struct task_struct *old_prio,
163 struct task_struct *new_prio)
164{
165 TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
166}
167
168static void litmus_dummy_run_tasklets(struct task_struct* t)
169{
170 //TRACE("%s: PAI Tasklet unsupported in this plugin!!!!!!\n", __FUNCTION__);
171}
172#endif
173
174#ifdef CONFIG_LITMUS_NESTED_LOCKING
175static void litmus_dummy_nested_increase_prio(struct task_struct* t, struct task_struct* prio_inh,
176 raw_spinlock_t *to_unlock, unsigned long irqflags)
177{
178}
179
180static void litmus_dummy_nested_decrease_prio(struct task_struct* t, struct task_struct* prio_inh,
181 raw_spinlock_t *to_unlock, unsigned long irqflags)
182{
183}
184
185static int litmus_dummy___compare(struct task_struct* a, comparison_mode_t a_mod,
186 struct task_struct* b, comparison_mode_t b_mode)
187{
188 TRACE_CUR("WARNING: Dummy compare function called!\n");
189 return 0;
190}
191#endif
192
193#ifdef CONFIG_LITMUS_DGL_SUPPORT
194static raw_spinlock_t* litmus_dummy_get_dgl_spinlock(struct task_struct *t)
195{
196 return NULL;
197}
198#endif
199
200#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
201static long litmus_dummy_allocate_aff_obs(struct affinity_observer **aff_obs,
202 int type,
203 void* __user config)
204{
205 return -ENXIO;
206}
121#endif 207#endif
122 208
123 209
@@ -136,9 +222,33 @@ struct sched_plugin linux_sched_plugin = {
136 .finish_switch = litmus_dummy_finish_switch, 222 .finish_switch = litmus_dummy_finish_switch,
137 .activate_plugin = litmus_dummy_activate_plugin, 223 .activate_plugin = litmus_dummy_activate_plugin,
138 .deactivate_plugin = litmus_dummy_deactivate_plugin, 224 .deactivate_plugin = litmus_dummy_deactivate_plugin,
225 .compare = litmus_dummy_compare,
139#ifdef CONFIG_LITMUS_LOCKING 226#ifdef CONFIG_LITMUS_LOCKING
140 .allocate_lock = litmus_dummy_allocate_lock, 227 .allocate_lock = litmus_dummy_allocate_lock,
228 .increase_prio = litmus_dummy_increase_prio,
229 .decrease_prio = litmus_dummy_decrease_prio,
230#endif
231#ifdef CONFIG_LITMUS_NESTED_LOCKING
232 .nested_increase_prio = litmus_dummy_nested_increase_prio,
233 .nested_decrease_prio = litmus_dummy_nested_decrease_prio,
234 .__compare = litmus_dummy___compare,
235#endif
236#ifdef CONFIG_LITMUS_SOFTIRQD
237 .increase_prio_klitirqd = litmus_dummy_increase_prio_klitirqd,
238 .decrease_prio_klitirqd = litmus_dummy_decrease_prio_klitirqd,
239#endif
240#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
241 .enqueue_pai_tasklet = litmus_dummy_enqueue_pai_tasklet,
242 .change_prio_pai_tasklet = litmus_dummy_change_prio_pai_tasklet,
243 .run_tasklets = litmus_dummy_run_tasklets,
244#endif
245#ifdef CONFIG_LITMUS_DGL_SUPPORT
246 .get_dgl_spinlock = litmus_dummy_get_dgl_spinlock,
141#endif 247#endif
248#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
249 .allocate_aff_obs = litmus_dummy_allocate_aff_obs,
250#endif
251
142 .admit_task = litmus_dummy_admit_task 252 .admit_task = litmus_dummy_admit_task
143}; 253};
144 254
@@ -174,8 +284,31 @@ int register_sched_plugin(struct sched_plugin* plugin)
174 CHECK(complete_job); 284 CHECK(complete_job);
175 CHECK(activate_plugin); 285 CHECK(activate_plugin);
176 CHECK(deactivate_plugin); 286 CHECK(deactivate_plugin);
287 CHECK(compare);
177#ifdef CONFIG_LITMUS_LOCKING 288#ifdef CONFIG_LITMUS_LOCKING
178 CHECK(allocate_lock); 289 CHECK(allocate_lock);
290 CHECK(increase_prio);
291 CHECK(decrease_prio);
292#endif
293#ifdef CONFIG_LITMUS_NESTED_LOCKING
294 CHECK(nested_increase_prio);
295 CHECK(nested_decrease_prio);
296 CHECK(__compare);
297#endif
298#ifdef CONFIG_LITMUS_SOFTIRQD
299 CHECK(increase_prio_klitirqd);
300 CHECK(decrease_prio_klitirqd);
301#endif
302#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
303 CHECK(enqueue_pai_tasklet);
304 CHECK(change_prio_pai_tasklet);
305 CHECK(run_tasklets);
306#endif
307#ifdef CONFIG_LITMUS_DGL_SUPPORT
308 CHECK(get_dgl_spinlock);
309#endif
310#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
311 CHECK(allocate_aff_obs);
179#endif 312#endif
180 CHECK(admit_task); 313 CHECK(admit_task);
181 314
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
index 5ef8d09ab41f..f7f575346b54 100644
--- a/litmus/sched_task_trace.c
+++ b/litmus/sched_task_trace.c
@@ -7,6 +7,7 @@
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/percpu.h> 9#include <linux/percpu.h>
10#include <linux/hardirq.h>
10 11
11#include <litmus/ftdev.h> 12#include <litmus/ftdev.h>
12#include <litmus/litmus.h> 13#include <litmus/litmus.h>
@@ -16,13 +17,13 @@
16#include <litmus/ftdev.h> 17#include <litmus/ftdev.h>
17 18
18 19
19#define NO_EVENTS (1 << CONFIG_SCHED_TASK_TRACE_SHIFT) 20#define NUM_EVENTS (1 << (CONFIG_SCHED_TASK_TRACE_SHIFT+11))
20 21
21#define now() litmus_clock() 22#define now() litmus_clock()
22 23
23struct local_buffer { 24struct local_buffer {
24 struct st_event_record record[NO_EVENTS]; 25 struct st_event_record record[NUM_EVENTS];
25 char flag[NO_EVENTS]; 26 char flag[NUM_EVENTS];
26 struct ft_buffer ftbuf; 27 struct ft_buffer ftbuf;
27}; 28};
28 29
@@ -41,7 +42,7 @@ static int __init init_sched_task_trace(void)
41 int i, ok = 0, err; 42 int i, ok = 0, err;
42 printk("Allocated %u sched_trace_xxx() events per CPU " 43 printk("Allocated %u sched_trace_xxx() events per CPU "
43 "(buffer size: %d bytes)\n", 44 "(buffer size: %d bytes)\n",
44 NO_EVENTS, (int) sizeof(struct local_buffer)); 45 NUM_EVENTS, (int) sizeof(struct local_buffer));
45 46
46 err = ftdev_init(&st_dev, THIS_MODULE, 47 err = ftdev_init(&st_dev, THIS_MODULE,
47 num_online_cpus(), "sched_trace"); 48 num_online_cpus(), "sched_trace");
@@ -50,7 +51,7 @@ static int __init init_sched_task_trace(void)
50 51
51 for (i = 0; i < st_dev.minor_cnt; i++) { 52 for (i = 0; i < st_dev.minor_cnt; i++) {
52 buf = &per_cpu(st_event_buffer, i); 53 buf = &per_cpu(st_event_buffer, i);
53 ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS, 54 ok += init_ft_buffer(&buf->ftbuf, NUM_EVENTS,
54 sizeof(struct st_event_record), 55 sizeof(struct st_event_record),
55 buf->flag, 56 buf->flag,
56 buf->record); 57 buf->record);
@@ -154,7 +155,8 @@ feather_callback void do_sched_trace_task_switch_to(unsigned long id,
154{ 155{
155 struct task_struct *t = (struct task_struct*) _task; 156 struct task_struct *t = (struct task_struct*) _task;
156 struct st_event_record* rec; 157 struct st_event_record* rec;
157 if (is_realtime(t)) { 158 //if (is_realtime(t)) /* comment out to trace EVERYTHING */
159 {
158 rec = get_record(ST_SWITCH_TO, t); 160 rec = get_record(ST_SWITCH_TO, t);
159 if (rec) { 161 if (rec) {
160 rec->data.switch_to.when = now(); 162 rec->data.switch_to.when = now();
@@ -169,7 +171,8 @@ feather_callback void do_sched_trace_task_switch_away(unsigned long id,
169{ 171{
170 struct task_struct *t = (struct task_struct*) _task; 172 struct task_struct *t = (struct task_struct*) _task;
171 struct st_event_record* rec; 173 struct st_event_record* rec;
172 if (is_realtime(t)) { 174 //if (is_realtime(t)) /* comment out to trace EVERYTHING */
175 {
173 rec = get_record(ST_SWITCH_AWAY, t); 176 rec = get_record(ST_SWITCH_AWAY, t);
174 if (rec) { 177 if (rec) {
175 rec->data.switch_away.when = now(); 178 rec->data.switch_away.when = now();
@@ -188,6 +191,9 @@ feather_callback void do_sched_trace_task_completion(unsigned long id,
188 if (rec) { 191 if (rec) {
189 rec->data.completion.when = now(); 192 rec->data.completion.when = now();
190 rec->data.completion.forced = forced; 193 rec->data.completion.forced = forced;
194#ifdef LITMUS_NVIDIA
195 rec->data.completion.nv_int_count = (u16)atomic_read(&tsk_rt(t)->nv_int_count);
196#endif
191 put_record(rec); 197 put_record(rec);
192 } 198 }
193} 199}
@@ -239,3 +245,265 @@ feather_callback void do_sched_trace_action(unsigned long id,
239 put_record(rec); 245 put_record(rec);
240 } 246 }
241} 247}
248
249
250
251
252feather_callback void do_sched_trace_prediction_err(unsigned long id,
253 unsigned long _task,
254 unsigned long _distance,
255 unsigned long _rel_err)
256{
257 struct task_struct *t = (struct task_struct*) _task;
258 struct st_event_record *rec = get_record(ST_PREDICTION_ERR, t);
259
260 if (rec) {
261 gpu_migration_dist_t* distance = (gpu_migration_dist_t*) _distance;
262 fp_t* rel_err = (fp_t*) _rel_err;
263
264 rec->data.prediction_err.distance = *distance;
265 rec->data.prediction_err.rel_err = rel_err->val;
266 put_record(rec);
267 }
268}
269
270
271feather_callback void do_sched_trace_migration(unsigned long id,
272 unsigned long _task,
273 unsigned long _mig_info)
274{
275 struct task_struct *t = (struct task_struct*) _task;
276 struct st_event_record *rec = get_record(ST_MIGRATION, t);
277
278 if (rec) {
279 struct migration_info* mig_info = (struct migration_info*) _mig_info;
280
281 rec->hdr.extra = mig_info->distance;
282 rec->data.migration.observed = mig_info->observed;
283 rec->data.migration.estimated = mig_info->estimated;
284
285 put_record(rec);
286 }
287}
288
289
290
291
292
293
294
295
296
297feather_callback void do_sched_trace_tasklet_release(unsigned long id,
298 unsigned long _owner)
299{
300 struct task_struct *t = (struct task_struct*) _owner;
301 struct st_event_record *rec = get_record(ST_TASKLET_RELEASE, t);
302
303 if (rec) {
304 rec->data.tasklet_release.when = now();
305 put_record(rec);
306 }
307}
308
309
310feather_callback void do_sched_trace_tasklet_begin(unsigned long id,
311 unsigned long _owner)
312{
313 struct task_struct *t = (struct task_struct*) _owner;
314 struct st_event_record *rec = get_record(ST_TASKLET_BEGIN, t);
315
316 if (rec) {
317 rec->data.tasklet_begin.when = now();
318
319 if(!in_interrupt())
320 rec->data.tasklet_begin.exe_pid = current->pid;
321 else
322 rec->data.tasklet_begin.exe_pid = 0;
323
324 put_record(rec);
325 }
326}
327EXPORT_SYMBOL(do_sched_trace_tasklet_begin);
328
329
330feather_callback void do_sched_trace_tasklet_end(unsigned long id,
331 unsigned long _owner,
332 unsigned long _flushed)
333{
334 struct task_struct *t = (struct task_struct*) _owner;
335 struct st_event_record *rec = get_record(ST_TASKLET_END, t);
336
337 if (rec) {
338 rec->data.tasklet_end.when = now();
339 rec->data.tasklet_end.flushed = _flushed;
340
341 if(!in_interrupt())
342 rec->data.tasklet_end.exe_pid = current->pid;
343 else
344 rec->data.tasklet_end.exe_pid = 0;
345
346 put_record(rec);
347 }
348}
349EXPORT_SYMBOL(do_sched_trace_tasklet_end);
350
351
352feather_callback void do_sched_trace_work_release(unsigned long id,
353 unsigned long _owner)
354{
355 struct task_struct *t = (struct task_struct*) _owner;
356 struct st_event_record *rec = get_record(ST_WORK_RELEASE, t);
357
358 if (rec) {
359 rec->data.work_release.when = now();
360 put_record(rec);
361 }
362}
363
364
365feather_callback void do_sched_trace_work_begin(unsigned long id,
366 unsigned long _owner,
367 unsigned long _exe)
368{
369 struct task_struct *t = (struct task_struct*) _owner;
370 struct st_event_record *rec = get_record(ST_WORK_BEGIN, t);
371
372 if (rec) {
373 struct task_struct *exe = (struct task_struct*) _exe;
374 rec->data.work_begin.exe_pid = exe->pid;
375 rec->data.work_begin.when = now();
376 put_record(rec);
377 }
378}
379EXPORT_SYMBOL(do_sched_trace_work_begin);
380
381
382feather_callback void do_sched_trace_work_end(unsigned long id,
383 unsigned long _owner,
384 unsigned long _exe,
385 unsigned long _flushed)
386{
387 struct task_struct *t = (struct task_struct*) _owner;
388 struct st_event_record *rec = get_record(ST_WORK_END, t);
389
390 if (rec) {
391 struct task_struct *exe = (struct task_struct*) _exe;
392 rec->data.work_end.exe_pid = exe->pid;
393 rec->data.work_end.flushed = _flushed;
394 rec->data.work_end.when = now();
395 put_record(rec);
396 }
397}
398EXPORT_SYMBOL(do_sched_trace_work_end);
399
400
401feather_callback void do_sched_trace_eff_prio_change(unsigned long id,
402 unsigned long _task,
403 unsigned long _inh)
404{
405 struct task_struct *t = (struct task_struct*) _task;
406 struct st_event_record *rec = get_record(ST_EFF_PRIO_CHANGE, t);
407
408 if (rec) {
409 struct task_struct *inh = (struct task_struct*) _inh;
410 rec->data.effective_priority_change.when = now();
411 rec->data.effective_priority_change.inh_pid = (inh != NULL) ?
412 inh->pid :
413 0xffff;
414
415 put_record(rec);
416 }
417}
418
419/* pray for no nesting of nv interrupts on same CPU... */
420struct tracing_interrupt_map
421{
422 int active;
423 int count;
424 unsigned long data[128]; // assume nesting less than 128...
425 unsigned long serial[128];
426};
427DEFINE_PER_CPU(struct tracing_interrupt_map, active_interrupt_tracing);
428
429
430DEFINE_PER_CPU(u32, intCounter);
431
432feather_callback void do_sched_trace_nv_interrupt_begin(unsigned long id,
433 unsigned long _device)
434{
435 struct st_event_record *rec;
436 u32 serialNum;
437
438 {
439 u32* serial;
440 struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
441 if(!int_map->active == 0xcafebabe)
442 {
443 int_map->count++;
444 }
445 else
446 {
447 int_map->active = 0xcafebabe;
448 int_map->count = 1;
449 }
450 //int_map->data[int_map->count-1] = _device;
451
452 serial = &per_cpu(intCounter, smp_processor_id());
453 *serial += num_online_cpus();
454 serialNum = *serial;
455 int_map->serial[int_map->count-1] = serialNum;
456 }
457
458 rec = get_record(ST_NV_INTERRUPT_BEGIN, NULL);
459 if(rec) {
460 u32 device = _device;
461 rec->data.nv_interrupt_begin.when = now();
462 rec->data.nv_interrupt_begin.device = device;
463 rec->data.nv_interrupt_begin.serialNumber = serialNum;
464 put_record(rec);
465 }
466}
467EXPORT_SYMBOL(do_sched_trace_nv_interrupt_begin);
468
469/*
470int is_interrupt_tracing_active(void)
471{
472 struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
473 if(int_map->active == 0xcafebabe)
474 return 1;
475 return 0;
476}
477*/
478
479feather_callback void do_sched_trace_nv_interrupt_end(unsigned long id, unsigned long _device)
480{
481 struct tracing_interrupt_map* int_map = &per_cpu(active_interrupt_tracing, smp_processor_id());
482 if(int_map->active == 0xcafebabe)
483 {
484 struct st_event_record *rec = get_record(ST_NV_INTERRUPT_END, NULL);
485
486 int_map->count--;
487 if(int_map->count == 0)
488 int_map->active = 0;
489
490 if(rec) {
491 u32 device = _device;
492 rec->data.nv_interrupt_end.when = now();
493 //rec->data.nv_interrupt_end.device = int_map->data[int_map->count];
494 rec->data.nv_interrupt_end.device = device;
495 rec->data.nv_interrupt_end.serialNumber = int_map->serial[int_map->count];
496 put_record(rec);
497 }
498 }
499}
500EXPORT_SYMBOL(do_sched_trace_nv_interrupt_end);
501
502
503
504
505
506
507
508
509
diff --git a/litmus/sched_trace_external.c b/litmus/sched_trace_external.c
new file mode 100644
index 000000000000..cf8e1d78aa77
--- /dev/null
+++ b/litmus/sched_trace_external.c
@@ -0,0 +1,64 @@
1#include <linux/module.h>
2
3#include <litmus/trace.h>
4#include <litmus/sched_trace.h>
5#include <litmus/litmus.h>
6
7void __sched_trace_tasklet_begin_external(struct task_struct* t)
8{
9 sched_trace_tasklet_begin(t);
10}
11EXPORT_SYMBOL(__sched_trace_tasklet_begin_external);
12
13void __sched_trace_tasklet_end_external(struct task_struct* t, unsigned long flushed)
14{
15 sched_trace_tasklet_end(t, flushed);
16}
17EXPORT_SYMBOL(__sched_trace_tasklet_end_external);
18
19
20
21void __sched_trace_work_begin_external(struct task_struct* t, struct task_struct* e)
22{
23 sched_trace_work_begin(t, e);
24}
25EXPORT_SYMBOL(__sched_trace_work_begin_external);
26
27void __sched_trace_work_end_external(struct task_struct* t, struct task_struct* e, unsigned long f)
28{
29 sched_trace_work_end(t, e, f);
30}
31EXPORT_SYMBOL(__sched_trace_work_end_external);
32
33
34
35void __sched_trace_nv_interrupt_begin_external(u32 device)
36{
37 //unsigned long _device = device;
38 sched_trace_nv_interrupt_begin((unsigned long)device);
39}
40EXPORT_SYMBOL(__sched_trace_nv_interrupt_begin_external);
41
42void __sched_trace_nv_interrupt_end_external(u32 device)
43{
44 //unsigned long _device = device;
45 sched_trace_nv_interrupt_end((unsigned long)device);
46}
47EXPORT_SYMBOL(__sched_trace_nv_interrupt_end_external);
48
49
50#ifdef CONFIG_LITMUS_NVIDIA
51
52#define EXX_TS(evt) \
53void __##evt(void) { evt; } \
54EXPORT_SYMBOL(__##evt);
55
56EXX_TS(TS_NV_TOPISR_START)
57EXX_TS(TS_NV_TOPISR_END)
58EXX_TS(TS_NV_BOTISR_START)
59EXX_TS(TS_NV_BOTISR_END)
60EXX_TS(TS_NV_RELEASE_BOTISR_START)
61EXX_TS(TS_NV_RELEASE_BOTISR_END)
62
63#endif
64