aboutsummaryrefslogtreecommitdiffstats
path: root/litmus
diff options
context:
space:
mode:
authorBjoern Brandenburg <bbb@mpi-sws.org>2013-06-25 01:27:07 -0400
committerBjoern Brandenburg <bbb@mpi-sws.org>2013-08-07 03:46:49 -0400
commit543810eb67bea9c3046ecb58388493bca39fe796 (patch)
treecf65010367e53dfbd3e39a9eb6e89dacf92348f3 /litmus
parent1412c8b72e192a14b8dd620f58a75f55a5490783 (diff)
Add LITMUS^RT core implementation
This patch adds the core of LITMUS^RT: - library functionality (heaps, rt_domain, prioritization, etc.) - budget enforcement logic - job management - system call backends - virtual devices (control page, etc.) - scheduler plugin API (and dummy plugin) This code compiles, but is not yet integrated with the rest of Linux.
Diffstat (limited to 'litmus')
-rw-r--r--litmus/Kconfig165
-rw-r--r--litmus/Makefile19
-rw-r--r--litmus/affinity.c41
-rw-r--r--litmus/bheap.c316
-rw-r--r--litmus/binheap.c387
-rw-r--r--litmus/budget.c113
-rw-r--r--litmus/clustered.c111
-rw-r--r--litmus/ctrldev.c160
-rw-r--r--litmus/edf_common.c200
-rw-r--r--litmus/fdso.c307
-rw-r--r--litmus/fp_common.c119
-rw-r--r--litmus/jobs.c55
-rw-r--r--litmus/litmus.c576
-rw-r--r--litmus/litmus_proc.c407
-rw-r--r--litmus/locking.c188
-rw-r--r--litmus/preempt.c137
-rw-r--r--litmus/rt_domain.c348
-rw-r--r--litmus/sched_plugin.c224
-rw-r--r--litmus/srp.c305
-rw-r--r--litmus/sync.c166
20 files changed, 4344 insertions, 0 deletions
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 5408ef6b159b..32c18c6eb58d 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -1,5 +1,156 @@
1menu "LITMUS^RT" 1menu "LITMUS^RT"
2 2
3menu "Scheduling"
4
5config RELEASE_MASTER
6 bool "Release-master Support"
7 depends on ARCH_HAS_SEND_PULL_TIMERS && SMP
8 default n
9 help
10 Allow one processor to act as a dedicated interrupt processor
11 that services all timer interrupts, but that does not schedule
12 real-time tasks. See RTSS'09 paper for details
13 (http://www.cs.unc.edu/~anderson/papers.html).
14
15
16config BUG_ON_MIGRATION_DEADLOCK
17 bool "Panic on suspected migration deadlock"
18 default y
19 help
20 This is a debugging option. The LITMUS^RT migration support code for
21 global scheduling contains a simple heuristic to detect when the
22 system deadlocks due to circular stack dependencies.
23
24 For example, such a deadlock exists if CPU 0 waits for task A's stack
25 to become available while using task B's stack, and CPU 1 waits for
26 task B's stack to become available while using task A's stack. Such
27 a situation can arise in (buggy) global scheduling plugins.
28
29 With this option enabled, such a scenario with result in a BUG().
30 You can turn off this option when debugging on real hardware (e.g.,
31 to rescue traces, etc. that would be hard to get after a panic).
32
33 Only turn this off if you really know what you are doing. If this
34 BUG() triggers, the scheduler is broken and turning off this option
35 won't fix it.
36
37
38endmenu
39
40menu "Real-Time Synchronization"
41
42config NP_SECTION
43 bool "Non-preemptive section support"
44 default y
45 help
46 Allow tasks to become non-preemptable.
47 Note that plugins still need to explicitly support non-preemptivity.
48 Currently, only the GSN-EDF, PSN-EDF, and P-FP plugins have such support.
49
50 This is required to support locking protocols such as the FMLP.
51 If disabled, all tasks will be considered preemptable at all times.
52
53config LITMUS_LOCKING
54 bool "Support for real-time locking protocols"
55 depends on NP_SECTION
56 default y
57 help
58 Enable LITMUS^RT's multiprocessor real-time locking protocols with
59 predicable maximum blocking times.
60
61 Say Yes if you want to include locking protocols such as the FMLP and
62 Baker's SRP.
63
64endmenu
65
66menu "Performance Enhancements"
67
68config SCHED_CPU_AFFINITY
69 bool "Local Migration Affinity"
70 depends on X86 && SYSFS
71 default y
72 help
73 Rescheduled tasks prefer CPUs near to their previously used CPU.
74 This may improve cache performance through possible preservation of
75 cache affinity, at the expense of (slightly) more involved scheduling
76 logic.
77
78 Warning: May make bugs harder to find since tasks may migrate less often.
79
80 NOTES:
81 * Feature is not utilized by PFair/PD^2.
82
83 Say Yes if unsure.
84
85config ALLOW_EARLY_RELEASE
86 bool "Allow Early Releasing"
87 default y
88 help
89 Allow tasks to release jobs early (while still maintaining job
90 precedence constraints). Only supported by EDF schedulers. Early
91 releasing must be explicitly requested by real-time tasks via
92 the task_params passed to sys_set_task_rt_param().
93
94 Early releasing can improve job response times while maintaining
95 real-time correctness. However, it can easily peg your CPUs
96 since tasks never suspend to wait for their next job. As such, early
97 releasing is really only useful in the context of implementing
98 bandwidth servers, interrupt handling threads, or short-lived
99 computations.
100
101 Beware that early releasing may affect real-time analysis
102 if using locking protocols or I/O.
103
104 Say Yes if unsure.
105
106choice
107 prompt "EDF Tie-Break Behavior"
108 default EDF_TIE_BREAK_LATENESS_NORM
109 help
110 Allows the configuration of tie-breaking behavior when the deadlines
111 of two EDF-scheduled tasks are equal.
112
113 config EDF_TIE_BREAK_LATENESS
114 bool "Lateness-based Tie Break"
115 help
116 Break ties between two jobs, A and B, based upon the lateness of their
117 prior jobs. The job with the greatest lateness has priority. Note that
118 lateness has a negative value if the prior job finished before its
119 deadline.
120
121 config EDF_TIE_BREAK_LATENESS_NORM
122 bool "Normalized Lateness-based Tie Break"
123 help
124 Break ties between two jobs, A and B, based upon the lateness, normalized
125 by relative deadline, of their prior jobs. The job with the greatest
126 normalized lateness has priority. Note that lateness has a negative value
127 if the prior job finished before its deadline.
128
129 Normalized lateness tie-breaks are likely desireable over non-normalized
130 tie-breaks if the execution times and/or relative deadlines of tasks in a
131 task set vary greatly.
132
133 config EDF_TIE_BREAK_HASH
134 bool "Hash-based Tie Breaks"
135 help
136 Break ties between two jobs, A and B, with equal deadlines by using a
137 uniform hash; i.e.: hash(A.pid, A.job_num) < hash(B.pid, B.job_num). Job
138 A has ~50% of winning a given tie-break.
139
140 config EDF_PID_TIE_BREAK
141 bool "PID-based Tie Breaks"
142 help
143 Break ties based upon OS-assigned thread IDs. Use this option if
144 required by algorithm's real-time analysis or per-task response-time
145 jitter must be minimized.
146
147 NOTES:
148 * This tie-breaking method was default in Litmus 2012.2 and before.
149
150endchoice
151
152endmenu
153
3menu "Tracing" 154menu "Tracing"
4 155
5config FEATHER_TRACE 156config FEATHER_TRACE
@@ -154,6 +305,20 @@ config SCHED_DEBUG_TRACE_CALLER
154 305
155 If unsure, say No. 306 If unsure, say No.
156 307
308config PREEMPT_STATE_TRACE
309 bool "Trace preemption state machine transitions"
310 depends on SCHED_DEBUG_TRACE && DEBUG_KERNEL
311 default n
312 help
313 With this option enabled, each CPU will log when it transitions
314 states in the preemption state machine. This state machine is
315 used to determine how to react to IPIs (avoid races with in-flight IPIs).
316
317 Warning: this creates a lot of information in the debug trace. Only
318 recommended when you are debugging preemption-related races.
319
320 If unsure, say No.
321
157endmenu 322endmenu
158 323
159endmenu 324endmenu
diff --git a/litmus/Makefile b/litmus/Makefile
index 6318f1c6fac8..fcb4d7533327 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -2,6 +2,25 @@
2# Makefile for LITMUS^RT 2# Makefile for LITMUS^RT
3# 3#
4 4
5obj-y = sched_plugin.o litmus.o \
6 preempt.o \
7 litmus_proc.o \
8 budget.o \
9 clustered.o \
10 jobs.o \
11 sync.o \
12 rt_domain.o \
13 edf_common.o \
14 fp_common.o \
15 fdso.o \
16 locking.o \
17 srp.o \
18 bheap.o \
19 binheap.o \
20 ctrldev.o
21
22obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
23
5obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o 24obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
6obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o 25obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
7obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o 26obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
diff --git a/litmus/affinity.c b/litmus/affinity.c
new file mode 100644
index 000000000000..a5b437b80433
--- /dev/null
+++ b/litmus/affinity.c
@@ -0,0 +1,41 @@
1#include <linux/cpu.h>
2
3#include <litmus/affinity.h>
4
5struct neighborhood neigh_info[NR_CPUS];
6
7/* called by _init_litmus() */
8void init_topology(void) {
9 int cpu;
10 int i;
11 int chk;
12 int depth = num_cache_leaves;
13
14 if (depth > NUM_CACHE_LEVELS)
15 depth = NUM_CACHE_LEVELS;
16
17 for_each_online_cpu(cpu) {
18 for (i = 0; i < depth; ++i) {
19 chk = get_shared_cpu_map((struct cpumask *)&neigh_info[cpu].neighbors[i], cpu, i);
20 if (chk) {
21 /* failed */
22 neigh_info[cpu].size[i] = 0;
23 } else {
24 /* size = num bits in mask */
25 neigh_info[cpu].size[i] =
26 cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]);
27 }
28 printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
29 cpu, neigh_info[cpu].size[i], i,
30 *cpumask_bits(neigh_info[cpu].neighbors[i]));
31 }
32
33 /* set data for non-existent levels */
34 for (; i < NUM_CACHE_LEVELS; ++i) {
35 neigh_info[cpu].size[i] = 0;
36
37 printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n",
38 cpu, neigh_info[cpu].size[i], i, 0lu);
39 }
40 }
41}
diff --git a/litmus/bheap.c b/litmus/bheap.c
new file mode 100644
index 000000000000..2707e0122b6d
--- /dev/null
+++ b/litmus/bheap.c
@@ -0,0 +1,316 @@
1#include <linux/bug.h>
2#include <linux/kernel.h>
3#include <litmus/bheap.h>
4
5void bheap_init(struct bheap* heap)
6{
7 heap->head = NULL;
8 heap->min = NULL;
9}
10
11void bheap_node_init(struct bheap_node** _h, void* value)
12{
13 struct bheap_node* h = *_h;
14 h->parent = NULL;
15 h->next = NULL;
16 h->child = NULL;
17 h->degree = NOT_IN_HEAP;
18 h->value = value;
19 h->ref = _h;
20}
21
22
23/* make child a subtree of root */
24static void __bheap_link(struct bheap_node* root,
25 struct bheap_node* child)
26{
27 child->parent = root;
28 child->next = root->child;
29 root->child = child;
30 root->degree++;
31}
32
33/* merge root lists */
34static struct bheap_node* __bheap_merge(struct bheap_node* a,
35 struct bheap_node* b)
36{
37 struct bheap_node* head = NULL;
38 struct bheap_node** pos = &head;
39
40 while (a && b) {
41 if (a->degree < b->degree) {
42 *pos = a;
43 a = a->next;
44 } else {
45 *pos = b;
46 b = b->next;
47 }
48 pos = &(*pos)->next;
49 }
50 if (a)
51 *pos = a;
52 else
53 *pos = b;
54 return head;
55}
56
57/* reverse a linked list of nodes. also clears parent pointer */
58static struct bheap_node* __bheap_reverse(struct bheap_node* h)
59{
60 struct bheap_node* tail = NULL;
61 struct bheap_node* next;
62
63 if (!h)
64 return h;
65
66 h->parent = NULL;
67 while (h->next) {
68 next = h->next;
69 h->next = tail;
70 tail = h;
71 h = next;
72 h->parent = NULL;
73 }
74 h->next = tail;
75 return h;
76}
77
78static void __bheap_min(bheap_prio_t higher_prio, struct bheap* heap,
79 struct bheap_node** prev, struct bheap_node** node)
80{
81 struct bheap_node *_prev, *cur;
82 *prev = NULL;
83
84 if (!heap->head) {
85 *node = NULL;
86 return;
87 }
88
89 *node = heap->head;
90 _prev = heap->head;
91 cur = heap->head->next;
92 while (cur) {
93 if (higher_prio(cur, *node)) {
94 *node = cur;
95 *prev = _prev;
96 }
97 _prev = cur;
98 cur = cur->next;
99 }
100}
101
102static void __bheap_union(bheap_prio_t higher_prio, struct bheap* heap,
103 struct bheap_node* h2)
104{
105 struct bheap_node* h1;
106 struct bheap_node *prev, *x, *next;
107 if (!h2)
108 return;
109 h1 = heap->head;
110 if (!h1) {
111 heap->head = h2;
112 return;
113 }
114 h1 = __bheap_merge(h1, h2);
115 prev = NULL;
116 x = h1;
117 next = x->next;
118 while (next) {
119 if (x->degree != next->degree ||
120 (next->next && next->next->degree == x->degree)) {
121 /* nothing to do, advance */
122 prev = x;
123 x = next;
124 } else if (higher_prio(x, next)) {
125 /* x becomes the root of next */
126 x->next = next->next;
127 __bheap_link(x, next);
128 } else {
129 /* next becomes the root of x */
130 if (prev)
131 prev->next = next;
132 else
133 h1 = next;
134 __bheap_link(next, x);
135 x = next;
136 }
137 next = x->next;
138 }
139 heap->head = h1;
140}
141
142static struct bheap_node* __bheap_extract_min(bheap_prio_t higher_prio,
143 struct bheap* heap)
144{
145 struct bheap_node *prev, *node;
146 __bheap_min(higher_prio, heap, &prev, &node);
147 if (!node)
148 return NULL;
149 if (prev)
150 prev->next = node->next;
151 else
152 heap->head = node->next;
153 __bheap_union(higher_prio, heap, __bheap_reverse(node->child));
154 return node;
155}
156
157/* insert (and reinitialize) a node into the heap */
158void bheap_insert(bheap_prio_t higher_prio, struct bheap* heap,
159 struct bheap_node* node)
160{
161 struct bheap_node *min;
162 node->child = NULL;
163 node->parent = NULL;
164 node->next = NULL;
165 node->degree = 0;
166 if (heap->min && higher_prio(node, heap->min)) {
167 /* swap min cache */
168 min = heap->min;
169 min->child = NULL;
170 min->parent = NULL;
171 min->next = NULL;
172 min->degree = 0;
173 __bheap_union(higher_prio, heap, min);
174 heap->min = node;
175 } else
176 __bheap_union(higher_prio, heap, node);
177}
178
179void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap)
180{
181 struct bheap_node* min;
182 if (heap->min) {
183 min = heap->min;
184 heap->min = NULL;
185 bheap_insert(higher_prio, heap, min);
186 }
187}
188
189/* merge addition into target */
190void bheap_union(bheap_prio_t higher_prio,
191 struct bheap* target, struct bheap* addition)
192{
193 /* first insert any cached minima, if necessary */
194 bheap_uncache_min(higher_prio, target);
195 bheap_uncache_min(higher_prio, addition);
196 __bheap_union(higher_prio, target, addition->head);
197 /* this is a destructive merge */
198 addition->head = NULL;
199}
200
201struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
202 struct bheap* heap)
203{
204 if (!heap->min)
205 heap->min = __bheap_extract_min(higher_prio, heap);
206 return heap->min;
207}
208
209struct bheap_node* bheap_take(bheap_prio_t higher_prio,
210 struct bheap* heap)
211{
212 struct bheap_node *node;
213 if (!heap->min)
214 heap->min = __bheap_extract_min(higher_prio, heap);
215 node = heap->min;
216 heap->min = NULL;
217 if (node)
218 node->degree = NOT_IN_HEAP;
219 return node;
220}
221
222int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node)
223{
224 struct bheap_node *parent;
225 struct bheap_node** tmp_ref;
226 void* tmp;
227
228 /* bubble up */
229 parent = node->parent;
230 while (parent && higher_prio(node, parent)) {
231 /* swap parent and node */
232 tmp = parent->value;
233 parent->value = node->value;
234 node->value = tmp;
235 /* swap references */
236 *(parent->ref) = node;
237 *(node->ref) = parent;
238 tmp_ref = parent->ref;
239 parent->ref = node->ref;
240 node->ref = tmp_ref;
241 /* step up */
242 node = parent;
243 parent = node->parent;
244 }
245
246 return parent != NULL;
247}
248
249void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap,
250 struct bheap_node* node)
251{
252 struct bheap_node *parent, *prev, *pos;
253 struct bheap_node** tmp_ref;
254 void* tmp;
255
256 if (heap->min != node) {
257 /* bubble up */
258 parent = node->parent;
259 while (parent) {
260 /* swap parent and node */
261 tmp = parent->value;
262 parent->value = node->value;
263 node->value = tmp;
264 /* swap references */
265 *(parent->ref) = node;
266 *(node->ref) = parent;
267 tmp_ref = parent->ref;
268 parent->ref = node->ref;
269 node->ref = tmp_ref;
270 /* step up */
271 node = parent;
272 parent = node->parent;
273 }
274 /* now delete:
275 * first find prev */
276 prev = NULL;
277 pos = heap->head;
278 while (pos != node) {
279 BUG_ON(!pos); /* fell off the list -> deleted from wrong heap */
280 prev = pos;
281 pos = pos->next;
282 }
283 /* we have prev, now remove node */
284 if (prev)
285 prev->next = node->next;
286 else
287 heap->head = node->next;
288 __bheap_union(higher_prio, heap, __bheap_reverse(node->child));
289 } else
290 heap->min = NULL;
291 node->degree = NOT_IN_HEAP;
292}
293
294/* allocate a heap node for value and insert into the heap */
295int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
296 void* value, int gfp_flags)
297{
298 struct bheap_node* hn = bheap_node_alloc(gfp_flags);
299 if (likely(hn)) {
300 bheap_node_init(&hn, value);
301 bheap_insert(higher_prio, heap, hn);
302 }
303 return hn != NULL;
304}
305
306void* bheap_take_del(bheap_prio_t higher_prio,
307 struct bheap* heap)
308{
309 struct bheap_node* hn = bheap_take(higher_prio, heap);
310 void* ret = NULL;
311 if (hn) {
312 ret = hn->value;
313 bheap_node_free(hn);
314 }
315 return ret;
316}
diff --git a/litmus/binheap.c b/litmus/binheap.c
new file mode 100644
index 000000000000..d3ab34b92096
--- /dev/null
+++ b/litmus/binheap.c
@@ -0,0 +1,387 @@
1#include <litmus/binheap.h>
2
3/* Returns true of the root ancestor of node is the root of the given heap. */
4int binheap_is_in_this_heap(struct binheap_node *node,
5 struct binheap* heap)
6{
7 if(!binheap_is_in_heap(node)) {
8 return 0;
9 }
10
11 while(node->parent != NULL) {
12 node = node->parent;
13 }
14
15 return (node == heap->root);
16}
17
18
19/* Update the node reference pointers. Same logic as Litmus binomial heap. */
20static void __update_ref(struct binheap_node *parent,
21 struct binheap_node *child)
22{
23 *(parent->ref_ptr) = child;
24 *(child->ref_ptr) = parent;
25
26 swap(parent->ref_ptr, child->ref_ptr);
27}
28
29
30/* Swaps data between two nodes. */
31static void __binheap_swap(struct binheap_node *parent,
32 struct binheap_node *child)
33{
34 swap(parent->data, child->data);
35 __update_ref(parent, child);
36}
37
38
39/* Swaps memory and data between two nodes. Actual nodes swap instead of
40 * just data. Needed when we delete nodes from the heap.
41 */
42static void __binheap_swap_safe(struct binheap *handle,
43 struct binheap_node *a,
44 struct binheap_node *b)
45{
46 swap(a->data, b->data);
47 __update_ref(a, b);
48
49 if((a->parent != NULL) && (a->parent == b->parent)) {
50 /* special case: shared parent */
51 swap(a->parent->left, a->parent->right);
52 }
53 else {
54 /* Update pointers to swap parents. */
55
56 if(a->parent) {
57 if(a == a->parent->left) {
58 a->parent->left = b;
59 }
60 else {
61 a->parent->right = b;
62 }
63 }
64
65 if(b->parent) {
66 if(b == b->parent->left) {
67 b->parent->left = a;
68 }
69 else {
70 b->parent->right = a;
71 }
72 }
73
74 swap(a->parent, b->parent);
75 }
76
77 /* swap children */
78
79 if(a->left) {
80 a->left->parent = b;
81
82 if(a->right) {
83 a->right->parent = b;
84 }
85 }
86
87 if(b->left) {
88 b->left->parent = a;
89
90 if(b->right) {
91 b->right->parent = a;
92 }
93 }
94
95 swap(a->left, b->left);
96 swap(a->right, b->right);
97
98
99 /* update next/last/root pointers */
100
101 if(a == handle->next) {
102 handle->next = b;
103 }
104 else if(b == handle->next) {
105 handle->next = a;
106 }
107
108 if(a == handle->last) {
109 handle->last = b;
110 }
111 else if(b == handle->last) {
112 handle->last = a;
113 }
114
115 if(a == handle->root) {
116 handle->root = b;
117 }
118 else if(b == handle->root) {
119 handle->root = a;
120 }
121}
122
123
124/**
125 * Update the pointer to the last node in the complete binary tree.
126 * Called internally after the root node has been deleted.
127 */
128static void __binheap_update_last(struct binheap *handle)
129{
130 struct binheap_node *temp = handle->last;
131
132 /* find a "bend" in the tree. */
133 while(temp->parent && (temp == temp->parent->left)) {
134 temp = temp->parent;
135 }
136
137 /* step over to sibling if we're not at root */
138 if(temp->parent != NULL) {
139 temp = temp->parent->left;
140 }
141
142 /* now travel right as far as possible. */
143 while(temp->right != NULL) {
144 temp = temp->right;
145 }
146
147 /* take one step to the left if we're not at the bottom-most level. */
148 if(temp->left != NULL) {
149 temp = temp->left;
150 }
151
152 handle->last = temp;
153}
154
155
156/**
157 * Update the pointer to the node that will take the next inserted node.
158 * Called internally after a node has been inserted.
159 */
160static void __binheap_update_next(struct binheap *handle)
161{
162 struct binheap_node *temp = handle->next;
163
164 /* find a "bend" in the tree. */
165 while(temp->parent && (temp == temp->parent->right)) {
166 temp = temp->parent;
167 }
168
169 /* step over to sibling if we're not at root */
170 if(temp->parent != NULL) {
171 temp = temp->parent->right;
172 }
173
174 /* now travel left as far as possible. */
175 while(temp->left != NULL) {
176 temp = temp->left;
177 }
178
179 handle->next = temp;
180}
181
182
183
184/* bubble node up towards root */
185static void __binheap_bubble_up(struct binheap *handle,
186 struct binheap_node *node)
187{
188 /* let BINHEAP_POISON data bubble to the top */
189
190 while((node->parent != NULL) &&
191 ((node->data == BINHEAP_POISON) ||
192 handle->compare(node, node->parent))) {
193 __binheap_swap(node->parent, node);
194 node = node->parent;
195 }
196}
197
198
199/* bubble node down, swapping with min-child */
200static void __binheap_bubble_down(struct binheap *handle)
201{
202 struct binheap_node *node = handle->root;
203
204 while(node->left != NULL) {
205 if(node->right && handle->compare(node->right, node->left)) {
206 if(handle->compare(node->right, node)) {
207 __binheap_swap(node, node->right);
208 node = node->right;
209 }
210 else {
211 break;
212 }
213 }
214 else {
215 if(handle->compare(node->left, node)) {
216 __binheap_swap(node, node->left);
217 node = node->left;
218 }
219 else {
220 break;
221 }
222 }
223 }
224}
225
226
227void __binheap_add(struct binheap_node *new_node,
228 struct binheap *handle,
229 void *data)
230{
231 new_node->data = data;
232 new_node->ref = new_node;
233 new_node->ref_ptr = &(new_node->ref);
234
235 if(!binheap_empty(handle)) {
236 /* insert left side first */
237 if(handle->next->left == NULL) {
238 handle->next->left = new_node;
239 new_node->parent = handle->next;
240 new_node->left = NULL;
241 new_node->right = NULL;
242
243 handle->last = new_node;
244
245 __binheap_bubble_up(handle, new_node);
246 }
247 else {
248 /* left occupied. insert right. */
249 handle->next->right = new_node;
250 new_node->parent = handle->next;
251 new_node->left = NULL;
252 new_node->right = NULL;
253
254 handle->last = new_node;
255
256 __binheap_update_next(handle);
257 __binheap_bubble_up(handle, new_node);
258 }
259 }
260 else {
261 /* first node in heap */
262
263 new_node->parent = NULL;
264 new_node->left = NULL;
265 new_node->right = NULL;
266
267 handle->root = new_node;
268 handle->next = new_node;
269 handle->last = new_node;
270 }
271}
272
273
274/**
275 * Removes the root node from the heap. The node is removed after coalescing
276 * the binheap_node with its original data pointer at the root of the tree.
277 *
278 * The 'last' node in the tree is then swapped up to the root and bubbled
279 * down.
280 */
281void __binheap_delete_root(struct binheap *handle,
282 struct binheap_node *container)
283{
284 struct binheap_node *root = handle->root;
285
286 if(root != container) {
287 /* coalesce */
288 __binheap_swap_safe(handle, root, container);
289 root = container;
290 }
291
292 if(handle->last != root) {
293 /* swap 'last' node up to root and bubble it down. */
294
295 struct binheap_node *to_move = handle->last;
296
297 if(to_move->parent != root) {
298 handle->next = to_move->parent;
299
300 if(handle->next->right == to_move) {
301 /* disconnect from parent */
302 to_move->parent->right = NULL;
303 handle->last = handle->next->left;
304 }
305 else {
306 /* find new 'last' before we disconnect */
307 __binheap_update_last(handle);
308
309 /* disconnect from parent */
310 to_move->parent->left = NULL;
311 }
312 }
313 else {
314 /* 'last' is direct child of root */
315
316 handle->next = to_move;
317
318 if(to_move == to_move->parent->right) {
319 to_move->parent->right = NULL;
320 handle->last = to_move->parent->left;
321 }
322 else {
323 to_move->parent->left = NULL;
324 handle->last = to_move;
325 }
326 }
327 to_move->parent = NULL;
328
329 /* reconnect as root. We can't just swap data ptrs since root node
330 * may be freed after this function returns.
331 */
332 to_move->left = root->left;
333 to_move->right = root->right;
334 if(to_move->left != NULL) {
335 to_move->left->parent = to_move;
336 }
337 if(to_move->right != NULL) {
338 to_move->right->parent = to_move;
339 }
340
341 handle->root = to_move;
342
343 /* bubble down */
344 __binheap_bubble_down(handle);
345 }
346 else {
347 /* removing last node in tree */
348 handle->root = NULL;
349 handle->next = NULL;
350 handle->last = NULL;
351 }
352
353 /* mark as removed */
354 container->parent = BINHEAP_POISON;
355}
356
357
358/**
359 * Delete an arbitrary node. Bubble node to delete up to the root,
360 * and then delete to root.
361 */
362void __binheap_delete(struct binheap_node *node_to_delete,
363 struct binheap *handle)
364{
365 struct binheap_node *target = node_to_delete->ref;
366 void *temp_data = target->data;
367
368 /* temporarily set data to null to allow node to bubble up to the top. */
369 target->data = BINHEAP_POISON;
370
371 __binheap_bubble_up(handle, target);
372 __binheap_delete_root(handle, node_to_delete);
373
374 node_to_delete->data = temp_data; /* restore node data pointer */
375}
376
377
378/**
379 * Bubble up a node whose pointer has decreased in value.
380 */
381void __binheap_decrease(struct binheap_node *orig_node,
382 struct binheap *handle)
383{
384 struct binheap_node *target = orig_node->ref;
385
386 __binheap_bubble_up(handle, target);
387}
diff --git a/litmus/budget.c b/litmus/budget.c
new file mode 100644
index 000000000000..f7712be29adb
--- /dev/null
+++ b/litmus/budget.c
@@ -0,0 +1,113 @@
1#include <linux/sched.h>
2#include <linux/percpu.h>
3#include <linux/hrtimer.h>
4
5#include <litmus/litmus.h>
6#include <litmus/preempt.h>
7
8#include <litmus/budget.h>
9
10struct enforcement_timer {
11 /* The enforcement timer is used to accurately police
12 * slice budgets. */
13 struct hrtimer timer;
14 int armed;
15};
16
17DEFINE_PER_CPU(struct enforcement_timer, budget_timer);
18
19static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer)
20{
21 struct enforcement_timer* et = container_of(timer,
22 struct enforcement_timer,
23 timer);
24 unsigned long flags;
25
26 local_irq_save(flags);
27 TRACE("enforcement timer fired.\n");
28 et->armed = 0;
29 /* activate scheduler */
30 litmus_reschedule_local();
31 local_irq_restore(flags);
32
33 return HRTIMER_NORESTART;
34}
35
36/* assumes called with IRQs off */
37static void cancel_enforcement_timer(struct enforcement_timer* et)
38{
39 int ret;
40
41 TRACE("cancelling enforcement timer.\n");
42
43 /* Since interrupts are disabled and et->armed is only
44 * modified locally, we do not need any locks.
45 */
46
47 if (et->armed) {
48 ret = hrtimer_try_to_cancel(&et->timer);
49 /* Should never be inactive. */
50 BUG_ON(ret == 0);
51 /* Should never be running concurrently. */
52 BUG_ON(ret == -1);
53
54 et->armed = 0;
55 }
56}
57
58/* assumes called with IRQs off */
59static void arm_enforcement_timer(struct enforcement_timer* et,
60 struct task_struct* t)
61{
62 lt_t when_to_fire;
63 TRACE_TASK(t, "arming enforcement timer.\n");
64
65 /* Calling this when there is no budget left for the task
66 * makes no sense, unless the task is non-preemptive. */
67 BUG_ON(budget_exhausted(t) && (!is_np(t)));
68
69 /* __hrtimer_start_range_ns() cancels the timer
70 * anyway, so we don't have to check whether it is still armed */
71
72 if (likely(!is_np(t))) {
73 when_to_fire = litmus_clock() + budget_remaining(t);
74 __hrtimer_start_range_ns(&et->timer,
75 ns_to_ktime(when_to_fire),
76 0 /* delta */,
77 HRTIMER_MODE_ABS_PINNED,
78 0 /* no wakeup */);
79 et->armed = 1;
80 }
81}
82
83
84/* expects to be called with IRQs off */
85void update_enforcement_timer(struct task_struct* t)
86{
87 struct enforcement_timer* et = &__get_cpu_var(budget_timer);
88
89 if (t && budget_precisely_enforced(t)) {
90 /* Make sure we call into the scheduler when this budget
91 * expires. */
92 arm_enforcement_timer(et, t);
93 } else if (et->armed) {
94 /* Make sure we don't cause unnecessary interrupts. */
95 cancel_enforcement_timer(et);
96 }
97}
98
99
100static int __init init_budget_enforcement(void)
101{
102 int cpu;
103 struct enforcement_timer* et;
104
105 for (cpu = 0; cpu < NR_CPUS; cpu++) {
106 et = &per_cpu(budget_timer, cpu);
107 hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
108 et->timer.function = on_enforcement_timeout;
109 }
110 return 0;
111}
112
113module_init(init_budget_enforcement);
diff --git a/litmus/clustered.c b/litmus/clustered.c
new file mode 100644
index 000000000000..979fac6bebb7
--- /dev/null
+++ b/litmus/clustered.c
@@ -0,0 +1,111 @@
1#include <linux/gfp.h>
2#include <linux/cpumask.h>
3#include <linux/list.h>
4
5#include <litmus/clustered.h>
6
7#if !defined(CONFIG_X86) || !defined(CONFIG_SYSFS)
8/* fake get_shared_cpu_map() on non-x86 architectures */
9
10int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index)
11{
12 if (index != 1)
13 return 1;
14 else {
15 /* Fake L1: CPU is all by itself. */
16 cpumask_clear(mask);
17 cpumask_set_cpu(cpu, mask);
18 return 0;
19 }
20}
21
22#endif
23
24int get_cluster_size(enum cache_level level)
25{
26 cpumask_var_t mask;
27 int ok;
28 int num_cpus;
29
30 if (level == GLOBAL_CLUSTER)
31 return num_online_cpus();
32 else {
33 if (!zalloc_cpumask_var(&mask, GFP_ATOMIC))
34 return -ENOMEM;
35 /* assumes CPU 0 is representative of all CPUs */
36 ok = get_shared_cpu_map(mask, 0, level);
37 /* ok == 0 means we got the map; otherwise it's an invalid cache level */
38 if (ok == 0)
39 num_cpus = cpumask_weight(mask);
40 free_cpumask_var(mask);
41
42 if (ok == 0)
43 return num_cpus;
44 else
45 return -EINVAL;
46 }
47}
48
49int assign_cpus_to_clusters(enum cache_level level,
50 struct scheduling_cluster* clusters[],
51 unsigned int num_clusters,
52 struct cluster_cpu* cpus[],
53 unsigned int num_cpus)
54{
55 cpumask_var_t mask;
56 unsigned int i, free_cluster = 0, low_cpu;
57 int err = 0;
58
59 if (!zalloc_cpumask_var(&mask, GFP_ATOMIC))
60 return -ENOMEM;
61
62 /* clear cluster pointers */
63 for (i = 0; i < num_cpus; i++) {
64 cpus[i]->id = i;
65 cpus[i]->cluster = NULL;
66 }
67
68 /* initialize clusters */
69 for (i = 0; i < num_clusters; i++) {
70 clusters[i]->id = i;
71 INIT_LIST_HEAD(&clusters[i]->cpus);
72 }
73
74 /* Assign each CPU. Two assumtions are made:
75 * 1) The index of a cpu in cpus corresponds to its processor id (i.e., the index in a cpu mask).
76 * 2) All cpus that belong to some cluster are online.
77 */
78 for_each_online_cpu(i) {
79 /* get lowest-id CPU in cluster */
80 if (level != GLOBAL_CLUSTER) {
81 err = get_shared_cpu_map(mask, cpus[i]->id, level);
82 if (err != 0) {
83 /* ugh... wrong cache level? Either caller screwed up
84 * or the CPU topology is weird. */
85 printk(KERN_ERR "Could not set up clusters for L%d sharing (max: L%d).\n",
86 level, err);
87 err = -EINVAL;
88 goto out;
89 }
90 low_cpu = cpumask_first(mask);
91 } else
92 low_cpu = 0;
93 if (low_cpu == i) {
94 /* caller must provide an appropriate number of clusters */
95 BUG_ON(free_cluster >= num_clusters);
96
97 /* create new cluster */
98 cpus[i]->cluster = clusters[free_cluster++];
99 } else {
100 /* low_cpu points to the right cluster
101 * Assumption: low_cpu is actually online and was processed earlier. */
102 cpus[i]->cluster = cpus[low_cpu]->cluster;
103 }
104 /* enqueue in cpus list */
105 list_add_tail(&cpus[i]->cluster_list, &cpus[i]->cluster->cpus);
106 printk(KERN_INFO "Assigning CPU%u to cluster %u\n.", i, cpus[i]->cluster->id);
107 }
108out:
109 free_cpumask_var(mask);
110 return err;
111}
diff --git a/litmus/ctrldev.c b/litmus/ctrldev.c
new file mode 100644
index 000000000000..3e2ac2b3fbe8
--- /dev/null
+++ b/litmus/ctrldev.c
@@ -0,0 +1,160 @@
1#include <linux/sched.h>
2#include <linux/mm.h>
3#include <linux/fs.h>
4#include <linux/miscdevice.h>
5#include <linux/module.h>
6
7#include <litmus/litmus.h>
8
9/* only one page for now, but we might want to add a RO version at some point */
10
11#define CTRL_NAME "litmus/ctrl"
12
13/* allocate t->rt_param.ctrl_page*/
14static int alloc_ctrl_page(struct task_struct *t)
15{
16 int err = 0;
17
18 /* only allocate if the task doesn't have one yet */
19 if (!tsk_rt(t)->ctrl_page) {
20 tsk_rt(t)->ctrl_page = (void*) get_zeroed_page(GFP_KERNEL);
21 if (!tsk_rt(t)->ctrl_page)
22 err = -ENOMEM;
23 /* will get de-allocated in task teardown */
24 TRACE_TASK(t, "%s ctrl_page = %p\n", __FUNCTION__,
25 tsk_rt(t)->ctrl_page);
26 }
27 return err;
28}
29
30static int map_ctrl_page(struct task_struct *t, struct vm_area_struct* vma)
31{
32 int err;
33
34 struct page* ctrl = virt_to_page(tsk_rt(t)->ctrl_page);
35
36 TRACE_CUR(CTRL_NAME
37 ": mapping %p (pfn:%lx) to 0x%lx (prot:%lx)\n",
38 tsk_rt(t)->ctrl_page,page_to_pfn(ctrl), vma->vm_start,
39 vma->vm_page_prot);
40
41 /* Map it into the vma. */
42 err = vm_insert_page(vma, vma->vm_start, ctrl);
43
44 if (err)
45 TRACE_CUR(CTRL_NAME ": vm_insert_page() failed (%d)\n", err);
46
47 return err;
48}
49
50static void litmus_ctrl_vm_close(struct vm_area_struct* vma)
51{
52 TRACE_CUR("%s flags=0x%x prot=0x%x\n", __FUNCTION__,
53 vma->vm_flags, vma->vm_page_prot);
54
55 TRACE_CUR(CTRL_NAME
56 ": %p:%p vma:%p vma->vm_private_data:%p closed.\n",
57 (void*) vma->vm_start, (void*) vma->vm_end, vma,
58 vma->vm_private_data);
59}
60
61static int litmus_ctrl_vm_fault(struct vm_area_struct* vma,
62 struct vm_fault* vmf)
63{
64 TRACE_CUR("%s flags=0x%x (off:%ld)\n", __FUNCTION__,
65 vma->vm_flags, vmf->pgoff);
66
67 /* This function should never be called, since all pages should have
68 * been mapped by mmap() already. */
69 WARN_ONCE(1, "Page faults should be impossible in the control page\n");
70
71 return VM_FAULT_SIGBUS;
72}
73
74static struct vm_operations_struct litmus_ctrl_vm_ops = {
75 .close = litmus_ctrl_vm_close,
76 .fault = litmus_ctrl_vm_fault,
77};
78
79static int litmus_ctrl_mmap(struct file* filp, struct vm_area_struct* vma)
80{
81 int err = 0;
82
83 /* first make sure mapper knows what he's doing */
84
85 /* you can only get one page */
86 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
87 return -EINVAL;
88
89 /* you can only map the "first" page */
90 if (vma->vm_pgoff != 0)
91 return -EINVAL;
92
93 /* you can't share it with anyone */
94 if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
95 return -EINVAL;
96
97 vma->vm_ops = &litmus_ctrl_vm_ops;
98 /* This mapping should not be kept across forks,
99 * cannot be expanded, and is not a "normal" page. */
100 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_IO;
101
102 /* We don't want the first write access to trigger a "minor" page fault
103 * to mark the page as dirty. This is transient, private memory, we
104 * don't care if it was touched or not. __S011 means RW access, but not
105 * execute, and avoids copy-on-write behavior.
106 * See protection_map in mmap.c. */
107 vma->vm_page_prot = __S011;
108
109 err = alloc_ctrl_page(current);
110 if (!err)
111 err = map_ctrl_page(current, vma);
112
113 TRACE_CUR("%s flags=0x%x prot=0x%lx\n",
114 __FUNCTION__, vma->vm_flags, vma->vm_page_prot);
115
116 return err;
117}
118
119static struct file_operations litmus_ctrl_fops = {
120 .owner = THIS_MODULE,
121 .mmap = litmus_ctrl_mmap,
122};
123
124static struct miscdevice litmus_ctrl_dev = {
125 .name = CTRL_NAME,
126 .minor = MISC_DYNAMIC_MINOR,
127 .fops = &litmus_ctrl_fops,
128};
129
130static int __init init_litmus_ctrl_dev(void)
131{
132 int err;
133
134 BUILD_BUG_ON(sizeof(struct control_page) > PAGE_SIZE);
135
136 BUILD_BUG_ON(sizeof(union np_flag) != sizeof(uint32_t));
137
138 BUILD_BUG_ON(offsetof(struct control_page, sched.raw)
139 != LITMUS_CP_OFFSET_SCHED);
140 BUILD_BUG_ON(offsetof(struct control_page, irq_count)
141 != LITMUS_CP_OFFSET_IRQ_COUNT);
142 BUILD_BUG_ON(offsetof(struct control_page, ts_syscall_start)
143 != LITMUS_CP_OFFSET_TS_SC_START);
144 BUILD_BUG_ON(offsetof(struct control_page, irq_syscall_start)
145 != LITMUS_CP_OFFSET_IRQ_SC_START);
146
147 printk("Initializing LITMUS^RT control device.\n");
148 err = misc_register(&litmus_ctrl_dev);
149 if (err)
150 printk("Could not allocate %s device (%d).\n", CTRL_NAME, err);
151 return err;
152}
153
154static void __exit exit_litmus_ctrl_dev(void)
155{
156 misc_deregister(&litmus_ctrl_dev);
157}
158
159module_init(init_litmus_ctrl_dev);
160module_exit(exit_litmus_ctrl_dev);
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
new file mode 100644
index 000000000000..5aca2934a7b5
--- /dev/null
+++ b/litmus/edf_common.c
@@ -0,0 +1,200 @@
1/*
2 * kernel/edf_common.c
3 *
4 * Common functions for EDF based scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15#include <litmus/edf_common.h>
16
17#ifdef CONFIG_EDF_TIE_BREAK_LATENESS_NORM
18#include <litmus/fpmath.h>
19#endif
20
21#ifdef CONFIG_EDF_TIE_BREAK_HASH
22#include <linux/hash.h>
23static inline long edf_hash(struct task_struct *t)
24{
25 /* pid is 32 bits, so normally we would shove that into the
26 * upper 32-bits and and put the job number in the bottom
27 * and hash the 64-bit number with hash_64(). Sadly,
28 * in testing, hash_64() doesn't distribute keys were the
29 * upper bits are close together (as would be the case with
30 * pids) and job numbers are equal (as would be the case with
31 * synchronous task sets with all relative deadlines equal).
32 *
33 * A 2006 Linux patch proposed the following solution
34 * (but for some reason it wasn't accepted...).
35 *
36 * At least this workaround works for 32-bit systems as well.
37 */
38 return hash_32(hash_32((u32)tsk_rt(t)->job_params.job_no, 32) ^ t->pid, 32);
39}
40#endif
41
42
43/* edf_higher_prio - returns true if first has a higher EDF priority
44 * than second. Deadline ties are broken by PID.
45 *
46 * both first and second may be NULL
47 */
48int edf_higher_prio(struct task_struct* first,
49 struct task_struct* second)
50{
51 struct task_struct *first_task = first;
52 struct task_struct *second_task = second;
53
54 /* There is no point in comparing a task to itself. */
55 if (first && first == second) {
56 TRACE_TASK(first,
57 "WARNING: pointless edf priority comparison.\n");
58 return 0;
59 }
60
61
62 /* check for NULL tasks */
63 if (!first || !second)
64 return first && !second;
65
66#ifdef CONFIG_LITMUS_LOCKING
67
68 /* Check for inherited priorities. Change task
69 * used for comparison in such a case.
70 */
71 if (unlikely(first->rt_param.inh_task))
72 first_task = first->rt_param.inh_task;
73 if (unlikely(second->rt_param.inh_task))
74 second_task = second->rt_param.inh_task;
75
76 /* Check for priority boosting. Tie-break by start of boosting.
77 */
78 if (unlikely(is_priority_boosted(first_task))) {
79 /* first_task is boosted, how about second_task? */
80 if (!is_priority_boosted(second_task) ||
81 lt_before(get_boost_start(first_task),
82 get_boost_start(second_task)))
83 return 1;
84 else
85 return 0;
86 } else if (unlikely(is_priority_boosted(second_task)))
87 /* second_task is boosted, first is not*/
88 return 0;
89
90#endif
91
92 if (earlier_deadline(first_task, second_task)) {
93 return 1;
94 }
95 else if (get_deadline(first_task) == get_deadline(second_task)) {
96 /* Need to tie break. All methods must set pid_break to 0/1 if
97 * first_task does not have priority over second_task.
98 */
99 int pid_break;
100
101
102#if defined(CONFIG_EDF_TIE_BREAK_LATENESS)
103 /* Tie break by lateness. Jobs with greater lateness get
104 * priority. This should spread tardiness across all tasks,
105 * especially in task sets where all tasks have the same
106 * period and relative deadlines.
107 */
108 if (get_lateness(first_task) > get_lateness(second_task)) {
109 return 1;
110 }
111 pid_break = (get_lateness(first_task) == get_lateness(second_task));
112
113
114#elif defined(CONFIG_EDF_TIE_BREAK_LATENESS_NORM)
115 /* Tie break by lateness, normalized by relative deadline. Jobs with
116 * greater normalized lateness get priority.
117 *
118 * Note: Considered using the algebraically equivalent
119 * lateness(first)*relative_deadline(second) >
120 lateness(second)*relative_deadline(first)
121 * to avoid fixed-point math, but values are prone to overflow if inputs
122 * are on the order of several seconds, even in 64-bit.
123 */
124 fp_t fnorm = _frac(get_lateness(first_task),
125 get_rt_relative_deadline(first_task));
126 fp_t snorm = _frac(get_lateness(second_task),
127 get_rt_relative_deadline(second_task));
128 if (_gt(fnorm, snorm)) {
129 return 1;
130 }
131 pid_break = _eq(fnorm, snorm);
132
133
134#elif defined(CONFIG_EDF_TIE_BREAK_HASH)
135 /* Tie break by comparing hashs of (pid, job#) tuple. There should be
136 * a 50% chance that first_task has a higher priority than second_task.
137 */
138 long fhash = edf_hash(first_task);
139 long shash = edf_hash(second_task);
140 if (fhash < shash) {
141 return 1;
142 }
143 pid_break = (fhash == shash);
144#else
145
146
147 /* CONFIG_EDF_PID_TIE_BREAK */
148 pid_break = 1; // fall through to tie-break by pid;
149#endif
150
151 /* Tie break by pid */
152 if(pid_break) {
153 if (first_task->pid < second_task->pid) {
154 return 1;
155 }
156 else if (first_task->pid == second_task->pid) {
157 /* If the PIDs are the same then the task with the
158 * inherited priority wins.
159 */
160 if (!second->rt_param.inh_task) {
161 return 1;
162 }
163 }
164 }
165 }
166 return 0; /* fall-through. prio(second_task) > prio(first_task) */
167}
168
169int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
170{
171 return edf_higher_prio(bheap2task(a), bheap2task(b));
172}
173
174void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
175 release_jobs_t release)
176{
177 rt_domain_init(rt, edf_ready_order, resched, release);
178}
179
180/* need_to_preempt - check whether the task t needs to be preempted
181 * call only with irqs disabled and with ready_lock acquired
182 * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
183 */
184int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t)
185{
186 /* we need the read lock for edf_ready_queue */
187 /* no need to preempt if there is nothing pending */
188 if (!__jobs_pending(rt))
189 return 0;
190 /* we need to reschedule if t doesn't exist */
191 if (!t)
192 return 1;
193
194 /* NOTE: We cannot check for non-preemptibility since we
195 * don't know what address space we're currently in.
196 */
197
198 /* make sure to get non-rt stuff out of the way */
199 return !is_realtime(t) || edf_higher_prio(__next_ready(rt), t);
200}
diff --git a/litmus/fdso.c b/litmus/fdso.c
new file mode 100644
index 000000000000..c4b450be4509
--- /dev/null
+++ b/litmus/fdso.c
@@ -0,0 +1,307 @@
1/* fdso.c - file descriptor attached shared objects
2 *
3 * (c) 2007 B. Brandenburg, LITMUS^RT project
4 *
5 * Notes:
6 * - objects descriptor (OD) tables are not cloned during a fork.
7 * - objects are created on-demand, and freed after the last reference
8 * is dropped.
9 * - for now, object types are hard coded.
10 * - As long as we have live objects, we keep a reference to the inode.
11 */
12
13#include <linux/errno.h>
14#include <linux/sched.h>
15#include <linux/mutex.h>
16#include <linux/file.h>
17#include <asm/uaccess.h>
18
19#include <litmus/fdso.h>
20
21extern struct fdso_ops generic_lock_ops;
22
23static const struct fdso_ops* fdso_ops[] = {
24 &generic_lock_ops, /* FMLP_SEM */
25 &generic_lock_ops, /* SRP_SEM */
26 &generic_lock_ops, /* MPCP_SEM */
27 &generic_lock_ops, /* MPCP_VS_SEM */
28 &generic_lock_ops, /* DPCP_SEM */
29 &generic_lock_ops, /* PCP_SEM */
30};
31
32static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
33{
34 BUILD_BUG_ON(ARRAY_SIZE(fdso_ops) != MAX_OBJ_TYPE + 1);
35
36 if (fdso_ops[type]->create)
37 return fdso_ops[type]->create(obj_ref, type, config);
38 else
39 return -EINVAL;
40}
41
42static void fdso_destroy(obj_type_t type, void* obj)
43{
44 fdso_ops[type]->destroy(type, obj);
45}
46
47static int fdso_open(struct od_table_entry* entry, void* __user config)
48{
49 if (fdso_ops[entry->obj->type]->open)
50 return fdso_ops[entry->obj->type]->open(entry, config);
51 else
52 return 0;
53}
54
55static int fdso_close(struct od_table_entry* entry)
56{
57 if (fdso_ops[entry->obj->type]->close)
58 return fdso_ops[entry->obj->type]->close(entry);
59 else
60 return 0;
61}
62
63/* inode must be locked already */
64static int alloc_inode_obj(struct inode_obj_id** obj_ref,
65 struct inode* inode,
66 obj_type_t type,
67 unsigned int id,
68 void* __user config)
69{
70 struct inode_obj_id* obj;
71 void* raw_obj;
72 int err;
73
74 obj = kmalloc(sizeof(*obj), GFP_KERNEL);
75 if (!obj) {
76 return -ENOMEM;
77 }
78
79 err = fdso_create(&raw_obj, type, config);
80 if (err != 0) {
81 kfree(obj);
82 return err;
83 }
84
85 INIT_LIST_HEAD(&obj->list);
86 atomic_set(&obj->count, 1);
87 obj->type = type;
88 obj->id = id;
89 obj->obj = raw_obj;
90 obj->inode = inode;
91
92 list_add(&obj->list, &inode->i_obj_list);
93 atomic_inc(&inode->i_count);
94
95 printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id);
96
97 *obj_ref = obj;
98 return 0;
99}
100
101/* inode must be locked already */
102static struct inode_obj_id* get_inode_obj(struct inode* inode,
103 obj_type_t type,
104 unsigned int id)
105{
106 struct list_head* pos;
107 struct inode_obj_id* obj = NULL;
108
109 list_for_each(pos, &inode->i_obj_list) {
110 obj = list_entry(pos, struct inode_obj_id, list);
111 if (obj->id == id && obj->type == type) {
112 atomic_inc(&obj->count);
113 return obj;
114 }
115 }
116 printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id);
117 return NULL;
118}
119
120
121static void put_inode_obj(struct inode_obj_id* obj)
122{
123 struct inode* inode;
124 int let_go = 0;
125
126 inode = obj->inode;
127 if (atomic_dec_and_test(&obj->count)) {
128
129 mutex_lock(&inode->i_obj_mutex);
130 /* no new references can be obtained */
131 if (!atomic_read(&obj->count)) {
132 list_del(&obj->list);
133 fdso_destroy(obj->type, obj->obj);
134 kfree(obj);
135 let_go = 1;
136 }
137 mutex_unlock(&inode->i_obj_mutex);
138 if (let_go)
139 iput(inode);
140 }
141}
142
143static struct od_table_entry* get_od_entry(struct task_struct* t)
144{
145 struct od_table_entry* table;
146 int i;
147
148
149 table = t->od_table;
150 if (!table) {
151 table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS,
152 GFP_KERNEL);
153 t->od_table = table;
154 }
155
156 for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++)
157 if (!table[i].used) {
158 table[i].used = 1;
159 return table + i;
160 }
161 return NULL;
162}
163
164static int put_od_entry(struct od_table_entry* od)
165{
166 put_inode_obj(od->obj);
167 od->used = 0;
168 return 0;
169}
170
171static long close_od_entry(struct od_table_entry *od)
172{
173 long ret;
174
175 /* Give the class a chance to reject the close. */
176 ret = fdso_close(od);
177 if (ret == 0)
178 ret = put_od_entry(od);
179
180 return ret;
181}
182
183void exit_od_table(struct task_struct* t)
184{
185 int i;
186
187 if (t->od_table) {
188 for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++)
189 if (t->od_table[i].used)
190 close_od_entry(t->od_table + i);
191 kfree(t->od_table);
192 t->od_table = NULL;
193 }
194}
195
196static int do_sys_od_open(struct file* file, obj_type_t type, int id,
197 void* __user config)
198{
199 int idx = 0, err = 0;
200 struct inode* inode;
201 struct inode_obj_id* obj = NULL;
202 struct od_table_entry* entry;
203
204 inode = file->f_dentry->d_inode;
205
206 entry = get_od_entry(current);
207 if (!entry)
208 return -ENOMEM;
209
210 mutex_lock(&inode->i_obj_mutex);
211 obj = get_inode_obj(inode, type, id);
212 if (!obj)
213 err = alloc_inode_obj(&obj, inode, type, id, config);
214 if (err != 0) {
215 obj = NULL;
216 idx = err;
217 entry->used = 0;
218 } else {
219 entry->obj = obj;
220 entry->class = fdso_ops[type];
221 idx = entry - current->od_table;
222 }
223
224 mutex_unlock(&inode->i_obj_mutex);
225
226 /* open only if creation succeeded */
227 if (!err)
228 err = fdso_open(entry, config);
229 if (err < 0) {
230 /* The class rejected the open call.
231 * We need to clean up and tell user space.
232 */
233 if (obj)
234 put_od_entry(entry);
235 idx = err;
236 }
237
238 return idx;
239}
240
241
242struct od_table_entry* get_entry_for_od(int od)
243{
244 struct task_struct *t = current;
245
246 if (!t->od_table)
247 return NULL;
248 if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
249 return NULL;
250 if (!t->od_table[od].used)
251 return NULL;
252 return t->od_table + od;
253}
254
255
256asmlinkage long sys_od_open(int fd, int type, int obj_id, void* __user config)
257{
258 int ret = 0;
259 struct file* file;
260
261 /*
262 1) get file from fd, get inode from file
263 2) lock inode
264 3) try to lookup object
265 4) if not present create and enqueue object, inc inode refcnt
266 5) increment refcnt of object
267 6) alloc od_table_entry, setup ptrs
268 7) unlock inode
269 8) return offset in od_table as OD
270 */
271
272 if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) {
273 ret = -EINVAL;
274 goto out;
275 }
276
277 file = fget(fd);
278 if (!file) {
279 ret = -EBADF;
280 goto out;
281 }
282
283 ret = do_sys_od_open(file, type, obj_id, config);
284
285 fput(file);
286
287out:
288 return ret;
289}
290
291
292asmlinkage long sys_od_close(int od)
293{
294 int ret = -EINVAL;
295 struct task_struct *t = current;
296
297 if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
298 return ret;
299
300 if (!t->od_table || !t->od_table[od].used)
301 return ret;
302
303
304 ret = close_od_entry(t->od_table + od);
305
306 return ret;
307}
diff --git a/litmus/fp_common.c b/litmus/fp_common.c
new file mode 100644
index 000000000000..964a4729deff
--- /dev/null
+++ b/litmus/fp_common.c
@@ -0,0 +1,119 @@
1/*
2 * litmus/fp_common.c
3 *
4 * Common functions for fixed-priority scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15#include <litmus/fp_common.h>
16
17/* fp_higher_prio - returns true if first has a higher static priority
18 * than second. Ties are broken by PID.
19 *
20 * both first and second may be NULL
21 */
22int fp_higher_prio(struct task_struct* first,
23 struct task_struct* second)
24{
25 struct task_struct *first_task = first;
26 struct task_struct *second_task = second;
27
28 /* There is no point in comparing a task to itself. */
29 if (unlikely(first && first == second)) {
30 TRACE_TASK(first,
31 "WARNING: pointless FP priority comparison.\n");
32 return 0;
33 }
34
35
36 /* check for NULL tasks */
37 if (!first || !second)
38 return first && !second;
39
40 if (!is_realtime(second_task))
41 return 1;
42
43#ifdef CONFIG_LITMUS_LOCKING
44
45 /* Check for inherited priorities. Change task
46 * used for comparison in such a case.
47 */
48 if (unlikely(first->rt_param.inh_task))
49 first_task = first->rt_param.inh_task;
50 if (unlikely(second->rt_param.inh_task))
51 second_task = second->rt_param.inh_task;
52
53 /* Check for priority boosting. Tie-break by start of boosting.
54 */
55 if (unlikely(is_priority_boosted(first_task))) {
56 /* first_task is boosted, how about second_task? */
57 if (is_priority_boosted(second_task))
58 /* break by priority point */
59 return lt_before(get_boost_start(first_task),
60 get_boost_start(second_task));
61 else
62 /* priority boosting wins. */
63 return 1;
64 } else if (unlikely(is_priority_boosted(second_task)))
65 /* second_task is boosted, first is not*/
66 return 0;
67
68#endif
69
70 /* Comparisons to itself are not expected; priority inheritance
71 * should also not cause this to happen. */
72 BUG_ON(first_task == second_task);
73
74 if (get_priority(first_task) < get_priority(second_task))
75 return 1;
76 else if (get_priority(first_task) == get_priority(second_task))
77 /* Break by PID. */
78 return first_task->pid < second_task->pid;
79 else
80 return 0;
81}
82
83int fp_ready_order(struct bheap_node* a, struct bheap_node* b)
84{
85 return fp_higher_prio(bheap2task(a), bheap2task(b));
86}
87
88void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
89 release_jobs_t release)
90{
91 rt_domain_init(rt, fp_ready_order, resched, release);
92}
93
94/* need_to_preempt - check whether the task t needs to be preempted
95 */
96int fp_preemption_needed(struct fp_prio_queue *q, struct task_struct *t)
97{
98 struct task_struct *pending;
99
100 pending = fp_prio_peek(q);
101
102 if (!pending)
103 return 0;
104 if (!t)
105 return 1;
106
107 /* make sure to get non-rt stuff out of the way */
108 return !is_realtime(t) || fp_higher_prio(pending, t);
109}
110
111void fp_prio_queue_init(struct fp_prio_queue* q)
112{
113 int i;
114
115 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
116 q->bitmask[i] = 0;
117 for (i = 0; i < LITMUS_MAX_PRIORITY; i++)
118 bheap_init(&q->queue[i]);
119}
diff --git a/litmus/jobs.c b/litmus/jobs.c
new file mode 100644
index 000000000000..89a0810415fa
--- /dev/null
+++ b/litmus/jobs.c
@@ -0,0 +1,55 @@
1/* litmus/jobs.c - common job control code
2 */
3
4#include <linux/sched.h>
5
6#include <litmus/litmus.h>
7#include <litmus/jobs.h>
8
9static inline void setup_release(struct task_struct *t, lt_t release)
10{
11 /* prepare next release */
12 t->rt_param.job_params.release = release;
13 t->rt_param.job_params.deadline = release + get_rt_relative_deadline(t);
14 t->rt_param.job_params.exec_time = 0;
15
16 /* update job sequence number */
17 t->rt_param.job_params.job_no++;
18}
19
20void prepare_for_next_period(struct task_struct *t)
21{
22 BUG_ON(!t);
23
24 /* Record lateness before we set up the next job's
25 * release and deadline. Lateness may be negative.
26 */
27 t->rt_param.job_params.lateness =
28 (long long)litmus_clock() -
29 (long long)t->rt_param.job_params.deadline;
30
31 setup_release(t, get_release(t) + get_rt_period(t));
32 tsk_rt(t)->dont_requeue = 0;
33}
34
35void release_at(struct task_struct *t, lt_t start)
36{
37 BUG_ON(!t);
38 setup_release(t, start);
39 tsk_rt(t)->completed = 0;
40}
41
42
43/*
44 * Deactivate current task until the beginning of the next period.
45 */
46long complete_job(void)
47{
48 /* Mark that we do not excute anymore */
49 tsk_rt(current)->completed = 1;
50 /* call schedule, this will return when a new job arrives
51 * it also takes care of preparing for the next release
52 */
53 schedule();
54 return 0;
55}
diff --git a/litmus/litmus.c b/litmus/litmus.c
new file mode 100644
index 000000000000..7417a8fbda74
--- /dev/null
+++ b/litmus/litmus.c
@@ -0,0 +1,576 @@
1/*
2 * litmus.c -- Implementation of the LITMUS syscalls,
3 * the LITMUS intialization code,
4 * and the procfs interface..
5 */
6#include <asm/uaccess.h>
7#include <linux/uaccess.h>
8#include <linux/sysrq.h>
9#include <linux/sched.h>
10#include <linux/module.h>
11#include <linux/slab.h>
12#include <linux/reboot.h>
13#include <linux/stop_machine.h>
14
15#include <litmus/litmus.h>
16#include <litmus/bheap.h>
17#include <litmus/trace.h>
18#include <litmus/rt_domain.h>
19#include <litmus/litmus_proc.h>
20#include <litmus/sched_trace.h>
21
22#ifdef CONFIG_SCHED_CPU_AFFINITY
23#include <litmus/affinity.h>
24#endif
25
26/* Number of RT tasks that exist in the system */
27atomic_t rt_task_count = ATOMIC_INIT(0);
28
29#ifdef CONFIG_RELEASE_MASTER
30/* current master CPU for handling timer IRQs */
31atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU);
32#endif
33
34static struct kmem_cache * bheap_node_cache;
35extern struct kmem_cache * release_heap_cache;
36
37struct bheap_node* bheap_node_alloc(int gfp_flags)
38{
39 return kmem_cache_alloc(bheap_node_cache, gfp_flags);
40}
41
42void bheap_node_free(struct bheap_node* hn)
43{
44 kmem_cache_free(bheap_node_cache, hn);
45}
46
47struct release_heap* release_heap_alloc(int gfp_flags);
48void release_heap_free(struct release_heap* rh);
49
50/*
51 * sys_set_task_rt_param
52 * @pid: Pid of the task which scheduling parameters must be changed
53 * @param: New real-time extension parameters such as the execution cost and
54 * period
55 * Syscall for manipulating with task rt extension params
56 * Returns EFAULT if param is NULL.
57 * ESRCH if pid is not corrsponding
58 * to a valid task.
59 * EINVAL if either period or execution cost is <=0
60 * EPERM if pid is a real-time task
61 * 0 if success
62 *
63 * Only non-real-time tasks may be configured with this system call
64 * to avoid races with the scheduler. In practice, this means that a
65 * task's parameters must be set _before_ calling sys_prepare_rt_task()
66 *
67 * find_task_by_vpid() assumes that we are in the same namespace of the
68 * target.
69 */
70asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
71{
72 struct rt_task tp;
73 struct task_struct *target;
74 int retval = -EINVAL;
75
76 printk("Setting up rt task parameters for process %d.\n", pid);
77
78 if (pid < 0 || param == 0) {
79 goto out;
80 }
81 if (copy_from_user(&tp, param, sizeof(tp))) {
82 retval = -EFAULT;
83 goto out;
84 }
85
86 /* Task search and manipulation must be protected */
87 read_lock_irq(&tasklist_lock);
88 if (!(target = find_task_by_vpid(pid))) {
89 retval = -ESRCH;
90 goto out_unlock;
91 }
92
93 if (is_realtime(target)) {
94 /* The task is already a real-time task.
95 * We cannot not allow parameter changes at this point.
96 */
97 retval = -EBUSY;
98 goto out_unlock;
99 }
100
101 /* set relative deadline to be implicit if left unspecified */
102 if (tp.relative_deadline == 0)
103 tp.relative_deadline = tp.period;
104
105 if (tp.exec_cost <= 0)
106 goto out_unlock;
107 if (tp.period <= 0)
108 goto out_unlock;
109 if (!cpu_online(tp.cpu))
110 goto out_unlock;
111 if (min(tp.relative_deadline, tp.period) < tp.exec_cost) /*density check*/
112 {
113 printk(KERN_INFO "litmus: real-time task %d rejected "
114 "because task density > 1.0\n", pid);
115 goto out_unlock;
116 }
117 if (tp.cls != RT_CLASS_HARD &&
118 tp.cls != RT_CLASS_SOFT &&
119 tp.cls != RT_CLASS_BEST_EFFORT)
120 {
121 printk(KERN_INFO "litmus: real-time task %d rejected "
122 "because its class is invalid\n", pid);
123 goto out_unlock;
124 }
125 if (tp.budget_policy != NO_ENFORCEMENT &&
126 tp.budget_policy != QUANTUM_ENFORCEMENT &&
127 tp.budget_policy != PRECISE_ENFORCEMENT)
128 {
129 printk(KERN_INFO "litmus: real-time task %d rejected "
130 "because unsupported budget enforcement policy "
131 "specified (%d)\n",
132 pid, tp.budget_policy);
133 goto out_unlock;
134 }
135
136 target->rt_param.task_params = tp;
137
138 retval = 0;
139 out_unlock:
140 read_unlock_irq(&tasklist_lock);
141 out:
142 return retval;
143}
144
145/*
146 * Getter of task's RT params
147 * returns EINVAL if param or pid is NULL
148 * returns ESRCH if pid does not correspond to a valid task
149 * returns EFAULT if copying of parameters has failed.
150 *
151 * find_task_by_vpid() assumes that we are in the same namespace of the
152 * target.
153 */
154asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param)
155{
156 int retval = -EINVAL;
157 struct task_struct *source;
158 struct rt_task lp;
159 if (param == 0 || pid < 0)
160 goto out;
161 read_lock(&tasklist_lock);
162 if (!(source = find_task_by_vpid(pid))) {
163 retval = -ESRCH;
164 goto out_unlock;
165 }
166 lp = source->rt_param.task_params;
167 read_unlock(&tasklist_lock);
168 /* Do copying outside the lock */
169 retval =
170 copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
171 return retval;
172 out_unlock:
173 read_unlock(&tasklist_lock);
174 out:
175 return retval;
176
177}
178
179/*
180 * This is the crucial function for periodic task implementation,
181 * It checks if a task is periodic, checks if such kind of sleep
182 * is permitted and calls plugin-specific sleep, which puts the
183 * task into a wait array.
184 * returns 0 on successful wakeup
185 * returns EPERM if current conditions do not permit such sleep
186 * returns EINVAL if current task is not able to go to sleep
187 */
188asmlinkage long sys_complete_job(void)
189{
190 int retval = -EPERM;
191 if (!is_realtime(current)) {
192 retval = -EINVAL;
193 goto out;
194 }
195 /* Task with negative or zero period cannot sleep */
196 if (get_rt_period(current) <= 0) {
197 retval = -EINVAL;
198 goto out;
199 }
200 /* The plugin has to put the task into an
201 * appropriate queue and call schedule
202 */
203 retval = litmus->complete_job();
204 out:
205 return retval;
206}
207
208/* This is an "improved" version of sys_complete_job that
209 * addresses the problem of unintentionally missing a job after
210 * an overrun.
211 *
212 * returns 0 on successful wakeup
213 * returns EPERM if current conditions do not permit such sleep
214 * returns EINVAL if current task is not able to go to sleep
215 */
216asmlinkage long sys_wait_for_job_release(unsigned int job)
217{
218 int retval = -EPERM;
219 if (!is_realtime(current)) {
220 retval = -EINVAL;
221 goto out;
222 }
223
224 /* Task with negative or zero period cannot sleep */
225 if (get_rt_period(current) <= 0) {
226 retval = -EINVAL;
227 goto out;
228 }
229
230 retval = 0;
231
232 /* first wait until we have "reached" the desired job
233 *
234 * This implementation has at least two problems:
235 *
236 * 1) It doesn't gracefully handle the wrap around of
237 * job_no. Since LITMUS is a prototype, this is not much
238 * of a problem right now.
239 *
240 * 2) It is theoretically racy if a job release occurs
241 * between checking job_no and calling sleep_next_period().
242 * A proper solution would requiring adding another callback
243 * in the plugin structure and testing the condition with
244 * interrupts disabled.
245 *
246 * FIXME: At least problem 2 should be taken care of eventually.
247 */
248 while (!retval && job > current->rt_param.job_params.job_no)
249 /* If the last job overran then job <= job_no and we
250 * don't send the task to sleep.
251 */
252 retval = litmus->complete_job();
253 out:
254 return retval;
255}
256
257/* This is a helper syscall to query the current job sequence number.
258 *
259 * returns 0 on successful query
260 * returns EPERM if task is not a real-time task.
261 * returns EFAULT if &job is not a valid pointer.
262 */
263asmlinkage long sys_query_job_no(unsigned int __user *job)
264{
265 int retval = -EPERM;
266 if (is_realtime(current))
267 retval = put_user(current->rt_param.job_params.job_no, job);
268
269 return retval;
270}
271
272/* sys_null_call() is only used for determining raw system call
273 * overheads (kernel entry, kernel exit). It has no useful side effects.
274 * If ts is non-NULL, then the current Feather-Trace time is recorded.
275 */
276asmlinkage long sys_null_call(cycles_t __user *ts)
277{
278 long ret = 0;
279 cycles_t now;
280
281 if (ts) {
282 now = get_cycles();
283 ret = put_user(now, ts);
284 }
285
286 return ret;
287}
288
289/* p is a real-time task. Re-init its state as a best-effort task. */
290static void reinit_litmus_state(struct task_struct* p, int restore)
291{
292 struct rt_task user_config = {};
293 void* ctrl_page = NULL;
294
295 if (restore) {
296 /* Safe user-space provided configuration data.
297 * and allocated page. */
298 user_config = p->rt_param.task_params;
299 ctrl_page = p->rt_param.ctrl_page;
300 }
301
302 /* We probably should not be inheriting any task's priority
303 * at this point in time.
304 */
305 WARN_ON(p->rt_param.inh_task);
306
307 /* Cleanup everything else. */
308 memset(&p->rt_param, 0, sizeof(p->rt_param));
309
310 /* Restore preserved fields. */
311 if (restore) {
312 p->rt_param.task_params = user_config;
313 p->rt_param.ctrl_page = ctrl_page;
314 }
315}
316
317long litmus_admit_task(struct task_struct* tsk)
318{
319 long retval = 0;
320
321 BUG_ON(is_realtime(tsk));
322
323 tsk_rt(tsk)->heap_node = NULL;
324 tsk_rt(tsk)->rel_heap = NULL;
325
326 if (get_rt_relative_deadline(tsk) == 0 ||
327 get_exec_cost(tsk) >
328 min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) {
329 TRACE_TASK(tsk,
330 "litmus admit: invalid task parameters "
331 "(e = %lu, p = %lu, d = %lu)\n",
332 get_exec_cost(tsk), get_rt_period(tsk),
333 get_rt_relative_deadline(tsk));
334 retval = -EINVAL;
335 goto out;
336 }
337
338 if (!cpu_online(get_partition(tsk))) {
339 TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n",
340 get_partition(tsk));
341 retval = -EINVAL;
342 goto out;
343 }
344
345 INIT_LIST_HEAD(&tsk_rt(tsk)->list);
346
347 /* allocate heap node for this task */
348 tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC);
349 tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC);
350
351 if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) {
352 printk(KERN_WARNING "litmus: no more heap node memory!?\n");
353
354 retval = -ENOMEM;
355 goto out;
356 } else {
357 bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
358 }
359
360 preempt_disable();
361
362 retval = litmus->admit_task(tsk);
363
364 if (!retval) {
365 sched_trace_task_name(tsk);
366 sched_trace_task_param(tsk);
367 atomic_inc(&rt_task_count);
368 }
369
370 preempt_enable();
371
372out:
373 if (retval) {
374 bheap_node_free(tsk_rt(tsk)->heap_node);
375 release_heap_free(tsk_rt(tsk)->rel_heap);
376 }
377 return retval;
378}
379
380void litmus_exit_task(struct task_struct* tsk)
381{
382 if (is_realtime(tsk)) {
383 sched_trace_task_completion(tsk, 1);
384
385 litmus->task_exit(tsk);
386
387 BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node));
388 bheap_node_free(tsk_rt(tsk)->heap_node);
389 release_heap_free(tsk_rt(tsk)->rel_heap);
390
391 atomic_dec(&rt_task_count);
392 reinit_litmus_state(tsk, 1);
393 }
394}
395
396static int do_plugin_switch(void *_plugin)
397{
398 int ret;
399 struct sched_plugin* plugin = _plugin;
400
401 /* don't switch if there are active real-time tasks */
402 if (atomic_read(&rt_task_count) == 0) {
403 ret = litmus->deactivate_plugin();
404 if (0 != ret)
405 goto out;
406 ret = plugin->activate_plugin();
407 if (0 != ret) {
408 printk(KERN_INFO "Can't activate %s (%d).\n",
409 plugin->plugin_name, ret);
410 plugin = &linux_sched_plugin;
411 }
412 printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
413 litmus = plugin;
414 } else
415 ret = -EBUSY;
416out:
417 return ret;
418}
419
420/* Switching a plugin in use is tricky.
421 * We must watch out that no real-time tasks exists
422 * (and that none is created in parallel) and that the plugin is not
423 * currently in use on any processor (in theory).
424 */
425int switch_sched_plugin(struct sched_plugin* plugin)
426{
427 BUG_ON(!plugin);
428
429 if (atomic_read(&rt_task_count) == 0)
430 return stop_machine(do_plugin_switch, plugin, NULL);
431 else
432 return -EBUSY;
433}
434
435/* Called upon fork.
436 * p is the newly forked task.
437 */
438void litmus_fork(struct task_struct* p)
439{
440 if (is_realtime(p)) {
441 /* clean out any litmus related state, don't preserve anything */
442 reinit_litmus_state(p, 0);
443 /* Don't let the child be a real-time task. */
444 p->sched_reset_on_fork = 1;
445 } else
446 /* non-rt tasks might have ctrl_page set */
447 tsk_rt(p)->ctrl_page = NULL;
448
449 /* od tables are never inherited across a fork */
450 p->od_table = NULL;
451}
452
453/* Called upon execve().
454 * current is doing the exec.
455 * Don't let address space specific stuff leak.
456 */
457void litmus_exec(void)
458{
459 struct task_struct* p = current;
460
461 if (is_realtime(p)) {
462 WARN_ON(p->rt_param.inh_task);
463 if (tsk_rt(p)->ctrl_page) {
464 free_page((unsigned long) tsk_rt(p)->ctrl_page);
465 tsk_rt(p)->ctrl_page = NULL;
466 }
467 }
468}
469
470void exit_litmus(struct task_struct *dead_tsk)
471{
472 /* We also allow non-RT tasks to
473 * allocate control pages to allow
474 * measurements with non-RT tasks.
475 * So check if we need to free the page
476 * in any case.
477 */
478 if (tsk_rt(dead_tsk)->ctrl_page) {
479 TRACE_TASK(dead_tsk,
480 "freeing ctrl_page %p\n",
481 tsk_rt(dead_tsk)->ctrl_page);
482 free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page);
483 }
484
485 /* main cleanup only for RT tasks */
486 if (is_realtime(dead_tsk))
487 litmus_exit_task(dead_tsk);
488}
489
490
491#ifdef CONFIG_MAGIC_SYSRQ
492int sys_kill(int pid, int sig);
493
494static void sysrq_handle_kill_rt_tasks(int key)
495{
496 struct task_struct *t;
497 read_lock(&tasklist_lock);
498 for_each_process(t) {
499 if (is_realtime(t)) {
500 sys_kill(t->pid, SIGKILL);
501 }
502 }
503 read_unlock(&tasklist_lock);
504}
505
506static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
507 .handler = sysrq_handle_kill_rt_tasks,
508 .help_msg = "quit-rt-tasks(X)",
509 .action_msg = "sent SIGKILL to all LITMUS^RT real-time tasks",
510};
511#endif
512
513extern struct sched_plugin linux_sched_plugin;
514
515static int litmus_shutdown_nb(struct notifier_block *unused1,
516 unsigned long unused2, void *unused3)
517{
518 /* Attempt to switch back to regular Linux scheduling.
519 * Forces the active plugin to clean up.
520 */
521 if (litmus != &linux_sched_plugin) {
522 int ret = switch_sched_plugin(&linux_sched_plugin);
523 if (ret) {
524 printk("Auto-shutdown of active Litmus plugin failed.\n");
525 }
526 }
527 return NOTIFY_DONE;
528}
529
530static struct notifier_block shutdown_notifier = {
531 .notifier_call = litmus_shutdown_nb,
532};
533
534static int __init _init_litmus(void)
535{
536 /* Common initializers,
537 * mode change lock is used to enforce single mode change
538 * operation.
539 */
540 printk("Starting LITMUS^RT kernel\n");
541
542 register_sched_plugin(&linux_sched_plugin);
543
544 bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC);
545 release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC);
546
547#ifdef CONFIG_MAGIC_SYSRQ
548 /* offer some debugging help */
549 if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op))
550 printk("Registered kill rt tasks magic sysrq.\n");
551 else
552 printk("Could not register kill rt tasks magic sysrq.\n");
553#endif
554
555 init_litmus_proc();
556
557#ifdef CONFIG_SCHED_CPU_AFFINITY
558 init_topology();
559#endif
560
561 register_reboot_notifier(&shutdown_notifier);
562
563 return 0;
564}
565
566static void _exit_litmus(void)
567{
568 unregister_reboot_notifier(&shutdown_notifier);
569
570 exit_litmus_proc();
571 kmem_cache_destroy(bheap_node_cache);
572 kmem_cache_destroy(release_heap_cache);
573}
574
575module_init(_init_litmus);
576module_exit(_exit_litmus);
diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
new file mode 100644
index 000000000000..1ebf1277f5d3
--- /dev/null
+++ b/litmus/litmus_proc.c
@@ -0,0 +1,407 @@
1/*
2 * litmus_proc.c -- Implementation of the /proc/litmus directory tree.
3 */
4
5#include <linux/sched.h>
6#include <linux/uaccess.h>
7#include <linux/seq_file.h>
8
9#include <litmus/litmus.h>
10#include <litmus/litmus_proc.h>
11
12#include <litmus/clustered.h>
13
14/* in litmus/litmus.c */
15extern atomic_t rt_task_count;
16
17static struct proc_dir_entry *litmus_dir = NULL,
18 *curr_file = NULL,
19 *stat_file = NULL,
20 *plugs_dir = NULL,
21#ifdef CONFIG_RELEASE_MASTER
22 *release_master_file = NULL,
23#endif
24 *plugs_file = NULL;
25
26/* in litmus/sync.c */
27int count_tasks_waiting_for_release(void);
28
29static int litmus_stats_proc_show(struct seq_file *m, void *v)
30{
31 seq_printf(m,
32 "real-time tasks = %d\n"
33 "ready for release = %d\n",
34 atomic_read(&rt_task_count),
35 count_tasks_waiting_for_release());
36 return 0;
37}
38
39static int litmus_stats_proc_open(struct inode *inode, struct file *file)
40{
41 return single_open(file, litmus_stats_proc_show, PDE_DATA(inode));
42}
43
44static const struct file_operations litmus_stats_proc_fops = {
45 .open = litmus_stats_proc_open,
46 .read = seq_read,
47 .llseek = seq_lseek,
48 .release = single_release,
49};
50
51
52static int litmus_loaded_proc_show(struct seq_file *m, void *v)
53{
54 print_sched_plugins(m);
55 return 0;
56}
57
58static int litmus_loaded_proc_open(struct inode *inode, struct file *file)
59{
60 return single_open(file, litmus_loaded_proc_show, PDE_DATA(inode));
61}
62
63static const struct file_operations litmus_loaded_proc_fops = {
64 .open = litmus_loaded_proc_open,
65 .read = seq_read,
66 .llseek = seq_lseek,
67 .release = single_release,
68};
69
70
71
72
73/* in litmus/litmus.c */
74int switch_sched_plugin(struct sched_plugin*);
75
76static ssize_t litmus_active_proc_write(struct file *file,
77 const char __user *buffer, size_t count,
78 loff_t *ppos)
79{
80 char name[65];
81 struct sched_plugin* found;
82 ssize_t ret = -EINVAL;
83 int err;
84
85
86 ret = copy_and_chomp(name, sizeof(name), buffer, count);
87 if (ret < 0)
88 return ret;
89
90 found = find_sched_plugin(name);
91
92 if (found) {
93 err = switch_sched_plugin(found);
94 if (err) {
95 printk(KERN_INFO "Could not switch plugin: %d\n", err);
96 ret = err;
97 }
98 } else {
99 printk(KERN_INFO "Plugin '%s' is unknown.\n", name);
100 ret = -ESRCH;
101 }
102
103 return ret;
104}
105
106static int litmus_active_proc_show(struct seq_file *m, void *v)
107{
108 seq_printf(m, "%s\n", litmus->plugin_name);
109 return 0;
110}
111
112static int litmus_active_proc_open(struct inode *inode, struct file *file)
113{
114 return single_open(file, litmus_active_proc_show, PDE_DATA(inode));
115}
116
117static const struct file_operations litmus_active_proc_fops = {
118 .open = litmus_active_proc_open,
119 .read = seq_read,
120 .llseek = seq_lseek,
121 .release = single_release,
122 .write = litmus_active_proc_write,
123};
124
125
126#ifdef CONFIG_RELEASE_MASTER
127static ssize_t litmus_release_master_proc_write(
128 struct file *file,
129 const char __user *buffer, size_t count,
130 loff_t *ppos)
131{
132 int cpu, err, online = 0;
133 char msg[64];
134 ssize_t len;
135
136 len = copy_and_chomp(msg, sizeof(msg), buffer, count);
137
138 if (len < 0)
139 return len;
140
141 if (strcmp(msg, "NO_CPU") == 0)
142 atomic_set(&release_master_cpu, NO_CPU);
143 else {
144 err = sscanf(msg, "%d", &cpu);
145 if (err == 1 && cpu >= 0 && (online = cpu_online(cpu))) {
146 atomic_set(&release_master_cpu, cpu);
147 } else {
148 TRACE("invalid release master: '%s' "
149 "(err:%d cpu:%d online:%d)\n",
150 msg, err, cpu, online);
151 len = -EINVAL;
152 }
153 }
154 return len;
155}
156
157static int litmus_release_master_proc_show(struct seq_file *m, void *v)
158{
159 int master;
160 master = atomic_read(&release_master_cpu);
161 if (master == NO_CPU)
162 seq_printf(m, "NO_CPU\n");
163 else
164 seq_printf(m, "%d\n", master);
165 return 0;
166}
167
168static int litmus_release_master_proc_open(struct inode *inode, struct file *file)
169{
170 return single_open(file, litmus_release_master_proc_show, PDE_DATA(inode));
171}
172
173static const struct file_operations litmus_release_master_proc_fops = {
174 .open = litmus_release_master_proc_open,
175 .read = seq_read,
176 .llseek = seq_lseek,
177 .release = single_release,
178 .write = litmus_release_master_proc_write,
179};
180#endif
181
182int __init init_litmus_proc(void)
183{
184 litmus_dir = proc_mkdir("litmus", NULL);
185 if (!litmus_dir) {
186 printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n");
187 return -ENOMEM;
188 }
189
190 curr_file = proc_create("active_plugin", 0644, litmus_dir,
191 &litmus_active_proc_fops);
192
193 if (!curr_file) {
194 printk(KERN_ERR "Could not allocate active_plugin "
195 "procfs entry.\n");
196 return -ENOMEM;
197 }
198
199#ifdef CONFIG_RELEASE_MASTER
200 release_master_file = proc_create("release_master", 0644, litmus_dir,
201 &litmus_release_master_proc_fops);
202 if (!release_master_file) {
203 printk(KERN_ERR "Could not allocate release_master "
204 "procfs entry.\n");
205 return -ENOMEM;
206 }
207#endif
208
209 stat_file = proc_create("stats", 0444, litmus_dir, &litmus_stats_proc_fops);
210
211 plugs_dir = proc_mkdir("plugins", litmus_dir);
212 if (!plugs_dir){
213 printk(KERN_ERR "Could not allocate plugins directory "
214 "procfs entry.\n");
215 return -ENOMEM;
216 }
217
218 plugs_file = proc_create("loaded", 0444, plugs_dir,
219 &litmus_loaded_proc_fops);
220
221 return 0;
222}
223
224void exit_litmus_proc(void)
225{
226 if (plugs_file)
227 remove_proc_entry("loaded", plugs_dir);
228 if (plugs_dir)
229 remove_proc_entry("plugins", litmus_dir);
230 if (stat_file)
231 remove_proc_entry("stats", litmus_dir);
232 if (curr_file)
233 remove_proc_entry("active_plugin", litmus_dir);
234#ifdef CONFIG_RELEASE_MASTER
235 if (release_master_file)
236 remove_proc_entry("release_master", litmus_dir);
237#endif
238 if (litmus_dir)
239 remove_proc_entry("litmus", NULL);
240}
241
242long make_plugin_proc_dir(struct sched_plugin* plugin,
243 struct proc_dir_entry** pde_in)
244{
245 struct proc_dir_entry *pde_new = NULL;
246 long rv;
247
248 if (!plugin || !plugin->plugin_name){
249 printk(KERN_ERR "Invalid plugin struct passed to %s.\n",
250 __func__);
251 rv = -EINVAL;
252 goto out_no_pde;
253 }
254
255 if (!plugs_dir){
256 printk(KERN_ERR "Could not make plugin sub-directory, because "
257 "/proc/litmus/plugins does not exist.\n");
258 rv = -ENOENT;
259 goto out_no_pde;
260 }
261
262 pde_new = proc_mkdir(plugin->plugin_name, plugs_dir);
263 if (!pde_new){
264 printk(KERN_ERR "Could not make plugin sub-directory: "
265 "out of memory?.\n");
266 rv = -ENOMEM;
267 goto out_no_pde;
268 }
269
270 rv = 0;
271 *pde_in = pde_new;
272 goto out_ok;
273
274out_no_pde:
275 *pde_in = NULL;
276out_ok:
277 return rv;
278}
279
280void remove_plugin_proc_dir(struct sched_plugin* plugin)
281{
282 if (!plugin || !plugin->plugin_name){
283 printk(KERN_ERR "Invalid plugin struct passed to %s.\n",
284 __func__);
285 return;
286 }
287 remove_proc_entry(plugin->plugin_name, plugs_dir);
288}
289
290
291
292/* misc. I/O helper functions */
293
294int copy_and_chomp(char *kbuf, unsigned long ksize,
295 __user const char* ubuf, unsigned long ulength)
296{
297 /* caller must provide buffer space */
298 BUG_ON(!ksize);
299
300 ksize--; /* leave space for null byte */
301
302 if (ksize > ulength)
303 ksize = ulength;
304
305 if(copy_from_user(kbuf, ubuf, ksize))
306 return -EFAULT;
307
308 kbuf[ksize] = '\0';
309
310 /* chomp kbuf */
311 if (ksize > 0 && kbuf[ksize - 1] == '\n')
312 kbuf[ksize - 1] = '\0';
313
314 return ksize;
315}
316
317/* helper functions for clustered plugins */
318static const char* cache_level_names[] = {
319 "ALL",
320 "L1",
321 "L2",
322 "L3",
323};
324
325int parse_cache_level(const char *cache_name, enum cache_level *level)
326{
327 int err = -EINVAL;
328 int i;
329 /* do a quick and dirty comparison to find the cluster size */
330 for (i = GLOBAL_CLUSTER; i <= L3_CLUSTER; i++)
331 if (!strcmp(cache_name, cache_level_names[i])) {
332 *level = (enum cache_level) i;
333 err = 0;
334 break;
335 }
336 return err;
337}
338
339const char* cache_level_name(enum cache_level level)
340{
341 int idx = level;
342
343 if (idx >= GLOBAL_CLUSTER && idx <= L3_CLUSTER)
344 return cache_level_names[idx];
345 else
346 return "INVALID";
347}
348
349
350
351
352/* proc file interface to configure the cluster size */
353
354static ssize_t litmus_cluster_proc_write(struct file *file,
355 const char __user *buffer, size_t count,
356 loff_t *ppos)
357{
358 enum cache_level *level = (enum cache_level *) PDE_DATA(file_inode(file));
359 ssize_t len;
360 char cache_name[8];
361
362 len = copy_and_chomp(cache_name, sizeof(cache_name), buffer, count);
363
364 if (len > 0 && parse_cache_level(cache_name, level)) {
365 printk(KERN_INFO "Cluster '%s' is unknown.\n", cache_name);
366 len = -EINVAL;
367 }
368
369 return len;
370}
371
372static int litmus_cluster_proc_show(struct seq_file *m, void *v)
373{
374 enum cache_level *level = (enum cache_level *) m->private;
375
376 seq_printf(m, "%s\n", cache_level_name(*level));
377 return 0;
378}
379
380static int litmus_cluster_proc_open(struct inode *inode, struct file *file)
381{
382 return single_open(file, litmus_cluster_proc_show, PDE_DATA(inode));
383}
384
385static const struct file_operations litmus_cluster_proc_fops = {
386 .open = litmus_cluster_proc_open,
387 .read = seq_read,
388 .llseek = seq_lseek,
389 .release = single_release,
390 .write = litmus_cluster_proc_write,
391};
392
393struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent,
394 enum cache_level* level)
395{
396 struct proc_dir_entry* cluster_file;
397
398
399 cluster_file = proc_create_data("cluster", 0644, parent,
400 &litmus_cluster_proc_fops,
401 (void *) level);
402 if (!cluster_file) {
403 printk(KERN_ERR
404 "Could not cluster procfs entry.\n");
405 }
406 return cluster_file;
407}
diff --git a/litmus/locking.c b/litmus/locking.c
new file mode 100644
index 000000000000..43d9aece2e74
--- /dev/null
+++ b/litmus/locking.c
@@ -0,0 +1,188 @@
1#include <linux/sched.h>
2#include <litmus/litmus.h>
3#include <litmus/fdso.h>
4
5#ifdef CONFIG_LITMUS_LOCKING
6
7#include <linux/sched.h>
8#include <litmus/litmus.h>
9#include <litmus/sched_plugin.h>
10#include <litmus/trace.h>
11#include <litmus/wait.h>
12
13static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg);
14static int open_generic_lock(struct od_table_entry* entry, void* __user arg);
15static int close_generic_lock(struct od_table_entry* entry);
16static void destroy_generic_lock(obj_type_t type, void* sem);
17
18struct fdso_ops generic_lock_ops = {
19 .create = create_generic_lock,
20 .open = open_generic_lock,
21 .close = close_generic_lock,
22 .destroy = destroy_generic_lock
23};
24
25static inline bool is_lock(struct od_table_entry* entry)
26{
27 return entry->class == &generic_lock_ops;
28}
29
30static inline struct litmus_lock* get_lock(struct od_table_entry* entry)
31{
32 BUG_ON(!is_lock(entry));
33 return (struct litmus_lock*) entry->obj->obj;
34}
35
36static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg)
37{
38 struct litmus_lock* lock;
39 int err;
40
41 err = litmus->allocate_lock(&lock, type, arg);
42 if (err == 0)
43 *obj_ref = lock;
44 return err;
45}
46
47static int open_generic_lock(struct od_table_entry* entry, void* __user arg)
48{
49 struct litmus_lock* lock = get_lock(entry);
50 if (lock->ops->open)
51 return lock->ops->open(lock, arg);
52 else
53 return 0; /* default: any task can open it */
54}
55
56static int close_generic_lock(struct od_table_entry* entry)
57{
58 struct litmus_lock* lock = get_lock(entry);
59 if (lock->ops->close)
60 return lock->ops->close(lock);
61 else
62 return 0; /* default: closing succeeds */
63}
64
65static void destroy_generic_lock(obj_type_t type, void* obj)
66{
67 struct litmus_lock* lock = (struct litmus_lock*) obj;
68 lock->ops->deallocate(lock);
69}
70
71asmlinkage long sys_litmus_lock(int lock_od)
72{
73 long err = -EINVAL;
74 struct od_table_entry* entry;
75 struct litmus_lock* l;
76
77 TS_SYSCALL_IN_START;
78
79 TS_SYSCALL_IN_END;
80
81 TS_LOCK_START;
82
83 entry = get_entry_for_od(lock_od);
84 if (entry && is_lock(entry)) {
85 l = get_lock(entry);
86 TRACE_CUR("attempts to lock 0x%p\n", l);
87 err = l->ops->lock(l);
88 }
89
90 /* Note: task my have been suspended or preempted in between! Take
91 * this into account when computing overheads. */
92 TS_LOCK_END;
93
94 TS_SYSCALL_OUT_START;
95
96 return err;
97}
98
99asmlinkage long sys_litmus_unlock(int lock_od)
100{
101 long err = -EINVAL;
102 struct od_table_entry* entry;
103 struct litmus_lock* l;
104
105 TS_SYSCALL_IN_START;
106
107 TS_SYSCALL_IN_END;
108
109 TS_UNLOCK_START;
110
111 entry = get_entry_for_od(lock_od);
112 if (entry && is_lock(entry)) {
113 l = get_lock(entry);
114 TRACE_CUR("attempts to unlock 0x%p\n", l);
115 err = l->ops->unlock(l);
116 }
117
118 /* Note: task my have been preempted in between! Take this into
119 * account when computing overheads. */
120 TS_UNLOCK_END;
121
122 TS_SYSCALL_OUT_START;
123
124 return err;
125}
126
127struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
128{
129 wait_queue_t* q;
130 struct task_struct* t = NULL;
131
132 if (waitqueue_active(wq)) {
133 q = list_entry(wq->task_list.next,
134 wait_queue_t, task_list);
135 t = (struct task_struct*) q->private;
136 __remove_wait_queue(wq, q);
137 }
138 return(t);
139}
140
141unsigned int __add_wait_queue_prio_exclusive(
142 wait_queue_head_t* head,
143 prio_wait_queue_t *new)
144{
145 struct list_head *pos;
146 unsigned int passed = 0;
147
148 new->wq.flags |= WQ_FLAG_EXCLUSIVE;
149
150 /* find a spot where the new entry is less than the next */
151 list_for_each(pos, &head->task_list) {
152 prio_wait_queue_t* queued = list_entry(pos, prio_wait_queue_t,
153 wq.task_list);
154
155 if (unlikely(lt_before(new->priority, queued->priority) ||
156 (new->priority == queued->priority &&
157 new->tie_breaker < queued->tie_breaker))) {
158 /* pos is not less than new, thus insert here */
159 __list_add(&new->wq.task_list, pos->prev, pos);
160 goto out;
161 }
162 passed++;
163 }
164
165 /* if we get to this point either the list is empty or every entry
166 * queued element is less than new.
167 * Let's add new to the end. */
168 list_add_tail(&new->wq.task_list, &head->task_list);
169out:
170 return passed;
171}
172
173
174#else
175
176struct fdso_ops generic_lock_ops = {};
177
178asmlinkage long sys_litmus_lock(int sem_od)
179{
180 return -ENOSYS;
181}
182
183asmlinkage long sys_litmus_unlock(int sem_od)
184{
185 return -ENOSYS;
186}
187
188#endif
diff --git a/litmus/preempt.c b/litmus/preempt.c
new file mode 100644
index 000000000000..6be2f26728b8
--- /dev/null
+++ b/litmus/preempt.c
@@ -0,0 +1,137 @@
1#include <linux/sched.h>
2
3#include <litmus/litmus.h>
4#include <litmus/preempt.h>
5#include <litmus/trace.h>
6
7/* The rescheduling state of each processor.
8 */
9DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
10
11void sched_state_will_schedule(struct task_struct* tsk)
12{
13 /* Litmus hack: we only care about processor-local invocations of
14 * set_tsk_need_resched(). We can't reliably set the flag remotely
15 * since it might race with other updates to the scheduling state. We
16 * can't rely on the runqueue lock protecting updates to the sched
17 * state since processors do not acquire the runqueue locks for all
18 * updates to the sched state (to avoid acquiring two runqueue locks at
19 * the same time). Further, if tsk is residing on a remote processor,
20 * then that processor doesn't actually know yet that it is going to
21 * reschedule; it still must receive an IPI (unless a local invocation
22 * races).
23 */
24 if (likely(task_cpu(tsk) == smp_processor_id())) {
25 VERIFY_SCHED_STATE(TASK_SCHEDULED | SHOULD_SCHEDULE | TASK_PICKED | WILL_SCHEDULE);
26 if (is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK))
27 set_sched_state(PICKED_WRONG_TASK);
28 else
29 set_sched_state(WILL_SCHEDULE);
30 } else
31 /* Litmus tasks should never be subject to a remote
32 * set_tsk_need_resched(). */
33 BUG_ON(is_realtime(tsk));
34#ifdef CONFIG_PREEMPT_STATE_TRACE
35 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
36 __builtin_return_address(0));
37#endif
38}
39
40/* Called by the IPI handler after another CPU called smp_send_resched(). */
41void sched_state_ipi(void)
42{
43 /* If the IPI was slow, we might be in any state right now. The IPI is
44 * only meaningful if we are in SHOULD_SCHEDULE. */
45 if (is_in_sched_state(SHOULD_SCHEDULE)) {
46 /* Cause scheduler to be invoked.
47 * This will cause a transition to WILL_SCHEDULE. */
48 set_tsk_need_resched(current);
49 TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n",
50 current->comm, current->pid);
51 TS_SEND_RESCHED_END;
52 } else {
53 /* ignore */
54 TRACE_STATE("ignoring IPI in state %x (%s)\n",
55 get_sched_state(),
56 sched_state_name(get_sched_state()));
57 }
58}
59
60/* Called by plugins to cause a CPU to reschedule. IMPORTANT: the caller must
61 * hold the lock that is used to serialize scheduling decisions. */
62void litmus_reschedule(int cpu)
63{
64 int picked_transition_ok = 0;
65 int scheduled_transition_ok = 0;
66
67 /* The (remote) CPU could be in any state. */
68
69 /* The critical states are TASK_PICKED and TASK_SCHEDULED, as the CPU
70 * is not aware of the need to reschedule at this point. */
71
72 /* is a context switch in progress? */
73 if (cpu_is_in_sched_state(cpu, TASK_PICKED))
74 picked_transition_ok = sched_state_transition_on(
75 cpu, TASK_PICKED, PICKED_WRONG_TASK);
76
77 if (!picked_transition_ok &&
78 cpu_is_in_sched_state(cpu, TASK_SCHEDULED)) {
79 /* We either raced with the end of the context switch, or the
80 * CPU was in TASK_SCHEDULED anyway. */
81 scheduled_transition_ok = sched_state_transition_on(
82 cpu, TASK_SCHEDULED, SHOULD_SCHEDULE);
83 }
84
85 /* If the CPU was in state TASK_SCHEDULED, then we need to cause the
86 * scheduler to be invoked. */
87 if (scheduled_transition_ok) {
88 if (smp_processor_id() == cpu)
89 set_tsk_need_resched(current);
90 else {
91 TS_SEND_RESCHED_START(cpu);
92 smp_send_reschedule(cpu);
93 }
94 }
95
96 TRACE_STATE("%s picked-ok:%d sched-ok:%d\n",
97 __FUNCTION__,
98 picked_transition_ok,
99 scheduled_transition_ok);
100}
101
102void litmus_reschedule_local(void)
103{
104 if (is_in_sched_state(TASK_PICKED))
105 set_sched_state(PICKED_WRONG_TASK);
106 else if (is_in_sched_state(TASK_SCHEDULED | SHOULD_SCHEDULE)) {
107 set_sched_state(WILL_SCHEDULE);
108 set_tsk_need_resched(current);
109 }
110}
111
112#ifdef CONFIG_DEBUG_KERNEL
113
114void sched_state_plugin_check(void)
115{
116 if (!is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK)) {
117 TRACE("!!!! plugin did not call sched_state_task_picked()!"
118 "Calling sched_state_task_picked() is mandatory---fix this.\n");
119 set_sched_state(TASK_PICKED);
120 }
121}
122
123#define NAME_CHECK(x) case x: return #x
124const char* sched_state_name(int s)
125{
126 switch (s) {
127 NAME_CHECK(TASK_SCHEDULED);
128 NAME_CHECK(SHOULD_SCHEDULE);
129 NAME_CHECK(WILL_SCHEDULE);
130 NAME_CHECK(TASK_PICKED);
131 NAME_CHECK(PICKED_WRONG_TASK);
132 default:
133 return "UNKNOWN";
134 };
135}
136
137#endif
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
new file mode 100644
index 000000000000..bfadf1284291
--- /dev/null
+++ b/litmus/rt_domain.c
@@ -0,0 +1,348 @@
1/*
2 * litmus/rt_domain.c
3 *
4 * LITMUS real-time infrastructure. This file contains the
5 * functions that manipulate RT domains. RT domains are an abstraction
6 * of a ready queue and a release queue.
7 */
8
9#include <linux/percpu.h>
10#include <linux/sched.h>
11#include <linux/list.h>
12#include <linux/slab.h>
13
14#include <litmus/litmus.h>
15#include <litmus/sched_plugin.h>
16#include <litmus/sched_trace.h>
17
18#include <litmus/rt_domain.h>
19
20#include <litmus/trace.h>
21
22#include <litmus/bheap.h>
23
24/* Uncomment when debugging timer races... */
25#if 0
26#define VTRACE_TASK TRACE_TASK
27#define VTRACE TRACE
28#else
29#define VTRACE_TASK(t, fmt, args...) /* shut up */
30#define VTRACE(fmt, args...) /* be quiet already */
31#endif
32
33static int dummy_resched(rt_domain_t *rt)
34{
35 return 0;
36}
37
38static int dummy_order(struct bheap_node* a, struct bheap_node* b)
39{
40 return 0;
41}
42
43/* default implementation: use default lock */
44static void default_release_jobs(rt_domain_t* rt, struct bheap* tasks)
45{
46 merge_ready(rt, tasks);
47}
48
49static unsigned int time2slot(lt_t time)
50{
51 return (unsigned int) time2quanta(time, FLOOR) % RELEASE_QUEUE_SLOTS;
52}
53
54static enum hrtimer_restart on_release_timer(struct hrtimer *timer)
55{
56 unsigned long flags;
57 struct release_heap* rh;
58 rh = container_of(timer, struct release_heap, timer);
59
60 TS_RELEASE_LATENCY(rh->release_time);
61
62 VTRACE("on_release_timer(0x%p) starts.\n", timer);
63
64 TS_RELEASE_START;
65
66
67 raw_spin_lock_irqsave(&rh->dom->release_lock, flags);
68 VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock);
69 /* remove from release queue */
70 list_del(&rh->list);
71 raw_spin_unlock_irqrestore(&rh->dom->release_lock, flags);
72 VTRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock);
73
74 /* call release callback */
75 rh->dom->release_jobs(rh->dom, &rh->heap);
76 /* WARNING: rh can be referenced from other CPUs from now on. */
77
78 TS_RELEASE_END;
79
80 VTRACE("on_release_timer(0x%p) ends.\n", timer);
81
82 return HRTIMER_NORESTART;
83}
84
85/* allocated in litmus.c */
86struct kmem_cache * release_heap_cache;
87
88struct release_heap* release_heap_alloc(int gfp_flags)
89{
90 struct release_heap* rh;
91 rh= kmem_cache_alloc(release_heap_cache, gfp_flags);
92 if (rh) {
93 /* initialize timer */
94 hrtimer_init(&rh->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
95 rh->timer.function = on_release_timer;
96 }
97 return rh;
98}
99
100void release_heap_free(struct release_heap* rh)
101{
102 /* make sure timer is no longer in use */
103 hrtimer_cancel(&rh->timer);
104 kmem_cache_free(release_heap_cache, rh);
105}
106
107/* Caller must hold release lock.
108 * Will return heap for given time. If no such heap exists prior to
109 * the invocation it will be created.
110 */
111static struct release_heap* get_release_heap(rt_domain_t *rt,
112 struct task_struct* t,
113 int use_task_heap)
114{
115 struct list_head* pos;
116 struct release_heap* heap = NULL;
117 struct release_heap* rh;
118 lt_t release_time = get_release(t);
119 unsigned int slot = time2slot(release_time);
120
121 /* initialize pos for the case that the list is empty */
122 pos = rt->release_queue.slot[slot].next;
123 list_for_each(pos, &rt->release_queue.slot[slot]) {
124 rh = list_entry(pos, struct release_heap, list);
125 if (release_time == rh->release_time) {
126 /* perfect match -- this happens on hyperperiod
127 * boundaries
128 */
129 heap = rh;
130 break;
131 } else if (lt_before(release_time, rh->release_time)) {
132 /* we need to insert a new node since rh is
133 * already in the future
134 */
135 break;
136 }
137 }
138 if (!heap && use_task_heap) {
139 /* use pre-allocated release heap */
140 rh = tsk_rt(t)->rel_heap;
141
142 rh->dom = rt;
143 rh->release_time = release_time;
144
145 /* add to release queue */
146 list_add(&rh->list, pos->prev);
147 heap = rh;
148 }
149 return heap;
150}
151
152static void reinit_release_heap(struct task_struct* t)
153{
154 struct release_heap* rh;
155
156 /* use pre-allocated release heap */
157 rh = tsk_rt(t)->rel_heap;
158
159 /* Make sure it is safe to use. The timer callback could still
160 * be executing on another CPU; hrtimer_cancel() will wait
161 * until the timer callback has completed. However, under no
162 * circumstances should the timer be active (= yet to be
163 * triggered).
164 *
165 * WARNING: If the CPU still holds the release_lock at this point,
166 * deadlock may occur!
167 */
168 BUG_ON(hrtimer_cancel(&rh->timer));
169
170 /* initialize */
171 bheap_init(&rh->heap);
172#ifdef CONFIG_RELEASE_MASTER
173 atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE);
174#endif
175}
176/* arm_release_timer() - start local release timer or trigger
177 * remote timer (pull timer)
178 *
179 * Called by add_release() with:
180 * - tobe_lock taken
181 * - IRQ disabled
182 */
183#ifdef CONFIG_RELEASE_MASTER
184#define arm_release_timer(t) arm_release_timer_on((t), NO_CPU)
185static void arm_release_timer_on(rt_domain_t *_rt , int target_cpu)
186#else
187static void arm_release_timer(rt_domain_t *_rt)
188#endif
189{
190 rt_domain_t *rt = _rt;
191 struct list_head list;
192 struct list_head *pos, *safe;
193 struct task_struct* t;
194 struct release_heap* rh;
195
196 VTRACE("arm_release_timer() at %llu\n", litmus_clock());
197 list_replace_init(&rt->tobe_released, &list);
198
199 list_for_each_safe(pos, safe, &list) {
200 /* pick task of work list */
201 t = list_entry(pos, struct task_struct, rt_param.list);
202 sched_trace_task_release(t);
203 list_del(pos);
204
205 /* put into release heap while holding release_lock */
206 raw_spin_lock(&rt->release_lock);
207 VTRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock);
208
209 rh = get_release_heap(rt, t, 0);
210 if (!rh) {
211 /* need to use our own, but drop lock first */
212 raw_spin_unlock(&rt->release_lock);
213 VTRACE_TASK(t, "Dropped release_lock 0x%p\n",
214 &rt->release_lock);
215
216 reinit_release_heap(t);
217 VTRACE_TASK(t, "release_heap ready\n");
218
219 raw_spin_lock(&rt->release_lock);
220 VTRACE_TASK(t, "Re-acquired release_lock 0x%p\n",
221 &rt->release_lock);
222
223 rh = get_release_heap(rt, t, 1);
224 }
225 bheap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node);
226 VTRACE_TASK(t, "arm_release_timer(): added to release heap\n");
227
228 raw_spin_unlock(&rt->release_lock);
229 VTRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock);
230
231 /* To avoid arming the timer multiple times, we only let the
232 * owner do the arming (which is the "first" task to reference
233 * this release_heap anyway).
234 */
235 if (rh == tsk_rt(t)->rel_heap) {
236 VTRACE_TASK(t, "arming timer 0x%p\n", &rh->timer);
237 /* we cannot arm the timer using hrtimer_start()
238 * as it may deadlock on rq->lock
239 *
240 * PINNED mode is ok on both local and remote CPU
241 */
242#ifdef CONFIG_RELEASE_MASTER
243 if (rt->release_master == NO_CPU &&
244 target_cpu == NO_CPU)
245#endif
246 __hrtimer_start_range_ns(&rh->timer,
247 ns_to_ktime(rh->release_time),
248 0, HRTIMER_MODE_ABS_PINNED, 0);
249#ifdef CONFIG_RELEASE_MASTER
250 else
251 hrtimer_start_on(
252 /* target_cpu overrides release master */
253 (target_cpu != NO_CPU ?
254 target_cpu : rt->release_master),
255 &rh->info, &rh->timer,
256 ns_to_ktime(rh->release_time),
257 HRTIMER_MODE_ABS_PINNED);
258#endif
259 } else
260 VTRACE_TASK(t, "0x%p is not my timer\n", &rh->timer);
261 }
262}
263
264void rt_domain_init(rt_domain_t *rt,
265 bheap_prio_t order,
266 check_resched_needed_t check,
267 release_jobs_t release
268 )
269{
270 int i;
271
272 BUG_ON(!rt);
273 if (!check)
274 check = dummy_resched;
275 if (!release)
276 release = default_release_jobs;
277 if (!order)
278 order = dummy_order;
279
280#ifdef CONFIG_RELEASE_MASTER
281 rt->release_master = NO_CPU;
282#endif
283
284 bheap_init(&rt->ready_queue);
285 INIT_LIST_HEAD(&rt->tobe_released);
286 for (i = 0; i < RELEASE_QUEUE_SLOTS; i++)
287 INIT_LIST_HEAD(&rt->release_queue.slot[i]);
288
289 raw_spin_lock_init(&rt->ready_lock);
290 raw_spin_lock_init(&rt->release_lock);
291 raw_spin_lock_init(&rt->tobe_lock);
292
293 rt->check_resched = check;
294 rt->release_jobs = release;
295 rt->order = order;
296}
297
298/* add_ready - add a real-time task to the rt ready queue. It must be runnable.
299 * @new: the newly released task
300 */
301void __add_ready(rt_domain_t* rt, struct task_struct *new)
302{
303 TRACE("rt: adding %s/%d (%llu, %llu, %llu) rel=%llu "
304 "to ready queue at %llu\n",
305 new->comm, new->pid,
306 get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new),
307 get_release(new), litmus_clock());
308
309 BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node));
310
311 bheap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node);
312 rt->check_resched(rt);
313}
314
315/* merge_ready - Add a sorted set of tasks to the rt ready queue. They must be runnable.
316 * @tasks - the newly released tasks
317 */
318void __merge_ready(rt_domain_t* rt, struct bheap* tasks)
319{
320 bheap_union(rt->order, &rt->ready_queue, tasks);
321 rt->check_resched(rt);
322}
323
324
325#ifdef CONFIG_RELEASE_MASTER
326void __add_release_on(rt_domain_t* rt, struct task_struct *task,
327 int target_cpu)
328{
329 TRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n",
330 get_release(task), target_cpu);
331 list_add(&tsk_rt(task)->list, &rt->tobe_released);
332 task->rt_param.domain = rt;
333
334 arm_release_timer_on(rt, target_cpu);
335}
336#endif
337
338/* add_release - add a real-time task to the rt release queue.
339 * @task: the sleeping task
340 */
341void __add_release(rt_domain_t* rt, struct task_struct *task)
342{
343 TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task));
344 list_add(&tsk_rt(task)->list, &rt->tobe_released);
345 task->rt_param.domain = rt;
346
347 arm_release_timer(rt);
348}
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
new file mode 100644
index 000000000000..c4747e0ef2ab
--- /dev/null
+++ b/litmus/sched_plugin.c
@@ -0,0 +1,224 @@
1/* sched_plugin.c -- core infrastructure for the scheduler plugin system
2 *
3 * This file includes the initialization of the plugin system, the no-op Linux
4 * scheduler plugin, some dummy functions, and some helper functions.
5 */
6
7#include <linux/list.h>
8#include <linux/spinlock.h>
9#include <linux/sched.h>
10#include <linux/seq_file.h>
11
12#include <litmus/litmus.h>
13#include <litmus/sched_plugin.h>
14#include <litmus/preempt.h>
15#include <litmus/jobs.h>
16
17/*
18 * Generic function to trigger preemption on either local or remote cpu
19 * from scheduler plugins. The key feature is that this function is
20 * non-preemptive section aware and does not invoke the scheduler / send
21 * IPIs if the to-be-preempted task is actually non-preemptive.
22 */
23void preempt_if_preemptable(struct task_struct* t, int cpu)
24{
25 /* t is the real-time task executing on CPU on_cpu If t is NULL, then
26 * on_cpu is currently scheduling background work.
27 */
28
29 int reschedule = 0;
30
31 if (!t)
32 /* move non-real-time task out of the way */
33 reschedule = 1;
34 else {
35 if (smp_processor_id() == cpu) {
36 /* local CPU case */
37 /* check if we need to poke userspace */
38 if (is_user_np(t))
39 /* Yes, poke it. This doesn't have to be atomic since
40 * the task is definitely not executing. */
41 request_exit_np(t);
42 else if (!is_kernel_np(t))
43 /* only if we are allowed to preempt the
44 * currently-executing task */
45 reschedule = 1;
46 } else {
47 /* Remote CPU case. Only notify if it's not a kernel
48 * NP section and if we didn't set the userspace
49 * flag. */
50 reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t));
51 }
52 }
53 if (likely(reschedule))
54 litmus_reschedule(cpu);
55}
56
57
58/*************************************************************
59 * Dummy plugin functions *
60 *************************************************************/
61
62static void litmus_dummy_finish_switch(struct task_struct * prev)
63{
64}
65
66static struct task_struct* litmus_dummy_schedule(struct task_struct * prev)
67{
68 sched_state_task_picked();
69 return NULL;
70}
71
72static void litmus_dummy_tick(struct task_struct* tsk)
73{
74}
75
76static long litmus_dummy_admit_task(struct task_struct* tsk)
77{
78 printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n",
79 tsk->comm, tsk->pid);
80 return -EINVAL;
81}
82
83static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running)
84{
85}
86
87static void litmus_dummy_task_wake_up(struct task_struct *task)
88{
89}
90
91static void litmus_dummy_task_block(struct task_struct *task)
92{
93}
94
95static void litmus_dummy_task_exit(struct task_struct *task)
96{
97}
98
99static long litmus_dummy_complete_job(void)
100{
101 return -ENOSYS;
102}
103
104static long litmus_dummy_activate_plugin(void)
105{
106 return 0;
107}
108
109static long litmus_dummy_deactivate_plugin(void)
110{
111 return 0;
112}
113
114#ifdef CONFIG_LITMUS_LOCKING
115
116static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
117 void* __user config)
118{
119 return -ENXIO;
120}
121
122#endif
123
124
125/* The default scheduler plugin. It doesn't do anything and lets Linux do its
126 * job.
127 */
128struct sched_plugin linux_sched_plugin = {
129 .plugin_name = "Linux",
130 .tick = litmus_dummy_tick,
131 .task_new = litmus_dummy_task_new,
132 .task_exit = litmus_dummy_task_exit,
133 .task_wake_up = litmus_dummy_task_wake_up,
134 .task_block = litmus_dummy_task_block,
135 .complete_job = litmus_dummy_complete_job,
136 .schedule = litmus_dummy_schedule,
137 .finish_switch = litmus_dummy_finish_switch,
138 .activate_plugin = litmus_dummy_activate_plugin,
139 .deactivate_plugin = litmus_dummy_deactivate_plugin,
140#ifdef CONFIG_LITMUS_LOCKING
141 .allocate_lock = litmus_dummy_allocate_lock,
142#endif
143 .admit_task = litmus_dummy_admit_task
144};
145
146/*
147 * The reference to current plugin that is used to schedule tasks within
148 * the system. It stores references to actual function implementations
149 * Should be initialized by calling "init_***_plugin()"
150 */
151struct sched_plugin *litmus = &linux_sched_plugin;
152
153/* the list of registered scheduling plugins */
154static LIST_HEAD(sched_plugins);
155static DEFINE_RAW_SPINLOCK(sched_plugins_lock);
156
157#define CHECK(func) {\
158 if (!plugin->func) \
159 plugin->func = litmus_dummy_ ## func;}
160
161/* FIXME: get reference to module */
162int register_sched_plugin(struct sched_plugin* plugin)
163{
164 printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n",
165 plugin->plugin_name);
166
167 /* make sure we don't trip over null pointers later */
168 CHECK(finish_switch);
169 CHECK(schedule);
170 CHECK(tick);
171 CHECK(task_wake_up);
172 CHECK(task_exit);
173 CHECK(task_block);
174 CHECK(task_new);
175 CHECK(complete_job);
176 CHECK(activate_plugin);
177 CHECK(deactivate_plugin);
178#ifdef CONFIG_LITMUS_LOCKING
179 CHECK(allocate_lock);
180#endif
181 CHECK(admit_task);
182
183 if (!plugin->release_at)
184 plugin->release_at = release_at;
185
186 raw_spin_lock(&sched_plugins_lock);
187 list_add(&plugin->list, &sched_plugins);
188 raw_spin_unlock(&sched_plugins_lock);
189
190 return 0;
191}
192
193
194/* FIXME: reference counting, etc. */
195struct sched_plugin* find_sched_plugin(const char* name)
196{
197 struct list_head *pos;
198 struct sched_plugin *plugin;
199
200 raw_spin_lock(&sched_plugins_lock);
201 list_for_each(pos, &sched_plugins) {
202 plugin = list_entry(pos, struct sched_plugin, list);
203 if (!strcmp(plugin->plugin_name, name))
204 goto out_unlock;
205 }
206 plugin = NULL;
207
208out_unlock:
209 raw_spin_unlock(&sched_plugins_lock);
210 return plugin;
211}
212
213void print_sched_plugins(struct seq_file *m)
214{
215 struct list_head *pos;
216 struct sched_plugin *plugin;
217
218 raw_spin_lock(&sched_plugins_lock);
219 list_for_each(pos, &sched_plugins) {
220 plugin = list_entry(pos, struct sched_plugin, list);
221 seq_printf(m, "%s\n", plugin->plugin_name);
222 }
223 raw_spin_unlock(&sched_plugins_lock);
224}
diff --git a/litmus/srp.c b/litmus/srp.c
new file mode 100644
index 000000000000..c88dbf2f580f
--- /dev/null
+++ b/litmus/srp.c
@@ -0,0 +1,305 @@
1/* ************************************************************************** */
2/* STACK RESOURCE POLICY */
3/* ************************************************************************** */
4
5#include <asm/atomic.h>
6#include <linux/sched.h>
7#include <linux/wait.h>
8
9#include <litmus/litmus.h>
10#include <litmus/sched_plugin.h>
11#include <litmus/fdso.h>
12#include <litmus/trace.h>
13
14
15#ifdef CONFIG_LITMUS_LOCKING
16
17#include <litmus/srp.h>
18
19srp_prioritization_t get_srp_prio;
20
21struct srp {
22 struct list_head ceiling;
23 wait_queue_head_t ceiling_blocked;
24};
25#define system_ceiling(srp) list2prio(srp->ceiling.next)
26#define ceiling2sem(c) container_of(c, struct srp_semaphore, ceiling)
27
28#define UNDEF_SEM -2
29
30atomic_t srp_objects_in_use = ATOMIC_INIT(0);
31
32DEFINE_PER_CPU(struct srp, srp);
33
34/* Initialize SRP semaphores at boot time. */
35static int __init srp_init(void)
36{
37 int i;
38
39 printk("Initializing SRP per-CPU ceilings...");
40 for (i = 0; i < NR_CPUS; i++) {
41 init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked);
42 INIT_LIST_HEAD(&per_cpu(srp, i).ceiling);
43 }
44 printk(" done!\n");
45
46 return 0;
47}
48module_init(srp_init);
49
50/* SRP task priority comparison function. Smaller numeric values have higher
51 * priority, tie-break is PID. Special case: priority == 0 <=> no priority
52 */
53static int srp_higher_prio(struct srp_priority* first,
54 struct srp_priority* second)
55{
56 if (!first->priority)
57 return 0;
58 else
59 return !second->priority ||
60 first->priority < second->priority || (
61 first->priority == second->priority &&
62 first->pid < second->pid);
63}
64
65
66static int srp_exceeds_ceiling(struct task_struct* first,
67 struct srp* srp)
68{
69 struct srp_priority prio;
70
71 if (list_empty(&srp->ceiling))
72 return 1;
73 else {
74 prio.pid = first->pid;
75 prio.priority = get_srp_prio(first);
76 return srp_higher_prio(&prio, system_ceiling(srp)) ||
77 ceiling2sem(system_ceiling(srp))->owner == first;
78 }
79}
80
81static void srp_add_prio(struct srp* srp, struct srp_priority* prio)
82{
83 struct list_head *pos;
84 if (in_list(&prio->list)) {
85 printk(KERN_CRIT "WARNING: SRP violation detected, prio is already in "
86 "ceiling list! cpu=%d, srp=%p\n", smp_processor_id(), ceiling2sem(prio));
87 return;
88 }
89 list_for_each(pos, &srp->ceiling)
90 if (unlikely(srp_higher_prio(prio, list2prio(pos)))) {
91 __list_add(&prio->list, pos->prev, pos);
92 return;
93 }
94
95 list_add_tail(&prio->list, &srp->ceiling);
96}
97
98
99static int lock_srp_semaphore(struct litmus_lock* l)
100{
101 struct task_struct* t = current;
102 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
103
104 if (!is_realtime(t))
105 return -EPERM;
106
107 /* prevent acquisition of local locks in global critical sections */
108 if (tsk_rt(t)->num_locks_held)
109 return -EBUSY;
110
111 preempt_disable();
112
113 /* Update ceiling. */
114 srp_add_prio(&__get_cpu_var(srp), &sem->ceiling);
115
116 /* SRP invariant: all resources available */
117 BUG_ON(sem->owner != NULL);
118
119 sem->owner = t;
120 TRACE_CUR("acquired srp 0x%p\n", sem);
121
122 tsk_rt(t)->num_local_locks_held++;
123
124 preempt_enable();
125
126 return 0;
127}
128
129static int unlock_srp_semaphore(struct litmus_lock* l)
130{
131 struct task_struct* t = current;
132 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
133 int err = 0;
134
135 preempt_disable();
136
137 if (sem->owner != t) {
138 err = -EINVAL;
139 } else {
140 /* Determine new system priority ceiling for this CPU. */
141 BUG_ON(!in_list(&sem->ceiling.list));
142
143 list_del(&sem->ceiling.list);
144 sem->owner = NULL;
145
146 /* Wake tasks on this CPU, if they exceed current ceiling. */
147 TRACE_CUR("released srp 0x%p\n", sem);
148 wake_up_all(&__get_cpu_var(srp).ceiling_blocked);
149
150 tsk_rt(t)->num_local_locks_held--;
151 }
152
153 preempt_enable();
154 return err;
155}
156
157static int open_srp_semaphore(struct litmus_lock* l, void* __user arg)
158{
159 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
160 int err = 0;
161 struct task_struct* t = current;
162 struct srp_priority t_prio;
163
164 if (!is_realtime(t))
165 return -EPERM;
166
167 TRACE_CUR("opening SRP semaphore %p, cpu=%d\n", sem, sem->cpu);
168
169 preempt_disable();
170
171 if (sem->owner != NULL)
172 err = -EBUSY;
173
174 if (err == 0) {
175 if (sem->cpu == UNDEF_SEM)
176 sem->cpu = get_partition(t);
177 else if (sem->cpu != get_partition(t))
178 err = -EPERM;
179 }
180
181 if (err == 0) {
182 t_prio.priority = get_srp_prio(t);
183 t_prio.pid = t->pid;
184 if (srp_higher_prio(&t_prio, &sem->ceiling)) {
185 sem->ceiling.priority = t_prio.priority;
186 sem->ceiling.pid = t_prio.pid;
187 }
188 }
189
190 preempt_enable();
191
192 return err;
193}
194
195static int close_srp_semaphore(struct litmus_lock* l)
196{
197 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
198 int err = 0;
199
200 preempt_disable();
201
202 if (sem->owner == current)
203 unlock_srp_semaphore(l);
204
205 preempt_enable();
206
207 return err;
208}
209
210static void deallocate_srp_semaphore(struct litmus_lock* l)
211{
212 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
213 atomic_dec(&srp_objects_in_use);
214 kfree(sem);
215}
216
217static struct litmus_lock_ops srp_lock_ops = {
218 .open = open_srp_semaphore,
219 .close = close_srp_semaphore,
220 .lock = lock_srp_semaphore,
221 .unlock = unlock_srp_semaphore,
222 .deallocate = deallocate_srp_semaphore,
223};
224
225struct srp_semaphore* allocate_srp_semaphore(void)
226{
227 struct srp_semaphore* sem;
228
229 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
230 if (!sem)
231 return NULL;
232
233 INIT_LIST_HEAD(&sem->ceiling.list);
234 sem->ceiling.priority = 0;
235 sem->cpu = UNDEF_SEM;
236 sem->owner = NULL;
237
238 sem->litmus_lock.ops = &srp_lock_ops;
239
240 atomic_inc(&srp_objects_in_use);
241 return sem;
242}
243
244static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync,
245 void *key)
246{
247 int cpu = smp_processor_id();
248 struct task_struct *tsk = wait->private;
249 if (cpu != get_partition(tsk))
250 TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b",
251 get_partition(tsk));
252 else if (srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
253 return default_wake_function(wait, mode, sync, key);
254 return 0;
255}
256
257static void do_ceiling_block(struct task_struct *tsk)
258{
259 wait_queue_t wait = {
260 .private = tsk,
261 .func = srp_wake_up,
262 .task_list = {NULL, NULL}
263 };
264
265 tsk->state = TASK_UNINTERRUPTIBLE;
266 add_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
267 tsk->rt_param.srp_non_recurse = 1;
268 preempt_enable_no_resched();
269 schedule();
270 preempt_disable();
271 tsk->rt_param.srp_non_recurse = 0;
272 remove_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
273}
274
275/* Wait for current task priority to exceed system-wide priority ceiling.
276 * FIXME: the hotpath should be inline.
277 */
278void srp_ceiling_block(void)
279{
280 struct task_struct *tsk = current;
281
282 /* Only applies to real-time tasks, but optimize for RT tasks. */
283 if (unlikely(!is_realtime(tsk)))
284 return;
285
286 /* Avoid recursive ceiling blocking. */
287 if (unlikely(tsk->rt_param.srp_non_recurse))
288 return;
289
290 /* Bail out early if there aren't any SRP resources around. */
291 if (likely(!atomic_read(&srp_objects_in_use)))
292 return;
293
294 preempt_disable();
295 if (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) {
296 TRACE_CUR("is priority ceiling blocked.\n");
297 while (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
298 do_ceiling_block(tsk);
299 TRACE_CUR("finally exceeds system ceiling.\n");
300 } else
301 TRACE_CUR("is not priority ceiling blocked\n");
302 preempt_enable();
303}
304
305#endif
diff --git a/litmus/sync.c b/litmus/sync.c
new file mode 100644
index 000000000000..61a95463e4d2
--- /dev/null
+++ b/litmus/sync.c
@@ -0,0 +1,166 @@
1/* litmus/sync.c - Support for synchronous and asynchronous task system releases.
2 *
3 *
4 */
5
6#include <asm/atomic.h>
7#include <asm/uaccess.h>
8#include <linux/spinlock.h>
9#include <linux/list.h>
10#include <linux/sched.h>
11#include <linux/completion.h>
12
13#include <litmus/litmus.h>
14#include <litmus/sched_plugin.h>
15#include <litmus/jobs.h>
16
17#include <litmus/sched_trace.h>
18
19struct ts_release_wait {
20 struct list_head list;
21 struct completion completion;
22 lt_t ts_release_time;
23};
24
25#define DECLARE_TS_RELEASE_WAIT(symb) \
26 struct ts_release_wait symb = \
27 { \
28 LIST_HEAD_INIT(symb.list), \
29 COMPLETION_INITIALIZER_ONSTACK(symb.completion), \
30 0 \
31 }
32
33static LIST_HEAD(task_release_list);
34static DEFINE_MUTEX(task_release_lock);
35
36static long do_wait_for_ts_release(void)
37{
38 DECLARE_TS_RELEASE_WAIT(wait);
39
40 long ret = -ERESTARTSYS;
41
42 if (mutex_lock_interruptible(&task_release_lock))
43 goto out;
44
45 list_add(&wait.list, &task_release_list);
46
47 mutex_unlock(&task_release_lock);
48
49 /* We are enqueued, now we wait for someone to wake us up. */
50 ret = wait_for_completion_interruptible(&wait.completion);
51
52 if (!ret) {
53 /* Setting this flag before releasing ensures that this CPU
54 * will be the next CPU to requeue the task on a ready or
55 * release queue. Cleared by prepare_for_next_period()
56 */
57 tsk_rt(current)->dont_requeue = 1;
58
59 /* Completion succeeded, setup release time. complete_job()
60 * will indirectly cause the period to be added to the next
61 * release time, so subtract it here. */
62 litmus->release_at(current, wait.ts_release_time
63 + current->rt_param.task_params.phase
64 - current->rt_param.task_params.period);
65
66 /* Advance to next job --- when complete_job() returns, the
67 * first job has been released. Since we patched up the release
68 * time, this occurs when all tasks synchronously release their
69 * first job.*/
70 ret = complete_job();
71 } else {
72 /* We were interrupted, must cleanup list. */
73 mutex_lock(&task_release_lock);
74 if (!wait.completion.done)
75 list_del(&wait.list);
76 mutex_unlock(&task_release_lock);
77 }
78
79out:
80 return ret;
81}
82
83int count_tasks_waiting_for_release(void)
84{
85 int task_count = 0;
86 struct list_head *pos;
87
88 mutex_lock(&task_release_lock);
89
90 list_for_each(pos, &task_release_list) {
91 task_count++;
92 }
93
94 mutex_unlock(&task_release_lock);
95
96
97 return task_count;
98}
99
100static long do_release_ts(lt_t start)
101{
102 long task_count = 0;
103
104 struct list_head *pos, *safe;
105 struct ts_release_wait *wait;
106
107 if (mutex_lock_interruptible(&task_release_lock)) {
108 task_count = -ERESTARTSYS;
109 goto out;
110 }
111
112 TRACE("<<<<<< synchronous task system release >>>>>>\n");
113 sched_trace_sys_release(&start);
114
115 task_count = 0;
116 list_for_each_safe(pos, safe, &task_release_list) {
117 wait = (struct ts_release_wait*)
118 list_entry(pos, struct ts_release_wait, list);
119
120 task_count++;
121 wait->ts_release_time = start;
122 complete(&wait->completion);
123 }
124
125 /* clear stale list */
126 INIT_LIST_HEAD(&task_release_list);
127
128 mutex_unlock(&task_release_lock);
129
130out:
131 return task_count;
132}
133
134
135asmlinkage long sys_wait_for_ts_release(void)
136{
137 long ret = -EPERM;
138 struct task_struct *t = current;
139
140 if (is_realtime(t))
141 ret = do_wait_for_ts_release();
142
143 return ret;
144}
145
146#define ONE_MS 1000000
147
148asmlinkage long sys_release_ts(lt_t __user *__delay)
149{
150 long ret;
151 lt_t delay;
152 lt_t start_time;
153
154 /* FIXME: check capabilities... */
155
156 ret = copy_from_user(&delay, __delay, sizeof(delay));
157 if (ret == 0) {
158 /* round up to next larger integral millisecond */
159 start_time = litmus_clock();
160 do_div(start_time, ONE_MS);
161 start_time *= ONE_MS;
162 ret = do_release_ts(start_time + delay);
163 }
164
165 return ret;
166}