aboutsummaryrefslogtreecommitdiffstats
path: root/litmus
diff options
context:
space:
mode:
authorBjoern Brandenburg <bbb@mpi-sws.org>2015-08-09 07:18:48 -0400
committerBjoern Brandenburg <bbb@mpi-sws.org>2017-05-26 17:12:28 -0400
commit3baa55c19ffb567aa48568fa69dd17ad6f70d31d (patch)
tree7e79fd398705929f2db40ba239895cc60762f61f /litmus
parentcbe61859a233702ed8e6723b3b133d1f2ae1ae2c (diff)
Add LITMUS^RT core implementation
This patch adds the core of LITMUS^RT: - library functionality (heaps, rt_domain, prioritization, etc.) - budget enforcement logic - job management - system call backends - virtual devices (control page, etc.) - scheduler plugin API (and dummy plugin) This code compiles, but is not yet integrated with the rest of Linux. Squashed changes: LITMUS^RT Core: add get_current_budget() system call Allow userspace to figure out the used-up and remaining budget of a task. Adds deadline field to control page and updates it when setting up jobs for release. Adds control page deadline offset ftdev: respect O_NONBLOCK flag in ftdev_read() Don't block if userspace wants to go on doing something else. Export job release time and job sequence number in ctrl page Add alternate complete_job() default implementation Let jobs sleep like regular Linux tasks by suspending and waking them with a one-shot timer. Plugins can opt into using this implementation instead of the classic complete_job() implementation (or custom implementations). Fix RCU locking in sys_get_rt_task_param() sys_get_rt_task_param() is rarely used and apparently attracted some bitrot. Free before setting NULL to prevent memory leak Add hrtimer_start_on() support This patch replaces the previous implementation of hrtimer_start_on() by now using smp_call_function_single_async() to arm hrtimers on remote CPUs. Expose LITMUS^RT system calls via control page ioctl() Rationale: make LITMUS^RT ops available in a way that does not create merge conflicts each time we rebase LITMUS^RT on top of a new kernel version. This also helps with portability to different architectures, as we no longer need to patch each architecture's syscall table. Pick non-zero syscall ID start range To avoid interfering with Linux's magic reserved IOCTL numbers Don't preempt before time check in sleep_until_next_release() Avoid preempting jobs that are about to go to sleep soon anyway. LITMUS^RT proc: fix wrong memset() TRACE(): add TRACE_WARN_ON() helper Useful to replace BUG_ON() and WARN_ON() with a non-fatal TRACE()-based equivalent. Add void* plugin_state pointer to task_struct LITMUS^RT: split task admission into two functions Plugin interface: add fork_task() callback LITMUS^RT: Enable plugins to permit RT tasks to fork one-shot complete_job(): set completed flag This could race with a SIGSTOP or some other forced suspension, but we'll let plugins handle this, should they actually care. FP: add list-based ready queue LITMUS^RT core: add should_wait_for_stack() callback Allow plugins to give up when waiting for a stack to become available. LITMUS^RT core: add next_became_invalid() callback LITMUS^RT core: add post-migration validation callback LITMUS^RT core: be more careful when pull-migrating tasks Close more race windows and give plugins a chance to validate tasks after they have been migrated. Add KConfig options for timer latency warnings Add reservation creation API to plugin interface & syscalls LITMUS^RT syscall: expose sys_reservation_create() via ioctl() Add reservation configuration types to rt_param.h Add basic generic reservation-based scheduling infrastructure Switch to aligned quanta by default. For first-time users, aligned quanta is likely what's expected. LITMUS^RT core: keep track of time of last suspension This information is needed to insert ST_COMPLETION records for sporadic tasks. add fields for clock_nanosleep() support Need to communicate the intended wake-up time to the plugin wake-up handler. LITMUS^RT core: add generic handler for sporadic job arrivals In particular, check if a job arrival is triggered from a clock_nanosleep() call. add litmus->task_change_params() callback to plugin interface Will be used by adaptive C-EDF. Call litmus->task_change_params() from sys_set_rt_task_param() Move trace point definition to litmus/litmus.c If !CONFIG_SCHED_TASK_TRACE, but CONFIG_SCHED_LITMUS_TRACEPOINT, then we still need to define the tracepoint structures. This patch should be integrated with the earlier sched_task_trace.c patches during one of the next major rebasing efforts. LITMUS^RT scheduling class: mark enqueued task as present Remove unistd_*.h rebase fix: update to new hrtimer API The new API is actually nicer and cleaner. rebase fix: call lockdep_unpin_lock(&rq->lock, cookie) The LITMUS^RT scheduling class should also do the LOCKDEP dance. LITMUS^RT core: break out non-preemptive flag defs Not every file including litmus.h needs to know this. LITMUS^RT core: don't include debug_trace.h in litmus.h Including debug_trace.h introduces the TRACE() macro, which causes symbol clashes in some (rather obscure) drivers. LITMUS^RT core: add litmus_preemption_in_progress flags Used to communicate that a preemption is in progress. Set by the scheduler; read by the plugins. LITMUS^RT core: revise is_current_running() macro
Diffstat (limited to 'litmus')
-rw-r--r--litmus/Kconfig213
-rw-r--r--litmus/Makefile20
-rw-r--r--litmus/bheap.c316
-rw-r--r--litmus/binheap.c387
-rw-r--r--litmus/budget.c167
-rw-r--r--litmus/clustered.c119
-rw-r--r--litmus/ctrldev.c264
-rw-r--r--litmus/edf_common.c201
-rw-r--r--litmus/fdso.c308
-rw-r--r--litmus/fp_common.c130
-rw-r--r--litmus/ftdev.c11
-rw-r--r--litmus/jobs.c163
-rw-r--r--litmus/litmus.c773
-rw-r--r--litmus/litmus_proc.c574
-rw-r--r--litmus/locking.c189
-rw-r--r--litmus/preempt.c143
-rw-r--r--litmus/reservations/Makefile3
-rw-r--r--litmus/reservations/alloc.c143
-rw-r--r--litmus/reservations/budget-notifier.c26
-rw-r--r--litmus/reservations/core.c393
-rw-r--r--litmus/reservations/polling.c256
-rw-r--r--litmus/reservations/table-driven.c269
-rw-r--r--litmus/rt_domain.c351
-rw-r--r--litmus/sched_plugin.c289
-rw-r--r--litmus/sched_task_trace.c5
-rw-r--r--litmus/srp.c310
-rw-r--r--litmus/sync.c153
-rw-r--r--litmus/trace.c17
-rw-r--r--litmus/uncachedev.c102
29 files changed, 6289 insertions, 6 deletions
diff --git a/litmus/Kconfig b/litmus/Kconfig
index 5408ef6b159b..cf011532f6a3 100644
--- a/litmus/Kconfig
+++ b/litmus/Kconfig
@@ -1,5 +1,184 @@
1menu "LITMUS^RT" 1menu "LITMUS^RT"
2 2
3menu "Scheduling"
4
5config RELEASE_MASTER
6 bool "Release-master Support"
7 depends on SMP
8 default n
9 help
10 Allow one processor to act as a dedicated interrupt processor
11 that services all timer interrupts, but that does not schedule
12 real-time tasks. See RTSS'09 paper for details
13 (http://www.cs.unc.edu/~anderson/papers.html).
14
15config PREFER_LOCAL_LINKING
16 bool "Link newly arrived tasks locally if possible"
17 depends on SMP
18 default y
19 help
20 In linking-based schedulers such as GSN-EDF, if an idle CPU processes
21 a job arrival (i.e., when a job resumed or was released), it can
22 either link the task to itself and schedule it immediately (to avoid
23 unnecessary scheduling latency) or it can try to link it to the CPU
24 where it executed previously (to maximize cache affinity, at the
25 expense of increased latency due to the need to send an IPI).
26
27 In lightly loaded systems, this option can significantly reduce
28 scheduling latencies. In heavily loaded systems (where CPUs are
29 rarely idle), it will likely make hardly a difference.
30
31 If unsure, say yes.
32
33config LITMUS_QUANTUM_LENGTH_US
34 int "quantum length (in us)"
35 default 1000
36 range 500 10000
37 help
38 Determine the desired quantum length, in microseconds, which
39 is used to determine the granularity of scheduling in
40 quantum-driven plugins (primarily PFAIR). This parameter does not
41 affect event-driven plugins (such as the EDF-based plugins and P-FP).
42 Default: 1000us = 1ms.
43
44config BUG_ON_MIGRATION_DEADLOCK
45 bool "Panic on suspected migration deadlock"
46 default y
47 help
48 This is a debugging option. The LITMUS^RT migration support code for
49 global scheduling contains a simple heuristic to detect when the
50 system deadlocks due to circular stack dependencies.
51
52 For example, such a deadlock exists if CPU 0 waits for task A's stack
53 to become available while using task B's stack, and CPU 1 waits for
54 task B's stack to become available while using task A's stack. Such
55 a situation can arise in (buggy) global scheduling plugins.
56
57 With this option enabled, such a scenario with result in a BUG().
58 You can turn off this option when debugging on real hardware (e.g.,
59 to rescue traces, etc. that would be hard to get after a panic).
60
61 Only turn this off if you really know what you are doing. If this
62 BUG() triggers, the scheduler is broken and turning off this option
63 won't fix it.
64
65
66endmenu
67
68menu "Real-Time Synchronization"
69
70config NP_SECTION
71 bool "Non-preemptive section support"
72 default y
73 help
74 Allow tasks to become non-preemptable.
75 Note that plugins still need to explicitly support non-preemptivity.
76 Currently, only the GSN-EDF, PSN-EDF, and P-FP plugins have such support.
77
78 This is required to support locking protocols such as the FMLP.
79 If disabled, all tasks will be considered preemptable at all times.
80
81config LITMUS_LOCKING
82 bool "Support for real-time locking protocols"
83 depends on NP_SECTION
84 default y
85 help
86 Enable LITMUS^RT's multiprocessor real-time locking protocols with
87 predicable maximum blocking times.
88
89 Say Yes if you want to include locking protocols such as the FMLP and
90 Baker's SRP.
91
92endmenu
93
94menu "Performance Enhancements"
95
96config SCHED_CPU_AFFINITY
97 bool "Local Migration Affinity"
98 depends on X86 && SYSFS
99 default y
100 help
101 Rescheduled tasks prefer CPUs near to their previously used CPU.
102 This may improve cache performance through possible preservation of
103 cache affinity, at the expense of (slightly) more involved scheduling
104 logic.
105
106 Warning: May make bugs harder to find since tasks may migrate less often.
107
108 NOTES:
109 * Feature is not utilized by PFair/PD^2.
110
111 Say Yes if unsure.
112
113config ALLOW_EARLY_RELEASE
114 bool "Allow Early Releasing"
115 default y
116 help
117 Allow tasks to release jobs early (while still maintaining job
118 precedence constraints). Only supported by EDF schedulers. Early
119 releasing must be explicitly requested by real-time tasks via
120 the task_params passed to sys_set_task_rt_param().
121
122 Early releasing can improve job response times while maintaining
123 real-time correctness. However, it can easily peg your CPUs
124 since tasks never suspend to wait for their next job. As such, early
125 releasing is really only useful in the context of implementing
126 bandwidth servers, interrupt handling threads, or short-lived
127 computations.
128
129 Beware that early releasing may affect real-time analysis
130 if using locking protocols or I/O.
131
132 Say Yes if unsure.
133
134choice
135 prompt "EDF Tie-Break Behavior"
136 default EDF_TIE_BREAK_LATENESS_NORM
137 help
138 Allows the configuration of tie-breaking behavior when the deadlines
139 of two EDF-scheduled tasks are equal.
140
141 config EDF_TIE_BREAK_LATENESS
142 bool "Lateness-based Tie Break"
143 help
144 Break ties between two jobs, A and B, based upon the lateness of their
145 prior jobs. The job with the greatest lateness has priority. Note that
146 lateness has a negative value if the prior job finished before its
147 deadline.
148
149 config EDF_TIE_BREAK_LATENESS_NORM
150 bool "Normalized Lateness-based Tie Break"
151 help
152 Break ties between two jobs, A and B, based upon the lateness, normalized
153 by relative deadline, of their prior jobs. The job with the greatest
154 normalized lateness has priority. Note that lateness has a negative value
155 if the prior job finished before its deadline.
156
157 Normalized lateness tie-breaks are likely desireable over non-normalized
158 tie-breaks if the execution times and/or relative deadlines of tasks in a
159 task set vary greatly.
160
161 config EDF_TIE_BREAK_HASH
162 bool "Hash-based Tie Breaks"
163 help
164 Break ties between two jobs, A and B, with equal deadlines by using a
165 uniform hash; i.e.: hash(A.pid, A.job_num) < hash(B.pid, B.job_num). Job
166 A has ~50% of winning a given tie-break.
167
168 config EDF_PID_TIE_BREAK
169 bool "PID-based Tie Breaks"
170 help
171 Break ties based upon OS-assigned thread IDs. Use this option if
172 required by algorithm's real-time analysis or per-task response-time
173 jitter must be minimized.
174
175 NOTES:
176 * This tie-breaking method was default in Litmus 2012.2 and before.
177
178endchoice
179
180endmenu
181
3menu "Tracing" 182menu "Tracing"
4 183
5config FEATHER_TRACE 184config FEATHER_TRACE
@@ -154,6 +333,40 @@ config SCHED_DEBUG_TRACE_CALLER
154 333
155 If unsure, say No. 334 If unsure, say No.
156 335
336config PREEMPT_STATE_TRACE
337 bool "Trace preemption state machine transitions"
338 depends on SCHED_DEBUG_TRACE && DEBUG_KERNEL
339 default n
340 help
341 With this option enabled, each CPU will log when it transitions
342 states in the preemption state machine. This state machine is
343 used to determine how to react to IPIs (avoid races with in-flight IPIs).
344
345 Warning: this creates a lot of information in the debug trace. Only
346 recommended when you are debugging preemption-related races.
347
348 If unsure, say No.
349
350config REPORT_TIMER_LATENCY
351 bool "Warn when hrtimers incur large latency"
352 default n
353 help
354 With this option enabled, the hrtimer code will printk()
355 a warning when a timer fires much after its intended
356 time. This can useful when debugging latency issues.
357
358 If unsure, say No.
359
360config REPORT_TIMER_LATENCY_THRESHOLD
361 int "Maximum acceptable timer latency (in nanoseconds)"
362 depends on REPORT_TIMER_LATENCY
363 range 10000 100000000
364 default 1000000
365 help
366 If a timer fires more than the given threshold after its intended
367 expiration time, a warning message is printed to the kernel log.
368 By default, the threshold is one millisecond (= one million nanoseconds).
369
157endmenu 370endmenu
158 371
159endmenu 372endmenu
diff --git a/litmus/Makefile b/litmus/Makefile
index 6318f1c6fac8..56499562defb 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -2,7 +2,27 @@
2# Makefile for LITMUS^RT 2# Makefile for LITMUS^RT
3# 3#
4 4
5obj-y = sched_plugin.o litmus.o \
6 preempt.o \
7 litmus_proc.o \
8 budget.o \
9 clustered.o \
10 jobs.o \
11 sync.o \
12 rt_domain.o \
13 edf_common.o \
14 fp_common.o \
15 fdso.o \
16 locking.o \
17 srp.o \
18 bheap.o \
19 binheap.o \
20 ctrldev.o \
21 uncachedev.o
22
5obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o 23obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
6obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o 24obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
7obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o 25obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
8obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o 26obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
27
28obj-y += reservations/
diff --git a/litmus/bheap.c b/litmus/bheap.c
new file mode 100644
index 000000000000..2707e0122b6d
--- /dev/null
+++ b/litmus/bheap.c
@@ -0,0 +1,316 @@
1#include <linux/bug.h>
2#include <linux/kernel.h>
3#include <litmus/bheap.h>
4
5void bheap_init(struct bheap* heap)
6{
7 heap->head = NULL;
8 heap->min = NULL;
9}
10
11void bheap_node_init(struct bheap_node** _h, void* value)
12{
13 struct bheap_node* h = *_h;
14 h->parent = NULL;
15 h->next = NULL;
16 h->child = NULL;
17 h->degree = NOT_IN_HEAP;
18 h->value = value;
19 h->ref = _h;
20}
21
22
23/* make child a subtree of root */
24static void __bheap_link(struct bheap_node* root,
25 struct bheap_node* child)
26{
27 child->parent = root;
28 child->next = root->child;
29 root->child = child;
30 root->degree++;
31}
32
33/* merge root lists */
34static struct bheap_node* __bheap_merge(struct bheap_node* a,
35 struct bheap_node* b)
36{
37 struct bheap_node* head = NULL;
38 struct bheap_node** pos = &head;
39
40 while (a && b) {
41 if (a->degree < b->degree) {
42 *pos = a;
43 a = a->next;
44 } else {
45 *pos = b;
46 b = b->next;
47 }
48 pos = &(*pos)->next;
49 }
50 if (a)
51 *pos = a;
52 else
53 *pos = b;
54 return head;
55}
56
57/* reverse a linked list of nodes. also clears parent pointer */
58static struct bheap_node* __bheap_reverse(struct bheap_node* h)
59{
60 struct bheap_node* tail = NULL;
61 struct bheap_node* next;
62
63 if (!h)
64 return h;
65
66 h->parent = NULL;
67 while (h->next) {
68 next = h->next;
69 h->next = tail;
70 tail = h;
71 h = next;
72 h->parent = NULL;
73 }
74 h->next = tail;
75 return h;
76}
77
78static void __bheap_min(bheap_prio_t higher_prio, struct bheap* heap,
79 struct bheap_node** prev, struct bheap_node** node)
80{
81 struct bheap_node *_prev, *cur;
82 *prev = NULL;
83
84 if (!heap->head) {
85 *node = NULL;
86 return;
87 }
88
89 *node = heap->head;
90 _prev = heap->head;
91 cur = heap->head->next;
92 while (cur) {
93 if (higher_prio(cur, *node)) {
94 *node = cur;
95 *prev = _prev;
96 }
97 _prev = cur;
98 cur = cur->next;
99 }
100}
101
102static void __bheap_union(bheap_prio_t higher_prio, struct bheap* heap,
103 struct bheap_node* h2)
104{
105 struct bheap_node* h1;
106 struct bheap_node *prev, *x, *next;
107 if (!h2)
108 return;
109 h1 = heap->head;
110 if (!h1) {
111 heap->head = h2;
112 return;
113 }
114 h1 = __bheap_merge(h1, h2);
115 prev = NULL;
116 x = h1;
117 next = x->next;
118 while (next) {
119 if (x->degree != next->degree ||
120 (next->next && next->next->degree == x->degree)) {
121 /* nothing to do, advance */
122 prev = x;
123 x = next;
124 } else if (higher_prio(x, next)) {
125 /* x becomes the root of next */
126 x->next = next->next;
127 __bheap_link(x, next);
128 } else {
129 /* next becomes the root of x */
130 if (prev)
131 prev->next = next;
132 else
133 h1 = next;
134 __bheap_link(next, x);
135 x = next;
136 }
137 next = x->next;
138 }
139 heap->head = h1;
140}
141
142static struct bheap_node* __bheap_extract_min(bheap_prio_t higher_prio,
143 struct bheap* heap)
144{
145 struct bheap_node *prev, *node;
146 __bheap_min(higher_prio, heap, &prev, &node);
147 if (!node)
148 return NULL;
149 if (prev)
150 prev->next = node->next;
151 else
152 heap->head = node->next;
153 __bheap_union(higher_prio, heap, __bheap_reverse(node->child));
154 return node;
155}
156
157/* insert (and reinitialize) a node into the heap */
158void bheap_insert(bheap_prio_t higher_prio, struct bheap* heap,
159 struct bheap_node* node)
160{
161 struct bheap_node *min;
162 node->child = NULL;
163 node->parent = NULL;
164 node->next = NULL;
165 node->degree = 0;
166 if (heap->min && higher_prio(node, heap->min)) {
167 /* swap min cache */
168 min = heap->min;
169 min->child = NULL;
170 min->parent = NULL;
171 min->next = NULL;
172 min->degree = 0;
173 __bheap_union(higher_prio, heap, min);
174 heap->min = node;
175 } else
176 __bheap_union(higher_prio, heap, node);
177}
178
179void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap)
180{
181 struct bheap_node* min;
182 if (heap->min) {
183 min = heap->min;
184 heap->min = NULL;
185 bheap_insert(higher_prio, heap, min);
186 }
187}
188
189/* merge addition into target */
190void bheap_union(bheap_prio_t higher_prio,
191 struct bheap* target, struct bheap* addition)
192{
193 /* first insert any cached minima, if necessary */
194 bheap_uncache_min(higher_prio, target);
195 bheap_uncache_min(higher_prio, addition);
196 __bheap_union(higher_prio, target, addition->head);
197 /* this is a destructive merge */
198 addition->head = NULL;
199}
200
201struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
202 struct bheap* heap)
203{
204 if (!heap->min)
205 heap->min = __bheap_extract_min(higher_prio, heap);
206 return heap->min;
207}
208
209struct bheap_node* bheap_take(bheap_prio_t higher_prio,
210 struct bheap* heap)
211{
212 struct bheap_node *node;
213 if (!heap->min)
214 heap->min = __bheap_extract_min(higher_prio, heap);
215 node = heap->min;
216 heap->min = NULL;
217 if (node)
218 node->degree = NOT_IN_HEAP;
219 return node;
220}
221
222int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node)
223{
224 struct bheap_node *parent;
225 struct bheap_node** tmp_ref;
226 void* tmp;
227
228 /* bubble up */
229 parent = node->parent;
230 while (parent && higher_prio(node, parent)) {
231 /* swap parent and node */
232 tmp = parent->value;
233 parent->value = node->value;
234 node->value = tmp;
235 /* swap references */
236 *(parent->ref) = node;
237 *(node->ref) = parent;
238 tmp_ref = parent->ref;
239 parent->ref = node->ref;
240 node->ref = tmp_ref;
241 /* step up */
242 node = parent;
243 parent = node->parent;
244 }
245
246 return parent != NULL;
247}
248
249void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap,
250 struct bheap_node* node)
251{
252 struct bheap_node *parent, *prev, *pos;
253 struct bheap_node** tmp_ref;
254 void* tmp;
255
256 if (heap->min != node) {
257 /* bubble up */
258 parent = node->parent;
259 while (parent) {
260 /* swap parent and node */
261 tmp = parent->value;
262 parent->value = node->value;
263 node->value = tmp;
264 /* swap references */
265 *(parent->ref) = node;
266 *(node->ref) = parent;
267 tmp_ref = parent->ref;
268 parent->ref = node->ref;
269 node->ref = tmp_ref;
270 /* step up */
271 node = parent;
272 parent = node->parent;
273 }
274 /* now delete:
275 * first find prev */
276 prev = NULL;
277 pos = heap->head;
278 while (pos != node) {
279 BUG_ON(!pos); /* fell off the list -> deleted from wrong heap */
280 prev = pos;
281 pos = pos->next;
282 }
283 /* we have prev, now remove node */
284 if (prev)
285 prev->next = node->next;
286 else
287 heap->head = node->next;
288 __bheap_union(higher_prio, heap, __bheap_reverse(node->child));
289 } else
290 heap->min = NULL;
291 node->degree = NOT_IN_HEAP;
292}
293
294/* allocate a heap node for value and insert into the heap */
295int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
296 void* value, int gfp_flags)
297{
298 struct bheap_node* hn = bheap_node_alloc(gfp_flags);
299 if (likely(hn)) {
300 bheap_node_init(&hn, value);
301 bheap_insert(higher_prio, heap, hn);
302 }
303 return hn != NULL;
304}
305
306void* bheap_take_del(bheap_prio_t higher_prio,
307 struct bheap* heap)
308{
309 struct bheap_node* hn = bheap_take(higher_prio, heap);
310 void* ret = NULL;
311 if (hn) {
312 ret = hn->value;
313 bheap_node_free(hn);
314 }
315 return ret;
316}
diff --git a/litmus/binheap.c b/litmus/binheap.c
new file mode 100644
index 000000000000..d3ab34b92096
--- /dev/null
+++ b/litmus/binheap.c
@@ -0,0 +1,387 @@
1#include <litmus/binheap.h>
2
3/* Returns true of the root ancestor of node is the root of the given heap. */
4int binheap_is_in_this_heap(struct binheap_node *node,
5 struct binheap* heap)
6{
7 if(!binheap_is_in_heap(node)) {
8 return 0;
9 }
10
11 while(node->parent != NULL) {
12 node = node->parent;
13 }
14
15 return (node == heap->root);
16}
17
18
19/* Update the node reference pointers. Same logic as Litmus binomial heap. */
20static void __update_ref(struct binheap_node *parent,
21 struct binheap_node *child)
22{
23 *(parent->ref_ptr) = child;
24 *(child->ref_ptr) = parent;
25
26 swap(parent->ref_ptr, child->ref_ptr);
27}
28
29
30/* Swaps data between two nodes. */
31static void __binheap_swap(struct binheap_node *parent,
32 struct binheap_node *child)
33{
34 swap(parent->data, child->data);
35 __update_ref(parent, child);
36}
37
38
39/* Swaps memory and data between two nodes. Actual nodes swap instead of
40 * just data. Needed when we delete nodes from the heap.
41 */
42static void __binheap_swap_safe(struct binheap *handle,
43 struct binheap_node *a,
44 struct binheap_node *b)
45{
46 swap(a->data, b->data);
47 __update_ref(a, b);
48
49 if((a->parent != NULL) && (a->parent == b->parent)) {
50 /* special case: shared parent */
51 swap(a->parent->left, a->parent->right);
52 }
53 else {
54 /* Update pointers to swap parents. */
55
56 if(a->parent) {
57 if(a == a->parent->left) {
58 a->parent->left = b;
59 }
60 else {
61 a->parent->right = b;
62 }
63 }
64
65 if(b->parent) {
66 if(b == b->parent->left) {
67 b->parent->left = a;
68 }
69 else {
70 b->parent->right = a;
71 }
72 }
73
74 swap(a->parent, b->parent);
75 }
76
77 /* swap children */
78
79 if(a->left) {
80 a->left->parent = b;
81
82 if(a->right) {
83 a->right->parent = b;
84 }
85 }
86
87 if(b->left) {
88 b->left->parent = a;
89
90 if(b->right) {
91 b->right->parent = a;
92 }
93 }
94
95 swap(a->left, b->left);
96 swap(a->right, b->right);
97
98
99 /* update next/last/root pointers */
100
101 if(a == handle->next) {
102 handle->next = b;
103 }
104 else if(b == handle->next) {
105 handle->next = a;
106 }
107
108 if(a == handle->last) {
109 handle->last = b;
110 }
111 else if(b == handle->last) {
112 handle->last = a;
113 }
114
115 if(a == handle->root) {
116 handle->root = b;
117 }
118 else if(b == handle->root) {
119 handle->root = a;
120 }
121}
122
123
124/**
125 * Update the pointer to the last node in the complete binary tree.
126 * Called internally after the root node has been deleted.
127 */
128static void __binheap_update_last(struct binheap *handle)
129{
130 struct binheap_node *temp = handle->last;
131
132 /* find a "bend" in the tree. */
133 while(temp->parent && (temp == temp->parent->left)) {
134 temp = temp->parent;
135 }
136
137 /* step over to sibling if we're not at root */
138 if(temp->parent != NULL) {
139 temp = temp->parent->left;
140 }
141
142 /* now travel right as far as possible. */
143 while(temp->right != NULL) {
144 temp = temp->right;
145 }
146
147 /* take one step to the left if we're not at the bottom-most level. */
148 if(temp->left != NULL) {
149 temp = temp->left;
150 }
151
152 handle->last = temp;
153}
154
155
156/**
157 * Update the pointer to the node that will take the next inserted node.
158 * Called internally after a node has been inserted.
159 */
160static void __binheap_update_next(struct binheap *handle)
161{
162 struct binheap_node *temp = handle->next;
163
164 /* find a "bend" in the tree. */
165 while(temp->parent && (temp == temp->parent->right)) {
166 temp = temp->parent;
167 }
168
169 /* step over to sibling if we're not at root */
170 if(temp->parent != NULL) {
171 temp = temp->parent->right;
172 }
173
174 /* now travel left as far as possible. */
175 while(temp->left != NULL) {
176 temp = temp->left;
177 }
178
179 handle->next = temp;
180}
181
182
183
184/* bubble node up towards root */
185static void __binheap_bubble_up(struct binheap *handle,
186 struct binheap_node *node)
187{
188 /* let BINHEAP_POISON data bubble to the top */
189
190 while((node->parent != NULL) &&
191 ((node->data == BINHEAP_POISON) ||
192 handle->compare(node, node->parent))) {
193 __binheap_swap(node->parent, node);
194 node = node->parent;
195 }
196}
197
198
199/* bubble node down, swapping with min-child */
200static void __binheap_bubble_down(struct binheap *handle)
201{
202 struct binheap_node *node = handle->root;
203
204 while(node->left != NULL) {
205 if(node->right && handle->compare(node->right, node->left)) {
206 if(handle->compare(node->right, node)) {
207 __binheap_swap(node, node->right);
208 node = node->right;
209 }
210 else {
211 break;
212 }
213 }
214 else {
215 if(handle->compare(node->left, node)) {
216 __binheap_swap(node, node->left);
217 node = node->left;
218 }
219 else {
220 break;
221 }
222 }
223 }
224}
225
226
227void __binheap_add(struct binheap_node *new_node,
228 struct binheap *handle,
229 void *data)
230{
231 new_node->data = data;
232 new_node->ref = new_node;
233 new_node->ref_ptr = &(new_node->ref);
234
235 if(!binheap_empty(handle)) {
236 /* insert left side first */
237 if(handle->next->left == NULL) {
238 handle->next->left = new_node;
239 new_node->parent = handle->next;
240 new_node->left = NULL;
241 new_node->right = NULL;
242
243 handle->last = new_node;
244
245 __binheap_bubble_up(handle, new_node);
246 }
247 else {
248 /* left occupied. insert right. */
249 handle->next->right = new_node;
250 new_node->parent = handle->next;
251 new_node->left = NULL;
252 new_node->right = NULL;
253
254 handle->last = new_node;
255
256 __binheap_update_next(handle);
257 __binheap_bubble_up(handle, new_node);
258 }
259 }
260 else {
261 /* first node in heap */
262
263 new_node->parent = NULL;
264 new_node->left = NULL;
265 new_node->right = NULL;
266
267 handle->root = new_node;
268 handle->next = new_node;
269 handle->last = new_node;
270 }
271}
272
273
274/**
275 * Removes the root node from the heap. The node is removed after coalescing
276 * the binheap_node with its original data pointer at the root of the tree.
277 *
278 * The 'last' node in the tree is then swapped up to the root and bubbled
279 * down.
280 */
281void __binheap_delete_root(struct binheap *handle,
282 struct binheap_node *container)
283{
284 struct binheap_node *root = handle->root;
285
286 if(root != container) {
287 /* coalesce */
288 __binheap_swap_safe(handle, root, container);
289 root = container;
290 }
291
292 if(handle->last != root) {
293 /* swap 'last' node up to root and bubble it down. */
294
295 struct binheap_node *to_move = handle->last;
296
297 if(to_move->parent != root) {
298 handle->next = to_move->parent;
299
300 if(handle->next->right == to_move) {
301 /* disconnect from parent */
302 to_move->parent->right = NULL;
303 handle->last = handle->next->left;
304 }
305 else {
306 /* find new 'last' before we disconnect */
307 __binheap_update_last(handle);
308
309 /* disconnect from parent */
310 to_move->parent->left = NULL;
311 }
312 }
313 else {
314 /* 'last' is direct child of root */
315
316 handle->next = to_move;
317
318 if(to_move == to_move->parent->right) {
319 to_move->parent->right = NULL;
320 handle->last = to_move->parent->left;
321 }
322 else {
323 to_move->parent->left = NULL;
324 handle->last = to_move;
325 }
326 }
327 to_move->parent = NULL;
328
329 /* reconnect as root. We can't just swap data ptrs since root node
330 * may be freed after this function returns.
331 */
332 to_move->left = root->left;
333 to_move->right = root->right;
334 if(to_move->left != NULL) {
335 to_move->left->parent = to_move;
336 }
337 if(to_move->right != NULL) {
338 to_move->right->parent = to_move;
339 }
340
341 handle->root = to_move;
342
343 /* bubble down */
344 __binheap_bubble_down(handle);
345 }
346 else {
347 /* removing last node in tree */
348 handle->root = NULL;
349 handle->next = NULL;
350 handle->last = NULL;
351 }
352
353 /* mark as removed */
354 container->parent = BINHEAP_POISON;
355}
356
357
358/**
359 * Delete an arbitrary node. Bubble node to delete up to the root,
360 * and then delete to root.
361 */
362void __binheap_delete(struct binheap_node *node_to_delete,
363 struct binheap *handle)
364{
365 struct binheap_node *target = node_to_delete->ref;
366 void *temp_data = target->data;
367
368 /* temporarily set data to null to allow node to bubble up to the top. */
369 target->data = BINHEAP_POISON;
370
371 __binheap_bubble_up(handle, target);
372 __binheap_delete_root(handle, node_to_delete);
373
374 node_to_delete->data = temp_data; /* restore node data pointer */
375}
376
377
378/**
379 * Bubble up a node whose pointer has decreased in value.
380 */
381void __binheap_decrease(struct binheap_node *orig_node,
382 struct binheap *handle)
383{
384 struct binheap_node *target = orig_node->ref;
385
386 __binheap_bubble_up(handle, target);
387}
diff --git a/litmus/budget.c b/litmus/budget.c
new file mode 100644
index 000000000000..d7b250ebcc26
--- /dev/null
+++ b/litmus/budget.c
@@ -0,0 +1,167 @@
1#include <linux/sched.h>
2#include <linux/percpu.h>
3#include <linux/hrtimer.h>
4#include <linux/uaccess.h>
5#include <linux/module.h>
6
7#include <litmus/litmus.h>
8#include <litmus/preempt.h>
9#include <litmus/sched_plugin.h>
10#include <litmus/np.h>
11
12#include <litmus/budget.h>
13
14struct enforcement_timer {
15 /* The enforcement timer is used to accurately police
16 * slice budgets. */
17 struct hrtimer timer;
18 int armed;
19};
20
21DEFINE_PER_CPU(struct enforcement_timer, budget_timer);
22
23static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer)
24{
25 struct enforcement_timer* et = container_of(timer,
26 struct enforcement_timer,
27 timer);
28 unsigned long flags;
29
30 local_irq_save(flags);
31 TRACE("enforcement timer fired.\n");
32 et->armed = 0;
33 /* activate scheduler */
34 litmus_reschedule_local();
35 local_irq_restore(flags);
36
37 return HRTIMER_NORESTART;
38}
39
40/* assumes called with IRQs off */
41static void cancel_enforcement_timer(struct enforcement_timer* et)
42{
43 int ret;
44
45 TRACE("cancelling enforcement timer.\n");
46
47 /* Since interrupts are disabled and et->armed is only
48 * modified locally, we do not need any locks.
49 */
50
51 if (et->armed) {
52 ret = hrtimer_try_to_cancel(&et->timer);
53 /* Should never be inactive. */
54 BUG_ON(ret == 0);
55 /* Should never be running concurrently. */
56 BUG_ON(ret == -1);
57
58 et->armed = 0;
59 }
60}
61
62/* assumes called with IRQs off */
63static void arm_enforcement_timer(struct enforcement_timer* et,
64 struct task_struct* t)
65{
66 lt_t when_to_fire;
67 TRACE_TASK(t, "arming enforcement timer.\n");
68
69 WARN_ONCE(!hrtimer_is_hres_active(&et->timer),
70 KERN_ERR "WARNING: no high resolution timers available!?\n");
71
72 /* Calling this when there is no budget left for the task
73 * makes no sense, unless the task is non-preemptive. */
74 BUG_ON(budget_exhausted(t) && (!is_np(t)));
75
76 /* hrtimer_start_range_ns() cancels the timer
77 * anyway, so we don't have to check whether it is still armed */
78
79 if (likely(!is_np(t))) {
80 when_to_fire = litmus_clock() + budget_remaining(t);
81 hrtimer_start(&et->timer, ns_to_ktime(when_to_fire),
82 HRTIMER_MODE_ABS_PINNED);
83 et->armed = 1;
84 }
85}
86
87
88/* expects to be called with IRQs off */
89void update_enforcement_timer(struct task_struct* t)
90{
91 struct enforcement_timer* et = this_cpu_ptr(&budget_timer);
92
93 if (t && budget_precisely_enforced(t)) {
94 /* Make sure we call into the scheduler when this budget
95 * expires. */
96 arm_enforcement_timer(et, t);
97 } else if (et->armed) {
98 /* Make sure we don't cause unnecessary interrupts. */
99 cancel_enforcement_timer(et);
100 }
101}
102
103
104static int __init init_budget_enforcement(void)
105{
106 int cpu;
107 struct enforcement_timer* et;
108
109 for (cpu = 0; cpu < NR_CPUS; cpu++) {
110 et = &per_cpu(budget_timer, cpu);
111 hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
112 et->timer.function = on_enforcement_timeout;
113 }
114 return 0;
115}
116
117void litmus_current_budget(lt_t *used_so_far, lt_t *remaining)
118{
119 struct task_struct *t = current;
120 unsigned long flags;
121 s64 delta;
122
123 local_irq_save(flags);
124
125 delta = sched_clock_cpu(smp_processor_id()) - t->se.exec_start;
126 if (delta < 0)
127 delta = 0;
128
129 TRACE_CUR("current_budget: sc:%llu start:%llu lt_t:%llu delta:%lld exec-time:%llu rem:%llu\n",
130 sched_clock_cpu(smp_processor_id()), t->se.exec_start,
131 litmus_clock(), delta,
132 tsk_rt(t)->job_params.exec_time,
133 budget_remaining(t));
134
135 if (used_so_far)
136 *used_so_far = tsk_rt(t)->job_params.exec_time + delta;
137
138 if (remaining) {
139 *remaining = budget_remaining(t);
140 if (*remaining > delta)
141 *remaining -= delta;
142 else
143 *remaining = 0;
144 }
145
146 local_irq_restore(flags);
147}
148
149asmlinkage long sys_get_current_budget(
150 lt_t __user * _expended,
151 lt_t __user *_remaining)
152{
153 lt_t expended = 0, remaining = 0;
154
155 if (is_realtime(current))
156 litmus->current_budget(&expended, &remaining);
157
158 if (_expended && put_user(expended, _expended))
159 return -EFAULT;
160
161 if (_remaining && put_user(remaining, _remaining))
162 return -EFAULT;
163
164 return 0;
165}
166
167module_init(init_budget_enforcement);
diff --git a/litmus/clustered.c b/litmus/clustered.c
new file mode 100644
index 000000000000..de2aca2a271c
--- /dev/null
+++ b/litmus/clustered.c
@@ -0,0 +1,119 @@
1#include <linux/gfp.h>
2#include <linux/cpumask.h>
3#include <linux/list.h>
4#include <linux/cacheinfo.h>
5
6#include <litmus/debug_trace.h>
7#include <litmus/clustered.h>
8
9int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, unsigned int index)
10{
11 struct cpu_cacheinfo* info = get_cpu_cacheinfo(cpu);
12 struct cacheinfo *ci;
13
14 if (!info || index >= info->num_leaves) {
15 TRACE("no shared-cache CPUs: info=%d index=%u\n",
16 info != NULL, index);
17 return 1;
18 }
19
20 if (!info->info_list) {
21 TRACE("no shared-cache CPUs: no info_list (cpu\n");
22 }
23 ci = info->info_list + index;
24
25 cpumask_copy(mask, &ci->shared_cpu_map);
26
27 TRACE("get_shared: P%u@L%u -> %d siblings\n ", cpu, index, cpumask_weight(mask));
28
29 return 0;
30}
31
32int get_cluster_size(enum cache_level level)
33{
34 cpumask_var_t mask;
35 int ok;
36 int num_cpus;
37
38 if (level == GLOBAL_CLUSTER)
39 return num_online_cpus();
40 else {
41 if (!zalloc_cpumask_var(&mask, GFP_ATOMIC))
42 return -ENOMEM;
43 /* assumes CPU 0 is representative of all CPUs */
44 ok = get_shared_cpu_map(mask, 0, level);
45 /* ok == 0 means we got the map; otherwise it's an invalid cache level */
46 if (ok == 0)
47 num_cpus = cpumask_weight(mask);
48 free_cpumask_var(mask);
49
50 if (ok == 0)
51 return num_cpus;
52 else
53 return -EINVAL;
54 }
55}
56
57int assign_cpus_to_clusters(enum cache_level level,
58 struct scheduling_cluster* clusters[],
59 unsigned int num_clusters,
60 struct cluster_cpu* cpus[],
61 unsigned int num_cpus)
62{
63 cpumask_var_t mask;
64 unsigned int i, free_cluster = 0, low_cpu;
65 int err = 0;
66
67 if (!zalloc_cpumask_var(&mask, GFP_ATOMIC))
68 return -ENOMEM;
69
70 /* clear cluster pointers */
71 for (i = 0; i < num_cpus; i++) {
72 cpus[i]->id = i;
73 cpus[i]->cluster = NULL;
74 }
75
76 /* initialize clusters */
77 for (i = 0; i < num_clusters; i++) {
78 clusters[i]->id = i;
79 INIT_LIST_HEAD(&clusters[i]->cpus);
80 }
81
82 /* Assign each CPU. Two assumtions are made:
83 * 1) The index of a cpu in cpus corresponds to its processor id (i.e., the index in a cpu mask).
84 * 2) All cpus that belong to some cluster are online.
85 */
86 for_each_online_cpu(i) {
87 /* get lowest-id CPU in cluster */
88 if (level != GLOBAL_CLUSTER) {
89 err = get_shared_cpu_map(mask, cpus[i]->id, level);
90 if (err != 0) {
91 /* ugh... wrong cache level? Either caller screwed up
92 * or the CPU topology is weird. */
93 printk(KERN_ERR "Could not set up clusters for L%d sharing (max: L%d).\n",
94 level, err);
95 err = -EINVAL;
96 goto out;
97 }
98 low_cpu = cpumask_first(mask);
99 } else
100 low_cpu = 0;
101 if (low_cpu == i) {
102 /* caller must provide an appropriate number of clusters */
103 BUG_ON(free_cluster >= num_clusters);
104
105 /* create new cluster */
106 cpus[i]->cluster = clusters[free_cluster++];
107 } else {
108 /* low_cpu points to the right cluster
109 * Assumption: low_cpu is actually online and was processed earlier. */
110 cpus[i]->cluster = cpus[low_cpu]->cluster;
111 }
112 /* enqueue in cpus list */
113 list_add_tail(&cpus[i]->cluster_list, &cpus[i]->cluster->cpus);
114 printk(KERN_INFO "Assigning CPU%u to cluster %u\n.", i, cpus[i]->cluster->id);
115 }
116out:
117 free_cpumask_var(mask);
118 return err;
119}
diff --git a/litmus/ctrldev.c b/litmus/ctrldev.c
new file mode 100644
index 000000000000..3fc769c573a5
--- /dev/null
+++ b/litmus/ctrldev.c
@@ -0,0 +1,264 @@
1#include <linux/sched.h>
2#include <linux/mm.h>
3#include <linux/fs.h>
4#include <linux/miscdevice.h>
5#include <linux/module.h>
6#include <linux/uaccess.h>
7
8
9#include <litmus/litmus.h>
10#include <litmus/debug_trace.h>
11
12/* only one page for now, but we might want to add a RO version at some point */
13
14#define CTRL_NAME "litmus/ctrl"
15
16/* allocate t->rt_param.ctrl_page*/
17static int alloc_ctrl_page(struct task_struct *t)
18{
19 int err = 0;
20
21 /* only allocate if the task doesn't have one yet */
22 if (!tsk_rt(t)->ctrl_page) {
23 tsk_rt(t)->ctrl_page = (void*) get_zeroed_page(GFP_KERNEL);
24 if (!tsk_rt(t)->ctrl_page)
25 err = -ENOMEM;
26 /* will get de-allocated in task teardown */
27 TRACE_TASK(t, "%s ctrl_page = %p\n", __FUNCTION__,
28 tsk_rt(t)->ctrl_page);
29 }
30 return err;
31}
32
33static int map_ctrl_page(struct task_struct *t, struct vm_area_struct* vma)
34{
35 int err;
36
37 struct page* ctrl = virt_to_page(tsk_rt(t)->ctrl_page);
38
39 TRACE_CUR(CTRL_NAME
40 ": mapping %p (pfn:%lx) to 0x%lx (prot:%lx)\n",
41 tsk_rt(t)->ctrl_page,page_to_pfn(ctrl), vma->vm_start,
42 vma->vm_page_prot);
43
44 /* Map it into the vma. */
45 err = vm_insert_page(vma, vma->vm_start, ctrl);
46
47 if (err)
48 TRACE_CUR(CTRL_NAME ": vm_insert_page() failed (%d)\n", err);
49
50 return err;
51}
52
53static void litmus_ctrl_vm_close(struct vm_area_struct* vma)
54{
55 TRACE_CUR("%s flags=0x%x prot=0x%x\n", __FUNCTION__,
56 vma->vm_flags, vma->vm_page_prot);
57
58 TRACE_CUR(CTRL_NAME
59 ": %p:%p vma:%p vma->vm_private_data:%p closed.\n",
60 (void*) vma->vm_start, (void*) vma->vm_end, vma,
61 vma->vm_private_data);
62}
63
64static int litmus_ctrl_vm_fault(struct vm_area_struct* vma,
65 struct vm_fault* vmf)
66{
67 TRACE_CUR("%s flags=0x%x (off:%ld)\n", __FUNCTION__,
68 vma->vm_flags, vmf->pgoff);
69
70 /* This function should never be called, since all pages should have
71 * been mapped by mmap() already. */
72 WARN_ONCE(1, "Page faults should be impossible in the control page\n");
73
74 return VM_FAULT_SIGBUS;
75}
76
77static struct vm_operations_struct litmus_ctrl_vm_ops = {
78 .close = litmus_ctrl_vm_close,
79 .fault = litmus_ctrl_vm_fault,
80};
81
82static int litmus_ctrl_mmap(struct file* filp, struct vm_area_struct* vma)
83{
84 int err = 0;
85
86 /* first make sure mapper knows what he's doing */
87
88 /* you can only get one page */
89 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
90 return -EINVAL;
91
92 /* you can only map the "first" page */
93 if (vma->vm_pgoff != 0)
94 return -EINVAL;
95
96 /* you can't share it with anyone */
97 if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
98 return -EINVAL;
99
100 vma->vm_ops = &litmus_ctrl_vm_ops;
101 /* This mapping should not be kept across forks,
102 * cannot be expanded, and is not a "normal" page. */
103 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_READ | VM_WRITE;
104
105 /* We don't want the first write access to trigger a "minor" page fault
106 * to mark the page as dirty. This is transient, private memory, we
107 * don't care if it was touched or not. PAGE_SHARED means RW access, but
108 * not execute, and avoids copy-on-write behavior.
109 * See protection_map in mmap.c. */
110 vma->vm_page_prot = PAGE_SHARED;
111
112 err = alloc_ctrl_page(current);
113 if (!err)
114 err = map_ctrl_page(current, vma);
115
116 TRACE_CUR("%s flags=0x%x prot=0x%lx\n",
117 __FUNCTION__, vma->vm_flags, vma->vm_page_prot);
118
119 return err;
120}
121
122/* LITMUS^RT system calls */
123
124asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param);
125asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param);
126asmlinkage long sys_reservation_create(int type, void __user *config);
127asmlinkage long sys_get_current_budget(lt_t __user * _expended, lt_t __user *_remaining);
128asmlinkage long sys_null_call(cycles_t __user *ts);
129asmlinkage long sys_od_open(int fd, int type, int obj_id, void* __user config);
130asmlinkage long sys_od_close(int od);
131asmlinkage long sys_complete_job(void);
132asmlinkage long sys_litmus_lock(int lock_od);
133asmlinkage long sys_litmus_unlock(int lock_od);
134asmlinkage long sys_wait_for_job_release(unsigned int job);
135asmlinkage long sys_wait_for_ts_release(void);
136asmlinkage long sys_release_ts(lt_t __user *__delay);
137
138static long litmus_ctrl_ioctl(struct file *filp,
139 unsigned int cmd, unsigned long arg)
140{
141 long err = -ENOIOCTLCMD;
142
143 /* LITMUS^RT syscall emulation: we expose LITMUS^RT-specific operations
144 * via ioctl() to avoid merge conflicts with the syscall tables when
145 * rebasing LITMUS^RT. Whi this is not the most elegant way to expose
146 * syscall-like functionality, it helps with reducing the effort
147 * required to maintain LITMUS^RT out of tree.
148 */
149
150 union litmus_syscall_args syscall_args;
151
152 switch (cmd) {
153 case LRT_set_rt_task_param:
154 case LRT_get_rt_task_param:
155 case LRT_reservation_create:
156 case LRT_get_current_budget:
157 case LRT_od_open:
158 /* multiple arguments => need to get args via pointer */
159 /* get syscall parameters */
160 if (copy_from_user(&syscall_args, (void*) arg,
161 sizeof(syscall_args))) {
162 return -EFAULT;
163 }
164
165 switch (cmd) {
166 case LRT_set_rt_task_param:
167 return sys_set_rt_task_param(
168 syscall_args.get_set_task_param.pid,
169 syscall_args.get_set_task_param.param);
170 case LRT_get_rt_task_param:
171 return sys_get_rt_task_param(
172 syscall_args.get_set_task_param.pid,
173 syscall_args.get_set_task_param.param);
174 case LRT_reservation_create:
175 return sys_reservation_create(
176 syscall_args.reservation_create.type,
177 syscall_args.reservation_create.config);
178 case LRT_get_current_budget:
179 return sys_get_current_budget(
180 syscall_args.get_current_budget.expended,
181 syscall_args.get_current_budget.remaining);
182 case LRT_od_open:
183 return sys_od_open(
184 syscall_args.od_open.fd,
185 syscall_args.od_open.obj_type,
186 syscall_args.od_open.obj_id,
187 syscall_args.od_open.config);
188 }
189
190
191 case LRT_null_call:
192 return sys_null_call((cycles_t __user *) arg);
193
194 case LRT_od_close:
195 return sys_od_close(arg);
196
197 case LRT_complete_job:
198 return sys_complete_job();
199
200 case LRT_litmus_lock:
201 return sys_litmus_lock(arg);
202
203 case LRT_litmus_unlock:
204 return sys_litmus_unlock(arg);
205
206 case LRT_wait_for_job_release:
207 return sys_wait_for_job_release(arg);
208
209 case LRT_wait_for_ts_release:
210 return sys_wait_for_ts_release();
211
212 case LRT_release_ts:
213 return sys_release_ts((lt_t __user *) arg);
214
215 default:
216 printk(KERN_DEBUG "ctrldev: strange ioctl (%u, %lu)\n", cmd, arg);
217 };
218
219 return err;
220}
221
222static struct file_operations litmus_ctrl_fops = {
223 .owner = THIS_MODULE,
224 .mmap = litmus_ctrl_mmap,
225 .unlocked_ioctl = litmus_ctrl_ioctl,
226};
227
228static struct miscdevice litmus_ctrl_dev = {
229 .name = CTRL_NAME,
230 .minor = MISC_DYNAMIC_MINOR,
231 .fops = &litmus_ctrl_fops,
232};
233
234static int __init init_litmus_ctrl_dev(void)
235{
236 int err;
237
238 BUILD_BUG_ON(sizeof(struct control_page) > PAGE_SIZE);
239
240 BUILD_BUG_ON(sizeof(union np_flag) != sizeof(uint32_t));
241
242 BUILD_BUG_ON(offsetof(struct control_page, sched.raw)
243 != LITMUS_CP_OFFSET_SCHED);
244 BUILD_BUG_ON(offsetof(struct control_page, irq_count)
245 != LITMUS_CP_OFFSET_IRQ_COUNT);
246 BUILD_BUG_ON(offsetof(struct control_page, ts_syscall_start)
247 != LITMUS_CP_OFFSET_TS_SC_START);
248 BUILD_BUG_ON(offsetof(struct control_page, irq_syscall_start)
249 != LITMUS_CP_OFFSET_IRQ_SC_START);
250
251 printk("Initializing LITMUS^RT control device.\n");
252 err = misc_register(&litmus_ctrl_dev);
253 if (err)
254 printk("Could not allocate %s device (%d).\n", CTRL_NAME, err);
255 return err;
256}
257
258static void __exit exit_litmus_ctrl_dev(void)
259{
260 misc_deregister(&litmus_ctrl_dev);
261}
262
263module_init(init_litmus_ctrl_dev);
264module_exit(exit_litmus_ctrl_dev);
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
new file mode 100644
index 000000000000..1cd5ec711d28
--- /dev/null
+++ b/litmus/edf_common.c
@@ -0,0 +1,201 @@
1/*
2 * kernel/edf_common.c
3 *
4 * Common functions for EDF based scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14#include <litmus/debug_trace.h>
15
16#include <litmus/edf_common.h>
17
18#ifdef CONFIG_EDF_TIE_BREAK_LATENESS_NORM
19#include <litmus/fpmath.h>
20#endif
21
22#ifdef CONFIG_EDF_TIE_BREAK_HASH
23#include <linux/hash.h>
24static inline long edf_hash(struct task_struct *t)
25{
26 /* pid is 32 bits, so normally we would shove that into the
27 * upper 32-bits and and put the job number in the bottom
28 * and hash the 64-bit number with hash_64(). Sadly,
29 * in testing, hash_64() doesn't distribute keys were the
30 * upper bits are close together (as would be the case with
31 * pids) and job numbers are equal (as would be the case with
32 * synchronous task sets with all relative deadlines equal).
33 *
34 * A 2006 Linux patch proposed the following solution
35 * (but for some reason it wasn't accepted...).
36 *
37 * At least this workaround works for 32-bit systems as well.
38 */
39 return hash_32(hash_32((u32)tsk_rt(t)->job_params.job_no, 32) ^ t->pid, 32);
40}
41#endif
42
43
44/* edf_higher_prio - returns true if first has a higher EDF priority
45 * than second. Deadline ties are broken by PID.
46 *
47 * both first and second may be NULL
48 */
49int edf_higher_prio(struct task_struct* first,
50 struct task_struct* second)
51{
52 struct task_struct *first_task = first;
53 struct task_struct *second_task = second;
54
55 /* There is no point in comparing a task to itself. */
56 if (first && first == second) {
57 TRACE_TASK(first,
58 "WARNING: pointless edf priority comparison.\n");
59 return 0;
60 }
61
62
63 /* check for NULL tasks */
64 if (!first || !second)
65 return first && !second;
66
67#ifdef CONFIG_LITMUS_LOCKING
68
69 /* Check for inherited priorities. Change task
70 * used for comparison in such a case.
71 */
72 if (unlikely(first->rt_param.inh_task))
73 first_task = first->rt_param.inh_task;
74 if (unlikely(second->rt_param.inh_task))
75 second_task = second->rt_param.inh_task;
76
77 /* Check for priority boosting. Tie-break by start of boosting.
78 */
79 if (unlikely(is_priority_boosted(first_task))) {
80 /* first_task is boosted, how about second_task? */
81 if (!is_priority_boosted(second_task) ||
82 lt_before(get_boost_start(first_task),
83 get_boost_start(second_task)))
84 return 1;
85 else
86 return 0;
87 } else if (unlikely(is_priority_boosted(second_task)))
88 /* second_task is boosted, first is not*/
89 return 0;
90
91#endif
92
93 if (earlier_deadline(first_task, second_task)) {
94 return 1;
95 }
96 else if (get_deadline(first_task) == get_deadline(second_task)) {
97 /* Need to tie break. All methods must set pid_break to 0/1 if
98 * first_task does not have priority over second_task.
99 */
100 int pid_break;
101
102
103#if defined(CONFIG_EDF_TIE_BREAK_LATENESS)
104 /* Tie break by lateness. Jobs with greater lateness get
105 * priority. This should spread tardiness across all tasks,
106 * especially in task sets where all tasks have the same
107 * period and relative deadlines.
108 */
109 if (get_lateness(first_task) > get_lateness(second_task)) {
110 return 1;
111 }
112 pid_break = (get_lateness(first_task) == get_lateness(second_task));
113
114
115#elif defined(CONFIG_EDF_TIE_BREAK_LATENESS_NORM)
116 /* Tie break by lateness, normalized by relative deadline. Jobs with
117 * greater normalized lateness get priority.
118 *
119 * Note: Considered using the algebraically equivalent
120 * lateness(first)*relative_deadline(second) >
121 lateness(second)*relative_deadline(first)
122 * to avoid fixed-point math, but values are prone to overflow if inputs
123 * are on the order of several seconds, even in 64-bit.
124 */
125 fp_t fnorm = _frac(get_lateness(first_task),
126 get_rt_relative_deadline(first_task));
127 fp_t snorm = _frac(get_lateness(second_task),
128 get_rt_relative_deadline(second_task));
129 if (_gt(fnorm, snorm)) {
130 return 1;
131 }
132 pid_break = _eq(fnorm, snorm);
133
134
135#elif defined(CONFIG_EDF_TIE_BREAK_HASH)
136 /* Tie break by comparing hashs of (pid, job#) tuple. There should be
137 * a 50% chance that first_task has a higher priority than second_task.
138 */
139 long fhash = edf_hash(first_task);
140 long shash = edf_hash(second_task);
141 if (fhash < shash) {
142 return 1;
143 }
144 pid_break = (fhash == shash);
145#else
146
147
148 /* CONFIG_EDF_PID_TIE_BREAK */
149 pid_break = 1; // fall through to tie-break by pid;
150#endif
151
152 /* Tie break by pid */
153 if(pid_break) {
154 if (first_task->pid < second_task->pid) {
155 return 1;
156 }
157 else if (first_task->pid == second_task->pid) {
158 /* If the PIDs are the same then the task with the
159 * inherited priority wins.
160 */
161 if (!second->rt_param.inh_task) {
162 return 1;
163 }
164 }
165 }
166 }
167 return 0; /* fall-through. prio(second_task) > prio(first_task) */
168}
169
170int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
171{
172 return edf_higher_prio(bheap2task(a), bheap2task(b));
173}
174
175void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
176 release_jobs_t release)
177{
178 rt_domain_init(rt, edf_ready_order, resched, release);
179}
180
181/* need_to_preempt - check whether the task t needs to be preempted
182 * call only with irqs disabled and with ready_lock acquired
183 * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
184 */
185int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t)
186{
187 /* we need the read lock for edf_ready_queue */
188 /* no need to preempt if there is nothing pending */
189 if (!__jobs_pending(rt))
190 return 0;
191 /* we need to reschedule if t doesn't exist */
192 if (!t)
193 return 1;
194
195 /* NOTE: We cannot check for non-preemptibility since we
196 * don't know what address space we're currently in.
197 */
198
199 /* make sure to get non-rt stuff out of the way */
200 return !is_realtime(t) || edf_higher_prio(__next_ready(rt), t);
201}
diff --git a/litmus/fdso.c b/litmus/fdso.c
new file mode 100644
index 000000000000..0ff54e41839c
--- /dev/null
+++ b/litmus/fdso.c
@@ -0,0 +1,308 @@
1/* fdso.c - file descriptor attached shared objects
2 *
3 * (c) 2007 B. Brandenburg, LITMUS^RT project
4 *
5 * Notes:
6 * - objects descriptor (OD) tables are not cloned during a fork.
7 * - objects are created on-demand, and freed after the last reference
8 * is dropped.
9 * - for now, object types are hard coded.
10 * - As long as we have live objects, we keep a reference to the inode.
11 */
12
13#include <linux/errno.h>
14#include <linux/sched.h>
15#include <linux/mutex.h>
16#include <linux/file.h>
17#include <asm/uaccess.h>
18
19#include <litmus/fdso.h>
20
21extern struct fdso_ops generic_lock_ops;
22
23static const struct fdso_ops* fdso_ops[] = {
24 &generic_lock_ops, /* FMLP_SEM */
25 &generic_lock_ops, /* SRP_SEM */
26 &generic_lock_ops, /* MPCP_SEM */
27 &generic_lock_ops, /* MPCP_VS_SEM */
28 &generic_lock_ops, /* DPCP_SEM */
29 &generic_lock_ops, /* PCP_SEM */
30 &generic_lock_ops, /* DFLP_SEM */
31};
32
33static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
34{
35 BUILD_BUG_ON(ARRAY_SIZE(fdso_ops) != MAX_OBJ_TYPE + 1);
36
37 if (fdso_ops[type]->create)
38 return fdso_ops[type]->create(obj_ref, type, config);
39 else
40 return -EINVAL;
41}
42
43static void fdso_destroy(obj_type_t type, void* obj)
44{
45 fdso_ops[type]->destroy(type, obj);
46}
47
48static int fdso_open(struct od_table_entry* entry, void* __user config)
49{
50 if (fdso_ops[entry->obj->type]->open)
51 return fdso_ops[entry->obj->type]->open(entry, config);
52 else
53 return 0;
54}
55
56static int fdso_close(struct od_table_entry* entry)
57{
58 if (fdso_ops[entry->obj->type]->close)
59 return fdso_ops[entry->obj->type]->close(entry);
60 else
61 return 0;
62}
63
64/* inode must be locked already */
65static int alloc_inode_obj(struct inode_obj_id** obj_ref,
66 struct inode* inode,
67 obj_type_t type,
68 unsigned int id,
69 void* __user config)
70{
71 struct inode_obj_id* obj;
72 void* raw_obj;
73 int err;
74
75 obj = kmalloc(sizeof(*obj), GFP_KERNEL);
76 if (!obj) {
77 return -ENOMEM;
78 }
79
80 err = fdso_create(&raw_obj, type, config);
81 if (err != 0) {
82 kfree(obj);
83 return err;
84 }
85
86 INIT_LIST_HEAD(&obj->list);
87 atomic_set(&obj->count, 1);
88 obj->type = type;
89 obj->id = id;
90 obj->obj = raw_obj;
91 obj->inode = inode;
92
93 list_add(&obj->list, &inode->i_obj_list);
94 atomic_inc(&inode->i_count);
95
96 printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id);
97
98 *obj_ref = obj;
99 return 0;
100}
101
102/* inode must be locked already */
103static struct inode_obj_id* get_inode_obj(struct inode* inode,
104 obj_type_t type,
105 unsigned int id)
106{
107 struct list_head* pos;
108 struct inode_obj_id* obj = NULL;
109
110 list_for_each(pos, &inode->i_obj_list) {
111 obj = list_entry(pos, struct inode_obj_id, list);
112 if (obj->id == id && obj->type == type) {
113 atomic_inc(&obj->count);
114 return obj;
115 }
116 }
117 printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id);
118 return NULL;
119}
120
121
122static void put_inode_obj(struct inode_obj_id* obj)
123{
124 struct inode* inode;
125 int let_go = 0;
126
127 inode = obj->inode;
128 if (atomic_dec_and_test(&obj->count)) {
129
130 mutex_lock(&inode->i_obj_mutex);
131 /* no new references can be obtained */
132 if (!atomic_read(&obj->count)) {
133 list_del(&obj->list);
134 fdso_destroy(obj->type, obj->obj);
135 kfree(obj);
136 let_go = 1;
137 }
138 mutex_unlock(&inode->i_obj_mutex);
139 if (let_go)
140 iput(inode);
141 }
142}
143
144static struct od_table_entry* get_od_entry(struct task_struct* t)
145{
146 struct od_table_entry* table;
147 int i;
148
149
150 table = t->od_table;
151 if (!table) {
152 table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS,
153 GFP_KERNEL);
154 t->od_table = table;
155 }
156
157 for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++)
158 if (!table[i].used) {
159 table[i].used = 1;
160 return table + i;
161 }
162 return NULL;
163}
164
165static int put_od_entry(struct od_table_entry* od)
166{
167 put_inode_obj(od->obj);
168 od->used = 0;
169 return 0;
170}
171
172static long close_od_entry(struct od_table_entry *od)
173{
174 long ret;
175
176 /* Give the class a chance to reject the close. */
177 ret = fdso_close(od);
178 if (ret == 0)
179 ret = put_od_entry(od);
180
181 return ret;
182}
183
184void exit_od_table(struct task_struct* t)
185{
186 int i;
187
188 if (t->od_table) {
189 for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++)
190 if (t->od_table[i].used)
191 close_od_entry(t->od_table + i);
192 kfree(t->od_table);
193 t->od_table = NULL;
194 }
195}
196
197static int do_sys_od_open(struct file* file, obj_type_t type, int id,
198 void* __user config)
199{
200 int idx = 0, err = 0;
201 struct inode* inode;
202 struct inode_obj_id* obj = NULL;
203 struct od_table_entry* entry;
204
205 inode = file_inode(file);
206
207 entry = get_od_entry(current);
208 if (!entry)
209 return -ENOMEM;
210
211 mutex_lock(&inode->i_obj_mutex);
212 obj = get_inode_obj(inode, type, id);
213 if (!obj)
214 err = alloc_inode_obj(&obj, inode, type, id, config);
215 if (err != 0) {
216 obj = NULL;
217 idx = err;
218 entry->used = 0;
219 } else {
220 entry->obj = obj;
221 entry->class = fdso_ops[type];
222 idx = entry - current->od_table;
223 }
224
225 mutex_unlock(&inode->i_obj_mutex);
226
227 /* open only if creation succeeded */
228 if (!err)
229 err = fdso_open(entry, config);
230 if (err < 0) {
231 /* The class rejected the open call.
232 * We need to clean up and tell user space.
233 */
234 if (obj)
235 put_od_entry(entry);
236 idx = err;
237 }
238
239 return idx;
240}
241
242
243struct od_table_entry* get_entry_for_od(int od)
244{
245 struct task_struct *t = current;
246
247 if (!t->od_table)
248 return NULL;
249 if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
250 return NULL;
251 if (!t->od_table[od].used)
252 return NULL;
253 return t->od_table + od;
254}
255
256
257asmlinkage long sys_od_open(int fd, int type, int obj_id, void* __user config)
258{
259 int ret = 0;
260 struct file* file;
261
262 /*
263 1) get file from fd, get inode from file
264 2) lock inode
265 3) try to lookup object
266 4) if not present create and enqueue object, inc inode refcnt
267 5) increment refcnt of object
268 6) alloc od_table_entry, setup ptrs
269 7) unlock inode
270 8) return offset in od_table as OD
271 */
272
273 if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) {
274 ret = -EINVAL;
275 goto out;
276 }
277
278 file = fget(fd);
279 if (!file) {
280 ret = -EBADF;
281 goto out;
282 }
283
284 ret = do_sys_od_open(file, type, obj_id, config);
285
286 fput(file);
287
288out:
289 return ret;
290}
291
292
293asmlinkage long sys_od_close(int od)
294{
295 int ret = -EINVAL;
296 struct task_struct *t = current;
297
298 if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
299 return ret;
300
301 if (!t->od_table || !t->od_table[od].used)
302 return ret;
303
304
305 ret = close_od_entry(t->od_table + od);
306
307 return ret;
308}
diff --git a/litmus/fp_common.c b/litmus/fp_common.c
new file mode 100644
index 000000000000..242542a510d3
--- /dev/null
+++ b/litmus/fp_common.c
@@ -0,0 +1,130 @@
1/*
2 * litmus/fp_common.c
3 *
4 * Common functions for fixed-priority scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14#include <litmus/debug_trace.h>
15
16#include <litmus/fp_common.h>
17
18/* fp_higher_prio - returns true if first has a higher static priority
19 * than second. Ties are broken by PID.
20 *
21 * both first and second may be NULL
22 */
23int fp_higher_prio(struct task_struct* first,
24 struct task_struct* second)
25{
26 struct task_struct *first_task = first;
27 struct task_struct *second_task = second;
28
29 /* There is no point in comparing a task to itself. */
30 if (unlikely(first && first == second)) {
31 TRACE_TASK(first,
32 "WARNING: pointless FP priority comparison.\n");
33 return 0;
34 }
35
36
37 /* check for NULL tasks */
38 if (!first || !second)
39 return first && !second;
40
41 if (!is_realtime(second_task))
42 return 1;
43
44#ifdef CONFIG_LITMUS_LOCKING
45
46 /* Check for inherited priorities. Change task
47 * used for comparison in such a case.
48 */
49 if (unlikely(first->rt_param.inh_task))
50 first_task = first->rt_param.inh_task;
51 if (unlikely(second->rt_param.inh_task))
52 second_task = second->rt_param.inh_task;
53
54 /* Check for priority boosting. Tie-break by start of boosting.
55 */
56 if (unlikely(is_priority_boosted(first_task))) {
57 /* first_task is boosted, how about second_task? */
58 if (is_priority_boosted(second_task))
59 /* break by priority point */
60 return lt_before(get_boost_start(first_task),
61 get_boost_start(second_task));
62 else
63 /* priority boosting wins. */
64 return 1;
65 } else if (unlikely(is_priority_boosted(second_task)))
66 /* second_task is boosted, first is not*/
67 return 0;
68
69#endif
70
71 /* Comparisons to itself are not expected; priority inheritance
72 * should also not cause this to happen. */
73 BUG_ON(first_task == second_task);
74
75 if (get_priority(first_task) < get_priority(second_task))
76 return 1;
77 else if (get_priority(first_task) == get_priority(second_task))
78 /* Break by PID. */
79 return first_task->pid < second_task->pid;
80 else
81 return 0;
82}
83
84int fp_ready_order(struct bheap_node* a, struct bheap_node* b)
85{
86 return fp_higher_prio(bheap2task(a), bheap2task(b));
87}
88
89void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
90 release_jobs_t release)
91{
92 rt_domain_init(rt, fp_ready_order, resched, release);
93}
94
95/* need_to_preempt - check whether the task t needs to be preempted
96 */
97int fp_preemption_needed(struct fp_prio_queue *q, struct task_struct *t)
98{
99 struct task_struct *pending;
100
101 pending = fp_prio_peek(q);
102
103 if (!pending)
104 return 0;
105 if (!t)
106 return 1;
107
108 /* make sure to get non-rt stuff out of the way */
109 return !is_realtime(t) || fp_higher_prio(pending, t);
110}
111
112void fp_prio_queue_init(struct fp_prio_queue* q)
113{
114 int i;
115
116 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
117 q->bitmask[i] = 0;
118 for (i = 0; i < LITMUS_MAX_PRIORITY; i++)
119 bheap_init(&q->queue[i]);
120}
121
122void fp_ready_list_init(struct fp_ready_list* q)
123{
124 int i;
125
126 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
127 q->bitmask[i] = 0;
128 for (i = 0; i < LITMUS_MAX_PRIORITY; i++)
129 INIT_LIST_HEAD(q->queue + i);
130}
diff --git a/litmus/ftdev.c b/litmus/ftdev.c
index 9d539251f4cb..095301a23450 100644
--- a/litmus/ftdev.c
+++ b/litmus/ftdev.c
@@ -216,6 +216,17 @@ static ssize_t ftdev_read(struct file *filp,
216 * here with copied data because that data would get 216 * here with copied data because that data would get
217 * lost if the task is interrupted (e.g., killed). 217 * lost if the task is interrupted (e.g., killed).
218 */ 218 */
219
220 /* Before sleeping, check wether a non-blocking
221 * read was requested.
222 */
223 if (filp->f_flags & O_NONBLOCK)
224 {
225 /* bug out, userspace doesn't want us to sleep */
226 err = -EWOULDBLOCK;
227 break;
228 }
229
219 mutex_unlock(&ftdm->lock); 230 mutex_unlock(&ftdm->lock);
220 set_current_state(TASK_INTERRUPTIBLE); 231 set_current_state(TASK_INTERRUPTIBLE);
221 232
diff --git a/litmus/jobs.c b/litmus/jobs.c
new file mode 100644
index 000000000000..1c700f9acd24
--- /dev/null
+++ b/litmus/jobs.c
@@ -0,0 +1,163 @@
1/* litmus/jobs.c - common job control code
2 */
3
4#include <linux/sched.h>
5
6#include <litmus/preempt.h>
7#include <litmus/litmus.h>
8#include <litmus/sched_plugin.h>
9#include <litmus/sched_trace.h>
10#include <litmus/jobs.h>
11
12static inline void setup_release(struct task_struct *t, lt_t release)
13{
14 /* prepare next release */
15 t->rt_param.job_params.release = release;
16 t->rt_param.job_params.deadline = release + get_rt_relative_deadline(t);
17 t->rt_param.job_params.exec_time = 0;
18
19 /* update job sequence number */
20 t->rt_param.job_params.job_no++;
21
22 /* expose to user space */
23 if (has_control_page(t)) {
24 struct control_page* cp = get_control_page(t);
25 cp->deadline = t->rt_param.job_params.deadline;
26 cp->release = get_release(t);
27 cp->job_index = t->rt_param.job_params.job_no;
28 }
29}
30
31void prepare_for_next_period(struct task_struct *t)
32{
33 BUG_ON(!t);
34
35 /* Record lateness before we set up the next job's
36 * release and deadline. Lateness may be negative.
37 */
38 t->rt_param.job_params.lateness =
39 (long long)litmus_clock() -
40 (long long)t->rt_param.job_params.deadline;
41
42 if (tsk_rt(t)->sporadic_release) {
43 TRACE_TASK(t, "sporadic release at %llu\n",
44 tsk_rt(t)->sporadic_release_time);
45 /* sporadic release */
46 setup_release(t, tsk_rt(t)->sporadic_release_time);
47 tsk_rt(t)->sporadic_release = 0;
48 } else {
49 /* periodic release => add period */
50 setup_release(t, get_release(t) + get_rt_period(t));
51 }
52}
53
54void release_at(struct task_struct *t, lt_t start)
55{
56 BUG_ON(!t);
57 setup_release(t, start);
58 tsk_rt(t)->completed = 0;
59}
60
61void inferred_sporadic_job_release_at(struct task_struct *t, lt_t when)
62{
63 /* new sporadic release */
64 sched_trace_last_suspension_as_completion(t);
65 /* check if this task is resuming from a clock_nanosleep() call */
66 if (tsk_rt(t)->doing_abs_nanosleep &&
67 lt_after_eq(tsk_rt(t)->nanosleep_wakeup,
68 get_release(t) + get_rt_period(t))) {
69 /* clock_nanosleep() is supposed to wake up the task
70 * at a time that is a valid release time. Use that time
71 * rather than guessing the intended release time from the
72 * current time. */
73 TRACE_TASK(t, "nanosleep: backdating release "
74 "to %llu instead of %llu\n",
75 tsk_rt(t)->nanosleep_wakeup, when);
76 when = tsk_rt(t)->nanosleep_wakeup;
77 }
78 release_at(t, when);
79 sched_trace_task_release(t);
80}
81
82long default_wait_for_release_at(lt_t release_time)
83{
84 struct task_struct *t = current;
85 unsigned long flags;
86
87 local_irq_save(flags);
88 tsk_rt(t)->sporadic_release_time = release_time;
89 smp_wmb();
90 tsk_rt(t)->sporadic_release = 1;
91 local_irq_restore(flags);
92
93 return litmus->complete_job();
94}
95
96
97/*
98 * Deactivate current task until the beginning of the next period.
99 */
100long complete_job(void)
101{
102 preempt_disable();
103 TRACE_CUR("job completion indicated at %llu\n", litmus_clock());
104 /* Mark that we do not excute anymore */
105 tsk_rt(current)->completed = 1;
106 /* call schedule, this will return when a new job arrives
107 * it also takes care of preparing for the next release
108 */
109 litmus_reschedule_local();
110 preempt_enable();
111 return 0;
112}
113
114static long sleep_until_next_release(void);
115
116/* alternative job completion implementation that suspends the task */
117long complete_job_oneshot(void)
118{
119 struct task_struct *t = current;
120
121 preempt_disable();
122
123 TRACE_CUR("job completes at %llu (deadline: %llu)\n", litmus_clock(),
124 get_deadline(t));
125
126 sched_trace_task_completion(t, 0);
127 prepare_for_next_period(t);
128 sched_trace_task_release(t);
129
130 return sleep_until_next_release();
131}
132
133/* assumes caller has disabled preemptions;
134 * re-enables preemptions before returning */
135static long sleep_until_next_release(void)
136{
137 struct task_struct *t = current;
138 ktime_t next_release;
139 long err;
140
141 next_release = ns_to_ktime(get_release(t));
142
143 TRACE_CUR("next_release=%llu\n", get_release(t));
144
145 if (lt_after(get_release(t), litmus_clock())) {
146 set_current_state(TASK_INTERRUPTIBLE);
147 tsk_rt(t)->completed = 1;
148 preempt_enable_no_resched();
149 err = schedule_hrtimeout(&next_release, HRTIMER_MODE_ABS);
150 /* If we get woken by a signal, we return early.
151 * This is intentional; we want to be able to kill tasks
152 * that are waiting for the next job release.
153 */
154 tsk_rt(t)->completed = 0;
155 } else {
156 err = 0;
157 TRACE_CUR("TARDY: release=%llu now=%llu\n", get_release(t), litmus_clock());
158 preempt_enable();
159 }
160
161 TRACE_CUR("return to next job at %llu\n", litmus_clock());
162 return err;
163}
diff --git a/litmus/litmus.c b/litmus/litmus.c
new file mode 100644
index 000000000000..bd192180fef7
--- /dev/null
+++ b/litmus/litmus.c
@@ -0,0 +1,773 @@
1/*
2 * litmus.c -- Implementation of the LITMUS syscalls,
3 * the LITMUS intialization code,
4 * and the procfs interface..
5 */
6#include <asm/uaccess.h>
7#include <linux/uaccess.h>
8#include <linux/sysrq.h>
9#include <linux/sched.h>
10#include <linux/module.h>
11#include <linux/slab.h>
12#include <linux/reboot.h>
13#include <linux/stop_machine.h>
14#include <linux/sched/rt.h>
15#include <linux/rwsem.h>
16#include <linux/interrupt.h>
17
18#include <litmus/debug_trace.h>
19#include <litmus/litmus.h>
20#include <litmus/bheap.h>
21#include <litmus/trace.h>
22#include <litmus/rt_domain.h>
23#include <litmus/litmus_proc.h>
24#include <litmus/sched_trace.h>
25
26#ifdef CONFIG_SCHED_CPU_AFFINITY
27#include <litmus/affinity.h>
28#endif
29
30#ifdef CONFIG_SCHED_LITMUS_TRACEPOINT
31#define CREATE_TRACE_POINTS
32#include <trace/events/litmus.h>
33#endif
34
35/* Number of RT tasks that exist in the system */
36atomic_t rt_task_count = ATOMIC_INIT(0);
37
38#ifdef CONFIG_RELEASE_MASTER
39/* current master CPU for handling timer IRQs */
40atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU);
41#endif
42
43static struct kmem_cache * bheap_node_cache;
44extern struct kmem_cache * release_heap_cache;
45
46struct bheap_node* bheap_node_alloc(int gfp_flags)
47{
48 return kmem_cache_alloc(bheap_node_cache, gfp_flags);
49}
50
51void bheap_node_free(struct bheap_node* hn)
52{
53 kmem_cache_free(bheap_node_cache, hn);
54}
55
56struct release_heap* release_heap_alloc(int gfp_flags);
57void release_heap_free(struct release_heap* rh);
58
59/**
60 * Get the quantum alignment as a cmdline option.
61 * Default is aligned quanta..
62 */
63static bool aligned_quanta = 1;
64module_param(aligned_quanta, bool, 0644);
65
66u64 cpu_stagger_offset(int cpu)
67{
68 u64 offset = 0;
69
70 if (!aligned_quanta) {
71 offset = LITMUS_QUANTUM_LENGTH_NS;
72 do_div(offset, num_possible_cpus());
73 offset *= cpu;
74 }
75 return offset;
76}
77
78/*
79 * sys_set_task_rt_param
80 * @pid: Pid of the task which scheduling parameters must be changed
81 * @param: New real-time extension parameters such as the execution cost and
82 * period
83 * Syscall for manipulating with task rt extension params
84 * Returns EFAULT if param is NULL.
85 * ESRCH if pid is not corrsponding
86 * to a valid task.
87 * EINVAL if either period or execution cost is <=0
88 * EPERM if pid is a real-time task
89 * 0 if success
90 *
91 * Only non-real-time tasks may be configured with this system call
92 * to avoid races with the scheduler. In practice, this means that a
93 * task's parameters must be set _before_ calling sys_prepare_rt_task()
94 *
95 * find_task_by_vpid() assumes that we are in the same namespace of the
96 * target.
97 */
98asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
99{
100 struct rt_task tp;
101 struct task_struct *target;
102 int retval = -EINVAL;
103
104 printk("Setting up rt task parameters for process %d.\n", pid);
105
106 if (pid < 0 || param == 0) {
107 goto out;
108 }
109 if (copy_from_user(&tp, param, sizeof(tp))) {
110 retval = -EFAULT;
111 goto out;
112 }
113
114 /* Task search and manipulation must be protected */
115 read_lock_irq(&tasklist_lock);
116 rcu_read_lock();
117 if (!(target = find_task_by_vpid(pid))) {
118 retval = -ESRCH;
119 rcu_read_unlock();
120 goto out_unlock;
121 }
122 rcu_read_unlock();
123
124 /* set relative deadline to be implicit if left unspecified */
125 if (tp.relative_deadline == 0)
126 tp.relative_deadline = tp.period;
127
128 if (tp.exec_cost <= 0)
129 goto out_unlock;
130 if (tp.period <= 0)
131 goto out_unlock;
132 if (min(tp.relative_deadline, tp.period) < tp.exec_cost) /*density check*/
133 {
134 printk(KERN_INFO "litmus: real-time task %d rejected "
135 "because task density > 1.0\n", pid);
136 goto out_unlock;
137 }
138 if (tp.cls != RT_CLASS_HARD &&
139 tp.cls != RT_CLASS_SOFT &&
140 tp.cls != RT_CLASS_BEST_EFFORT)
141 {
142 printk(KERN_INFO "litmus: real-time task %d rejected "
143 "because its class is invalid\n", pid);
144 goto out_unlock;
145 }
146 if (tp.budget_policy != NO_ENFORCEMENT &&
147 tp.budget_policy != QUANTUM_ENFORCEMENT &&
148 tp.budget_policy != PRECISE_ENFORCEMENT)
149 {
150 printk(KERN_INFO "litmus: real-time task %d rejected "
151 "because unsupported budget enforcement policy "
152 "specified (%d)\n",
153 pid, tp.budget_policy);
154 goto out_unlock;
155 }
156
157 if (is_realtime(target)) {
158 /* The task is already a real-time task.
159 * Let plugin decide whether it wants to support
160 * parameter changes at runtime.
161 */
162 retval = litmus->task_change_params(target, &tp);
163 } else {
164 target->rt_param.task_params = tp;
165 retval = 0;
166 }
167 out_unlock:
168 read_unlock_irq(&tasklist_lock);
169 out:
170 return retval;
171}
172
173/*
174 * Getter of task's RT params
175 * returns EINVAL if param or pid is NULL
176 * returns ESRCH if pid does not correspond to a valid task
177 * returns EFAULT if copying of parameters has failed.
178 *
179 * find_task_by_vpid() assumes that we are in the same namespace of the
180 * target.
181 */
182asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param)
183{
184 int retval = -EINVAL;
185 struct task_struct *source;
186 struct rt_task lp;
187
188 if (param == 0 || pid < 0)
189 goto out;
190
191 read_lock_irq(&tasklist_lock);
192 rcu_read_lock();
193 source = find_task_by_vpid(pid);
194 rcu_read_unlock();
195 if (!source) {
196 retval = -ESRCH;
197 read_unlock_irq(&tasklist_lock);
198 goto out;
199 }
200 lp = source->rt_param.task_params;
201 read_unlock_irq(&tasklist_lock);
202 /* Do copying outside the lock */
203 retval =
204 copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
205 out:
206 return retval;
207
208}
209
210/*
211 * This is the crucial function for periodic task implementation,
212 * It checks if a task is periodic, checks if such kind of sleep
213 * is permitted and calls plugin-specific sleep, which puts the
214 * task into a wait array.
215 * returns 0 on successful wakeup
216 * returns EPERM if current conditions do not permit such sleep
217 * returns EINVAL if current task is not able to go to sleep
218 */
219asmlinkage long sys_complete_job(void)
220{
221 int retval = -EPERM;
222 if (!is_realtime(current)) {
223 retval = -EINVAL;
224 goto out;
225 }
226 /* Task with negative or zero period cannot sleep */
227 if (get_rt_period(current) <= 0) {
228 retval = -EINVAL;
229 goto out;
230 }
231 /* The plugin has to put the task into an
232 * appropriate queue and call schedule
233 */
234 retval = litmus->complete_job();
235 out:
236 return retval;
237}
238
239/* This is an "improved" version of sys_complete_job that
240 * addresses the problem of unintentionally missing a job after
241 * an overrun.
242 *
243 * returns 0 on successful wakeup
244 * returns EPERM if current conditions do not permit such sleep
245 * returns EINVAL if current task is not able to go to sleep
246 */
247asmlinkage long sys_wait_for_job_release(unsigned int job)
248{
249 int retval = -EPERM;
250 if (!is_realtime(current)) {
251 retval = -EINVAL;
252 goto out;
253 }
254
255 /* Task with negative or zero period cannot sleep */
256 if (get_rt_period(current) <= 0) {
257 retval = -EINVAL;
258 goto out;
259 }
260
261 retval = 0;
262
263 /* first wait until we have "reached" the desired job
264 *
265 * This implementation has at least two problems:
266 *
267 * 1) It doesn't gracefully handle the wrap around of
268 * job_no. Since LITMUS is a prototype, this is not much
269 * of a problem right now.
270 *
271 * 2) It is theoretically racy if a job release occurs
272 * between checking job_no and calling sleep_next_period().
273 * A proper solution would requiring adding another callback
274 * in the plugin structure and testing the condition with
275 * interrupts disabled.
276 *
277 * FIXME: At least problem 2 should be taken care of eventually.
278 */
279 while (!retval && job > current->rt_param.job_params.job_no)
280 /* If the last job overran then job <= job_no and we
281 * don't send the task to sleep.
282 */
283 retval = litmus->complete_job();
284 out:
285 return retval;
286}
287
288/* This is a helper syscall to query the current job sequence number.
289 *
290 * returns 0 on successful query
291 * returns EPERM if task is not a real-time task.
292 * returns EFAULT if &job is not a valid pointer.
293 */
294asmlinkage long sys_query_job_no(unsigned int __user *job)
295{
296 int retval = -EPERM;
297 if (is_realtime(current))
298 retval = put_user(current->rt_param.job_params.job_no, job);
299
300 return retval;
301}
302
303/* sys_null_call() is only used for determining raw system call
304 * overheads (kernel entry, kernel exit). It has no useful side effects.
305 * If ts is non-NULL, then the current Feather-Trace time is recorded.
306 */
307asmlinkage long sys_null_call(cycles_t __user *ts)
308{
309 long ret = 0;
310 cycles_t now;
311
312 if (ts) {
313 now = get_cycles();
314 ret = put_user(now, ts);
315 }
316
317 return ret;
318}
319
320asmlinkage long sys_reservation_create(int type, void __user *config)
321{
322 return litmus->reservation_create(type, config);
323}
324
325asmlinkage long sys_reservation_destroy(unsigned int reservation_id, int cpu)
326{
327 return litmus->reservation_destroy(reservation_id, cpu);
328}
329
330/* p is a real-time task. Re-init its state as a best-effort task. */
331static void reinit_litmus_state(struct task_struct* p, int restore)
332{
333 struct rt_task user_config = {};
334 void* ctrl_page = NULL;
335
336 if (restore) {
337 /* Safe user-space provided configuration data.
338 * and allocated page. */
339 user_config = p->rt_param.task_params;
340 ctrl_page = p->rt_param.ctrl_page;
341 }
342
343 /* We probably should not be inheriting any task's priority
344 * at this point in time.
345 */
346 WARN_ON(p->rt_param.inh_task);
347
348 /* Cleanup everything else. */
349 memset(&p->rt_param, 0, sizeof(p->rt_param));
350
351 /* Restore preserved fields. */
352 if (restore) {
353 p->rt_param.task_params = user_config;
354 p->rt_param.ctrl_page = ctrl_page;
355 }
356}
357
358static long __litmus_admit_task(struct task_struct* tsk)
359{
360 long err;
361
362 INIT_LIST_HEAD(&tsk_rt(tsk)->list);
363
364 /* allocate heap node for this task */
365 tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC);
366 tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC);
367
368 if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) {
369 printk(KERN_WARNING "litmus: no more heap node memory!?\n");
370
371 return -ENOMEM;
372 } else {
373 bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
374 }
375
376 preempt_disable();
377
378 err = litmus->admit_task(tsk);
379
380 if (!err) {
381 sched_trace_task_name(tsk);
382 sched_trace_task_param(tsk);
383 atomic_inc(&rt_task_count);
384 }
385
386 preempt_enable();
387
388 return err;
389}
390
391long litmus_admit_task(struct task_struct* tsk)
392{
393 long retval = 0;
394
395 BUG_ON(is_realtime(tsk));
396
397 tsk_rt(tsk)->heap_node = NULL;
398 tsk_rt(tsk)->rel_heap = NULL;
399
400 if (get_rt_relative_deadline(tsk) == 0 ||
401 get_exec_cost(tsk) >
402 min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) {
403 TRACE_TASK(tsk,
404 "litmus admit: invalid task parameters "
405 "(e = %lu, p = %lu, d = %lu)\n",
406 get_exec_cost(tsk), get_rt_period(tsk),
407 get_rt_relative_deadline(tsk));
408 retval = -EINVAL;
409 goto out;
410 }
411
412 retval = __litmus_admit_task(tsk);
413
414out:
415 if (retval) {
416 if (tsk_rt(tsk)->heap_node)
417 bheap_node_free(tsk_rt(tsk)->heap_node);
418 if (tsk_rt(tsk)->rel_heap)
419 release_heap_free(tsk_rt(tsk)->rel_heap);
420 }
421 return retval;
422}
423
424void litmus_clear_state(struct task_struct* tsk)
425{
426 BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node));
427 bheap_node_free(tsk_rt(tsk)->heap_node);
428 release_heap_free(tsk_rt(tsk)->rel_heap);
429
430 atomic_dec(&rt_task_count);
431 reinit_litmus_state(tsk, 1);
432}
433
434/* called from sched_setscheduler() */
435void litmus_exit_task(struct task_struct* tsk)
436{
437 if (is_realtime(tsk)) {
438 sched_trace_task_completion(tsk, 1);
439
440 litmus->task_exit(tsk);
441 }
442}
443
444static DECLARE_RWSEM(plugin_switch_mutex);
445
446void litmus_plugin_switch_disable(void)
447{
448 down_read(&plugin_switch_mutex);
449}
450
451void litmus_plugin_switch_enable(void)
452{
453 up_read(&plugin_switch_mutex);
454}
455
456static int __do_plugin_switch(struct sched_plugin* plugin)
457{
458 int ret;
459
460
461 /* don't switch if there are active real-time tasks */
462 if (atomic_read(&rt_task_count) == 0) {
463 TRACE("deactivating plugin %s\n", litmus->plugin_name);
464 ret = litmus->deactivate_plugin();
465 if (0 != ret)
466 goto out;
467
468 TRACE("activating plugin %s\n", plugin->plugin_name);
469 ret = plugin->activate_plugin();
470 if (0 != ret) {
471 printk(KERN_INFO "Can't activate %s (%d).\n",
472 plugin->plugin_name, ret);
473 plugin = &linux_sched_plugin;
474 }
475
476 printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
477 litmus = plugin;
478 } else
479 ret = -EBUSY;
480out:
481 TRACE("do_plugin_switch() => %d\n", ret);
482 return ret;
483}
484
485static atomic_t ready_to_switch;
486
487static int do_plugin_switch(void *_plugin)
488{
489 unsigned long flags;
490 int ret = 0;
491
492 local_save_flags(flags);
493 local_irq_disable();
494 hard_irq_disable();
495
496 if (atomic_dec_and_test(&ready_to_switch))
497 {
498 ret = __do_plugin_switch((struct sched_plugin*) _plugin);
499 atomic_set(&ready_to_switch, INT_MAX);
500 }
501
502 do {
503 cpu_relax();
504 } while (atomic_read(&ready_to_switch) != INT_MAX);
505
506 local_irq_restore(flags);
507 return ret;
508}
509
510/* Switching a plugin in use is tricky.
511 * We must watch out that no real-time tasks exists
512 * (and that none is created in parallel) and that the plugin is not
513 * currently in use on any processor (in theory).
514 */
515int switch_sched_plugin(struct sched_plugin* plugin)
516{
517 int err;
518 struct domain_proc_info* domain_info;
519
520 BUG_ON(!plugin);
521
522 if (atomic_read(&rt_task_count) == 0) {
523 down_write(&plugin_switch_mutex);
524
525 deactivate_domain_proc();
526
527 get_online_cpus();
528 atomic_set(&ready_to_switch, num_online_cpus());
529 err = stop_cpus(cpu_online_mask, do_plugin_switch, plugin);
530 put_online_cpus();
531
532 if (!litmus->get_domain_proc_info(&domain_info))
533 activate_domain_proc(domain_info);
534
535 up_write(&plugin_switch_mutex);
536 return err;
537 } else
538 return -EBUSY;
539}
540
541/* Called upon fork.
542 * p is the newly forked task.
543 */
544void litmus_fork(struct task_struct* p)
545{
546 /* non-rt tasks might have ctrl_page set */
547 tsk_rt(p)->ctrl_page = NULL;
548
549 if (is_realtime(p)) {
550 reinit_litmus_state(p, 1);
551 if (litmus->fork_task(p)) {
552 if (__litmus_admit_task(p))
553 /* something went wrong, give up */
554 p->sched_reset_on_fork = 1;
555 } else {
556 /* clean out any litmus related state */
557 reinit_litmus_state(p, 0);
558
559 TRACE_TASK(p, "fork: real-time status denied\n");
560 /* Don't let the child be a real-time task. */
561 p->sched_reset_on_fork = 1;
562 }
563 }
564
565 /* od tables are never inherited across a fork */
566 p->od_table = NULL;
567}
568
569/* Called upon execve().
570 * current is doing the exec.
571 * Don't let address space specific stuff leak.
572 */
573void litmus_exec(void)
574{
575 struct task_struct* p = current;
576
577 if (is_realtime(p)) {
578 WARN_ON(p->rt_param.inh_task);
579 if (tsk_rt(p)->ctrl_page) {
580 free_page((unsigned long) tsk_rt(p)->ctrl_page);
581 tsk_rt(p)->ctrl_page = NULL;
582 }
583 }
584}
585
586/* Called when dead_tsk is being deallocated
587 */
588void exit_litmus(struct task_struct *dead_tsk)
589{
590 /* We also allow non-RT tasks to
591 * allocate control pages to allow
592 * measurements with non-RT tasks.
593 * So check if we need to free the page
594 * in any case.
595 */
596 if (tsk_rt(dead_tsk)->ctrl_page) {
597 TRACE_TASK(dead_tsk,
598 "freeing ctrl_page %p\n",
599 tsk_rt(dead_tsk)->ctrl_page);
600 free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page);
601 }
602
603 /* Tasks should not be real-time tasks any longer at this point. */
604 BUG_ON(is_realtime(dead_tsk));
605}
606
607void litmus_do_exit(struct task_struct *exiting_tsk)
608{
609 /* This task called do_exit(), but is still a real-time task. To avoid
610 * complications later, we force it to be a non-real-time task now. */
611
612 struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
613
614 TRACE_TASK(exiting_tsk, "exiting, demoted to SCHED_FIFO\n");
615 sched_setscheduler_nocheck(exiting_tsk, SCHED_FIFO, &param);
616}
617
618void litmus_dealloc(struct task_struct *tsk)
619{
620 /* tsk is no longer a real-time task */
621 TRACE_TASK(tsk, "Deallocating real-time task data\n");
622 litmus->task_cleanup(tsk);
623 litmus_clear_state(tsk);
624}
625
626/* move current non-RT task to a specific CPU */
627int litmus_be_migrate_to(int cpu)
628{
629 struct cpumask single_cpu_aff;
630
631 cpumask_clear(&single_cpu_aff);
632 cpumask_set_cpu(cpu, &single_cpu_aff);
633 return sched_setaffinity(current->pid, &single_cpu_aff);
634}
635
636#ifdef CONFIG_MAGIC_SYSRQ
637int sys_kill(int pid, int sig);
638
639static void sysrq_handle_kill_rt_tasks(int key)
640{
641 struct task_struct *t;
642 read_lock(&tasklist_lock);
643 for_each_process(t) {
644 if (is_realtime(t)) {
645 sys_kill(t->pid, SIGKILL);
646 }
647 }
648 read_unlock(&tasklist_lock);
649}
650
651static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
652 .handler = sysrq_handle_kill_rt_tasks,
653 .help_msg = "quit-rt-tasks(X)",
654 .action_msg = "sent SIGKILL to all LITMUS^RT real-time tasks",
655};
656#endif
657
658extern struct sched_plugin linux_sched_plugin;
659
660static int litmus_shutdown_nb(struct notifier_block *unused1,
661 unsigned long unused2, void *unused3)
662{
663 /* Attempt to switch back to regular Linux scheduling.
664 * Forces the active plugin to clean up.
665 */
666 if (litmus != &linux_sched_plugin) {
667 int ret = switch_sched_plugin(&linux_sched_plugin);
668 if (ret) {
669 printk("Auto-shutdown of active Litmus plugin failed.\n");
670 }
671 }
672 return NOTIFY_DONE;
673}
674
675static struct notifier_block shutdown_notifier = {
676 .notifier_call = litmus_shutdown_nb,
677};
678
679/**
680 * Triggering hrtimers on specific cpus as required by arm_release_timer(_on)
681 */
682#ifdef CONFIG_SMP
683
684/**
685 * hrtimer_pull - smp_call_function_single_async callback on remote cpu
686 */
687void hrtimer_pull(void *csd_info)
688{
689 struct hrtimer_start_on_info *info = csd_info;
690 TRACE("pulled timer 0x%x\n", info->timer);
691 hrtimer_start_range_ns(info->timer, info->time, 0, info->mode);
692}
693
694/**
695 * hrtimer_start_on - trigger timer arming on remote cpu
696 * @cpu: remote cpu
697 * @info: save timer information for enqueuing on remote cpu
698 * @timer: timer to be pulled
699 * @time: expire time
700 * @mode: timer mode
701 */
702void hrtimer_start_on(int cpu, struct hrtimer_start_on_info *info,
703 struct hrtimer *timer, ktime_t time,
704 const enum hrtimer_mode mode)
705{
706 info->timer = timer;
707 info->time = time;
708 info->mode = mode;
709
710 /* initialize call_single_data struct */
711 info->csd.func = &hrtimer_pull;
712 info->csd.info = info;
713 info->csd.flags = 0;
714
715 /* initiate pull */
716 preempt_disable();
717 if (cpu == smp_processor_id()) {
718 /* start timer locally; we may get called
719 * with rq->lock held, do not wake up anything
720 */
721 TRACE("hrtimer_start_on: starting on local CPU\n");
722 hrtimer_start(info->timer, info->time, info->mode);
723 } else {
724 /* call hrtimer_pull() on remote cpu
725 * to start remote timer asynchronously
726 */
727 TRACE("hrtimer_start_on: pulling to remote CPU\n");
728 smp_call_function_single_async(cpu, &info->csd);
729 }
730 preempt_enable();
731}
732
733#endif /* CONFIG_SMP */
734
735static int __init _init_litmus(void)
736{
737 /* Common initializers,
738 * mode change lock is used to enforce single mode change
739 * operation.
740 */
741 printk("Starting LITMUS^RT kernel\n");
742
743 register_sched_plugin(&linux_sched_plugin);
744
745 bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC);
746 release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC);
747
748#ifdef CONFIG_MAGIC_SYSRQ
749 /* offer some debugging help */
750 if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op))
751 printk("Registered kill rt tasks magic sysrq.\n");
752 else
753 printk("Could not register kill rt tasks magic sysrq.\n");
754#endif
755
756 init_litmus_proc();
757
758 register_reboot_notifier(&shutdown_notifier);
759
760 return 0;
761}
762
763static void _exit_litmus(void)
764{
765 unregister_reboot_notifier(&shutdown_notifier);
766
767 exit_litmus_proc();
768 kmem_cache_destroy(bheap_node_cache);
769 kmem_cache_destroy(release_heap_cache);
770}
771
772module_init(_init_litmus);
773module_exit(_exit_litmus);
diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c
new file mode 100644
index 000000000000..de5e3f37fe88
--- /dev/null
+++ b/litmus/litmus_proc.c
@@ -0,0 +1,574 @@
1/*
2 * litmus_proc.c -- Implementation of the /proc/litmus directory tree.
3 */
4
5#include <linux/sched.h>
6#include <linux/slab.h>
7#include <linux/uaccess.h>
8#include <linux/seq_file.h>
9
10#include <litmus/debug_trace.h>
11#include <litmus/litmus.h>
12#include <litmus/litmus_proc.h>
13
14#include <litmus/clustered.h>
15
16/* in litmus/litmus.c */
17extern atomic_t rt_task_count;
18
19static struct proc_dir_entry *litmus_dir = NULL,
20 *curr_file = NULL,
21 *stat_file = NULL,
22 *plugs_dir = NULL,
23#ifdef CONFIG_RELEASE_MASTER
24 *release_master_file = NULL,
25#endif
26 *plugs_file = NULL,
27 *domains_dir = NULL,
28 *cpus_dir = NULL;
29
30
31/* in litmus/sync.c */
32int count_tasks_waiting_for_release(void);
33
34static int litmus_stats_proc_show(struct seq_file *m, void *v)
35{
36 seq_printf(m,
37 "real-time tasks = %d\n"
38 "ready for release = %d\n",
39 atomic_read(&rt_task_count),
40 count_tasks_waiting_for_release());
41 return 0;
42}
43
44static int litmus_stats_proc_open(struct inode *inode, struct file *file)
45{
46 return single_open(file, litmus_stats_proc_show, PDE_DATA(inode));
47}
48
49static const struct file_operations litmus_stats_proc_fops = {
50 .open = litmus_stats_proc_open,
51 .read = seq_read,
52 .llseek = seq_lseek,
53 .release = single_release,
54};
55
56
57static int litmus_loaded_proc_show(struct seq_file *m, void *v)
58{
59 print_sched_plugins(m);
60 return 0;
61}
62
63static int litmus_loaded_proc_open(struct inode *inode, struct file *file)
64{
65 return single_open(file, litmus_loaded_proc_show, PDE_DATA(inode));
66}
67
68static const struct file_operations litmus_loaded_proc_fops = {
69 .open = litmus_loaded_proc_open,
70 .read = seq_read,
71 .llseek = seq_lseek,
72 .release = single_release,
73};
74
75
76
77
78/* in litmus/litmus.c */
79int switch_sched_plugin(struct sched_plugin*);
80
81static ssize_t litmus_active_proc_write(struct file *file,
82 const char __user *buffer, size_t count,
83 loff_t *ppos)
84{
85 char name[65];
86 struct sched_plugin* found;
87 ssize_t ret = -EINVAL;
88 int err;
89
90
91 ret = copy_and_chomp(name, sizeof(name), buffer, count);
92 if (ret < 0)
93 return ret;
94
95 found = find_sched_plugin(name);
96
97 if (found) {
98 err = switch_sched_plugin(found);
99 if (err) {
100 printk(KERN_INFO "Could not switch plugin: %d\n", err);
101 ret = err;
102 }
103 } else {
104 printk(KERN_INFO "Plugin '%s' is unknown.\n", name);
105 ret = -ESRCH;
106 }
107
108 return ret;
109}
110
111static int litmus_active_proc_show(struct seq_file *m, void *v)
112{
113 seq_printf(m, "%s\n", litmus->plugin_name);
114 return 0;
115}
116
117static int litmus_active_proc_open(struct inode *inode, struct file *file)
118{
119 return single_open(file, litmus_active_proc_show, PDE_DATA(inode));
120}
121
122static const struct file_operations litmus_active_proc_fops = {
123 .open = litmus_active_proc_open,
124 .read = seq_read,
125 .llseek = seq_lseek,
126 .release = single_release,
127 .write = litmus_active_proc_write,
128};
129
130
131#ifdef CONFIG_RELEASE_MASTER
132static ssize_t litmus_release_master_proc_write(
133 struct file *file,
134 const char __user *buffer, size_t count,
135 loff_t *ppos)
136{
137 int cpu, err, online = 0;
138 char msg[64];
139 ssize_t len;
140
141 len = copy_and_chomp(msg, sizeof(msg), buffer, count);
142
143 if (len < 0)
144 return len;
145
146 if (strcmp(msg, "NO_CPU") == 0)
147 atomic_set(&release_master_cpu, NO_CPU);
148 else {
149 err = sscanf(msg, "%d", &cpu);
150 if (err == 1 && cpu >= 0 && (online = cpu_online(cpu))) {
151 atomic_set(&release_master_cpu, cpu);
152 } else {
153 TRACE("invalid release master: '%s' "
154 "(err:%d cpu:%d online:%d)\n",
155 msg, err, cpu, online);
156 len = -EINVAL;
157 }
158 }
159 return len;
160}
161
162static int litmus_release_master_proc_show(struct seq_file *m, void *v)
163{
164 int master;
165 master = atomic_read(&release_master_cpu);
166 if (master == NO_CPU)
167 seq_printf(m, "NO_CPU\n");
168 else
169 seq_printf(m, "%d\n", master);
170 return 0;
171}
172
173static int litmus_release_master_proc_open(struct inode *inode, struct file *file)
174{
175 return single_open(file, litmus_release_master_proc_show, PDE_DATA(inode));
176}
177
178static const struct file_operations litmus_release_master_proc_fops = {
179 .open = litmus_release_master_proc_open,
180 .read = seq_read,
181 .llseek = seq_lseek,
182 .release = single_release,
183 .write = litmus_release_master_proc_write,
184};
185#endif
186
187int __init init_litmus_proc(void)
188{
189 litmus_dir = proc_mkdir("litmus", NULL);
190 if (!litmus_dir) {
191 printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n");
192 return -ENOMEM;
193 }
194
195 curr_file = proc_create("active_plugin", 0644, litmus_dir,
196 &litmus_active_proc_fops);
197
198 if (!curr_file) {
199 printk(KERN_ERR "Could not allocate active_plugin "
200 "procfs entry.\n");
201 return -ENOMEM;
202 }
203
204#ifdef CONFIG_RELEASE_MASTER
205 release_master_file = proc_create("release_master", 0644, litmus_dir,
206 &litmus_release_master_proc_fops);
207 if (!release_master_file) {
208 printk(KERN_ERR "Could not allocate release_master "
209 "procfs entry.\n");
210 return -ENOMEM;
211 }
212#endif
213
214 stat_file = proc_create("stats", 0444, litmus_dir, &litmus_stats_proc_fops);
215
216 plugs_dir = proc_mkdir("plugins", litmus_dir);
217 if (!plugs_dir){
218 printk(KERN_ERR "Could not allocate plugins directory "
219 "procfs entry.\n");
220 return -ENOMEM;
221 }
222
223 plugs_file = proc_create("loaded", 0444, plugs_dir,
224 &litmus_loaded_proc_fops);
225
226 domains_dir = proc_mkdir("domains", litmus_dir);
227 if (!domains_dir) {
228 printk(KERN_ERR "Could not allocate domains directory "
229 "procfs entry.\n");
230 return -ENOMEM;
231 }
232
233 cpus_dir = proc_mkdir("cpus", litmus_dir);
234 if (!cpus_dir) {
235 printk(KERN_ERR "Could not allocate cpus directory "
236 "procfs entry.\n");
237 return -ENOMEM;
238 }
239
240 return 0;
241}
242
243void exit_litmus_proc(void)
244{
245 if (cpus_dir || domains_dir) {
246 deactivate_domain_proc();
247 if (cpus_dir)
248 remove_proc_entry("cpus", litmus_dir);
249 if (domains_dir)
250 remove_proc_entry("domains", litmus_dir);
251 }
252 if (plugs_file)
253 remove_proc_entry("loaded", plugs_dir);
254 if (plugs_dir)
255 remove_proc_entry("plugins", litmus_dir);
256 if (stat_file)
257 remove_proc_entry("stats", litmus_dir);
258 if (curr_file)
259 remove_proc_entry("active_plugin", litmus_dir);
260#ifdef CONFIG_RELEASE_MASTER
261 if (release_master_file)
262 remove_proc_entry("release_master", litmus_dir);
263#endif
264 if (litmus_dir)
265 remove_proc_entry("litmus", NULL);
266}
267
268long make_plugin_proc_dir(struct sched_plugin* plugin,
269 struct proc_dir_entry** pde_in)
270{
271 struct proc_dir_entry *pde_new = NULL;
272 long rv;
273
274 if (!plugin || !plugin->plugin_name){
275 printk(KERN_ERR "Invalid plugin struct passed to %s.\n",
276 __func__);
277 rv = -EINVAL;
278 goto out_no_pde;
279 }
280
281 if (!plugs_dir){
282 printk(KERN_ERR "Could not make plugin sub-directory, because "
283 "/proc/litmus/plugins does not exist.\n");
284 rv = -ENOENT;
285 goto out_no_pde;
286 }
287
288 pde_new = proc_mkdir(plugin->plugin_name, plugs_dir);
289 if (!pde_new){
290 printk(KERN_ERR "Could not make plugin sub-directory: "
291 "out of memory?.\n");
292 rv = -ENOMEM;
293 goto out_no_pde;
294 }
295
296 rv = 0;
297 *pde_in = pde_new;
298 goto out_ok;
299
300out_no_pde:
301 *pde_in = NULL;
302out_ok:
303 return rv;
304}
305
306void remove_plugin_proc_dir(struct sched_plugin* plugin)
307{
308 if (!plugin || !plugin->plugin_name){
309 printk(KERN_ERR "Invalid plugin struct passed to %s.\n",
310 __func__);
311 return;
312 }
313 remove_proc_entry(plugin->plugin_name, plugs_dir);
314}
315
316
317
318/* misc. I/O helper functions */
319
320int copy_and_chomp(char *kbuf, unsigned long ksize,
321 __user const char* ubuf, unsigned long ulength)
322{
323 /* caller must provide buffer space */
324 BUG_ON(!ksize);
325
326 ksize--; /* leave space for null byte */
327
328 if (ksize > ulength)
329 ksize = ulength;
330
331 if(copy_from_user(kbuf, ubuf, ksize))
332 return -EFAULT;
333
334 kbuf[ksize] = '\0';
335
336 /* chomp kbuf */
337 if (ksize > 0 && kbuf[ksize - 1] == '\n')
338 kbuf[ksize - 1] = '\0';
339
340 return ksize;
341}
342
343/* helper functions for clustered plugins */
344static const char* cache_level_names[] = {
345 "ALL",
346 "L1",
347 "L2",
348 "L3",
349};
350
351int parse_cache_level(const char *cache_name, enum cache_level *level)
352{
353 int err = -EINVAL;
354 int i;
355 /* do a quick and dirty comparison to find the cluster size */
356 for (i = GLOBAL_CLUSTER; i <= L3_CLUSTER; i++)
357 if (!strcmp(cache_name, cache_level_names[i])) {
358 *level = (enum cache_level) i;
359 err = 0;
360 break;
361 }
362 return err;
363}
364
365const char* cache_level_name(enum cache_level level)
366{
367 int idx = level;
368
369 if (idx >= GLOBAL_CLUSTER && idx <= L3_CLUSTER)
370 return cache_level_names[idx];
371 else
372 return "INVALID";
373}
374
375
376
377
378/* proc file interface to configure the cluster size */
379
380static ssize_t litmus_cluster_proc_write(struct file *file,
381 const char __user *buffer, size_t count,
382 loff_t *ppos)
383{
384 enum cache_level *level = (enum cache_level *) PDE_DATA(file_inode(file));
385 ssize_t len;
386 char cache_name[8];
387
388 len = copy_and_chomp(cache_name, sizeof(cache_name), buffer, count);
389
390 if (len > 0 && parse_cache_level(cache_name, level)) {
391 printk(KERN_INFO "Cluster '%s' is unknown.\n", cache_name);
392 len = -EINVAL;
393 }
394
395 return len;
396}
397
398static int litmus_cluster_proc_show(struct seq_file *m, void *v)
399{
400 enum cache_level *level = (enum cache_level *) m->private;
401
402 seq_printf(m, "%s\n", cache_level_name(*level));
403 return 0;
404}
405
406static int litmus_cluster_proc_open(struct inode *inode, struct file *file)
407{
408 return single_open(file, litmus_cluster_proc_show, PDE_DATA(inode));
409}
410
411static const struct file_operations litmus_cluster_proc_fops = {
412 .open = litmus_cluster_proc_open,
413 .read = seq_read,
414 .llseek = seq_lseek,
415 .release = single_release,
416 .write = litmus_cluster_proc_write,
417};
418
419struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent,
420 enum cache_level* level)
421{
422 struct proc_dir_entry* cluster_file;
423
424
425 cluster_file = proc_create_data("cluster", 0644, parent,
426 &litmus_cluster_proc_fops,
427 (void *) level);
428 if (!cluster_file) {
429 printk(KERN_ERR
430 "Could not cluster procfs entry.\n");
431 }
432 return cluster_file;
433}
434
435static struct domain_proc_info* active_mapping = NULL;
436
437static int litmus_mapping_proc_show(struct seq_file *m, void *v)
438{
439 struct cd_mapping *mapping = (struct cd_mapping*) m->private;
440
441 if(!mapping)
442 return 0;
443
444 seq_printf(m, "%*pb\n", cpumask_pr_args(mapping->mask));
445 return 0;
446}
447
448static int litmus_mapping_proc_open(struct inode *inode, struct file *file)
449{
450 return single_open(file, litmus_mapping_proc_show, PDE_DATA(inode));
451}
452
453static const struct file_operations litmus_domain_proc_fops = {
454 .open = litmus_mapping_proc_open,
455 .read = seq_read,
456 .llseek = seq_lseek,
457 .release = single_release,
458};
459
460long activate_domain_proc(struct domain_proc_info* map)
461{
462 int i;
463 char name[8];
464
465 if (!map)
466 return -EINVAL;
467 if (cpus_dir == NULL || domains_dir == NULL)
468 return -EINVAL;
469
470 if (active_mapping)
471 deactivate_domain_proc();
472
473 active_mapping = map;
474
475 for (i = 0; i < map->num_cpus; ++i) {
476 struct cd_mapping* m = &map->cpu_to_domains[i];
477 snprintf(name, sizeof(name), "%d", m->id);
478 m->proc_file = proc_create_data(name, 0444, cpus_dir,
479 &litmus_domain_proc_fops, (void*)m);
480 }
481
482 for (i = 0; i < map->num_domains; ++i) {
483 struct cd_mapping* m = &map->domain_to_cpus[i];
484 snprintf(name, sizeof(name), "%d", m->id);
485 m->proc_file = proc_create_data(name, 0444, domains_dir,
486 &litmus_domain_proc_fops, (void*)m);
487 }
488
489 return 0;
490}
491
492long deactivate_domain_proc()
493{
494 int i;
495 char name[65];
496
497 struct domain_proc_info* map = active_mapping;
498
499 if (!map)
500 return -EINVAL;
501
502 for (i = 0; i < map->num_cpus; ++i) {
503 struct cd_mapping* m = &map->cpu_to_domains[i];
504 snprintf(name, sizeof(name), "%d", m->id);
505 remove_proc_entry(name, cpus_dir);
506 m->proc_file = NULL;
507 }
508 for (i = 0; i < map->num_domains; ++i) {
509 struct cd_mapping* m = &map->domain_to_cpus[i];
510 snprintf(name, sizeof(name), "%d", m->id);
511 remove_proc_entry(name, domains_dir);
512 m->proc_file = NULL;
513 }
514
515 active_mapping = NULL;
516
517 return 0;
518}
519
520long init_domain_proc_info(struct domain_proc_info* m,
521 int num_cpus, int num_domains)
522{
523 int i;
524 int num_alloced_cpu_masks = 0;
525 int num_alloced_domain_masks = 0;
526
527 m->cpu_to_domains =
528 kmalloc(sizeof(*(m->cpu_to_domains))*num_cpus,
529 GFP_ATOMIC);
530 if(!m->cpu_to_domains)
531 goto failure;
532
533 m->domain_to_cpus =
534 kmalloc(sizeof(*(m->domain_to_cpus))*num_domains,
535 GFP_ATOMIC);
536 if(!m->domain_to_cpus)
537 goto failure;
538
539 for(i = 0; i < num_cpus; ++i) {
540 if(!zalloc_cpumask_var(&m->cpu_to_domains[i].mask, GFP_ATOMIC))
541 goto failure;
542 ++num_alloced_cpu_masks;
543 }
544 for(i = 0; i < num_domains; ++i) {
545 if(!zalloc_cpumask_var(&m->domain_to_cpus[i].mask, GFP_ATOMIC))
546 goto failure;
547 ++num_alloced_domain_masks;
548 }
549
550 return 0;
551
552failure:
553 for(i = 0; i < num_alloced_cpu_masks; ++i)
554 free_cpumask_var(m->cpu_to_domains[i].mask);
555 for(i = 0; i < num_alloced_domain_masks; ++i)
556 free_cpumask_var(m->domain_to_cpus[i].mask);
557 if(m->cpu_to_domains)
558 kfree(m->cpu_to_domains);
559 if(m->domain_to_cpus)
560 kfree(m->domain_to_cpus);
561 return -ENOMEM;
562}
563
564void destroy_domain_proc_info(struct domain_proc_info* m)
565{
566 int i;
567 for(i = 0; i < m->num_cpus; ++i)
568 free_cpumask_var(m->cpu_to_domains[i].mask);
569 for(i = 0; i < m->num_domains; ++i)
570 free_cpumask_var(m->domain_to_cpus[i].mask);
571 kfree(m->cpu_to_domains);
572 kfree(m->domain_to_cpus);
573 memset(m, 0, sizeof(*m));
574}
diff --git a/litmus/locking.c b/litmus/locking.c
new file mode 100644
index 000000000000..a1d0515c5613
--- /dev/null
+++ b/litmus/locking.c
@@ -0,0 +1,189 @@
1#include <linux/sched.h>
2#include <litmus/litmus.h>
3#include <litmus/fdso.h>
4#include <litmus/debug_trace.h>
5
6#ifdef CONFIG_LITMUS_LOCKING
7
8#include <linux/sched.h>
9#include <litmus/litmus.h>
10#include <litmus/sched_plugin.h>
11#include <litmus/trace.h>
12#include <litmus/wait.h>
13
14static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg);
15static int open_generic_lock(struct od_table_entry* entry, void* __user arg);
16static int close_generic_lock(struct od_table_entry* entry);
17static void destroy_generic_lock(obj_type_t type, void* sem);
18
19struct fdso_ops generic_lock_ops = {
20 .create = create_generic_lock,
21 .open = open_generic_lock,
22 .close = close_generic_lock,
23 .destroy = destroy_generic_lock
24};
25
26static inline bool is_lock(struct od_table_entry* entry)
27{
28 return entry->class == &generic_lock_ops;
29}
30
31static inline struct litmus_lock* get_lock(struct od_table_entry* entry)
32{
33 BUG_ON(!is_lock(entry));
34 return (struct litmus_lock*) entry->obj->obj;
35}
36
37static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg)
38{
39 struct litmus_lock* lock;
40 int err;
41
42 err = litmus->allocate_lock(&lock, type, arg);
43 if (err == 0)
44 *obj_ref = lock;
45 return err;
46}
47
48static int open_generic_lock(struct od_table_entry* entry, void* __user arg)
49{
50 struct litmus_lock* lock = get_lock(entry);
51 if (lock->ops->open)
52 return lock->ops->open(lock, arg);
53 else
54 return 0; /* default: any task can open it */
55}
56
57static int close_generic_lock(struct od_table_entry* entry)
58{
59 struct litmus_lock* lock = get_lock(entry);
60 if (lock->ops->close)
61 return lock->ops->close(lock);
62 else
63 return 0; /* default: closing succeeds */
64}
65
66static void destroy_generic_lock(obj_type_t type, void* obj)
67{
68 struct litmus_lock* lock = (struct litmus_lock*) obj;
69 lock->ops->deallocate(lock);
70}
71
72asmlinkage long sys_litmus_lock(int lock_od)
73{
74 long err = -EINVAL;
75 struct od_table_entry* entry;
76 struct litmus_lock* l;
77
78 TS_SYSCALL_IN_START;
79
80 TS_SYSCALL_IN_END;
81
82 TS_LOCK_START;
83
84 entry = get_entry_for_od(lock_od);
85 if (entry && is_lock(entry)) {
86 l = get_lock(entry);
87 TRACE_CUR("attempts to lock 0x%p\n", l);
88 err = l->ops->lock(l);
89 }
90
91 /* Note: task my have been suspended or preempted in between! Take
92 * this into account when computing overheads. */
93 TS_LOCK_END;
94
95 TS_SYSCALL_OUT_START;
96
97 return err;
98}
99
100asmlinkage long sys_litmus_unlock(int lock_od)
101{
102 long err = -EINVAL;
103 struct od_table_entry* entry;
104 struct litmus_lock* l;
105
106 TS_SYSCALL_IN_START;
107
108 TS_SYSCALL_IN_END;
109
110 TS_UNLOCK_START;
111
112 entry = get_entry_for_od(lock_od);
113 if (entry && is_lock(entry)) {
114 l = get_lock(entry);
115 TRACE_CUR("attempts to unlock 0x%p\n", l);
116 err = l->ops->unlock(l);
117 }
118
119 /* Note: task my have been preempted in between! Take this into
120 * account when computing overheads. */
121 TS_UNLOCK_END;
122
123 TS_SYSCALL_OUT_START;
124
125 return err;
126}
127
128struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
129{
130 wait_queue_t* q;
131 struct task_struct* t = NULL;
132
133 if (waitqueue_active(wq)) {
134 q = list_entry(wq->task_list.next,
135 wait_queue_t, task_list);
136 t = (struct task_struct*) q->private;
137 __remove_wait_queue(wq, q);
138 }
139 return(t);
140}
141
142unsigned int __add_wait_queue_prio_exclusive(
143 wait_queue_head_t* head,
144 prio_wait_queue_t *new)
145{
146 struct list_head *pos;
147 unsigned int passed = 0;
148
149 new->wq.flags |= WQ_FLAG_EXCLUSIVE;
150
151 /* find a spot where the new entry is less than the next */
152 list_for_each(pos, &head->task_list) {
153 prio_wait_queue_t* queued = list_entry(pos, prio_wait_queue_t,
154 wq.task_list);
155
156 if (unlikely(lt_before(new->priority, queued->priority) ||
157 (new->priority == queued->priority &&
158 new->tie_breaker < queued->tie_breaker))) {
159 /* pos is not less than new, thus insert here */
160 __list_add(&new->wq.task_list, pos->prev, pos);
161 goto out;
162 }
163 passed++;
164 }
165
166 /* if we get to this point either the list is empty or every entry
167 * queued element is less than new.
168 * Let's add new to the end. */
169 list_add_tail(&new->wq.task_list, &head->task_list);
170out:
171 return passed;
172}
173
174
175#else
176
177struct fdso_ops generic_lock_ops = {};
178
179asmlinkage long sys_litmus_lock(int sem_od)
180{
181 return -ENOSYS;
182}
183
184asmlinkage long sys_litmus_unlock(int sem_od)
185{
186 return -ENOSYS;
187}
188
189#endif
diff --git a/litmus/preempt.c b/litmus/preempt.c
new file mode 100644
index 000000000000..9e2356db86df
--- /dev/null
+++ b/litmus/preempt.c
@@ -0,0 +1,143 @@
1#include <linux/sched.h>
2
3#include <litmus/litmus.h>
4#include <litmus/preempt.h>
5#include <litmus/trace.h>
6
7DEFINE_PER_CPU(bool, litmus_preemption_in_progress);
8
9/* The rescheduling state of each processor.
10 */
11DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state);
12
13void sched_state_will_schedule(struct task_struct* tsk)
14{
15 /* Litmus hack: we only care about processor-local invocations of
16 * set_tsk_need_resched(). We can't reliably set the flag remotely
17 * since it might race with other updates to the scheduling state. We
18 * can't rely on the runqueue lock protecting updates to the sched
19 * state since processors do not acquire the runqueue locks for all
20 * updates to the sched state (to avoid acquiring two runqueue locks at
21 * the same time). Further, if tsk is residing on a remote processor,
22 * then that processor doesn't actually know yet that it is going to
23 * reschedule; it still must receive an IPI (unless a local invocation
24 * races).
25 */
26 if (likely(task_cpu(tsk) == smp_processor_id())) {
27 VERIFY_SCHED_STATE(TASK_SCHEDULED | SHOULD_SCHEDULE | TASK_PICKED | WILL_SCHEDULE);
28 if (is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK))
29 set_sched_state(PICKED_WRONG_TASK);
30 else
31 set_sched_state(WILL_SCHEDULE);
32 } else
33 /* Litmus tasks should never be subject to a remote
34 * set_tsk_need_resched(). */
35 BUG_ON(is_realtime(tsk));
36#ifdef CONFIG_PREEMPT_STATE_TRACE
37 TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n",
38 __builtin_return_address(0));
39#endif
40}
41
42/* Called by the IPI handler after another CPU called smp_send_resched(). */
43void sched_state_ipi(void)
44{
45 /* If the IPI was slow, we might be in any state right now. The IPI is
46 * only meaningful if we are in SHOULD_SCHEDULE. */
47 if (is_in_sched_state(SHOULD_SCHEDULE)) {
48 /* Cause scheduler to be invoked.
49 * This will cause a transition to WILL_SCHEDULE. */
50 set_tsk_need_resched(current);
51 TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n",
52 current->comm, current->pid);
53 TS_SEND_RESCHED_END;
54 } else {
55 /* ignore */
56 TRACE_STATE("ignoring IPI in state %x (%s)\n",
57 get_sched_state(),
58 sched_state_name(get_sched_state()));
59 }
60}
61
62/* Called by plugins to cause a CPU to reschedule. IMPORTANT: the caller must
63 * hold the lock that is used to serialize scheduling decisions. */
64void litmus_reschedule(int cpu)
65{
66 int picked_transition_ok = 0;
67 int scheduled_transition_ok = 0;
68
69 /* The (remote) CPU could be in any state. */
70
71 /* The critical states are TASK_PICKED and TASK_SCHEDULED, as the CPU
72 * is not aware of the need to reschedule at this point. */
73
74 /* is a context switch in progress? */
75 if (cpu_is_in_sched_state(cpu, TASK_PICKED))
76 picked_transition_ok = sched_state_transition_on(
77 cpu, TASK_PICKED, PICKED_WRONG_TASK);
78
79 if (!picked_transition_ok &&
80 cpu_is_in_sched_state(cpu, TASK_SCHEDULED)) {
81 /* We either raced with the end of the context switch, or the
82 * CPU was in TASK_SCHEDULED anyway. */
83 scheduled_transition_ok = sched_state_transition_on(
84 cpu, TASK_SCHEDULED, SHOULD_SCHEDULE);
85 }
86
87 /* If the CPU was in state TASK_SCHEDULED, then we need to cause the
88 * scheduler to be invoked. */
89 if (scheduled_transition_ok) {
90 if (smp_processor_id() == cpu) {
91 set_tsk_need_resched(current);
92 preempt_set_need_resched();
93 } else {
94 TS_SEND_RESCHED_START(cpu);
95 smp_send_reschedule(cpu);
96 }
97 }
98
99 TRACE_STATE("%s picked-ok:%d sched-ok:%d\n",
100 __FUNCTION__,
101 picked_transition_ok,
102 scheduled_transition_ok);
103}
104
105void litmus_reschedule_local(void)
106{
107 if (is_in_sched_state(TASK_PICKED))
108 set_sched_state(PICKED_WRONG_TASK);
109 else if (is_in_sched_state(TASK_SCHEDULED
110 | SHOULD_SCHEDULE
111 | PICKED_WRONG_TASK)) {
112 set_sched_state(WILL_SCHEDULE);
113 set_tsk_need_resched(current);
114 preempt_set_need_resched();
115 }
116}
117
118#ifdef CONFIG_DEBUG_KERNEL
119
120void sched_state_plugin_check(void)
121{
122 if (!is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK)) {
123 TRACE("!!!! plugin did not call sched_state_task_picked()!"
124 "Calling sched_state_task_picked() is mandatory---fix this.\n");
125 set_sched_state(TASK_PICKED);
126 }
127}
128
129#define NAME_CHECK(x) case x: return #x
130const char* sched_state_name(int s)
131{
132 switch (s) {
133 NAME_CHECK(TASK_SCHEDULED);
134 NAME_CHECK(SHOULD_SCHEDULE);
135 NAME_CHECK(WILL_SCHEDULE);
136 NAME_CHECK(TASK_PICKED);
137 NAME_CHECK(PICKED_WRONG_TASK);
138 default:
139 return "UNKNOWN";
140 };
141}
142
143#endif
diff --git a/litmus/reservations/Makefile b/litmus/reservations/Makefile
new file mode 100644
index 000000000000..517fc2ff8a76
--- /dev/null
+++ b/litmus/reservations/Makefile
@@ -0,0 +1,3 @@
1obj-y += core.o budget-notifier.o alloc.o
2obj-y += polling.o
3obj-y += table-driven.o
diff --git a/litmus/reservations/alloc.c b/litmus/reservations/alloc.c
new file mode 100644
index 000000000000..1f93f223f504
--- /dev/null
+++ b/litmus/reservations/alloc.c
@@ -0,0 +1,143 @@
1#include <linux/slab.h>
2#include <asm/uaccess.h>
3
4#include <litmus/rt_param.h>
5
6#include <litmus/reservations/alloc.h>
7#include <litmus/reservations/polling.h>
8#include <litmus/reservations/table-driven.h>
9
10
11long alloc_polling_reservation(
12 int res_type,
13 struct reservation_config *config,
14 struct reservation **_res)
15{
16 struct polling_reservation *pres;
17 int use_edf = config->priority == LITMUS_NO_PRIORITY;
18 int periodic = res_type == PERIODIC_POLLING;
19
20 if (config->polling_params.budget >
21 config->polling_params.period) {
22 printk(KERN_ERR "invalid polling reservation (%u): "
23 "budget > period\n", config->id);
24 return -EINVAL;
25 }
26 if (config->polling_params.budget >
27 config->polling_params.relative_deadline
28 && config->polling_params.relative_deadline) {
29 printk(KERN_ERR "invalid polling reservation (%u): "
30 "budget > deadline\n", config->id);
31 return -EINVAL;
32 }
33 if (config->polling_params.offset >
34 config->polling_params.period) {
35 printk(KERN_ERR "invalid polling reservation (%u): "
36 "offset > period\n", config->id);
37 return -EINVAL;
38 }
39
40 /* XXX: would be nice to use a core-local allocation. */
41 pres = kzalloc(sizeof(*pres), GFP_KERNEL);
42 if (!pres)
43 return -ENOMEM;
44
45 polling_reservation_init(pres, use_edf, periodic,
46 config->polling_params.budget,
47 config->polling_params.period,
48 config->polling_params.relative_deadline,
49 config->polling_params.offset);
50 pres->res.id = config->id;
51 if (!use_edf)
52 pres->res.priority = config->priority;
53
54 *_res = &pres->res;
55 return 0;
56}
57
58
59#define MAX_INTERVALS 1024
60
61long alloc_table_driven_reservation(
62 struct reservation_config *config,
63 struct reservation **_res)
64{
65 struct table_driven_reservation *td_res = NULL;
66 struct lt_interval *slots = NULL;
67 size_t slots_size;
68 unsigned int i, num_slots;
69 long err = -EINVAL;
70 void *mem;
71
72 if (!config->table_driven_params.num_intervals) {
73 printk(KERN_ERR "invalid table-driven reservation (%u): "
74 "no intervals\n", config->id);
75 return -EINVAL;
76 }
77
78 if (config->table_driven_params.num_intervals > MAX_INTERVALS) {
79 printk(KERN_ERR "invalid table-driven reservation (%u): "
80 "too many intervals (max: %d)\n", config->id, MAX_INTERVALS);
81 return -EINVAL;
82 }
83
84 num_slots = config->table_driven_params.num_intervals;
85 slots_size = sizeof(slots[0]) * num_slots;
86
87 mem = kzalloc(sizeof(*td_res) + slots_size, GFP_KERNEL);
88 if (!mem) {
89 return -ENOMEM;
90 } else {
91 slots = mem + sizeof(*td_res);
92 td_res = mem;
93 err = copy_from_user(slots,
94 config->table_driven_params.intervals, slots_size);
95 }
96
97 if (!err) {
98 /* sanity checks */
99 for (i = 0; !err && i < num_slots; i++)
100 if (slots[i].end <= slots[i].start) {
101 printk(KERN_ERR
102 "invalid table-driven reservation (%u): "
103 "invalid interval %u => [%llu, %llu]\n",
104 config->id, i,
105 slots[i].start, slots[i].end);
106 err = -EINVAL;
107 }
108
109 for (i = 0; !err && i + 1 < num_slots; i++)
110 if (slots[i + 1].start <= slots[i].end) {
111 printk(KERN_ERR
112 "invalid table-driven reservation (%u): "
113 "overlapping intervals %u, %u\n",
114 config->id, i, i + 1);
115 err = -EINVAL;
116 }
117
118 if (slots[num_slots - 1].end >
119 config->table_driven_params.major_cycle_length) {
120 printk(KERN_ERR
121 "invalid table-driven reservation (%u): last "
122 "interval ends past major cycle %llu > %llu\n",
123 config->id,
124 slots[num_slots - 1].end,
125 config->table_driven_params.major_cycle_length);
126 err = -EINVAL;
127 }
128 }
129
130 if (err) {
131 kfree(td_res);
132 } else {
133 table_driven_reservation_init(td_res,
134 config->table_driven_params.major_cycle_length,
135 slots, num_slots);
136 td_res->res.id = config->id;
137 td_res->res.priority = config->priority;
138 *_res = &td_res->res;
139 }
140
141 return err;
142}
143
diff --git a/litmus/reservations/budget-notifier.c b/litmus/reservations/budget-notifier.c
new file mode 100644
index 000000000000..0b0f42687882
--- /dev/null
+++ b/litmus/reservations/budget-notifier.c
@@ -0,0 +1,26 @@
1#include <litmus/reservations/budget-notifier.h>
2
3void budget_notifier_list_init(struct budget_notifier_list* bnl)
4{
5 INIT_LIST_HEAD(&bnl->list);
6 raw_spin_lock_init(&bnl->lock);
7}
8
9void budget_notifiers_fire(struct budget_notifier_list *bnl, bool replenished)
10{
11 struct budget_notifier *bn, *next;
12
13 unsigned long flags;
14
15 raw_spin_lock_irqsave(&bnl->lock, flags);
16
17 list_for_each_entry_safe(bn, next, &bnl->list, list) {
18 if (replenished)
19 bn->budget_replenished(bn);
20 else
21 bn->budget_exhausted(bn);
22 }
23
24 raw_spin_unlock_irqrestore(&bnl->lock, flags);
25}
26
diff --git a/litmus/reservations/core.c b/litmus/reservations/core.c
new file mode 100644
index 000000000000..5137eda0f643
--- /dev/null
+++ b/litmus/reservations/core.c
@@ -0,0 +1,393 @@
1#include <linux/sched.h>
2
3#include <litmus/litmus.h>
4#include <litmus/debug_trace.h>
5#include <litmus/reservations/reservation.h>
6
7void reservation_init(struct reservation *res)
8{
9 memset(res, 0, sizeof(*res));
10 res->state = RESERVATION_INACTIVE;
11 INIT_LIST_HEAD(&res->clients);
12 INIT_LIST_HEAD(&res->replenish_list);
13 budget_notifier_list_init(&res->budget_notifiers);
14}
15
16struct task_struct* default_dispatch_client(
17 struct reservation *res,
18 lt_t *for_at_most)
19{
20 struct reservation_client *client, *next;
21 struct task_struct* tsk;
22
23 BUG_ON(res->state != RESERVATION_ACTIVE);
24 *for_at_most = 0;
25
26 list_for_each_entry_safe(client, next, &res->clients, list) {
27 tsk = client->dispatch(client);
28 if (likely(tsk)) {
29 /* Primitive form of round-robin scheduling:
30 * make sure we alternate between multiple clients
31 * with at least the granularity of the replenishment
32 * period. Reservations that need more fine-grained
33 * or more predictable alternation between threads
34 * within a reservation should provide a custom
35 * dispatch function. */
36 list_del(&client->list);
37 /* move to back of list */
38 list_add_tail(&client->list, &res->clients);
39 return tsk;
40 }
41 }
42 return NULL;
43}
44
45void common_drain_budget(
46 struct reservation *res,
47 lt_t how_much)
48{
49 if (how_much >= res->cur_budget)
50 res->cur_budget = 0;
51 else
52 res->cur_budget -= how_much;
53
54 res->budget_consumed += how_much;
55 res->budget_consumed_total += how_much;
56
57 switch (res->state) {
58 case RESERVATION_DEPLETED:
59 case RESERVATION_INACTIVE:
60 BUG();
61 break;
62
63 case RESERVATION_ACTIVE_IDLE:
64 case RESERVATION_ACTIVE:
65 if (!res->cur_budget) {
66 res->env->change_state(res->env, res,
67 RESERVATION_DEPLETED);
68 } /* else: stay in current state */
69 break;
70 }
71}
72
73static struct task_struct * task_client_dispatch(struct reservation_client *client)
74{
75 struct task_client *tc = container_of(client, struct task_client, client);
76 return tc->task;
77}
78
79void task_client_init(struct task_client *tc, struct task_struct *tsk,
80 struct reservation *res)
81{
82 memset(&tc->client, 0, sizeof(tc->client));
83 tc->client.dispatch = task_client_dispatch;
84 tc->client.reservation = res;
85 tc->task = tsk;
86}
87
88static void sup_scheduler_update_at(
89 struct sup_reservation_environment* sup_env,
90 lt_t when)
91{
92 if (sup_env->next_scheduler_update > when)
93 sup_env->next_scheduler_update = when;
94}
95
96static void sup_scheduler_update_after(
97 struct sup_reservation_environment* sup_env,
98 lt_t timeout)
99{
100 sup_scheduler_update_at(sup_env, sup_env->env.current_time + timeout);
101}
102
103static int _sup_queue_depleted(
104 struct sup_reservation_environment* sup_env,
105 struct reservation *res)
106{
107 struct list_head *pos;
108 struct reservation *queued;
109 int passed_earlier = 0;
110
111 BUG_ON(in_list(&res->replenish_list));
112
113 list_for_each(pos, &sup_env->depleted_reservations) {
114 queued = list_entry(pos, struct reservation, replenish_list);
115 if (queued->next_replenishment > res->next_replenishment) {
116 list_add(&res->replenish_list, pos->prev);
117 return passed_earlier;
118 } else
119 passed_earlier = 1;
120 }
121
122 list_add_tail(&res->replenish_list, &sup_env->depleted_reservations);
123
124 return passed_earlier;
125}
126
127static void sup_queue_depleted(
128 struct sup_reservation_environment* sup_env,
129 struct reservation *res)
130{
131 int passed_earlier = _sup_queue_depleted(sup_env, res);
132
133 /* check for updated replenishment time */
134 if (!passed_earlier)
135 sup_scheduler_update_at(sup_env, res->next_replenishment);
136}
137
138static int _sup_queue_active(
139 struct sup_reservation_environment* sup_env,
140 struct reservation *res)
141{
142 struct list_head *pos;
143 struct reservation *queued;
144 int passed_active = 0;
145
146 if (likely(res->priority != RESERVATION_BACKGROUND_PRIORITY)) {
147 /* enqueue in order of priority */
148 list_for_each(pos, &sup_env->active_reservations) {
149 queued = list_entry(pos, struct reservation, list);
150 if (queued->priority > res->priority) {
151 list_add(&res->list, pos->prev);
152 return passed_active;
153 } else if (queued->state == RESERVATION_ACTIVE)
154 passed_active = 1;
155 }
156 } else {
157 /* don't preempt unless the list happens to be empty */
158 passed_active = !list_empty(&sup_env->active_reservations);
159 }
160 /* Either a background reservation, or we fell off the end of the list.
161 * In both cases, just add the reservation to the end of the list of
162 * active reservations. */
163 list_add_tail(&res->list, &sup_env->active_reservations);
164 return passed_active;
165}
166
167static void sup_queue_active(
168 struct sup_reservation_environment* sup_env,
169 struct reservation *res)
170{
171 int passed_active = _sup_queue_active(sup_env, res);
172
173 /* check for possible preemption */
174 if (res->state == RESERVATION_ACTIVE && !passed_active)
175 sup_env->next_scheduler_update = SUP_RESCHEDULE_NOW;
176 else if (res == list_first_entry(&sup_env->active_reservations,
177 struct reservation, list)) {
178 /* First reservation is draining budget => make sure
179 * the scheduler is called to notice when the reservation
180 * budget has been drained completely. */
181 sup_scheduler_update_after(sup_env, res->cur_budget);
182 }
183}
184
185static void sup_queue_reservation(
186 struct sup_reservation_environment* sup_env,
187 struct reservation *res)
188{
189 switch (res->state) {
190 case RESERVATION_INACTIVE:
191 list_add(&res->list, &sup_env->inactive_reservations);
192 break;
193
194 case RESERVATION_DEPLETED:
195 sup_queue_depleted(sup_env, res);
196 break;
197
198 case RESERVATION_ACTIVE_IDLE:
199 case RESERVATION_ACTIVE:
200 sup_queue_active(sup_env, res);
201 break;
202 }
203}
204
205void sup_add_new_reservation(
206 struct sup_reservation_environment* sup_env,
207 struct reservation* new_res)
208{
209 new_res->env = &sup_env->env;
210 list_add(&new_res->all_list, &sup_env->all_reservations);
211 sup_queue_reservation(sup_env, new_res);
212}
213
214struct reservation* sup_find_by_id(struct sup_reservation_environment* sup_env,
215 unsigned int id)
216{
217 struct reservation *res;
218
219 list_for_each_entry(res, &sup_env->all_reservations, all_list) {
220 if (res->id == id)
221 return res;
222 }
223
224 return NULL;
225}
226
227static void sup_charge_budget(
228 struct sup_reservation_environment* sup_env,
229 lt_t delta)
230{
231 struct reservation *res;
232
233 /* charge the highest-priority ACTIVE or ACTIVE_IDLE reservation */
234
235 res = list_first_entry_or_null(
236 &sup_env->active_reservations, struct reservation, list);
237
238 if (res) {
239 TRACE("R%d: charging at %llu for %llu execution, budget before: %llu\n",
240 res->id, res->env->current_time, delta, res->cur_budget);
241 res->ops->drain_budget(res, delta);
242 TRACE("R%d: budget now: %llu, priority: %llu\n",
243 res->id, res->cur_budget, res->priority);
244 }
245
246 /* check when the next budget expires */
247
248 res = list_first_entry_or_null(
249 &sup_env->active_reservations, struct reservation, list);
250
251 if (res) {
252 /* make sure scheduler is invoked when this reservation expires
253 * its remaining budget */
254 TRACE("requesting scheduler update for reservation %u "
255 "in %llu nanoseconds\n",
256 res->id, res->cur_budget);
257 sup_scheduler_update_after(sup_env, res->cur_budget);
258 }
259}
260
261static void sup_replenish_budgets(struct sup_reservation_environment* sup_env)
262{
263 struct list_head *pos, *next;
264 struct reservation *res;
265
266 list_for_each_safe(pos, next, &sup_env->depleted_reservations) {
267 res = list_entry(pos, struct reservation, replenish_list);
268 if (res->next_replenishment <= sup_env->env.current_time) {
269 TRACE("R%d: replenishing budget at %llu, "
270 "priority: %llu\n",
271 res->id, res->env->current_time, res->priority);
272 res->ops->replenish(res);
273 } else {
274 /* list is ordered by increasing depletion times */
275 break;
276 }
277 }
278
279 /* request a scheduler update at the next replenishment instant */
280 res = list_first_entry_or_null(&sup_env->depleted_reservations,
281 struct reservation, replenish_list);
282 if (res)
283 sup_scheduler_update_at(sup_env, res->next_replenishment);
284}
285
286void sup_update_time(
287 struct sup_reservation_environment* sup_env,
288 lt_t now)
289{
290 lt_t delta;
291
292 /* If the time didn't advance, there is nothing to do.
293 * This check makes it safe to call sup_advance_time() potentially
294 * multiple times (e.g., via different code paths. */
295 if (!list_empty(&sup_env->active_reservations))
296 TRACE("(sup_update_time) now: %llu, current_time: %llu\n", now,
297 sup_env->env.current_time);
298 if (unlikely(now <= sup_env->env.current_time))
299 return;
300
301 delta = now - sup_env->env.current_time;
302 sup_env->env.current_time = now;
303
304 /* check if future updates are required */
305 if (sup_env->next_scheduler_update <= sup_env->env.current_time)
306 sup_env->next_scheduler_update = SUP_NO_SCHEDULER_UPDATE;
307
308 /* deplete budgets by passage of time */
309 sup_charge_budget(sup_env, delta);
310
311 /* check if any budgets were replenished */
312 sup_replenish_budgets(sup_env);
313}
314
315struct task_struct* sup_dispatch(struct sup_reservation_environment* sup_env)
316{
317 struct reservation *res, *next;
318 struct task_struct *tsk = NULL;
319 lt_t time_slice;
320
321 list_for_each_entry_safe(res, next, &sup_env->active_reservations, list) {
322 if (res->state == RESERVATION_ACTIVE) {
323 tsk = res->ops->dispatch_client(res, &time_slice);
324 if (likely(tsk)) {
325 if (time_slice)
326 sup_scheduler_update_after(sup_env, time_slice);
327 sup_scheduler_update_after(sup_env, res->cur_budget);
328 return tsk;
329 }
330 }
331 }
332
333 return NULL;
334}
335
336static void sup_res_change_state(
337 struct reservation_environment* env,
338 struct reservation *res,
339 reservation_state_t new_state)
340{
341 struct sup_reservation_environment* sup_env;
342
343 sup_env = container_of(env, struct sup_reservation_environment, env);
344
345 TRACE("reservation R%d state %d->%d at %llu\n",
346 res->id, res->state, new_state, env->current_time);
347
348 if (new_state == RESERVATION_DEPLETED
349 && (res->state == RESERVATION_ACTIVE ||
350 res->state == RESERVATION_ACTIVE_IDLE)) {
351 budget_notifiers_fire(&res->budget_notifiers, false);
352 } else if (res->state == RESERVATION_DEPLETED
353 && new_state == RESERVATION_ACTIVE) {
354 budget_notifiers_fire(&res->budget_notifiers, true);
355 }
356
357 /* dequeue prior to re-queuing */
358 if (res->state == RESERVATION_DEPLETED)
359 list_del(&res->replenish_list);
360 else
361 list_del(&res->list);
362
363 /* check if we need to reschedule because we lost an active reservation */
364 if (res->state == RESERVATION_ACTIVE && !sup_env->will_schedule)
365 sup_env->next_scheduler_update = SUP_RESCHEDULE_NOW;
366 res->state = new_state;
367 sup_queue_reservation(sup_env, res);
368}
369
370static void sup_request_replenishment(
371 struct reservation_environment* env,
372 struct reservation *res)
373{
374 struct sup_reservation_environment* sup_env;
375
376 sup_env = container_of(env, struct sup_reservation_environment, env);
377 sup_queue_depleted(sup_env, res);
378}
379
380void sup_init(struct sup_reservation_environment* sup_env)
381{
382 memset(sup_env, 0, sizeof(*sup_env));
383
384 INIT_LIST_HEAD(&sup_env->all_reservations);
385 INIT_LIST_HEAD(&sup_env->active_reservations);
386 INIT_LIST_HEAD(&sup_env->depleted_reservations);
387 INIT_LIST_HEAD(&sup_env->inactive_reservations);
388
389 sup_env->env.change_state = sup_res_change_state;
390 sup_env->env.request_replenishment = sup_request_replenishment;
391
392 sup_env->next_scheduler_update = SUP_NO_SCHEDULER_UPDATE;
393}
diff --git a/litmus/reservations/polling.c b/litmus/reservations/polling.c
new file mode 100644
index 000000000000..63e0bed566e8
--- /dev/null
+++ b/litmus/reservations/polling.c
@@ -0,0 +1,256 @@
1#include <linux/sched.h>
2
3#include <litmus/litmus.h>
4#include <litmus/reservations/reservation.h>
5#include <litmus/reservations/polling.h>
6
7
8static void periodic_polling_client_arrives(
9 struct reservation* res,
10 struct reservation_client *client
11)
12{
13 struct polling_reservation *pres =
14 container_of(res, struct polling_reservation, res);
15 lt_t instances, tmp;
16
17 list_add_tail(&client->list, &res->clients);
18
19 switch (res->state) {
20 case RESERVATION_INACTIVE:
21 /* Figure out next replenishment time. */
22 tmp = res->env->current_time - res->env->time_zero;
23 instances = div64_u64(tmp, pres->period);
24 res->next_replenishment =
25 (instances + 1) * pres->period + pres->offset;
26
27 TRACE("pol-res: activate tmp=%llu instances=%llu period=%llu nextrp=%llu cur=%llu\n",
28 tmp, instances, pres->period, res->next_replenishment,
29 res->env->current_time);
30
31 res->env->change_state(res->env, res,
32 RESERVATION_DEPLETED);
33 break;
34
35 case RESERVATION_ACTIVE:
36 case RESERVATION_DEPLETED:
37 /* do nothing */
38 break;
39
40 case RESERVATION_ACTIVE_IDLE:
41 res->env->change_state(res->env, res,
42 RESERVATION_ACTIVE);
43 break;
44 }
45}
46
47
48static void periodic_polling_client_departs(
49 struct reservation *res,
50 struct reservation_client *client,
51 int did_signal_job_completion
52)
53{
54 list_del(&client->list);
55
56 switch (res->state) {
57 case RESERVATION_INACTIVE:
58 case RESERVATION_ACTIVE_IDLE:
59 BUG(); /* INACTIVE or IDLE <=> no client */
60 break;
61
62 case RESERVATION_ACTIVE:
63 if (list_empty(&res->clients)) {
64 res->env->change_state(res->env, res,
65 RESERVATION_ACTIVE_IDLE);
66 } /* else: nothing to do, more clients ready */
67 break;
68
69 case RESERVATION_DEPLETED:
70 /* do nothing */
71 break;
72 }
73}
74
75static void periodic_polling_on_replenishment(
76 struct reservation *res
77)
78{
79 struct polling_reservation *pres =
80 container_of(res, struct polling_reservation, res);
81
82 /* replenish budget */
83 res->cur_budget = pres->max_budget;
84 res->next_replenishment += pres->period;
85 res->budget_consumed = 0;
86
87 switch (res->state) {
88 case RESERVATION_DEPLETED:
89 case RESERVATION_INACTIVE:
90 case RESERVATION_ACTIVE_IDLE:
91 if (list_empty(&res->clients))
92 /* no clients => poll again later */
93 res->env->change_state(res->env, res,
94 RESERVATION_INACTIVE);
95 else
96 /* we have clients & budget => ACTIVE */
97 res->env->change_state(res->env, res,
98 RESERVATION_ACTIVE);
99 break;
100
101 case RESERVATION_ACTIVE:
102 /* Replenished while active => tardy? In any case,
103 * go ahead and stay active. */
104 break;
105 }
106}
107
108static void periodic_polling_on_replenishment_edf(
109 struct reservation *res
110)
111{
112 struct polling_reservation *pres =
113 container_of(res, struct polling_reservation, res);
114
115 /* update current priority */
116 res->priority = res->next_replenishment + pres->deadline;
117
118 /* do common updates */
119 periodic_polling_on_replenishment(res);
120}
121
122static struct reservation_ops periodic_polling_ops_fp = {
123 .dispatch_client = default_dispatch_client,
124 .client_arrives = periodic_polling_client_arrives,
125 .client_departs = periodic_polling_client_departs,
126 .replenish = periodic_polling_on_replenishment,
127 .drain_budget = common_drain_budget,
128};
129
130static struct reservation_ops periodic_polling_ops_edf = {
131 .dispatch_client = default_dispatch_client,
132 .client_arrives = periodic_polling_client_arrives,
133 .client_departs = periodic_polling_client_departs,
134 .replenish = periodic_polling_on_replenishment_edf,
135 .drain_budget = common_drain_budget,
136};
137
138
139
140
141static void sporadic_polling_client_arrives_fp(
142 struct reservation* res,
143 struct reservation_client *client
144)
145{
146 struct polling_reservation *pres =
147 container_of(res, struct polling_reservation, res);
148
149 list_add_tail(&client->list, &res->clients);
150
151 switch (res->state) {
152 case RESERVATION_INACTIVE:
153 /* Replenish now. */
154 res->cur_budget = pres->max_budget;
155 res->next_replenishment =
156 res->env->current_time + pres->period;
157
158 res->env->change_state(res->env, res,
159 RESERVATION_ACTIVE);
160 break;
161
162 case RESERVATION_ACTIVE:
163 case RESERVATION_DEPLETED:
164 /* do nothing */
165 break;
166
167 case RESERVATION_ACTIVE_IDLE:
168 res->env->change_state(res->env, res,
169 RESERVATION_ACTIVE);
170 break;
171 }
172}
173
174static void sporadic_polling_client_arrives_edf(
175 struct reservation* res,
176 struct reservation_client *client
177)
178{
179 struct polling_reservation *pres =
180 container_of(res, struct polling_reservation, res);
181
182 list_add_tail(&client->list, &res->clients);
183
184 switch (res->state) {
185 case RESERVATION_INACTIVE:
186 /* Replenish now. */
187 res->cur_budget = pres->max_budget;
188 res->next_replenishment =
189 res->env->current_time + pres->period;
190 res->priority =
191 res->env->current_time + pres->deadline;
192
193 res->env->change_state(res->env, res,
194 RESERVATION_ACTIVE);
195 break;
196
197 case RESERVATION_ACTIVE:
198 case RESERVATION_DEPLETED:
199 /* do nothing */
200 break;
201
202 case RESERVATION_ACTIVE_IDLE:
203 res->env->change_state(res->env, res,
204 RESERVATION_ACTIVE);
205 break;
206 }
207}
208
209static struct reservation_ops sporadic_polling_ops_fp = {
210 .dispatch_client = default_dispatch_client,
211 .client_arrives = sporadic_polling_client_arrives_fp,
212 .client_departs = periodic_polling_client_departs,
213 .replenish = periodic_polling_on_replenishment,
214 .drain_budget = common_drain_budget,
215};
216
217static struct reservation_ops sporadic_polling_ops_edf = {
218 .dispatch_client = default_dispatch_client,
219 .client_arrives = sporadic_polling_client_arrives_edf,
220 .client_departs = periodic_polling_client_departs,
221 .replenish = periodic_polling_on_replenishment_edf,
222 .drain_budget = common_drain_budget,
223};
224
225void polling_reservation_init(
226 struct polling_reservation *pres,
227 int use_edf_prio,
228 int use_periodic_polling,
229 lt_t budget, lt_t period, lt_t deadline, lt_t offset
230)
231{
232 if (!deadline)
233 deadline = period;
234 BUG_ON(budget > period);
235 BUG_ON(budget > deadline);
236 BUG_ON(offset >= period);
237
238 reservation_init(&pres->res);
239 pres->max_budget = budget;
240 pres->period = period;
241 pres->deadline = deadline;
242 pres->offset = offset;
243 if (use_periodic_polling) {
244 pres->res.kind = PERIODIC_POLLING;
245 if (use_edf_prio)
246 pres->res.ops = &periodic_polling_ops_edf;
247 else
248 pres->res.ops = &periodic_polling_ops_fp;
249 } else {
250 pres->res.kind = SPORADIC_POLLING;
251 if (use_edf_prio)
252 pres->res.ops = &sporadic_polling_ops_edf;
253 else
254 pres->res.ops = &sporadic_polling_ops_fp;
255 }
256}
diff --git a/litmus/reservations/table-driven.c b/litmus/reservations/table-driven.c
new file mode 100644
index 000000000000..e4debcb5d4d2
--- /dev/null
+++ b/litmus/reservations/table-driven.c
@@ -0,0 +1,269 @@
1#include <linux/sched.h>
2
3#include <litmus/litmus.h>
4#include <litmus/reservations/reservation.h>
5#include <litmus/reservations/table-driven.h>
6
7static lt_t td_cur_major_cycle_start(struct table_driven_reservation *tdres)
8{
9 lt_t x, tmp;
10
11 tmp = tdres->res.env->current_time - tdres->res.env->time_zero;
12 x = div64_u64(tmp, tdres->major_cycle);
13 x *= tdres->major_cycle;
14 return x;
15}
16
17
18static lt_t td_next_major_cycle_start(struct table_driven_reservation *tdres)
19{
20 lt_t x, tmp;
21
22 tmp = tdres->res.env->current_time - tdres->res.env->time_zero;
23 x = div64_u64(tmp, tdres->major_cycle) + 1;
24 x *= tdres->major_cycle;
25 return x;
26}
27
28static void td_client_arrives(
29 struct reservation* res,
30 struct reservation_client *client
31)
32{
33 struct table_driven_reservation *tdres =
34 container_of(res, struct table_driven_reservation, res);
35
36 list_add_tail(&client->list, &res->clients);
37
38 switch (res->state) {
39 case RESERVATION_INACTIVE:
40 /* Figure out first replenishment time. */
41 tdres->major_cycle_start = td_next_major_cycle_start(tdres);
42 res->next_replenishment = tdres->major_cycle_start;
43 res->next_replenishment += tdres->intervals[0].start;
44 tdres->next_interval = 0;
45
46 res->env->change_state(res->env, res,
47 RESERVATION_DEPLETED);
48 break;
49
50 case RESERVATION_ACTIVE:
51 case RESERVATION_DEPLETED:
52 /* do nothing */
53 break;
54
55 case RESERVATION_ACTIVE_IDLE:
56 res->env->change_state(res->env, res,
57 RESERVATION_ACTIVE);
58 break;
59 }
60}
61
62static void td_client_departs(
63 struct reservation *res,
64 struct reservation_client *client,
65 int did_signal_job_completion
66)
67{
68 list_del(&client->list);
69
70 switch (res->state) {
71 case RESERVATION_INACTIVE:
72 case RESERVATION_ACTIVE_IDLE:
73 BUG(); /* INACTIVE or IDLE <=> no client */
74 break;
75
76 case RESERVATION_ACTIVE:
77 if (list_empty(&res->clients)) {
78 res->env->change_state(res->env, res,
79 RESERVATION_ACTIVE_IDLE);
80 } /* else: nothing to do, more clients ready */
81 break;
82
83 case RESERVATION_DEPLETED:
84 /* do nothing */
85 break;
86 }
87}
88
89static lt_t td_time_remaining_until_end(struct table_driven_reservation *tdres)
90{
91 lt_t now = tdres->res.env->current_time;
92 lt_t end = tdres->cur_interval.end;
93 TRACE("td_remaining(%u): start=%llu now=%llu end=%llu state=%d\n",
94 tdres->res.id,
95 tdres->cur_interval.start,
96 now, end,
97 tdres->res.state);
98 if (now >= end)
99 return 0;
100 else
101 return end - now;
102}
103
104static void td_replenish(
105 struct reservation *res)
106{
107 struct table_driven_reservation *tdres =
108 container_of(res, struct table_driven_reservation, res);
109
110 TRACE("td_replenish(%u): expected_replenishment=%llu\n", res->id,
111 res->next_replenishment);
112
113 /* figure out current interval */
114 tdres->cur_interval.start = tdres->major_cycle_start +
115 tdres->intervals[tdres->next_interval].start;
116 tdres->cur_interval.end = tdres->major_cycle_start +
117 tdres->intervals[tdres->next_interval].end;
118 TRACE("major_cycle_start=%llu => [%llu, %llu]\n",
119 tdres->major_cycle_start,
120 tdres->cur_interval.start,
121 tdres->cur_interval.end);
122
123 /* reset budget */
124 res->cur_budget = td_time_remaining_until_end(tdres);
125 res->budget_consumed = 0;
126 TRACE("td_replenish(%u): %s budget=%llu\n", res->id,
127 res->cur_budget ? "" : "WARNING", res->cur_budget);
128
129 /* prepare next slot */
130 tdres->next_interval = (tdres->next_interval + 1) % tdres->num_intervals;
131 if (!tdres->next_interval)
132 /* wrap to next major cycle */
133 tdres->major_cycle_start += tdres->major_cycle;
134
135 /* determine next time this reservation becomes eligible to execute */
136 res->next_replenishment = tdres->major_cycle_start;
137 res->next_replenishment += tdres->intervals[tdres->next_interval].start;
138 TRACE("td_replenish(%u): next_replenishment=%llu\n", res->id,
139 res->next_replenishment);
140
141
142 switch (res->state) {
143 case RESERVATION_DEPLETED:
144 case RESERVATION_ACTIVE:
145 case RESERVATION_ACTIVE_IDLE:
146 if (list_empty(&res->clients))
147 res->env->change_state(res->env, res,
148 RESERVATION_ACTIVE_IDLE);
149 else
150 /* we have clients & budget => ACTIVE */
151 res->env->change_state(res->env, res,
152 RESERVATION_ACTIVE);
153 break;
154
155 case RESERVATION_INACTIVE:
156 BUG();
157 break;
158 }
159}
160
161static void td_drain_budget(
162 struct reservation *res,
163 lt_t how_much)
164{
165 struct table_driven_reservation *tdres =
166 container_of(res, struct table_driven_reservation, res);
167
168 res->budget_consumed += how_much;
169 res->budget_consumed_total += how_much;
170
171 /* Table-driven scheduling: instead of tracking the budget, we compute
172 * how much time is left in this allocation interval. */
173
174 /* sanity check: we should never try to drain from future slots */
175 BUG_ON(tdres->cur_interval.start > res->env->current_time);
176
177 switch (res->state) {
178 case RESERVATION_DEPLETED:
179 case RESERVATION_INACTIVE:
180 BUG();
181 break;
182
183 case RESERVATION_ACTIVE_IDLE:
184 case RESERVATION_ACTIVE:
185 res->cur_budget = td_time_remaining_until_end(tdres);
186 TRACE("td_drain_budget(%u): drained to budget=%llu\n",
187 res->id, res->cur_budget);
188 if (!res->cur_budget) {
189 res->env->change_state(res->env, res,
190 RESERVATION_DEPLETED);
191 } else {
192 /* sanity check budget calculation */
193 BUG_ON(res->env->current_time >= tdres->cur_interval.end);
194 BUG_ON(res->env->current_time < tdres->cur_interval.start);
195 }
196
197 break;
198 }
199}
200
201static struct task_struct* td_dispatch_client(
202 struct reservation *res,
203 lt_t *for_at_most)
204{
205 struct task_struct *t;
206 struct table_driven_reservation *tdres =
207 container_of(res, struct table_driven_reservation, res);
208
209 /* usual logic for selecting a client */
210 t = default_dispatch_client(res, for_at_most);
211
212 TRACE_TASK(t, "td_dispatch_client(%u): selected, budget=%llu\n",
213 res->id, res->cur_budget);
214
215 /* check how much budget we have left in this time slot */
216 res->cur_budget = td_time_remaining_until_end(tdres);
217
218 TRACE_TASK(t, "td_dispatch_client(%u): updated to budget=%llu next=%d\n",
219 res->id, res->cur_budget, tdres->next_interval);
220
221 if (unlikely(!res->cur_budget)) {
222 /* Unlikely case: if we ran out of budget, the user configured
223 * a broken scheduling table (overlapping table slots).
224 * Not much we can do about this, but we can't dispatch a job
225 * now without causing overload. So let's register this reservation
226 * as depleted and wait for the next allocation. */
227 TRACE("td_dispatch_client(%u): budget unexpectedly depleted "
228 "(check scheduling table for unintended overlap)\n",
229 res->id);
230 res->env->change_state(res->env, res,
231 RESERVATION_DEPLETED);
232 return NULL;
233 } else
234 return t;
235}
236
237static struct reservation_ops td_ops = {
238 .dispatch_client = td_dispatch_client,
239 .client_arrives = td_client_arrives,
240 .client_departs = td_client_departs,
241 .replenish = td_replenish,
242 .drain_budget = td_drain_budget,
243};
244
245void table_driven_reservation_init(
246 struct table_driven_reservation *tdres,
247 lt_t major_cycle,
248 struct lt_interval *intervals,
249 unsigned int num_intervals)
250{
251 unsigned int i;
252
253 /* sanity checking */
254 BUG_ON(!num_intervals);
255 for (i = 0; i < num_intervals; i++)
256 BUG_ON(intervals[i].end <= intervals[i].start);
257 for (i = 0; i + 1 < num_intervals; i++)
258 BUG_ON(intervals[i + 1].start <= intervals[i].end);
259 BUG_ON(intervals[num_intervals - 1].end > major_cycle);
260
261 reservation_init(&tdres->res);
262 tdres->res.kind = TABLE_DRIVEN;
263 tdres->major_cycle = major_cycle;
264 tdres->intervals = intervals;
265 tdres->cur_interval.start = 0;
266 tdres->cur_interval.end = 0;
267 tdres->num_intervals = num_intervals;
268 tdres->res.ops = &td_ops;
269}
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
new file mode 100644
index 000000000000..733a483e3084
--- /dev/null
+++ b/litmus/rt_domain.c
@@ -0,0 +1,351 @@
1/*
2 * litmus/rt_domain.c
3 *
4 * LITMUS real-time infrastructure. This file contains the
5 * functions that manipulate RT domains. RT domains are an abstraction
6 * of a ready queue and a release queue.
7 */
8
9#include <linux/percpu.h>
10#include <linux/sched.h>
11#include <linux/list.h>
12#include <linux/slab.h>
13
14#include <litmus/litmus.h>
15#include <litmus/sched_plugin.h>
16#include <litmus/sched_trace.h>
17#include <litmus/debug_trace.h>
18
19#include <litmus/rt_domain.h>
20
21#include <litmus/trace.h>
22
23#include <litmus/bheap.h>
24
25/* Uncomment when debugging timer races... */
26#if 0
27#define VTRACE_TASK TRACE_TASK
28#define VTRACE TRACE
29#else
30#define VTRACE_TASK(t, fmt, args...) /* shut up */
31#define VTRACE(fmt, args...) /* be quiet already */
32#endif
33
34static int dummy_resched(rt_domain_t *rt)
35{
36 return 0;
37}
38
39static int dummy_order(struct bheap_node* a, struct bheap_node* b)
40{
41 return 0;
42}
43
44/* default implementation: use default lock */
45static void default_release_jobs(rt_domain_t* rt, struct bheap* tasks)
46{
47 merge_ready(rt, tasks);
48}
49
50static unsigned int time2slot(lt_t time)
51{
52 return (unsigned int) time2quanta(time, FLOOR) % RELEASE_QUEUE_SLOTS;
53}
54
55static enum hrtimer_restart on_release_timer(struct hrtimer *timer)
56{
57 unsigned long flags;
58 struct release_heap* rh;
59 rh = container_of(timer, struct release_heap, timer);
60
61 TS_RELEASE_LATENCY(rh->release_time);
62
63 VTRACE("on_release_timer(0x%p) starts.\n", timer);
64
65 TS_RELEASE_START;
66
67
68 raw_spin_lock_irqsave(&rh->dom->release_lock, flags);
69 VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock);
70 /* remove from release queue */
71 list_del(&rh->list);
72 raw_spin_unlock_irqrestore(&rh->dom->release_lock, flags);
73 VTRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock);
74
75 /* call release callback */
76 rh->dom->release_jobs(rh->dom, &rh->heap);
77 /* WARNING: rh can be referenced from other CPUs from now on. */
78
79 TS_RELEASE_END;
80
81 VTRACE("on_release_timer(0x%p) ends.\n", timer);
82
83 return HRTIMER_NORESTART;
84}
85
86/* allocated in litmus.c */
87struct kmem_cache * release_heap_cache;
88
89struct release_heap* release_heap_alloc(int gfp_flags)
90{
91 struct release_heap* rh;
92 rh= kmem_cache_alloc(release_heap_cache, gfp_flags);
93 if (rh) {
94 /* initialize timer */
95 hrtimer_init(&rh->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
96 rh->timer.function = on_release_timer;
97 }
98 return rh;
99}
100
101void release_heap_free(struct release_heap* rh)
102{
103 /* make sure timer is no longer in use */
104 hrtimer_cancel(&rh->timer);
105 kmem_cache_free(release_heap_cache, rh);
106}
107
108/* Caller must hold release lock.
109 * Will return heap for given time. If no such heap exists prior to
110 * the invocation it will be created.
111 */
112static struct release_heap* get_release_heap(rt_domain_t *rt,
113 struct task_struct* t,
114 int use_task_heap)
115{
116 struct list_head* pos;
117 struct release_heap* heap = NULL;
118 struct release_heap* rh;
119 lt_t release_time = get_release(t);
120 unsigned int slot = time2slot(release_time);
121
122 /* initialize pos for the case that the list is empty */
123 pos = rt->release_queue.slot[slot].next;
124 list_for_each(pos, &rt->release_queue.slot[slot]) {
125 rh = list_entry(pos, struct release_heap, list);
126 if (release_time == rh->release_time) {
127 /* perfect match -- this happens on hyperperiod
128 * boundaries
129 */
130 heap = rh;
131 break;
132 } else if (lt_before(release_time, rh->release_time)) {
133 /* we need to insert a new node since rh is
134 * already in the future
135 */
136 break;
137 }
138 }
139 if (!heap && use_task_heap) {
140 /* use pre-allocated release heap */
141 rh = tsk_rt(t)->rel_heap;
142
143 rh->dom = rt;
144 rh->release_time = release_time;
145
146 /* add to release queue */
147 list_add(&rh->list, pos->prev);
148 heap = rh;
149 }
150 return heap;
151}
152
153static void reinit_release_heap(struct task_struct* t)
154{
155 struct release_heap* rh;
156
157 /* use pre-allocated release heap */
158 rh = tsk_rt(t)->rel_heap;
159
160 /* Make sure it is safe to use. The timer callback could still
161 * be executing on another CPU; hrtimer_cancel() will wait
162 * until the timer callback has completed. However, under no
163 * circumstances should the timer be active (= yet to be
164 * triggered).
165 *
166 * WARNING: If the CPU still holds the release_lock at this point,
167 * deadlock may occur!
168 */
169 BUG_ON(hrtimer_cancel(&rh->timer));
170
171 /* initialize */
172 bheap_init(&rh->heap);
173}
174/* arm_release_timer() - start local release timer or trigger
175 * remote timer (pull timer)
176 *
177 * Called by add_release() with:
178 * - tobe_lock taken
179 * - IRQ disabled
180 */
181#ifdef CONFIG_RELEASE_MASTER
182#define arm_release_timer(t) arm_release_timer_on((t), NO_CPU)
183static void arm_release_timer_on(rt_domain_t *_rt , int target_cpu)
184#else
185static void arm_release_timer(rt_domain_t *_rt)
186#endif
187{
188 rt_domain_t *rt = _rt;
189 struct list_head list;
190 struct list_head *pos, *safe;
191 struct task_struct* t;
192 struct release_heap* rh;
193
194 VTRACE("arm_release_timer() at %llu\n", litmus_clock());
195 list_replace_init(&rt->tobe_released, &list);
196
197 list_for_each_safe(pos, safe, &list) {
198 /* pick task of work list */
199 t = list_entry(pos, struct task_struct, rt_param.list);
200 sched_trace_task_release(t);
201 list_del(pos);
202
203 /* put into release heap while holding release_lock */
204 raw_spin_lock(&rt->release_lock);
205 VTRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock);
206
207 rh = get_release_heap(rt, t, 0);
208 if (!rh) {
209 /* need to use our own, but drop lock first */
210 raw_spin_unlock(&rt->release_lock);
211 VTRACE_TASK(t, "Dropped release_lock 0x%p\n",
212 &rt->release_lock);
213
214 reinit_release_heap(t);
215 VTRACE_TASK(t, "release_heap ready\n");
216
217 raw_spin_lock(&rt->release_lock);
218 VTRACE_TASK(t, "Re-acquired release_lock 0x%p\n",
219 &rt->release_lock);
220
221 rh = get_release_heap(rt, t, 1);
222 }
223 bheap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node);
224 VTRACE_TASK(t, "arm_release_timer(): added to release heap\n");
225
226 raw_spin_unlock(&rt->release_lock);
227 VTRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock);
228
229 /* To avoid arming the timer multiple times, we only let the
230 * owner do the arming (which is the "first" task to reference
231 * this release_heap anyway).
232 */
233 if (rh == tsk_rt(t)->rel_heap) {
234 VTRACE_TASK(t, "arming timer 0x%p\n", &rh->timer);
235
236 if (!hrtimer_is_hres_active(&rh->timer)) {
237 TRACE_TASK(t, "WARNING: no hires timer!!!\n");
238 }
239
240 /* we cannot arm the timer using hrtimer_start()
241 * as it may deadlock on rq->lock
242 *
243 * PINNED mode is ok on both local and remote CPU
244 */
245#ifdef CONFIG_RELEASE_MASTER
246 if (rt->release_master == NO_CPU &&
247 target_cpu == NO_CPU)
248#endif
249 hrtimer_start(&rh->timer,
250 ns_to_ktime(rh->release_time),
251 HRTIMER_MODE_ABS_PINNED);
252#ifdef CONFIG_RELEASE_MASTER
253 else
254 hrtimer_start_on(
255 /* target_cpu overrides release master */
256 (target_cpu != NO_CPU ?
257 target_cpu : rt->release_master),
258 &rh->info, &rh->timer,
259 ns_to_ktime(rh->release_time),
260 HRTIMER_MODE_ABS_PINNED);
261#endif
262 } else
263 VTRACE_TASK(t, "0x%p is not my timer\n", &rh->timer);
264 }
265}
266
267void rt_domain_init(rt_domain_t *rt,
268 bheap_prio_t order,
269 check_resched_needed_t check,
270 release_jobs_t release
271 )
272{
273 int i;
274
275 BUG_ON(!rt);
276 if (!check)
277 check = dummy_resched;
278 if (!release)
279 release = default_release_jobs;
280 if (!order)
281 order = dummy_order;
282
283#ifdef CONFIG_RELEASE_MASTER
284 rt->release_master = NO_CPU;
285#endif
286
287 bheap_init(&rt->ready_queue);
288 INIT_LIST_HEAD(&rt->tobe_released);
289 for (i = 0; i < RELEASE_QUEUE_SLOTS; i++)
290 INIT_LIST_HEAD(&rt->release_queue.slot[i]);
291
292 raw_spin_lock_init(&rt->ready_lock);
293 raw_spin_lock_init(&rt->release_lock);
294 raw_spin_lock_init(&rt->tobe_lock);
295
296 rt->check_resched = check;
297 rt->release_jobs = release;
298 rt->order = order;
299}
300
301/* add_ready - add a real-time task to the rt ready queue. It must be runnable.
302 * @new: the newly released task
303 */
304void __add_ready(rt_domain_t* rt, struct task_struct *new)
305{
306 TRACE("rt: adding %s/%d (%llu, %llu, %llu) rel=%llu "
307 "to ready queue at %llu\n",
308 new->comm, new->pid,
309 get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new),
310 get_release(new), litmus_clock());
311
312 BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node));
313
314 bheap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node);
315 rt->check_resched(rt);
316}
317
318/* merge_ready - Add a sorted set of tasks to the rt ready queue. They must be runnable.
319 * @tasks - the newly released tasks
320 */
321void __merge_ready(rt_domain_t* rt, struct bheap* tasks)
322{
323 bheap_union(rt->order, &rt->ready_queue, tasks);
324 rt->check_resched(rt);
325}
326
327
328#ifdef CONFIG_RELEASE_MASTER
329void __add_release_on(rt_domain_t* rt, struct task_struct *task,
330 int target_cpu)
331{
332 TRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n",
333 get_release(task), target_cpu);
334 list_add(&tsk_rt(task)->list, &rt->tobe_released);
335 task->rt_param.domain = rt;
336
337 arm_release_timer_on(rt, target_cpu);
338}
339#endif
340
341/* add_release - add a real-time task to the rt release queue.
342 * @task: the sleeping task
343 */
344void __add_release(rt_domain_t* rt, struct task_struct *task)
345{
346 TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task));
347 list_add(&tsk_rt(task)->list, &rt->tobe_released);
348 task->rt_param.domain = rt;
349
350 arm_release_timer(rt);
351}
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
new file mode 100644
index 000000000000..54afe73b4771
--- /dev/null
+++ b/litmus/sched_plugin.c
@@ -0,0 +1,289 @@
1/* sched_plugin.c -- core infrastructure for the scheduler plugin system
2 *
3 * This file includes the initialization of the plugin system, the no-op Linux
4 * scheduler plugin, some dummy functions, and some helper functions.
5 */
6
7#include <linux/list.h>
8#include <linux/spinlock.h>
9#include <linux/sched.h>
10#include <linux/seq_file.h>
11
12#include <litmus/litmus.h>
13#include <litmus/sched_plugin.h>
14#include <litmus/preempt.h>
15#include <litmus/jobs.h>
16#include <litmus/budget.h>
17#include <litmus/np.h>
18
19/*
20 * Generic function to trigger preemption on either local or remote cpu
21 * from scheduler plugins. The key feature is that this function is
22 * non-preemptive section aware and does not invoke the scheduler / send
23 * IPIs if the to-be-preempted task is actually non-preemptive.
24 */
25void preempt_if_preemptable(struct task_struct* t, int cpu)
26{
27 /* t is the real-time task executing on CPU on_cpu If t is NULL, then
28 * on_cpu is currently scheduling background work.
29 */
30
31 int reschedule = 0;
32
33 if (!t)
34 /* move non-real-time task out of the way */
35 reschedule = 1;
36 else {
37 if (smp_processor_id() == cpu) {
38 /* local CPU case */
39 /* check if we need to poke userspace */
40 if (is_user_np(t))
41 /* Yes, poke it. This doesn't have to be atomic since
42 * the task is definitely not executing. */
43 request_exit_np(t);
44 else if (!is_kernel_np(t))
45 /* only if we are allowed to preempt the
46 * currently-executing task */
47 reschedule = 1;
48 } else {
49 /* Remote CPU case. Only notify if it's not a kernel
50 * NP section and if we didn't set the userspace
51 * flag. */
52 reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t));
53 }
54 }
55 if (likely(reschedule))
56 litmus_reschedule(cpu);
57}
58
59
60/*************************************************************
61 * Dummy plugin functions *
62 *************************************************************/
63
64static void litmus_dummy_finish_switch(struct task_struct * prev)
65{
66}
67
68static struct task_struct* litmus_dummy_schedule(struct task_struct * prev)
69{
70 sched_state_task_picked();
71 return NULL;
72}
73
74static bool litmus_dummy_should_wait_for_stack(struct task_struct *next)
75{
76 return true; /* by default, wait indefinitely */
77}
78
79static void litmus_dummy_next_became_invalid(struct task_struct *next)
80{
81}
82
83static bool litmus_dummy_post_migration_validate(struct task_struct *next)
84{
85 return true; /* by default, anything is ok */
86}
87
88static long litmus_dummy_admit_task(struct task_struct* tsk)
89{
90 printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n",
91 tsk->comm, tsk->pid);
92 return -EINVAL;
93}
94
95static bool litmus_dummy_fork_task(struct task_struct* tsk)
96{
97 /* Default behavior: return false to demote to non-real-time task */
98 return false;
99}
100
101static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running)
102{
103}
104
105static void litmus_dummy_task_wake_up(struct task_struct *task)
106{
107}
108
109static void litmus_dummy_task_block(struct task_struct *task)
110{
111}
112
113static void litmus_dummy_task_exit(struct task_struct *task)
114{
115}
116
117static void litmus_dummy_task_cleanup(struct task_struct *task)
118{
119}
120
121static long litmus_dummy_complete_job(void)
122{
123 return -ENOSYS;
124}
125
126static long litmus_dummy_activate_plugin(void)
127{
128 return 0;
129}
130
131static long litmus_dummy_deactivate_plugin(void)
132{
133 return 0;
134}
135
136static long litmus_dummy_get_domain_proc_info(struct domain_proc_info **d)
137{
138 *d = NULL;
139 return 0;
140}
141
142static void litmus_dummy_synchronous_release_at(lt_t time_zero)
143{
144 /* ignore */
145}
146
147static long litmus_dummy_task_change_params(
148 struct task_struct *task,
149 struct rt_task *new_params)
150{
151 /* by default, do not allow changes to task parameters */
152 return -EBUSY;
153}
154
155#ifdef CONFIG_LITMUS_LOCKING
156
157static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type,
158 void* __user config)
159{
160 return -ENXIO;
161}
162
163#endif
164
165static long litmus_dummy_reservation_create(
166 int reservation_type,
167 void* __user config)
168{
169 return -ENOSYS;
170}
171
172static long litmus_dummy_reservation_destroy(unsigned int reservation_id, int cpu)
173{
174 return -ENOSYS;
175}
176
177/* The default scheduler plugin. It doesn't do anything and lets Linux do its
178 * job.
179 */
180struct sched_plugin linux_sched_plugin = {
181 .plugin_name = "Linux",
182 .task_new = litmus_dummy_task_new,
183 .task_exit = litmus_dummy_task_exit,
184 .task_wake_up = litmus_dummy_task_wake_up,
185 .task_block = litmus_dummy_task_block,
186 .complete_job = litmus_dummy_complete_job,
187 .schedule = litmus_dummy_schedule,
188 .finish_switch = litmus_dummy_finish_switch,
189 .activate_plugin = litmus_dummy_activate_plugin,
190 .deactivate_plugin = litmus_dummy_deactivate_plugin,
191 .get_domain_proc_info = litmus_dummy_get_domain_proc_info,
192 .synchronous_release_at = litmus_dummy_synchronous_release_at,
193#ifdef CONFIG_LITMUS_LOCKING
194 .allocate_lock = litmus_dummy_allocate_lock,
195#endif
196 .admit_task = litmus_dummy_admit_task
197};
198
199/*
200 * The reference to current plugin that is used to schedule tasks within
201 * the system. It stores references to actual function implementations
202 * Should be initialized by calling "init_***_plugin()"
203 */
204struct sched_plugin *litmus = &linux_sched_plugin;
205
206/* the list of registered scheduling plugins */
207static LIST_HEAD(sched_plugins);
208static DEFINE_RAW_SPINLOCK(sched_plugins_lock);
209
210#define CHECK(func) {\
211 if (!plugin->func) \
212 plugin->func = litmus_dummy_ ## func;}
213
214/* FIXME: get reference to module */
215int register_sched_plugin(struct sched_plugin* plugin)
216{
217 printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n",
218 plugin->plugin_name);
219
220 /* make sure we don't trip over null pointers later */
221 CHECK(finish_switch);
222 CHECK(schedule);
223 CHECK(should_wait_for_stack);
224 CHECK(post_migration_validate);
225 CHECK(next_became_invalid);
226 CHECK(task_wake_up);
227 CHECK(task_exit);
228 CHECK(task_cleanup);
229 CHECK(task_block);
230 CHECK(task_new);
231 CHECK(task_change_params);
232 CHECK(complete_job);
233 CHECK(activate_plugin);
234 CHECK(deactivate_plugin);
235 CHECK(get_domain_proc_info);
236#ifdef CONFIG_LITMUS_LOCKING
237 CHECK(allocate_lock);
238#endif
239 CHECK(admit_task);
240 CHECK(fork_task);
241 CHECK(synchronous_release_at);
242 CHECK(reservation_destroy);
243 CHECK(reservation_create);
244
245 if (!plugin->wait_for_release_at)
246 plugin->wait_for_release_at = default_wait_for_release_at;
247
248 if (!plugin->current_budget)
249 plugin->current_budget = litmus_current_budget;
250
251 raw_spin_lock(&sched_plugins_lock);
252 list_add(&plugin->list, &sched_plugins);
253 raw_spin_unlock(&sched_plugins_lock);
254
255 return 0;
256}
257
258
259/* FIXME: reference counting, etc. */
260struct sched_plugin* find_sched_plugin(const char* name)
261{
262 struct list_head *pos;
263 struct sched_plugin *plugin;
264
265 raw_spin_lock(&sched_plugins_lock);
266 list_for_each(pos, &sched_plugins) {
267 plugin = list_entry(pos, struct sched_plugin, list);
268 if (!strcmp(plugin->plugin_name, name))
269 goto out_unlock;
270 }
271 plugin = NULL;
272
273out_unlock:
274 raw_spin_unlock(&sched_plugins_lock);
275 return plugin;
276}
277
278void print_sched_plugins(struct seq_file *m)
279{
280 struct list_head *pos;
281 struct sched_plugin *plugin;
282
283 raw_spin_lock(&sched_plugins_lock);
284 list_for_each(pos, &sched_plugins) {
285 plugin = list_entry(pos, struct sched_plugin, list);
286 seq_printf(m, "%s\n", plugin->plugin_name);
287 }
288 raw_spin_unlock(&sched_plugins_lock);
289}
diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
index f89d08405944..a6088f16bb08 100644
--- a/litmus/sched_task_trace.c
+++ b/litmus/sched_task_trace.c
@@ -15,11 +15,6 @@
15#include <litmus/feather_trace.h> 15#include <litmus/feather_trace.h>
16#include <litmus/ftdev.h> 16#include <litmus/ftdev.h>
17 17
18#ifdef CONFIG_SCHED_LITMUS_TRACEPOINT
19#define CREATE_TRACE_POINTS
20#include <trace/events/litmus.h>
21#endif
22
23#define NO_EVENTS (1 << CONFIG_SCHED_TASK_TRACE_SHIFT) 18#define NO_EVENTS (1 << CONFIG_SCHED_TASK_TRACE_SHIFT)
24 19
25#define now() litmus_clock() 20#define now() litmus_clock()
diff --git a/litmus/srp.c b/litmus/srp.c
new file mode 100644
index 000000000000..7e3c057c0752
--- /dev/null
+++ b/litmus/srp.c
@@ -0,0 +1,310 @@
1/* ************************************************************************** */
2/* STACK RESOURCE POLICY */
3/* ************************************************************************** */
4
5#include <linux/module.h>
6#include <asm/atomic.h>
7#include <linux/sched.h>
8#include <linux/wait.h>
9
10#include <litmus/litmus.h>
11#include <litmus/sched_plugin.h>
12#include <litmus/debug_trace.h>
13#include <litmus/fdso.h>
14#include <litmus/trace.h>
15
16
17#ifdef CONFIG_LITMUS_LOCKING
18
19#include <litmus/srp.h>
20
21srp_prioritization_t get_srp_prio;
22
23struct srp {
24 struct list_head ceiling;
25 wait_queue_head_t ceiling_blocked;
26};
27#define system_ceiling(srp) list2prio(srp->ceiling.next)
28#define ceiling2sem(c) container_of(c, struct srp_semaphore, ceiling)
29
30#define UNDEF_SEM -2
31
32DEFINE_PER_CPU(struct srp, srp);
33
34DEFINE_PER_CPU(int, srp_objects_in_use);
35
36/* Initialize SRP semaphores at boot time. */
37static int __init srp_init(void)
38{
39 int i;
40
41 printk("Initializing SRP per-CPU ceilings...");
42 for (i = 0; i < NR_CPUS; i++) {
43 init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked);
44 INIT_LIST_HEAD(&per_cpu(srp, i).ceiling);
45 per_cpu(srp_objects_in_use, i) = 0;
46 }
47 printk(" done!\n");
48
49 return 0;
50}
51module_init(srp_init);
52
53/* SRP task priority comparison function. Smaller numeric values have higher
54 * priority, tie-break is PID. Special case: priority == 0 <=> no priority
55 */
56static int srp_higher_prio(struct srp_priority* first,
57 struct srp_priority* second)
58{
59 if (!first->priority)
60 return 0;
61 else
62 return !second->priority ||
63 first->priority < second->priority || (
64 first->priority == second->priority &&
65 first->pid < second->pid);
66}
67
68
69static int srp_exceeds_ceiling(struct task_struct* first,
70 struct srp* srp)
71{
72 struct srp_priority prio;
73
74 if (list_empty(&srp->ceiling))
75 return 1;
76 else {
77 prio.pid = first->pid;
78 prio.priority = get_srp_prio(first);
79 return srp_higher_prio(&prio, system_ceiling(srp)) ||
80 ceiling2sem(system_ceiling(srp))->owner == first;
81 }
82}
83
84static void srp_add_prio(struct srp* srp, struct srp_priority* prio)
85{
86 struct list_head *pos;
87 if (in_list(&prio->list)) {
88 printk(KERN_CRIT "WARNING: SRP violation detected, prio is already in "
89 "ceiling list! cpu=%d, srp=%p\n", smp_processor_id(), ceiling2sem(prio));
90 return;
91 }
92 list_for_each(pos, &srp->ceiling)
93 if (unlikely(srp_higher_prio(prio, list2prio(pos)))) {
94 __list_add(&prio->list, pos->prev, pos);
95 return;
96 }
97
98 list_add_tail(&prio->list, &srp->ceiling);
99}
100
101
102static int lock_srp_semaphore(struct litmus_lock* l)
103{
104 struct task_struct* t = current;
105 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
106
107 if (!is_realtime(t))
108 return -EPERM;
109
110 /* prevent acquisition of local locks in global critical sections */
111 if (tsk_rt(t)->num_locks_held)
112 return -EBUSY;
113
114 preempt_disable();
115
116 /* Update ceiling. */
117 srp_add_prio(this_cpu_ptr(&srp), &sem->ceiling);
118
119 /* SRP invariant: all resources available */
120 BUG_ON(sem->owner != NULL);
121
122 sem->owner = t;
123 TRACE_CUR("acquired srp 0x%p\n", sem);
124
125 tsk_rt(t)->num_local_locks_held++;
126
127 preempt_enable();
128
129 return 0;
130}
131
132static int unlock_srp_semaphore(struct litmus_lock* l)
133{
134 struct task_struct* t = current;
135 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
136 int err = 0;
137
138 preempt_disable();
139
140 if (sem->owner != t) {
141 err = -EINVAL;
142 } else {
143 /* The current owner should be executing on the correct CPU.
144 *
145 * If the owner transitioned out of RT mode or is exiting, then
146 * we it might have already been migrated away by the best-effort
147 * scheduler and we just have to deal with it. */
148 if (unlikely(!is_realtime(t) && sem->cpu != smp_processor_id())) {
149 TRACE_TASK(t, "SRP unlock cpu=%d, sem->cpu=%d\n",
150 smp_processor_id(), sem->cpu);
151 preempt_enable();
152 err = litmus_be_migrate_to(sem->cpu);
153 preempt_disable();
154 TRACE_TASK(t, "post-migrate: cpu=%d, sem->cpu=%d err=%d\n",
155 smp_processor_id(), sem->cpu, err);
156 }
157 BUG_ON(sem->cpu != smp_processor_id());
158 err = 0;
159
160 /* Determine new system priority ceiling for this CPU. */
161 BUG_ON(!in_list(&sem->ceiling.list));
162
163 list_del(&sem->ceiling.list);
164 sem->owner = NULL;
165
166 /* Wake tasks on this CPU, if they exceed current ceiling. */
167 TRACE_CUR("released srp 0x%p\n", sem);
168 wake_up_all(&this_cpu_ptr(&srp)->ceiling_blocked);
169
170 tsk_rt(t)->num_local_locks_held--;
171 }
172
173 preempt_enable();
174 return err;
175}
176
177static int open_srp_semaphore(struct litmus_lock* l, void* __user arg)
178{
179 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
180 int err = 0;
181 struct task_struct* t = current;
182 struct srp_priority t_prio;
183
184 if (!is_realtime(t))
185 return -EPERM;
186
187 TRACE_CUR("opening SRP semaphore %p, cpu=%d\n", sem, sem->cpu);
188
189 preempt_disable();
190
191 if (sem->owner != NULL)
192 err = -EBUSY;
193
194 if (err == 0) {
195 if (sem->cpu == UNDEF_SEM)
196 sem->cpu = get_partition(t);
197 else if (sem->cpu != get_partition(t))
198 err = -EPERM;
199 }
200
201 if (err == 0) {
202 t_prio.priority = get_srp_prio(t);
203 t_prio.pid = t->pid;
204 if (srp_higher_prio(&t_prio, &sem->ceiling)) {
205 sem->ceiling.priority = t_prio.priority;
206 sem->ceiling.pid = t_prio.pid;
207 }
208 }
209
210 preempt_enable();
211
212 return err;
213}
214
215static int close_srp_semaphore(struct litmus_lock* l)
216{
217 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
218 int err = 0;
219
220 preempt_disable();
221
222 if (sem->owner == current)
223 unlock_srp_semaphore(l);
224
225 preempt_enable();
226
227 return err;
228}
229
230static void deallocate_srp_semaphore(struct litmus_lock* l)
231{
232 struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock);
233 raw_cpu_dec(srp_objects_in_use);
234 kfree(sem);
235}
236
237static struct litmus_lock_ops srp_lock_ops = {
238 .open = open_srp_semaphore,
239 .close = close_srp_semaphore,
240 .lock = lock_srp_semaphore,
241 .unlock = unlock_srp_semaphore,
242 .deallocate = deallocate_srp_semaphore,
243};
244
245struct srp_semaphore* allocate_srp_semaphore(void)
246{
247 struct srp_semaphore* sem;
248
249 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
250 if (!sem)
251 return NULL;
252
253 INIT_LIST_HEAD(&sem->ceiling.list);
254 sem->ceiling.priority = 0;
255 sem->cpu = UNDEF_SEM;
256 sem->owner = NULL;
257
258 sem->litmus_lock.ops = &srp_lock_ops;
259
260 raw_cpu_inc(srp_objects_in_use);
261 return sem;
262}
263
264static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync,
265 void *key)
266{
267 int cpu = smp_processor_id();
268 struct task_struct *tsk = wait->private;
269 if (cpu != get_partition(tsk))
270 TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b",
271 get_partition(tsk));
272 else if (srp_exceeds_ceiling(tsk, this_cpu_ptr(&srp)))
273 return default_wake_function(wait, mode, sync, key);
274 return 0;
275}
276
277static void do_ceiling_block(struct task_struct *tsk)
278{
279 wait_queue_t wait = {
280 .private = tsk,
281 .func = srp_wake_up,
282 .task_list = {NULL, NULL}
283 };
284
285 tsk->state = TASK_UNINTERRUPTIBLE;
286 add_wait_queue(&this_cpu_ptr(&srp)->ceiling_blocked, &wait);
287 tsk->rt_param.srp_non_recurse = 1;
288 preempt_enable_no_resched();
289 schedule();
290 preempt_disable();
291 tsk->rt_param.srp_non_recurse = 0;
292 remove_wait_queue(&this_cpu_ptr(&srp)->ceiling_blocked, &wait);
293}
294
295/* Wait for current task priority to exceed system-wide priority ceiling.
296 */
297void __srp_ceiling_block(struct task_struct *cur)
298{
299 preempt_disable();
300 if (!srp_exceeds_ceiling(cur, this_cpu_ptr(&srp))) {
301 TRACE_CUR("is priority ceiling blocked.\n");
302 while (!srp_exceeds_ceiling(cur, this_cpu_ptr(&srp)))
303 do_ceiling_block(cur);
304 TRACE_CUR("finally exceeds system ceiling.\n");
305 } else
306 TRACE_CUR("is not priority ceiling blocked\n");
307 preempt_enable();
308}
309
310#endif
diff --git a/litmus/sync.c b/litmus/sync.c
new file mode 100644
index 000000000000..941545a05383
--- /dev/null
+++ b/litmus/sync.c
@@ -0,0 +1,153 @@
1/* litmus/sync.c - Support for synchronous and asynchronous task system releases.
2 *
3 *
4 */
5
6#include <asm/atomic.h>
7#include <asm/uaccess.h>
8#include <linux/spinlock.h>
9#include <linux/list.h>
10#include <linux/sched.h>
11#include <linux/completion.h>
12
13#include <litmus/litmus.h>
14#include <litmus/sched_plugin.h>
15#include <litmus/jobs.h>
16
17#include <litmus/sched_trace.h>
18#include <litmus/debug_trace.h>
19
20struct ts_release_wait {
21 struct list_head list;
22 struct completion completion;
23 lt_t ts_release_time;
24};
25
26#define DECLARE_TS_RELEASE_WAIT(symb) \
27 struct ts_release_wait symb = \
28 { \
29 LIST_HEAD_INIT(symb.list), \
30 COMPLETION_INITIALIZER_ONSTACK(symb.completion), \
31 0 \
32 }
33
34static LIST_HEAD(task_release_list);
35static DEFINE_MUTEX(task_release_lock);
36
37static long do_wait_for_ts_release(void)
38{
39 DECLARE_TS_RELEASE_WAIT(wait);
40
41 long ret = -ERESTARTSYS;
42
43 if (mutex_lock_interruptible(&task_release_lock))
44 goto out;
45
46 list_add(&wait.list, &task_release_list);
47
48 mutex_unlock(&task_release_lock);
49
50 /* We are enqueued, now we wait for someone to wake us up. */
51 ret = wait_for_completion_interruptible(&wait.completion);
52
53 if (!ret) {
54 /* Completion succeeded, setup release time. */
55 ret = litmus->wait_for_release_at(
56 wait.ts_release_time + get_rt_phase(current));
57 } else {
58 /* We were interrupted, must cleanup list. */
59 mutex_lock(&task_release_lock);
60 if (!wait.completion.done)
61 list_del(&wait.list);
62 mutex_unlock(&task_release_lock);
63 }
64
65out:
66 return ret;
67}
68
69int count_tasks_waiting_for_release(void)
70{
71 int task_count = 0;
72 struct list_head *pos;
73
74 mutex_lock(&task_release_lock);
75
76 list_for_each(pos, &task_release_list) {
77 task_count++;
78 }
79
80 mutex_unlock(&task_release_lock);
81
82
83 return task_count;
84}
85
86static long do_release_ts(lt_t start)
87{
88 long task_count = 0;
89
90 struct list_head *pos, *safe;
91 struct ts_release_wait *wait;
92
93 if (mutex_lock_interruptible(&task_release_lock)) {
94 task_count = -ERESTARTSYS;
95 goto out;
96 }
97
98 TRACE("<<<<<< synchronous task system release >>>>>>\n");
99 sched_trace_sys_release(&start);
100 litmus->synchronous_release_at(start);
101
102 task_count = 0;
103 list_for_each_safe(pos, safe, &task_release_list) {
104 wait = (struct ts_release_wait*)
105 list_entry(pos, struct ts_release_wait, list);
106
107 task_count++;
108 wait->ts_release_time = start;
109 complete(&wait->completion);
110 }
111
112 /* clear stale list */
113 INIT_LIST_HEAD(&task_release_list);
114
115 mutex_unlock(&task_release_lock);
116
117out:
118 return task_count;
119}
120
121
122asmlinkage long sys_wait_for_ts_release(void)
123{
124 long ret = -EPERM;
125 struct task_struct *t = current;
126
127 if (is_realtime(t))
128 ret = do_wait_for_ts_release();
129
130 return ret;
131}
132
133#define ONE_MS 1000000
134
135asmlinkage long sys_release_ts(lt_t __user *__delay)
136{
137 long ret;
138 lt_t delay;
139 lt_t start_time;
140
141 /* FIXME: check capabilities... */
142
143 ret = copy_from_user(&delay, __delay, sizeof(delay));
144 if (ret == 0) {
145 /* round up to next larger integral millisecond */
146 start_time = litmus_clock();
147 do_div(start_time, ONE_MS);
148 start_time *= ONE_MS;
149 ret = do_release_ts(start_time + delay);
150 }
151
152 return ret;
153}
diff --git a/litmus/trace.c b/litmus/trace.c
index 7d698aac257d..0724e31a49ff 100644
--- a/litmus/trace.c
+++ b/litmus/trace.c
@@ -254,6 +254,19 @@ feather_callback void save_cpu_timestamp_irq(unsigned long event,
254 0, RECORD_LOCAL_TIMESTAMP); 254 0, RECORD_LOCAL_TIMESTAMP);
255} 255}
256 256
257feather_callback void save_cpu_task_latency(unsigned long event,
258 unsigned long when_ptr)
259{
260 lt_t now = litmus_clock();
261 lt_t *when = (lt_t*) when_ptr;
262 lt_t delta = now - *when;
263
264 write_cpu_timestamp(event, TSK_RT,
265 0,
266 0, LOCAL_IRQ_COUNT, 0,
267 delta, DO_NOT_RECORD_TIMESTAMP);
268}
269
257/* Record to remote trace buffer */ 270/* Record to remote trace buffer */
258feather_callback void msg_sent_to(unsigned long event, unsigned long to) 271feather_callback void msg_sent_to(unsigned long event, unsigned long to)
259{ 272{
@@ -422,11 +435,13 @@ static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
422 435
423static void free_timestamp_buffer(struct ftdev* ftdev, unsigned int idx) 436static void free_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
424{ 437{
438 struct ft_buffer* tmp = ftdev->minor[idx].buf;
439 smp_rmb();
425 ftdev->minor[idx].buf = NULL; 440 ftdev->minor[idx].buf = NULL;
426 /* Make sure all cores have actually seen buf == NULL before 441 /* Make sure all cores have actually seen buf == NULL before
427 * yanking out the mappings from underneath them. */ 442 * yanking out the mappings from underneath them. */
428 smp_wmb(); 443 smp_wmb();
429 free_ft_buffer(ftdev->minor[idx].buf); 444 free_ft_buffer(tmp);
430} 445}
431 446
432static ssize_t write_timestamp_from_user(struct ft_buffer* buf, size_t len, 447static ssize_t write_timestamp_from_user(struct ft_buffer* buf, size_t len,
diff --git a/litmus/uncachedev.c b/litmus/uncachedev.c
new file mode 100644
index 000000000000..06a6a7c17983
--- /dev/null
+++ b/litmus/uncachedev.c
@@ -0,0 +1,102 @@
1#include <linux/sched.h>
2#include <linux/kernel.h>
3#include <linux/mm.h>
4#include <linux/fs.h>
5#include <linux/errno.h>
6#include <linux/highmem.h>
7#include <asm/page.h>
8#include <linux/miscdevice.h>
9#include <linux/module.h>
10
11#include <litmus/litmus.h>
12
13/* device for allocating pages not cached by the CPU */
14
15#define UNCACHE_NAME "litmus/uncache"
16
17void litmus_uncache_vm_open(struct vm_area_struct *vma)
18{
19}
20
21void litmus_uncache_vm_close(struct vm_area_struct *vma)
22{
23}
24
25int litmus_uncache_vm_fault(struct vm_area_struct* vma,
26 struct vm_fault* vmf)
27{
28 /* modeled after SG DMA video4linux, but without DMA. */
29 /* (see drivers/media/video/videobuf-dma-sg.c) */
30 struct page *page;
31
32 page = alloc_page(GFP_USER);
33 if (!page)
34 return VM_FAULT_OOM;
35
36 clear_user_highpage(page, (unsigned long)vmf->virtual_address);
37 vmf->page = page;
38
39 return 0;
40}
41
42static struct vm_operations_struct litmus_uncache_vm_ops = {
43 .open = litmus_uncache_vm_open,
44 .close = litmus_uncache_vm_close,
45 .fault = litmus_uncache_vm_fault,
46};
47
48static int litmus_uncache_mmap(struct file* filp, struct vm_area_struct* vma)
49{
50 /* first make sure mapper knows what he's doing */
51
52 /* you can only map the "first" page */
53 if (vma->vm_pgoff != 0)
54 return -EINVAL;
55
56 /* you can't share it with anyone */
57 if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
58 return -EINVAL;
59
60 /* cannot be expanded, and is not a "normal" page. */
61 vma->vm_flags |= VM_DONTEXPAND;
62
63 /* noncached pages are not explicitly locked in memory (for now). */
64 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
65
66 vma->vm_ops = &litmus_uncache_vm_ops;
67
68 return 0;
69}
70
71static struct file_operations litmus_uncache_fops = {
72 .owner = THIS_MODULE,
73 .mmap = litmus_uncache_mmap,
74};
75
76static struct miscdevice litmus_uncache_dev = {
77 .name = UNCACHE_NAME,
78 .minor = MISC_DYNAMIC_MINOR,
79 .fops = &litmus_uncache_fops,
80 /* pages are not locked, so there is no reason why
81 anyone cannot allocate an uncache pages */
82 .mode = (S_IRUGO | S_IWUGO),
83};
84
85static int __init init_litmus_uncache_dev(void)
86{
87 int err;
88
89 printk("Initializing LITMUS^RT uncache device.\n");
90 err = misc_register(&litmus_uncache_dev);
91 if (err)
92 printk("Could not allocate %s device (%d).\n", UNCACHE_NAME, err);
93 return err;
94}
95
96static void __exit exit_litmus_uncache_dev(void)
97{
98 misc_deregister(&litmus_uncache_dev);
99}
100
101module_init(init_litmus_uncache_dev);
102module_exit(exit_litmus_uncache_dev);