diff options
author | Bjoern Brandenburg <bbb@mpi-sws.org> | 2013-06-25 01:27:07 -0400 |
---|---|---|
committer | Bjoern Brandenburg <bbb@mpi-sws.org> | 2013-08-07 03:46:49 -0400 |
commit | 543810eb67bea9c3046ecb58388493bca39fe796 (patch) | |
tree | cf65010367e53dfbd3e39a9eb6e89dacf92348f3 /litmus | |
parent | 1412c8b72e192a14b8dd620f58a75f55a5490783 (diff) |
Add LITMUS^RT core implementation
This patch adds the core of LITMUS^RT:
- library functionality (heaps, rt_domain, prioritization, etc.)
- budget enforcement logic
- job management
- system call backends
- virtual devices (control page, etc.)
- scheduler plugin API (and dummy plugin)
This code compiles, but is not yet integrated with the rest of Linux.
Diffstat (limited to 'litmus')
-rw-r--r-- | litmus/Kconfig | 165 | ||||
-rw-r--r-- | litmus/Makefile | 19 | ||||
-rw-r--r-- | litmus/affinity.c | 41 | ||||
-rw-r--r-- | litmus/bheap.c | 316 | ||||
-rw-r--r-- | litmus/binheap.c | 387 | ||||
-rw-r--r-- | litmus/budget.c | 113 | ||||
-rw-r--r-- | litmus/clustered.c | 111 | ||||
-rw-r--r-- | litmus/ctrldev.c | 160 | ||||
-rw-r--r-- | litmus/edf_common.c | 200 | ||||
-rw-r--r-- | litmus/fdso.c | 307 | ||||
-rw-r--r-- | litmus/fp_common.c | 119 | ||||
-rw-r--r-- | litmus/jobs.c | 55 | ||||
-rw-r--r-- | litmus/litmus.c | 576 | ||||
-rw-r--r-- | litmus/litmus_proc.c | 407 | ||||
-rw-r--r-- | litmus/locking.c | 188 | ||||
-rw-r--r-- | litmus/preempt.c | 137 | ||||
-rw-r--r-- | litmus/rt_domain.c | 348 | ||||
-rw-r--r-- | litmus/sched_plugin.c | 224 | ||||
-rw-r--r-- | litmus/srp.c | 305 | ||||
-rw-r--r-- | litmus/sync.c | 166 |
20 files changed, 4344 insertions, 0 deletions
diff --git a/litmus/Kconfig b/litmus/Kconfig index 5408ef6b159b..32c18c6eb58d 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig | |||
@@ -1,5 +1,156 @@ | |||
1 | menu "LITMUS^RT" | 1 | menu "LITMUS^RT" |
2 | 2 | ||
3 | menu "Scheduling" | ||
4 | |||
5 | config RELEASE_MASTER | ||
6 | bool "Release-master Support" | ||
7 | depends on ARCH_HAS_SEND_PULL_TIMERS && SMP | ||
8 | default n | ||
9 | help | ||
10 | Allow one processor to act as a dedicated interrupt processor | ||
11 | that services all timer interrupts, but that does not schedule | ||
12 | real-time tasks. See RTSS'09 paper for details | ||
13 | (http://www.cs.unc.edu/~anderson/papers.html). | ||
14 | |||
15 | |||
16 | config BUG_ON_MIGRATION_DEADLOCK | ||
17 | bool "Panic on suspected migration deadlock" | ||
18 | default y | ||
19 | help | ||
20 | This is a debugging option. The LITMUS^RT migration support code for | ||
21 | global scheduling contains a simple heuristic to detect when the | ||
22 | system deadlocks due to circular stack dependencies. | ||
23 | |||
24 | For example, such a deadlock exists if CPU 0 waits for task A's stack | ||
25 | to become available while using task B's stack, and CPU 1 waits for | ||
26 | task B's stack to become available while using task A's stack. Such | ||
27 | a situation can arise in (buggy) global scheduling plugins. | ||
28 | |||
29 | With this option enabled, such a scenario with result in a BUG(). | ||
30 | You can turn off this option when debugging on real hardware (e.g., | ||
31 | to rescue traces, etc. that would be hard to get after a panic). | ||
32 | |||
33 | Only turn this off if you really know what you are doing. If this | ||
34 | BUG() triggers, the scheduler is broken and turning off this option | ||
35 | won't fix it. | ||
36 | |||
37 | |||
38 | endmenu | ||
39 | |||
40 | menu "Real-Time Synchronization" | ||
41 | |||
42 | config NP_SECTION | ||
43 | bool "Non-preemptive section support" | ||
44 | default y | ||
45 | help | ||
46 | Allow tasks to become non-preemptable. | ||
47 | Note that plugins still need to explicitly support non-preemptivity. | ||
48 | Currently, only the GSN-EDF, PSN-EDF, and P-FP plugins have such support. | ||
49 | |||
50 | This is required to support locking protocols such as the FMLP. | ||
51 | If disabled, all tasks will be considered preemptable at all times. | ||
52 | |||
53 | config LITMUS_LOCKING | ||
54 | bool "Support for real-time locking protocols" | ||
55 | depends on NP_SECTION | ||
56 | default y | ||
57 | help | ||
58 | Enable LITMUS^RT's multiprocessor real-time locking protocols with | ||
59 | predicable maximum blocking times. | ||
60 | |||
61 | Say Yes if you want to include locking protocols such as the FMLP and | ||
62 | Baker's SRP. | ||
63 | |||
64 | endmenu | ||
65 | |||
66 | menu "Performance Enhancements" | ||
67 | |||
68 | config SCHED_CPU_AFFINITY | ||
69 | bool "Local Migration Affinity" | ||
70 | depends on X86 && SYSFS | ||
71 | default y | ||
72 | help | ||
73 | Rescheduled tasks prefer CPUs near to their previously used CPU. | ||
74 | This may improve cache performance through possible preservation of | ||
75 | cache affinity, at the expense of (slightly) more involved scheduling | ||
76 | logic. | ||
77 | |||
78 | Warning: May make bugs harder to find since tasks may migrate less often. | ||
79 | |||
80 | NOTES: | ||
81 | * Feature is not utilized by PFair/PD^2. | ||
82 | |||
83 | Say Yes if unsure. | ||
84 | |||
85 | config ALLOW_EARLY_RELEASE | ||
86 | bool "Allow Early Releasing" | ||
87 | default y | ||
88 | help | ||
89 | Allow tasks to release jobs early (while still maintaining job | ||
90 | precedence constraints). Only supported by EDF schedulers. Early | ||
91 | releasing must be explicitly requested by real-time tasks via | ||
92 | the task_params passed to sys_set_task_rt_param(). | ||
93 | |||
94 | Early releasing can improve job response times while maintaining | ||
95 | real-time correctness. However, it can easily peg your CPUs | ||
96 | since tasks never suspend to wait for their next job. As such, early | ||
97 | releasing is really only useful in the context of implementing | ||
98 | bandwidth servers, interrupt handling threads, or short-lived | ||
99 | computations. | ||
100 | |||
101 | Beware that early releasing may affect real-time analysis | ||
102 | if using locking protocols or I/O. | ||
103 | |||
104 | Say Yes if unsure. | ||
105 | |||
106 | choice | ||
107 | prompt "EDF Tie-Break Behavior" | ||
108 | default EDF_TIE_BREAK_LATENESS_NORM | ||
109 | help | ||
110 | Allows the configuration of tie-breaking behavior when the deadlines | ||
111 | of two EDF-scheduled tasks are equal. | ||
112 | |||
113 | config EDF_TIE_BREAK_LATENESS | ||
114 | bool "Lateness-based Tie Break" | ||
115 | help | ||
116 | Break ties between two jobs, A and B, based upon the lateness of their | ||
117 | prior jobs. The job with the greatest lateness has priority. Note that | ||
118 | lateness has a negative value if the prior job finished before its | ||
119 | deadline. | ||
120 | |||
121 | config EDF_TIE_BREAK_LATENESS_NORM | ||
122 | bool "Normalized Lateness-based Tie Break" | ||
123 | help | ||
124 | Break ties between two jobs, A and B, based upon the lateness, normalized | ||
125 | by relative deadline, of their prior jobs. The job with the greatest | ||
126 | normalized lateness has priority. Note that lateness has a negative value | ||
127 | if the prior job finished before its deadline. | ||
128 | |||
129 | Normalized lateness tie-breaks are likely desireable over non-normalized | ||
130 | tie-breaks if the execution times and/or relative deadlines of tasks in a | ||
131 | task set vary greatly. | ||
132 | |||
133 | config EDF_TIE_BREAK_HASH | ||
134 | bool "Hash-based Tie Breaks" | ||
135 | help | ||
136 | Break ties between two jobs, A and B, with equal deadlines by using a | ||
137 | uniform hash; i.e.: hash(A.pid, A.job_num) < hash(B.pid, B.job_num). Job | ||
138 | A has ~50% of winning a given tie-break. | ||
139 | |||
140 | config EDF_PID_TIE_BREAK | ||
141 | bool "PID-based Tie Breaks" | ||
142 | help | ||
143 | Break ties based upon OS-assigned thread IDs. Use this option if | ||
144 | required by algorithm's real-time analysis or per-task response-time | ||
145 | jitter must be minimized. | ||
146 | |||
147 | NOTES: | ||
148 | * This tie-breaking method was default in Litmus 2012.2 and before. | ||
149 | |||
150 | endchoice | ||
151 | |||
152 | endmenu | ||
153 | |||
3 | menu "Tracing" | 154 | menu "Tracing" |
4 | 155 | ||
5 | config FEATHER_TRACE | 156 | config FEATHER_TRACE |
@@ -154,6 +305,20 @@ config SCHED_DEBUG_TRACE_CALLER | |||
154 | 305 | ||
155 | If unsure, say No. | 306 | If unsure, say No. |
156 | 307 | ||
308 | config PREEMPT_STATE_TRACE | ||
309 | bool "Trace preemption state machine transitions" | ||
310 | depends on SCHED_DEBUG_TRACE && DEBUG_KERNEL | ||
311 | default n | ||
312 | help | ||
313 | With this option enabled, each CPU will log when it transitions | ||
314 | states in the preemption state machine. This state machine is | ||
315 | used to determine how to react to IPIs (avoid races with in-flight IPIs). | ||
316 | |||
317 | Warning: this creates a lot of information in the debug trace. Only | ||
318 | recommended when you are debugging preemption-related races. | ||
319 | |||
320 | If unsure, say No. | ||
321 | |||
157 | endmenu | 322 | endmenu |
158 | 323 | ||
159 | endmenu | 324 | endmenu |
diff --git a/litmus/Makefile b/litmus/Makefile index 6318f1c6fac8..fcb4d7533327 100644 --- a/litmus/Makefile +++ b/litmus/Makefile | |||
@@ -2,6 +2,25 @@ | |||
2 | # Makefile for LITMUS^RT | 2 | # Makefile for LITMUS^RT |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-y = sched_plugin.o litmus.o \ | ||
6 | preempt.o \ | ||
7 | litmus_proc.o \ | ||
8 | budget.o \ | ||
9 | clustered.o \ | ||
10 | jobs.o \ | ||
11 | sync.o \ | ||
12 | rt_domain.o \ | ||
13 | edf_common.o \ | ||
14 | fp_common.o \ | ||
15 | fdso.o \ | ||
16 | locking.o \ | ||
17 | srp.o \ | ||
18 | bheap.o \ | ||
19 | binheap.o \ | ||
20 | ctrldev.o | ||
21 | |||
22 | obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o | ||
23 | |||
5 | obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o | 24 | obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o |
6 | obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o | 25 | obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o |
7 | obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o | 26 | obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o |
diff --git a/litmus/affinity.c b/litmus/affinity.c new file mode 100644 index 000000000000..a5b437b80433 --- /dev/null +++ b/litmus/affinity.c | |||
@@ -0,0 +1,41 @@ | |||
1 | #include <linux/cpu.h> | ||
2 | |||
3 | #include <litmus/affinity.h> | ||
4 | |||
5 | struct neighborhood neigh_info[NR_CPUS]; | ||
6 | |||
7 | /* called by _init_litmus() */ | ||
8 | void init_topology(void) { | ||
9 | int cpu; | ||
10 | int i; | ||
11 | int chk; | ||
12 | int depth = num_cache_leaves; | ||
13 | |||
14 | if (depth > NUM_CACHE_LEVELS) | ||
15 | depth = NUM_CACHE_LEVELS; | ||
16 | |||
17 | for_each_online_cpu(cpu) { | ||
18 | for (i = 0; i < depth; ++i) { | ||
19 | chk = get_shared_cpu_map((struct cpumask *)&neigh_info[cpu].neighbors[i], cpu, i); | ||
20 | if (chk) { | ||
21 | /* failed */ | ||
22 | neigh_info[cpu].size[i] = 0; | ||
23 | } else { | ||
24 | /* size = num bits in mask */ | ||
25 | neigh_info[cpu].size[i] = | ||
26 | cpumask_weight((struct cpumask *)&neigh_info[cpu].neighbors[i]); | ||
27 | } | ||
28 | printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n", | ||
29 | cpu, neigh_info[cpu].size[i], i, | ||
30 | *cpumask_bits(neigh_info[cpu].neighbors[i])); | ||
31 | } | ||
32 | |||
33 | /* set data for non-existent levels */ | ||
34 | for (; i < NUM_CACHE_LEVELS; ++i) { | ||
35 | neigh_info[cpu].size[i] = 0; | ||
36 | |||
37 | printk("CPU %d has %d neighbors at level %d. (mask = %lx)\n", | ||
38 | cpu, neigh_info[cpu].size[i], i, 0lu); | ||
39 | } | ||
40 | } | ||
41 | } | ||
diff --git a/litmus/bheap.c b/litmus/bheap.c new file mode 100644 index 000000000000..2707e0122b6d --- /dev/null +++ b/litmus/bheap.c | |||
@@ -0,0 +1,316 @@ | |||
1 | #include <linux/bug.h> | ||
2 | #include <linux/kernel.h> | ||
3 | #include <litmus/bheap.h> | ||
4 | |||
5 | void bheap_init(struct bheap* heap) | ||
6 | { | ||
7 | heap->head = NULL; | ||
8 | heap->min = NULL; | ||
9 | } | ||
10 | |||
11 | void bheap_node_init(struct bheap_node** _h, void* value) | ||
12 | { | ||
13 | struct bheap_node* h = *_h; | ||
14 | h->parent = NULL; | ||
15 | h->next = NULL; | ||
16 | h->child = NULL; | ||
17 | h->degree = NOT_IN_HEAP; | ||
18 | h->value = value; | ||
19 | h->ref = _h; | ||
20 | } | ||
21 | |||
22 | |||
23 | /* make child a subtree of root */ | ||
24 | static void __bheap_link(struct bheap_node* root, | ||
25 | struct bheap_node* child) | ||
26 | { | ||
27 | child->parent = root; | ||
28 | child->next = root->child; | ||
29 | root->child = child; | ||
30 | root->degree++; | ||
31 | } | ||
32 | |||
33 | /* merge root lists */ | ||
34 | static struct bheap_node* __bheap_merge(struct bheap_node* a, | ||
35 | struct bheap_node* b) | ||
36 | { | ||
37 | struct bheap_node* head = NULL; | ||
38 | struct bheap_node** pos = &head; | ||
39 | |||
40 | while (a && b) { | ||
41 | if (a->degree < b->degree) { | ||
42 | *pos = a; | ||
43 | a = a->next; | ||
44 | } else { | ||
45 | *pos = b; | ||
46 | b = b->next; | ||
47 | } | ||
48 | pos = &(*pos)->next; | ||
49 | } | ||
50 | if (a) | ||
51 | *pos = a; | ||
52 | else | ||
53 | *pos = b; | ||
54 | return head; | ||
55 | } | ||
56 | |||
57 | /* reverse a linked list of nodes. also clears parent pointer */ | ||
58 | static struct bheap_node* __bheap_reverse(struct bheap_node* h) | ||
59 | { | ||
60 | struct bheap_node* tail = NULL; | ||
61 | struct bheap_node* next; | ||
62 | |||
63 | if (!h) | ||
64 | return h; | ||
65 | |||
66 | h->parent = NULL; | ||
67 | while (h->next) { | ||
68 | next = h->next; | ||
69 | h->next = tail; | ||
70 | tail = h; | ||
71 | h = next; | ||
72 | h->parent = NULL; | ||
73 | } | ||
74 | h->next = tail; | ||
75 | return h; | ||
76 | } | ||
77 | |||
78 | static void __bheap_min(bheap_prio_t higher_prio, struct bheap* heap, | ||
79 | struct bheap_node** prev, struct bheap_node** node) | ||
80 | { | ||
81 | struct bheap_node *_prev, *cur; | ||
82 | *prev = NULL; | ||
83 | |||
84 | if (!heap->head) { | ||
85 | *node = NULL; | ||
86 | return; | ||
87 | } | ||
88 | |||
89 | *node = heap->head; | ||
90 | _prev = heap->head; | ||
91 | cur = heap->head->next; | ||
92 | while (cur) { | ||
93 | if (higher_prio(cur, *node)) { | ||
94 | *node = cur; | ||
95 | *prev = _prev; | ||
96 | } | ||
97 | _prev = cur; | ||
98 | cur = cur->next; | ||
99 | } | ||
100 | } | ||
101 | |||
102 | static void __bheap_union(bheap_prio_t higher_prio, struct bheap* heap, | ||
103 | struct bheap_node* h2) | ||
104 | { | ||
105 | struct bheap_node* h1; | ||
106 | struct bheap_node *prev, *x, *next; | ||
107 | if (!h2) | ||
108 | return; | ||
109 | h1 = heap->head; | ||
110 | if (!h1) { | ||
111 | heap->head = h2; | ||
112 | return; | ||
113 | } | ||
114 | h1 = __bheap_merge(h1, h2); | ||
115 | prev = NULL; | ||
116 | x = h1; | ||
117 | next = x->next; | ||
118 | while (next) { | ||
119 | if (x->degree != next->degree || | ||
120 | (next->next && next->next->degree == x->degree)) { | ||
121 | /* nothing to do, advance */ | ||
122 | prev = x; | ||
123 | x = next; | ||
124 | } else if (higher_prio(x, next)) { | ||
125 | /* x becomes the root of next */ | ||
126 | x->next = next->next; | ||
127 | __bheap_link(x, next); | ||
128 | } else { | ||
129 | /* next becomes the root of x */ | ||
130 | if (prev) | ||
131 | prev->next = next; | ||
132 | else | ||
133 | h1 = next; | ||
134 | __bheap_link(next, x); | ||
135 | x = next; | ||
136 | } | ||
137 | next = x->next; | ||
138 | } | ||
139 | heap->head = h1; | ||
140 | } | ||
141 | |||
142 | static struct bheap_node* __bheap_extract_min(bheap_prio_t higher_prio, | ||
143 | struct bheap* heap) | ||
144 | { | ||
145 | struct bheap_node *prev, *node; | ||
146 | __bheap_min(higher_prio, heap, &prev, &node); | ||
147 | if (!node) | ||
148 | return NULL; | ||
149 | if (prev) | ||
150 | prev->next = node->next; | ||
151 | else | ||
152 | heap->head = node->next; | ||
153 | __bheap_union(higher_prio, heap, __bheap_reverse(node->child)); | ||
154 | return node; | ||
155 | } | ||
156 | |||
157 | /* insert (and reinitialize) a node into the heap */ | ||
158 | void bheap_insert(bheap_prio_t higher_prio, struct bheap* heap, | ||
159 | struct bheap_node* node) | ||
160 | { | ||
161 | struct bheap_node *min; | ||
162 | node->child = NULL; | ||
163 | node->parent = NULL; | ||
164 | node->next = NULL; | ||
165 | node->degree = 0; | ||
166 | if (heap->min && higher_prio(node, heap->min)) { | ||
167 | /* swap min cache */ | ||
168 | min = heap->min; | ||
169 | min->child = NULL; | ||
170 | min->parent = NULL; | ||
171 | min->next = NULL; | ||
172 | min->degree = 0; | ||
173 | __bheap_union(higher_prio, heap, min); | ||
174 | heap->min = node; | ||
175 | } else | ||
176 | __bheap_union(higher_prio, heap, node); | ||
177 | } | ||
178 | |||
179 | void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap) | ||
180 | { | ||
181 | struct bheap_node* min; | ||
182 | if (heap->min) { | ||
183 | min = heap->min; | ||
184 | heap->min = NULL; | ||
185 | bheap_insert(higher_prio, heap, min); | ||
186 | } | ||
187 | } | ||
188 | |||
189 | /* merge addition into target */ | ||
190 | void bheap_union(bheap_prio_t higher_prio, | ||
191 | struct bheap* target, struct bheap* addition) | ||
192 | { | ||
193 | /* first insert any cached minima, if necessary */ | ||
194 | bheap_uncache_min(higher_prio, target); | ||
195 | bheap_uncache_min(higher_prio, addition); | ||
196 | __bheap_union(higher_prio, target, addition->head); | ||
197 | /* this is a destructive merge */ | ||
198 | addition->head = NULL; | ||
199 | } | ||
200 | |||
201 | struct bheap_node* bheap_peek(bheap_prio_t higher_prio, | ||
202 | struct bheap* heap) | ||
203 | { | ||
204 | if (!heap->min) | ||
205 | heap->min = __bheap_extract_min(higher_prio, heap); | ||
206 | return heap->min; | ||
207 | } | ||
208 | |||
209 | struct bheap_node* bheap_take(bheap_prio_t higher_prio, | ||
210 | struct bheap* heap) | ||
211 | { | ||
212 | struct bheap_node *node; | ||
213 | if (!heap->min) | ||
214 | heap->min = __bheap_extract_min(higher_prio, heap); | ||
215 | node = heap->min; | ||
216 | heap->min = NULL; | ||
217 | if (node) | ||
218 | node->degree = NOT_IN_HEAP; | ||
219 | return node; | ||
220 | } | ||
221 | |||
222 | int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node) | ||
223 | { | ||
224 | struct bheap_node *parent; | ||
225 | struct bheap_node** tmp_ref; | ||
226 | void* tmp; | ||
227 | |||
228 | /* bubble up */ | ||
229 | parent = node->parent; | ||
230 | while (parent && higher_prio(node, parent)) { | ||
231 | /* swap parent and node */ | ||
232 | tmp = parent->value; | ||
233 | parent->value = node->value; | ||
234 | node->value = tmp; | ||
235 | /* swap references */ | ||
236 | *(parent->ref) = node; | ||
237 | *(node->ref) = parent; | ||
238 | tmp_ref = parent->ref; | ||
239 | parent->ref = node->ref; | ||
240 | node->ref = tmp_ref; | ||
241 | /* step up */ | ||
242 | node = parent; | ||
243 | parent = node->parent; | ||
244 | } | ||
245 | |||
246 | return parent != NULL; | ||
247 | } | ||
248 | |||
249 | void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap, | ||
250 | struct bheap_node* node) | ||
251 | { | ||
252 | struct bheap_node *parent, *prev, *pos; | ||
253 | struct bheap_node** tmp_ref; | ||
254 | void* tmp; | ||
255 | |||
256 | if (heap->min != node) { | ||
257 | /* bubble up */ | ||
258 | parent = node->parent; | ||
259 | while (parent) { | ||
260 | /* swap parent and node */ | ||
261 | tmp = parent->value; | ||
262 | parent->value = node->value; | ||
263 | node->value = tmp; | ||
264 | /* swap references */ | ||
265 | *(parent->ref) = node; | ||
266 | *(node->ref) = parent; | ||
267 | tmp_ref = parent->ref; | ||
268 | parent->ref = node->ref; | ||
269 | node->ref = tmp_ref; | ||
270 | /* step up */ | ||
271 | node = parent; | ||
272 | parent = node->parent; | ||
273 | } | ||
274 | /* now delete: | ||
275 | * first find prev */ | ||
276 | prev = NULL; | ||
277 | pos = heap->head; | ||
278 | while (pos != node) { | ||
279 | BUG_ON(!pos); /* fell off the list -> deleted from wrong heap */ | ||
280 | prev = pos; | ||
281 | pos = pos->next; | ||
282 | } | ||
283 | /* we have prev, now remove node */ | ||
284 | if (prev) | ||
285 | prev->next = node->next; | ||
286 | else | ||
287 | heap->head = node->next; | ||
288 | __bheap_union(higher_prio, heap, __bheap_reverse(node->child)); | ||
289 | } else | ||
290 | heap->min = NULL; | ||
291 | node->degree = NOT_IN_HEAP; | ||
292 | } | ||
293 | |||
294 | /* allocate a heap node for value and insert into the heap */ | ||
295 | int bheap_add(bheap_prio_t higher_prio, struct bheap* heap, | ||
296 | void* value, int gfp_flags) | ||
297 | { | ||
298 | struct bheap_node* hn = bheap_node_alloc(gfp_flags); | ||
299 | if (likely(hn)) { | ||
300 | bheap_node_init(&hn, value); | ||
301 | bheap_insert(higher_prio, heap, hn); | ||
302 | } | ||
303 | return hn != NULL; | ||
304 | } | ||
305 | |||
306 | void* bheap_take_del(bheap_prio_t higher_prio, | ||
307 | struct bheap* heap) | ||
308 | { | ||
309 | struct bheap_node* hn = bheap_take(higher_prio, heap); | ||
310 | void* ret = NULL; | ||
311 | if (hn) { | ||
312 | ret = hn->value; | ||
313 | bheap_node_free(hn); | ||
314 | } | ||
315 | return ret; | ||
316 | } | ||
diff --git a/litmus/binheap.c b/litmus/binheap.c new file mode 100644 index 000000000000..d3ab34b92096 --- /dev/null +++ b/litmus/binheap.c | |||
@@ -0,0 +1,387 @@ | |||
1 | #include <litmus/binheap.h> | ||
2 | |||
3 | /* Returns true of the root ancestor of node is the root of the given heap. */ | ||
4 | int binheap_is_in_this_heap(struct binheap_node *node, | ||
5 | struct binheap* heap) | ||
6 | { | ||
7 | if(!binheap_is_in_heap(node)) { | ||
8 | return 0; | ||
9 | } | ||
10 | |||
11 | while(node->parent != NULL) { | ||
12 | node = node->parent; | ||
13 | } | ||
14 | |||
15 | return (node == heap->root); | ||
16 | } | ||
17 | |||
18 | |||
19 | /* Update the node reference pointers. Same logic as Litmus binomial heap. */ | ||
20 | static void __update_ref(struct binheap_node *parent, | ||
21 | struct binheap_node *child) | ||
22 | { | ||
23 | *(parent->ref_ptr) = child; | ||
24 | *(child->ref_ptr) = parent; | ||
25 | |||
26 | swap(parent->ref_ptr, child->ref_ptr); | ||
27 | } | ||
28 | |||
29 | |||
30 | /* Swaps data between two nodes. */ | ||
31 | static void __binheap_swap(struct binheap_node *parent, | ||
32 | struct binheap_node *child) | ||
33 | { | ||
34 | swap(parent->data, child->data); | ||
35 | __update_ref(parent, child); | ||
36 | } | ||
37 | |||
38 | |||
39 | /* Swaps memory and data between two nodes. Actual nodes swap instead of | ||
40 | * just data. Needed when we delete nodes from the heap. | ||
41 | */ | ||
42 | static void __binheap_swap_safe(struct binheap *handle, | ||
43 | struct binheap_node *a, | ||
44 | struct binheap_node *b) | ||
45 | { | ||
46 | swap(a->data, b->data); | ||
47 | __update_ref(a, b); | ||
48 | |||
49 | if((a->parent != NULL) && (a->parent == b->parent)) { | ||
50 | /* special case: shared parent */ | ||
51 | swap(a->parent->left, a->parent->right); | ||
52 | } | ||
53 | else { | ||
54 | /* Update pointers to swap parents. */ | ||
55 | |||
56 | if(a->parent) { | ||
57 | if(a == a->parent->left) { | ||
58 | a->parent->left = b; | ||
59 | } | ||
60 | else { | ||
61 | a->parent->right = b; | ||
62 | } | ||
63 | } | ||
64 | |||
65 | if(b->parent) { | ||
66 | if(b == b->parent->left) { | ||
67 | b->parent->left = a; | ||
68 | } | ||
69 | else { | ||
70 | b->parent->right = a; | ||
71 | } | ||
72 | } | ||
73 | |||
74 | swap(a->parent, b->parent); | ||
75 | } | ||
76 | |||
77 | /* swap children */ | ||
78 | |||
79 | if(a->left) { | ||
80 | a->left->parent = b; | ||
81 | |||
82 | if(a->right) { | ||
83 | a->right->parent = b; | ||
84 | } | ||
85 | } | ||
86 | |||
87 | if(b->left) { | ||
88 | b->left->parent = a; | ||
89 | |||
90 | if(b->right) { | ||
91 | b->right->parent = a; | ||
92 | } | ||
93 | } | ||
94 | |||
95 | swap(a->left, b->left); | ||
96 | swap(a->right, b->right); | ||
97 | |||
98 | |||
99 | /* update next/last/root pointers */ | ||
100 | |||
101 | if(a == handle->next) { | ||
102 | handle->next = b; | ||
103 | } | ||
104 | else if(b == handle->next) { | ||
105 | handle->next = a; | ||
106 | } | ||
107 | |||
108 | if(a == handle->last) { | ||
109 | handle->last = b; | ||
110 | } | ||
111 | else if(b == handle->last) { | ||
112 | handle->last = a; | ||
113 | } | ||
114 | |||
115 | if(a == handle->root) { | ||
116 | handle->root = b; | ||
117 | } | ||
118 | else if(b == handle->root) { | ||
119 | handle->root = a; | ||
120 | } | ||
121 | } | ||
122 | |||
123 | |||
124 | /** | ||
125 | * Update the pointer to the last node in the complete binary tree. | ||
126 | * Called internally after the root node has been deleted. | ||
127 | */ | ||
128 | static void __binheap_update_last(struct binheap *handle) | ||
129 | { | ||
130 | struct binheap_node *temp = handle->last; | ||
131 | |||
132 | /* find a "bend" in the tree. */ | ||
133 | while(temp->parent && (temp == temp->parent->left)) { | ||
134 | temp = temp->parent; | ||
135 | } | ||
136 | |||
137 | /* step over to sibling if we're not at root */ | ||
138 | if(temp->parent != NULL) { | ||
139 | temp = temp->parent->left; | ||
140 | } | ||
141 | |||
142 | /* now travel right as far as possible. */ | ||
143 | while(temp->right != NULL) { | ||
144 | temp = temp->right; | ||
145 | } | ||
146 | |||
147 | /* take one step to the left if we're not at the bottom-most level. */ | ||
148 | if(temp->left != NULL) { | ||
149 | temp = temp->left; | ||
150 | } | ||
151 | |||
152 | handle->last = temp; | ||
153 | } | ||
154 | |||
155 | |||
156 | /** | ||
157 | * Update the pointer to the node that will take the next inserted node. | ||
158 | * Called internally after a node has been inserted. | ||
159 | */ | ||
160 | static void __binheap_update_next(struct binheap *handle) | ||
161 | { | ||
162 | struct binheap_node *temp = handle->next; | ||
163 | |||
164 | /* find a "bend" in the tree. */ | ||
165 | while(temp->parent && (temp == temp->parent->right)) { | ||
166 | temp = temp->parent; | ||
167 | } | ||
168 | |||
169 | /* step over to sibling if we're not at root */ | ||
170 | if(temp->parent != NULL) { | ||
171 | temp = temp->parent->right; | ||
172 | } | ||
173 | |||
174 | /* now travel left as far as possible. */ | ||
175 | while(temp->left != NULL) { | ||
176 | temp = temp->left; | ||
177 | } | ||
178 | |||
179 | handle->next = temp; | ||
180 | } | ||
181 | |||
182 | |||
183 | |||
184 | /* bubble node up towards root */ | ||
185 | static void __binheap_bubble_up(struct binheap *handle, | ||
186 | struct binheap_node *node) | ||
187 | { | ||
188 | /* let BINHEAP_POISON data bubble to the top */ | ||
189 | |||
190 | while((node->parent != NULL) && | ||
191 | ((node->data == BINHEAP_POISON) || | ||
192 | handle->compare(node, node->parent))) { | ||
193 | __binheap_swap(node->parent, node); | ||
194 | node = node->parent; | ||
195 | } | ||
196 | } | ||
197 | |||
198 | |||
199 | /* bubble node down, swapping with min-child */ | ||
200 | static void __binheap_bubble_down(struct binheap *handle) | ||
201 | { | ||
202 | struct binheap_node *node = handle->root; | ||
203 | |||
204 | while(node->left != NULL) { | ||
205 | if(node->right && handle->compare(node->right, node->left)) { | ||
206 | if(handle->compare(node->right, node)) { | ||
207 | __binheap_swap(node, node->right); | ||
208 | node = node->right; | ||
209 | } | ||
210 | else { | ||
211 | break; | ||
212 | } | ||
213 | } | ||
214 | else { | ||
215 | if(handle->compare(node->left, node)) { | ||
216 | __binheap_swap(node, node->left); | ||
217 | node = node->left; | ||
218 | } | ||
219 | else { | ||
220 | break; | ||
221 | } | ||
222 | } | ||
223 | } | ||
224 | } | ||
225 | |||
226 | |||
227 | void __binheap_add(struct binheap_node *new_node, | ||
228 | struct binheap *handle, | ||
229 | void *data) | ||
230 | { | ||
231 | new_node->data = data; | ||
232 | new_node->ref = new_node; | ||
233 | new_node->ref_ptr = &(new_node->ref); | ||
234 | |||
235 | if(!binheap_empty(handle)) { | ||
236 | /* insert left side first */ | ||
237 | if(handle->next->left == NULL) { | ||
238 | handle->next->left = new_node; | ||
239 | new_node->parent = handle->next; | ||
240 | new_node->left = NULL; | ||
241 | new_node->right = NULL; | ||
242 | |||
243 | handle->last = new_node; | ||
244 | |||
245 | __binheap_bubble_up(handle, new_node); | ||
246 | } | ||
247 | else { | ||
248 | /* left occupied. insert right. */ | ||
249 | handle->next->right = new_node; | ||
250 | new_node->parent = handle->next; | ||
251 | new_node->left = NULL; | ||
252 | new_node->right = NULL; | ||
253 | |||
254 | handle->last = new_node; | ||
255 | |||
256 | __binheap_update_next(handle); | ||
257 | __binheap_bubble_up(handle, new_node); | ||
258 | } | ||
259 | } | ||
260 | else { | ||
261 | /* first node in heap */ | ||
262 | |||
263 | new_node->parent = NULL; | ||
264 | new_node->left = NULL; | ||
265 | new_node->right = NULL; | ||
266 | |||
267 | handle->root = new_node; | ||
268 | handle->next = new_node; | ||
269 | handle->last = new_node; | ||
270 | } | ||
271 | } | ||
272 | |||
273 | |||
274 | /** | ||
275 | * Removes the root node from the heap. The node is removed after coalescing | ||
276 | * the binheap_node with its original data pointer at the root of the tree. | ||
277 | * | ||
278 | * The 'last' node in the tree is then swapped up to the root and bubbled | ||
279 | * down. | ||
280 | */ | ||
281 | void __binheap_delete_root(struct binheap *handle, | ||
282 | struct binheap_node *container) | ||
283 | { | ||
284 | struct binheap_node *root = handle->root; | ||
285 | |||
286 | if(root != container) { | ||
287 | /* coalesce */ | ||
288 | __binheap_swap_safe(handle, root, container); | ||
289 | root = container; | ||
290 | } | ||
291 | |||
292 | if(handle->last != root) { | ||
293 | /* swap 'last' node up to root and bubble it down. */ | ||
294 | |||
295 | struct binheap_node *to_move = handle->last; | ||
296 | |||
297 | if(to_move->parent != root) { | ||
298 | handle->next = to_move->parent; | ||
299 | |||
300 | if(handle->next->right == to_move) { | ||
301 | /* disconnect from parent */ | ||
302 | to_move->parent->right = NULL; | ||
303 | handle->last = handle->next->left; | ||
304 | } | ||
305 | else { | ||
306 | /* find new 'last' before we disconnect */ | ||
307 | __binheap_update_last(handle); | ||
308 | |||
309 | /* disconnect from parent */ | ||
310 | to_move->parent->left = NULL; | ||
311 | } | ||
312 | } | ||
313 | else { | ||
314 | /* 'last' is direct child of root */ | ||
315 | |||
316 | handle->next = to_move; | ||
317 | |||
318 | if(to_move == to_move->parent->right) { | ||
319 | to_move->parent->right = NULL; | ||
320 | handle->last = to_move->parent->left; | ||
321 | } | ||
322 | else { | ||
323 | to_move->parent->left = NULL; | ||
324 | handle->last = to_move; | ||
325 | } | ||
326 | } | ||
327 | to_move->parent = NULL; | ||
328 | |||
329 | /* reconnect as root. We can't just swap data ptrs since root node | ||
330 | * may be freed after this function returns. | ||
331 | */ | ||
332 | to_move->left = root->left; | ||
333 | to_move->right = root->right; | ||
334 | if(to_move->left != NULL) { | ||
335 | to_move->left->parent = to_move; | ||
336 | } | ||
337 | if(to_move->right != NULL) { | ||
338 | to_move->right->parent = to_move; | ||
339 | } | ||
340 | |||
341 | handle->root = to_move; | ||
342 | |||
343 | /* bubble down */ | ||
344 | __binheap_bubble_down(handle); | ||
345 | } | ||
346 | else { | ||
347 | /* removing last node in tree */ | ||
348 | handle->root = NULL; | ||
349 | handle->next = NULL; | ||
350 | handle->last = NULL; | ||
351 | } | ||
352 | |||
353 | /* mark as removed */ | ||
354 | container->parent = BINHEAP_POISON; | ||
355 | } | ||
356 | |||
357 | |||
358 | /** | ||
359 | * Delete an arbitrary node. Bubble node to delete up to the root, | ||
360 | * and then delete to root. | ||
361 | */ | ||
362 | void __binheap_delete(struct binheap_node *node_to_delete, | ||
363 | struct binheap *handle) | ||
364 | { | ||
365 | struct binheap_node *target = node_to_delete->ref; | ||
366 | void *temp_data = target->data; | ||
367 | |||
368 | /* temporarily set data to null to allow node to bubble up to the top. */ | ||
369 | target->data = BINHEAP_POISON; | ||
370 | |||
371 | __binheap_bubble_up(handle, target); | ||
372 | __binheap_delete_root(handle, node_to_delete); | ||
373 | |||
374 | node_to_delete->data = temp_data; /* restore node data pointer */ | ||
375 | } | ||
376 | |||
377 | |||
378 | /** | ||
379 | * Bubble up a node whose pointer has decreased in value. | ||
380 | */ | ||
381 | void __binheap_decrease(struct binheap_node *orig_node, | ||
382 | struct binheap *handle) | ||
383 | { | ||
384 | struct binheap_node *target = orig_node->ref; | ||
385 | |||
386 | __binheap_bubble_up(handle, target); | ||
387 | } | ||
diff --git a/litmus/budget.c b/litmus/budget.c new file mode 100644 index 000000000000..f7712be29adb --- /dev/null +++ b/litmus/budget.c | |||
@@ -0,0 +1,113 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/percpu.h> | ||
3 | #include <linux/hrtimer.h> | ||
4 | |||
5 | #include <litmus/litmus.h> | ||
6 | #include <litmus/preempt.h> | ||
7 | |||
8 | #include <litmus/budget.h> | ||
9 | |||
10 | struct enforcement_timer { | ||
11 | /* The enforcement timer is used to accurately police | ||
12 | * slice budgets. */ | ||
13 | struct hrtimer timer; | ||
14 | int armed; | ||
15 | }; | ||
16 | |||
17 | DEFINE_PER_CPU(struct enforcement_timer, budget_timer); | ||
18 | |||
19 | static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer) | ||
20 | { | ||
21 | struct enforcement_timer* et = container_of(timer, | ||
22 | struct enforcement_timer, | ||
23 | timer); | ||
24 | unsigned long flags; | ||
25 | |||
26 | local_irq_save(flags); | ||
27 | TRACE("enforcement timer fired.\n"); | ||
28 | et->armed = 0; | ||
29 | /* activate scheduler */ | ||
30 | litmus_reschedule_local(); | ||
31 | local_irq_restore(flags); | ||
32 | |||
33 | return HRTIMER_NORESTART; | ||
34 | } | ||
35 | |||
36 | /* assumes called with IRQs off */ | ||
37 | static void cancel_enforcement_timer(struct enforcement_timer* et) | ||
38 | { | ||
39 | int ret; | ||
40 | |||
41 | TRACE("cancelling enforcement timer.\n"); | ||
42 | |||
43 | /* Since interrupts are disabled and et->armed is only | ||
44 | * modified locally, we do not need any locks. | ||
45 | */ | ||
46 | |||
47 | if (et->armed) { | ||
48 | ret = hrtimer_try_to_cancel(&et->timer); | ||
49 | /* Should never be inactive. */ | ||
50 | BUG_ON(ret == 0); | ||
51 | /* Should never be running concurrently. */ | ||
52 | BUG_ON(ret == -1); | ||
53 | |||
54 | et->armed = 0; | ||
55 | } | ||
56 | } | ||
57 | |||
58 | /* assumes called with IRQs off */ | ||
59 | static void arm_enforcement_timer(struct enforcement_timer* et, | ||
60 | struct task_struct* t) | ||
61 | { | ||
62 | lt_t when_to_fire; | ||
63 | TRACE_TASK(t, "arming enforcement timer.\n"); | ||
64 | |||
65 | /* Calling this when there is no budget left for the task | ||
66 | * makes no sense, unless the task is non-preemptive. */ | ||
67 | BUG_ON(budget_exhausted(t) && (!is_np(t))); | ||
68 | |||
69 | /* __hrtimer_start_range_ns() cancels the timer | ||
70 | * anyway, so we don't have to check whether it is still armed */ | ||
71 | |||
72 | if (likely(!is_np(t))) { | ||
73 | when_to_fire = litmus_clock() + budget_remaining(t); | ||
74 | __hrtimer_start_range_ns(&et->timer, | ||
75 | ns_to_ktime(when_to_fire), | ||
76 | 0 /* delta */, | ||
77 | HRTIMER_MODE_ABS_PINNED, | ||
78 | 0 /* no wakeup */); | ||
79 | et->armed = 1; | ||
80 | } | ||
81 | } | ||
82 | |||
83 | |||
84 | /* expects to be called with IRQs off */ | ||
85 | void update_enforcement_timer(struct task_struct* t) | ||
86 | { | ||
87 | struct enforcement_timer* et = &__get_cpu_var(budget_timer); | ||
88 | |||
89 | if (t && budget_precisely_enforced(t)) { | ||
90 | /* Make sure we call into the scheduler when this budget | ||
91 | * expires. */ | ||
92 | arm_enforcement_timer(et, t); | ||
93 | } else if (et->armed) { | ||
94 | /* Make sure we don't cause unnecessary interrupts. */ | ||
95 | cancel_enforcement_timer(et); | ||
96 | } | ||
97 | } | ||
98 | |||
99 | |||
100 | static int __init init_budget_enforcement(void) | ||
101 | { | ||
102 | int cpu; | ||
103 | struct enforcement_timer* et; | ||
104 | |||
105 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
106 | et = &per_cpu(budget_timer, cpu); | ||
107 | hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
108 | et->timer.function = on_enforcement_timeout; | ||
109 | } | ||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | module_init(init_budget_enforcement); | ||
diff --git a/litmus/clustered.c b/litmus/clustered.c new file mode 100644 index 000000000000..979fac6bebb7 --- /dev/null +++ b/litmus/clustered.c | |||
@@ -0,0 +1,111 @@ | |||
1 | #include <linux/gfp.h> | ||
2 | #include <linux/cpumask.h> | ||
3 | #include <linux/list.h> | ||
4 | |||
5 | #include <litmus/clustered.h> | ||
6 | |||
7 | #if !defined(CONFIG_X86) || !defined(CONFIG_SYSFS) | ||
8 | /* fake get_shared_cpu_map() on non-x86 architectures */ | ||
9 | |||
10 | int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index) | ||
11 | { | ||
12 | if (index != 1) | ||
13 | return 1; | ||
14 | else { | ||
15 | /* Fake L1: CPU is all by itself. */ | ||
16 | cpumask_clear(mask); | ||
17 | cpumask_set_cpu(cpu, mask); | ||
18 | return 0; | ||
19 | } | ||
20 | } | ||
21 | |||
22 | #endif | ||
23 | |||
24 | int get_cluster_size(enum cache_level level) | ||
25 | { | ||
26 | cpumask_var_t mask; | ||
27 | int ok; | ||
28 | int num_cpus; | ||
29 | |||
30 | if (level == GLOBAL_CLUSTER) | ||
31 | return num_online_cpus(); | ||
32 | else { | ||
33 | if (!zalloc_cpumask_var(&mask, GFP_ATOMIC)) | ||
34 | return -ENOMEM; | ||
35 | /* assumes CPU 0 is representative of all CPUs */ | ||
36 | ok = get_shared_cpu_map(mask, 0, level); | ||
37 | /* ok == 0 means we got the map; otherwise it's an invalid cache level */ | ||
38 | if (ok == 0) | ||
39 | num_cpus = cpumask_weight(mask); | ||
40 | free_cpumask_var(mask); | ||
41 | |||
42 | if (ok == 0) | ||
43 | return num_cpus; | ||
44 | else | ||
45 | return -EINVAL; | ||
46 | } | ||
47 | } | ||
48 | |||
49 | int assign_cpus_to_clusters(enum cache_level level, | ||
50 | struct scheduling_cluster* clusters[], | ||
51 | unsigned int num_clusters, | ||
52 | struct cluster_cpu* cpus[], | ||
53 | unsigned int num_cpus) | ||
54 | { | ||
55 | cpumask_var_t mask; | ||
56 | unsigned int i, free_cluster = 0, low_cpu; | ||
57 | int err = 0; | ||
58 | |||
59 | if (!zalloc_cpumask_var(&mask, GFP_ATOMIC)) | ||
60 | return -ENOMEM; | ||
61 | |||
62 | /* clear cluster pointers */ | ||
63 | for (i = 0; i < num_cpus; i++) { | ||
64 | cpus[i]->id = i; | ||
65 | cpus[i]->cluster = NULL; | ||
66 | } | ||
67 | |||
68 | /* initialize clusters */ | ||
69 | for (i = 0; i < num_clusters; i++) { | ||
70 | clusters[i]->id = i; | ||
71 | INIT_LIST_HEAD(&clusters[i]->cpus); | ||
72 | } | ||
73 | |||
74 | /* Assign each CPU. Two assumtions are made: | ||
75 | * 1) The index of a cpu in cpus corresponds to its processor id (i.e., the index in a cpu mask). | ||
76 | * 2) All cpus that belong to some cluster are online. | ||
77 | */ | ||
78 | for_each_online_cpu(i) { | ||
79 | /* get lowest-id CPU in cluster */ | ||
80 | if (level != GLOBAL_CLUSTER) { | ||
81 | err = get_shared_cpu_map(mask, cpus[i]->id, level); | ||
82 | if (err != 0) { | ||
83 | /* ugh... wrong cache level? Either caller screwed up | ||
84 | * or the CPU topology is weird. */ | ||
85 | printk(KERN_ERR "Could not set up clusters for L%d sharing (max: L%d).\n", | ||
86 | level, err); | ||
87 | err = -EINVAL; | ||
88 | goto out; | ||
89 | } | ||
90 | low_cpu = cpumask_first(mask); | ||
91 | } else | ||
92 | low_cpu = 0; | ||
93 | if (low_cpu == i) { | ||
94 | /* caller must provide an appropriate number of clusters */ | ||
95 | BUG_ON(free_cluster >= num_clusters); | ||
96 | |||
97 | /* create new cluster */ | ||
98 | cpus[i]->cluster = clusters[free_cluster++]; | ||
99 | } else { | ||
100 | /* low_cpu points to the right cluster | ||
101 | * Assumption: low_cpu is actually online and was processed earlier. */ | ||
102 | cpus[i]->cluster = cpus[low_cpu]->cluster; | ||
103 | } | ||
104 | /* enqueue in cpus list */ | ||
105 | list_add_tail(&cpus[i]->cluster_list, &cpus[i]->cluster->cpus); | ||
106 | printk(KERN_INFO "Assigning CPU%u to cluster %u\n.", i, cpus[i]->cluster->id); | ||
107 | } | ||
108 | out: | ||
109 | free_cpumask_var(mask); | ||
110 | return err; | ||
111 | } | ||
diff --git a/litmus/ctrldev.c b/litmus/ctrldev.c new file mode 100644 index 000000000000..3e2ac2b3fbe8 --- /dev/null +++ b/litmus/ctrldev.c | |||
@@ -0,0 +1,160 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/mm.h> | ||
3 | #include <linux/fs.h> | ||
4 | #include <linux/miscdevice.h> | ||
5 | #include <linux/module.h> | ||
6 | |||
7 | #include <litmus/litmus.h> | ||
8 | |||
9 | /* only one page for now, but we might want to add a RO version at some point */ | ||
10 | |||
11 | #define CTRL_NAME "litmus/ctrl" | ||
12 | |||
13 | /* allocate t->rt_param.ctrl_page*/ | ||
14 | static int alloc_ctrl_page(struct task_struct *t) | ||
15 | { | ||
16 | int err = 0; | ||
17 | |||
18 | /* only allocate if the task doesn't have one yet */ | ||
19 | if (!tsk_rt(t)->ctrl_page) { | ||
20 | tsk_rt(t)->ctrl_page = (void*) get_zeroed_page(GFP_KERNEL); | ||
21 | if (!tsk_rt(t)->ctrl_page) | ||
22 | err = -ENOMEM; | ||
23 | /* will get de-allocated in task teardown */ | ||
24 | TRACE_TASK(t, "%s ctrl_page = %p\n", __FUNCTION__, | ||
25 | tsk_rt(t)->ctrl_page); | ||
26 | } | ||
27 | return err; | ||
28 | } | ||
29 | |||
30 | static int map_ctrl_page(struct task_struct *t, struct vm_area_struct* vma) | ||
31 | { | ||
32 | int err; | ||
33 | |||
34 | struct page* ctrl = virt_to_page(tsk_rt(t)->ctrl_page); | ||
35 | |||
36 | TRACE_CUR(CTRL_NAME | ||
37 | ": mapping %p (pfn:%lx) to 0x%lx (prot:%lx)\n", | ||
38 | tsk_rt(t)->ctrl_page,page_to_pfn(ctrl), vma->vm_start, | ||
39 | vma->vm_page_prot); | ||
40 | |||
41 | /* Map it into the vma. */ | ||
42 | err = vm_insert_page(vma, vma->vm_start, ctrl); | ||
43 | |||
44 | if (err) | ||
45 | TRACE_CUR(CTRL_NAME ": vm_insert_page() failed (%d)\n", err); | ||
46 | |||
47 | return err; | ||
48 | } | ||
49 | |||
50 | static void litmus_ctrl_vm_close(struct vm_area_struct* vma) | ||
51 | { | ||
52 | TRACE_CUR("%s flags=0x%x prot=0x%x\n", __FUNCTION__, | ||
53 | vma->vm_flags, vma->vm_page_prot); | ||
54 | |||
55 | TRACE_CUR(CTRL_NAME | ||
56 | ": %p:%p vma:%p vma->vm_private_data:%p closed.\n", | ||
57 | (void*) vma->vm_start, (void*) vma->vm_end, vma, | ||
58 | vma->vm_private_data); | ||
59 | } | ||
60 | |||
61 | static int litmus_ctrl_vm_fault(struct vm_area_struct* vma, | ||
62 | struct vm_fault* vmf) | ||
63 | { | ||
64 | TRACE_CUR("%s flags=0x%x (off:%ld)\n", __FUNCTION__, | ||
65 | vma->vm_flags, vmf->pgoff); | ||
66 | |||
67 | /* This function should never be called, since all pages should have | ||
68 | * been mapped by mmap() already. */ | ||
69 | WARN_ONCE(1, "Page faults should be impossible in the control page\n"); | ||
70 | |||
71 | return VM_FAULT_SIGBUS; | ||
72 | } | ||
73 | |||
74 | static struct vm_operations_struct litmus_ctrl_vm_ops = { | ||
75 | .close = litmus_ctrl_vm_close, | ||
76 | .fault = litmus_ctrl_vm_fault, | ||
77 | }; | ||
78 | |||
79 | static int litmus_ctrl_mmap(struct file* filp, struct vm_area_struct* vma) | ||
80 | { | ||
81 | int err = 0; | ||
82 | |||
83 | /* first make sure mapper knows what he's doing */ | ||
84 | |||
85 | /* you can only get one page */ | ||
86 | if (vma->vm_end - vma->vm_start != PAGE_SIZE) | ||
87 | return -EINVAL; | ||
88 | |||
89 | /* you can only map the "first" page */ | ||
90 | if (vma->vm_pgoff != 0) | ||
91 | return -EINVAL; | ||
92 | |||
93 | /* you can't share it with anyone */ | ||
94 | if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED)) | ||
95 | return -EINVAL; | ||
96 | |||
97 | vma->vm_ops = &litmus_ctrl_vm_ops; | ||
98 | /* This mapping should not be kept across forks, | ||
99 | * cannot be expanded, and is not a "normal" page. */ | ||
100 | vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_IO; | ||
101 | |||
102 | /* We don't want the first write access to trigger a "minor" page fault | ||
103 | * to mark the page as dirty. This is transient, private memory, we | ||
104 | * don't care if it was touched or not. __S011 means RW access, but not | ||
105 | * execute, and avoids copy-on-write behavior. | ||
106 | * See protection_map in mmap.c. */ | ||
107 | vma->vm_page_prot = __S011; | ||
108 | |||
109 | err = alloc_ctrl_page(current); | ||
110 | if (!err) | ||
111 | err = map_ctrl_page(current, vma); | ||
112 | |||
113 | TRACE_CUR("%s flags=0x%x prot=0x%lx\n", | ||
114 | __FUNCTION__, vma->vm_flags, vma->vm_page_prot); | ||
115 | |||
116 | return err; | ||
117 | } | ||
118 | |||
119 | static struct file_operations litmus_ctrl_fops = { | ||
120 | .owner = THIS_MODULE, | ||
121 | .mmap = litmus_ctrl_mmap, | ||
122 | }; | ||
123 | |||
124 | static struct miscdevice litmus_ctrl_dev = { | ||
125 | .name = CTRL_NAME, | ||
126 | .minor = MISC_DYNAMIC_MINOR, | ||
127 | .fops = &litmus_ctrl_fops, | ||
128 | }; | ||
129 | |||
130 | static int __init init_litmus_ctrl_dev(void) | ||
131 | { | ||
132 | int err; | ||
133 | |||
134 | BUILD_BUG_ON(sizeof(struct control_page) > PAGE_SIZE); | ||
135 | |||
136 | BUILD_BUG_ON(sizeof(union np_flag) != sizeof(uint32_t)); | ||
137 | |||
138 | BUILD_BUG_ON(offsetof(struct control_page, sched.raw) | ||
139 | != LITMUS_CP_OFFSET_SCHED); | ||
140 | BUILD_BUG_ON(offsetof(struct control_page, irq_count) | ||
141 | != LITMUS_CP_OFFSET_IRQ_COUNT); | ||
142 | BUILD_BUG_ON(offsetof(struct control_page, ts_syscall_start) | ||
143 | != LITMUS_CP_OFFSET_TS_SC_START); | ||
144 | BUILD_BUG_ON(offsetof(struct control_page, irq_syscall_start) | ||
145 | != LITMUS_CP_OFFSET_IRQ_SC_START); | ||
146 | |||
147 | printk("Initializing LITMUS^RT control device.\n"); | ||
148 | err = misc_register(&litmus_ctrl_dev); | ||
149 | if (err) | ||
150 | printk("Could not allocate %s device (%d).\n", CTRL_NAME, err); | ||
151 | return err; | ||
152 | } | ||
153 | |||
154 | static void __exit exit_litmus_ctrl_dev(void) | ||
155 | { | ||
156 | misc_deregister(&litmus_ctrl_dev); | ||
157 | } | ||
158 | |||
159 | module_init(init_litmus_ctrl_dev); | ||
160 | module_exit(exit_litmus_ctrl_dev); | ||
diff --git a/litmus/edf_common.c b/litmus/edf_common.c new file mode 100644 index 000000000000..5aca2934a7b5 --- /dev/null +++ b/litmus/edf_common.c | |||
@@ -0,0 +1,200 @@ | |||
1 | /* | ||
2 | * kernel/edf_common.c | ||
3 | * | ||
4 | * Common functions for EDF based scheduler. | ||
5 | */ | ||
6 | |||
7 | #include <linux/percpu.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/list.h> | ||
10 | |||
11 | #include <litmus/litmus.h> | ||
12 | #include <litmus/sched_plugin.h> | ||
13 | #include <litmus/sched_trace.h> | ||
14 | |||
15 | #include <litmus/edf_common.h> | ||
16 | |||
17 | #ifdef CONFIG_EDF_TIE_BREAK_LATENESS_NORM | ||
18 | #include <litmus/fpmath.h> | ||
19 | #endif | ||
20 | |||
21 | #ifdef CONFIG_EDF_TIE_BREAK_HASH | ||
22 | #include <linux/hash.h> | ||
23 | static inline long edf_hash(struct task_struct *t) | ||
24 | { | ||
25 | /* pid is 32 bits, so normally we would shove that into the | ||
26 | * upper 32-bits and and put the job number in the bottom | ||
27 | * and hash the 64-bit number with hash_64(). Sadly, | ||
28 | * in testing, hash_64() doesn't distribute keys were the | ||
29 | * upper bits are close together (as would be the case with | ||
30 | * pids) and job numbers are equal (as would be the case with | ||
31 | * synchronous task sets with all relative deadlines equal). | ||
32 | * | ||
33 | * A 2006 Linux patch proposed the following solution | ||
34 | * (but for some reason it wasn't accepted...). | ||
35 | * | ||
36 | * At least this workaround works for 32-bit systems as well. | ||
37 | */ | ||
38 | return hash_32(hash_32((u32)tsk_rt(t)->job_params.job_no, 32) ^ t->pid, 32); | ||
39 | } | ||
40 | #endif | ||
41 | |||
42 | |||
43 | /* edf_higher_prio - returns true if first has a higher EDF priority | ||
44 | * than second. Deadline ties are broken by PID. | ||
45 | * | ||
46 | * both first and second may be NULL | ||
47 | */ | ||
48 | int edf_higher_prio(struct task_struct* first, | ||
49 | struct task_struct* second) | ||
50 | { | ||
51 | struct task_struct *first_task = first; | ||
52 | struct task_struct *second_task = second; | ||
53 | |||
54 | /* There is no point in comparing a task to itself. */ | ||
55 | if (first && first == second) { | ||
56 | TRACE_TASK(first, | ||
57 | "WARNING: pointless edf priority comparison.\n"); | ||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | |||
62 | /* check for NULL tasks */ | ||
63 | if (!first || !second) | ||
64 | return first && !second; | ||
65 | |||
66 | #ifdef CONFIG_LITMUS_LOCKING | ||
67 | |||
68 | /* Check for inherited priorities. Change task | ||
69 | * used for comparison in such a case. | ||
70 | */ | ||
71 | if (unlikely(first->rt_param.inh_task)) | ||
72 | first_task = first->rt_param.inh_task; | ||
73 | if (unlikely(second->rt_param.inh_task)) | ||
74 | second_task = second->rt_param.inh_task; | ||
75 | |||
76 | /* Check for priority boosting. Tie-break by start of boosting. | ||
77 | */ | ||
78 | if (unlikely(is_priority_boosted(first_task))) { | ||
79 | /* first_task is boosted, how about second_task? */ | ||
80 | if (!is_priority_boosted(second_task) || | ||
81 | lt_before(get_boost_start(first_task), | ||
82 | get_boost_start(second_task))) | ||
83 | return 1; | ||
84 | else | ||
85 | return 0; | ||
86 | } else if (unlikely(is_priority_boosted(second_task))) | ||
87 | /* second_task is boosted, first is not*/ | ||
88 | return 0; | ||
89 | |||
90 | #endif | ||
91 | |||
92 | if (earlier_deadline(first_task, second_task)) { | ||
93 | return 1; | ||
94 | } | ||
95 | else if (get_deadline(first_task) == get_deadline(second_task)) { | ||
96 | /* Need to tie break. All methods must set pid_break to 0/1 if | ||
97 | * first_task does not have priority over second_task. | ||
98 | */ | ||
99 | int pid_break; | ||
100 | |||
101 | |||
102 | #if defined(CONFIG_EDF_TIE_BREAK_LATENESS) | ||
103 | /* Tie break by lateness. Jobs with greater lateness get | ||
104 | * priority. This should spread tardiness across all tasks, | ||
105 | * especially in task sets where all tasks have the same | ||
106 | * period and relative deadlines. | ||
107 | */ | ||
108 | if (get_lateness(first_task) > get_lateness(second_task)) { | ||
109 | return 1; | ||
110 | } | ||
111 | pid_break = (get_lateness(first_task) == get_lateness(second_task)); | ||
112 | |||
113 | |||
114 | #elif defined(CONFIG_EDF_TIE_BREAK_LATENESS_NORM) | ||
115 | /* Tie break by lateness, normalized by relative deadline. Jobs with | ||
116 | * greater normalized lateness get priority. | ||
117 | * | ||
118 | * Note: Considered using the algebraically equivalent | ||
119 | * lateness(first)*relative_deadline(second) > | ||
120 | lateness(second)*relative_deadline(first) | ||
121 | * to avoid fixed-point math, but values are prone to overflow if inputs | ||
122 | * are on the order of several seconds, even in 64-bit. | ||
123 | */ | ||
124 | fp_t fnorm = _frac(get_lateness(first_task), | ||
125 | get_rt_relative_deadline(first_task)); | ||
126 | fp_t snorm = _frac(get_lateness(second_task), | ||
127 | get_rt_relative_deadline(second_task)); | ||
128 | if (_gt(fnorm, snorm)) { | ||
129 | return 1; | ||
130 | } | ||
131 | pid_break = _eq(fnorm, snorm); | ||
132 | |||
133 | |||
134 | #elif defined(CONFIG_EDF_TIE_BREAK_HASH) | ||
135 | /* Tie break by comparing hashs of (pid, job#) tuple. There should be | ||
136 | * a 50% chance that first_task has a higher priority than second_task. | ||
137 | */ | ||
138 | long fhash = edf_hash(first_task); | ||
139 | long shash = edf_hash(second_task); | ||
140 | if (fhash < shash) { | ||
141 | return 1; | ||
142 | } | ||
143 | pid_break = (fhash == shash); | ||
144 | #else | ||
145 | |||
146 | |||
147 | /* CONFIG_EDF_PID_TIE_BREAK */ | ||
148 | pid_break = 1; // fall through to tie-break by pid; | ||
149 | #endif | ||
150 | |||
151 | /* Tie break by pid */ | ||
152 | if(pid_break) { | ||
153 | if (first_task->pid < second_task->pid) { | ||
154 | return 1; | ||
155 | } | ||
156 | else if (first_task->pid == second_task->pid) { | ||
157 | /* If the PIDs are the same then the task with the | ||
158 | * inherited priority wins. | ||
159 | */ | ||
160 | if (!second->rt_param.inh_task) { | ||
161 | return 1; | ||
162 | } | ||
163 | } | ||
164 | } | ||
165 | } | ||
166 | return 0; /* fall-through. prio(second_task) > prio(first_task) */ | ||
167 | } | ||
168 | |||
169 | int edf_ready_order(struct bheap_node* a, struct bheap_node* b) | ||
170 | { | ||
171 | return edf_higher_prio(bheap2task(a), bheap2task(b)); | ||
172 | } | ||
173 | |||
174 | void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched, | ||
175 | release_jobs_t release) | ||
176 | { | ||
177 | rt_domain_init(rt, edf_ready_order, resched, release); | ||
178 | } | ||
179 | |||
180 | /* need_to_preempt - check whether the task t needs to be preempted | ||
181 | * call only with irqs disabled and with ready_lock acquired | ||
182 | * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT! | ||
183 | */ | ||
184 | int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t) | ||
185 | { | ||
186 | /* we need the read lock for edf_ready_queue */ | ||
187 | /* no need to preempt if there is nothing pending */ | ||
188 | if (!__jobs_pending(rt)) | ||
189 | return 0; | ||
190 | /* we need to reschedule if t doesn't exist */ | ||
191 | if (!t) | ||
192 | return 1; | ||
193 | |||
194 | /* NOTE: We cannot check for non-preemptibility since we | ||
195 | * don't know what address space we're currently in. | ||
196 | */ | ||
197 | |||
198 | /* make sure to get non-rt stuff out of the way */ | ||
199 | return !is_realtime(t) || edf_higher_prio(__next_ready(rt), t); | ||
200 | } | ||
diff --git a/litmus/fdso.c b/litmus/fdso.c new file mode 100644 index 000000000000..c4b450be4509 --- /dev/null +++ b/litmus/fdso.c | |||
@@ -0,0 +1,307 @@ | |||
1 | /* fdso.c - file descriptor attached shared objects | ||
2 | * | ||
3 | * (c) 2007 B. Brandenburg, LITMUS^RT project | ||
4 | * | ||
5 | * Notes: | ||
6 | * - objects descriptor (OD) tables are not cloned during a fork. | ||
7 | * - objects are created on-demand, and freed after the last reference | ||
8 | * is dropped. | ||
9 | * - for now, object types are hard coded. | ||
10 | * - As long as we have live objects, we keep a reference to the inode. | ||
11 | */ | ||
12 | |||
13 | #include <linux/errno.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/mutex.h> | ||
16 | #include <linux/file.h> | ||
17 | #include <asm/uaccess.h> | ||
18 | |||
19 | #include <litmus/fdso.h> | ||
20 | |||
21 | extern struct fdso_ops generic_lock_ops; | ||
22 | |||
23 | static const struct fdso_ops* fdso_ops[] = { | ||
24 | &generic_lock_ops, /* FMLP_SEM */ | ||
25 | &generic_lock_ops, /* SRP_SEM */ | ||
26 | &generic_lock_ops, /* MPCP_SEM */ | ||
27 | &generic_lock_ops, /* MPCP_VS_SEM */ | ||
28 | &generic_lock_ops, /* DPCP_SEM */ | ||
29 | &generic_lock_ops, /* PCP_SEM */ | ||
30 | }; | ||
31 | |||
32 | static int fdso_create(void** obj_ref, obj_type_t type, void* __user config) | ||
33 | { | ||
34 | BUILD_BUG_ON(ARRAY_SIZE(fdso_ops) != MAX_OBJ_TYPE + 1); | ||
35 | |||
36 | if (fdso_ops[type]->create) | ||
37 | return fdso_ops[type]->create(obj_ref, type, config); | ||
38 | else | ||
39 | return -EINVAL; | ||
40 | } | ||
41 | |||
42 | static void fdso_destroy(obj_type_t type, void* obj) | ||
43 | { | ||
44 | fdso_ops[type]->destroy(type, obj); | ||
45 | } | ||
46 | |||
47 | static int fdso_open(struct od_table_entry* entry, void* __user config) | ||
48 | { | ||
49 | if (fdso_ops[entry->obj->type]->open) | ||
50 | return fdso_ops[entry->obj->type]->open(entry, config); | ||
51 | else | ||
52 | return 0; | ||
53 | } | ||
54 | |||
55 | static int fdso_close(struct od_table_entry* entry) | ||
56 | { | ||
57 | if (fdso_ops[entry->obj->type]->close) | ||
58 | return fdso_ops[entry->obj->type]->close(entry); | ||
59 | else | ||
60 | return 0; | ||
61 | } | ||
62 | |||
63 | /* inode must be locked already */ | ||
64 | static int alloc_inode_obj(struct inode_obj_id** obj_ref, | ||
65 | struct inode* inode, | ||
66 | obj_type_t type, | ||
67 | unsigned int id, | ||
68 | void* __user config) | ||
69 | { | ||
70 | struct inode_obj_id* obj; | ||
71 | void* raw_obj; | ||
72 | int err; | ||
73 | |||
74 | obj = kmalloc(sizeof(*obj), GFP_KERNEL); | ||
75 | if (!obj) { | ||
76 | return -ENOMEM; | ||
77 | } | ||
78 | |||
79 | err = fdso_create(&raw_obj, type, config); | ||
80 | if (err != 0) { | ||
81 | kfree(obj); | ||
82 | return err; | ||
83 | } | ||
84 | |||
85 | INIT_LIST_HEAD(&obj->list); | ||
86 | atomic_set(&obj->count, 1); | ||
87 | obj->type = type; | ||
88 | obj->id = id; | ||
89 | obj->obj = raw_obj; | ||
90 | obj->inode = inode; | ||
91 | |||
92 | list_add(&obj->list, &inode->i_obj_list); | ||
93 | atomic_inc(&inode->i_count); | ||
94 | |||
95 | printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id); | ||
96 | |||
97 | *obj_ref = obj; | ||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | /* inode must be locked already */ | ||
102 | static struct inode_obj_id* get_inode_obj(struct inode* inode, | ||
103 | obj_type_t type, | ||
104 | unsigned int id) | ||
105 | { | ||
106 | struct list_head* pos; | ||
107 | struct inode_obj_id* obj = NULL; | ||
108 | |||
109 | list_for_each(pos, &inode->i_obj_list) { | ||
110 | obj = list_entry(pos, struct inode_obj_id, list); | ||
111 | if (obj->id == id && obj->type == type) { | ||
112 | atomic_inc(&obj->count); | ||
113 | return obj; | ||
114 | } | ||
115 | } | ||
116 | printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id); | ||
117 | return NULL; | ||
118 | } | ||
119 | |||
120 | |||
121 | static void put_inode_obj(struct inode_obj_id* obj) | ||
122 | { | ||
123 | struct inode* inode; | ||
124 | int let_go = 0; | ||
125 | |||
126 | inode = obj->inode; | ||
127 | if (atomic_dec_and_test(&obj->count)) { | ||
128 | |||
129 | mutex_lock(&inode->i_obj_mutex); | ||
130 | /* no new references can be obtained */ | ||
131 | if (!atomic_read(&obj->count)) { | ||
132 | list_del(&obj->list); | ||
133 | fdso_destroy(obj->type, obj->obj); | ||
134 | kfree(obj); | ||
135 | let_go = 1; | ||
136 | } | ||
137 | mutex_unlock(&inode->i_obj_mutex); | ||
138 | if (let_go) | ||
139 | iput(inode); | ||
140 | } | ||
141 | } | ||
142 | |||
143 | static struct od_table_entry* get_od_entry(struct task_struct* t) | ||
144 | { | ||
145 | struct od_table_entry* table; | ||
146 | int i; | ||
147 | |||
148 | |||
149 | table = t->od_table; | ||
150 | if (!table) { | ||
151 | table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS, | ||
152 | GFP_KERNEL); | ||
153 | t->od_table = table; | ||
154 | } | ||
155 | |||
156 | for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++) | ||
157 | if (!table[i].used) { | ||
158 | table[i].used = 1; | ||
159 | return table + i; | ||
160 | } | ||
161 | return NULL; | ||
162 | } | ||
163 | |||
164 | static int put_od_entry(struct od_table_entry* od) | ||
165 | { | ||
166 | put_inode_obj(od->obj); | ||
167 | od->used = 0; | ||
168 | return 0; | ||
169 | } | ||
170 | |||
171 | static long close_od_entry(struct od_table_entry *od) | ||
172 | { | ||
173 | long ret; | ||
174 | |||
175 | /* Give the class a chance to reject the close. */ | ||
176 | ret = fdso_close(od); | ||
177 | if (ret == 0) | ||
178 | ret = put_od_entry(od); | ||
179 | |||
180 | return ret; | ||
181 | } | ||
182 | |||
183 | void exit_od_table(struct task_struct* t) | ||
184 | { | ||
185 | int i; | ||
186 | |||
187 | if (t->od_table) { | ||
188 | for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++) | ||
189 | if (t->od_table[i].used) | ||
190 | close_od_entry(t->od_table + i); | ||
191 | kfree(t->od_table); | ||
192 | t->od_table = NULL; | ||
193 | } | ||
194 | } | ||
195 | |||
196 | static int do_sys_od_open(struct file* file, obj_type_t type, int id, | ||
197 | void* __user config) | ||
198 | { | ||
199 | int idx = 0, err = 0; | ||
200 | struct inode* inode; | ||
201 | struct inode_obj_id* obj = NULL; | ||
202 | struct od_table_entry* entry; | ||
203 | |||
204 | inode = file->f_dentry->d_inode; | ||
205 | |||
206 | entry = get_od_entry(current); | ||
207 | if (!entry) | ||
208 | return -ENOMEM; | ||
209 | |||
210 | mutex_lock(&inode->i_obj_mutex); | ||
211 | obj = get_inode_obj(inode, type, id); | ||
212 | if (!obj) | ||
213 | err = alloc_inode_obj(&obj, inode, type, id, config); | ||
214 | if (err != 0) { | ||
215 | obj = NULL; | ||
216 | idx = err; | ||
217 | entry->used = 0; | ||
218 | } else { | ||
219 | entry->obj = obj; | ||
220 | entry->class = fdso_ops[type]; | ||
221 | idx = entry - current->od_table; | ||
222 | } | ||
223 | |||
224 | mutex_unlock(&inode->i_obj_mutex); | ||
225 | |||
226 | /* open only if creation succeeded */ | ||
227 | if (!err) | ||
228 | err = fdso_open(entry, config); | ||
229 | if (err < 0) { | ||
230 | /* The class rejected the open call. | ||
231 | * We need to clean up and tell user space. | ||
232 | */ | ||
233 | if (obj) | ||
234 | put_od_entry(entry); | ||
235 | idx = err; | ||
236 | } | ||
237 | |||
238 | return idx; | ||
239 | } | ||
240 | |||
241 | |||
242 | struct od_table_entry* get_entry_for_od(int od) | ||
243 | { | ||
244 | struct task_struct *t = current; | ||
245 | |||
246 | if (!t->od_table) | ||
247 | return NULL; | ||
248 | if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS) | ||
249 | return NULL; | ||
250 | if (!t->od_table[od].used) | ||
251 | return NULL; | ||
252 | return t->od_table + od; | ||
253 | } | ||
254 | |||
255 | |||
256 | asmlinkage long sys_od_open(int fd, int type, int obj_id, void* __user config) | ||
257 | { | ||
258 | int ret = 0; | ||
259 | struct file* file; | ||
260 | |||
261 | /* | ||
262 | 1) get file from fd, get inode from file | ||
263 | 2) lock inode | ||
264 | 3) try to lookup object | ||
265 | 4) if not present create and enqueue object, inc inode refcnt | ||
266 | 5) increment refcnt of object | ||
267 | 6) alloc od_table_entry, setup ptrs | ||
268 | 7) unlock inode | ||
269 | 8) return offset in od_table as OD | ||
270 | */ | ||
271 | |||
272 | if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) { | ||
273 | ret = -EINVAL; | ||
274 | goto out; | ||
275 | } | ||
276 | |||
277 | file = fget(fd); | ||
278 | if (!file) { | ||
279 | ret = -EBADF; | ||
280 | goto out; | ||
281 | } | ||
282 | |||
283 | ret = do_sys_od_open(file, type, obj_id, config); | ||
284 | |||
285 | fput(file); | ||
286 | |||
287 | out: | ||
288 | return ret; | ||
289 | } | ||
290 | |||
291 | |||
292 | asmlinkage long sys_od_close(int od) | ||
293 | { | ||
294 | int ret = -EINVAL; | ||
295 | struct task_struct *t = current; | ||
296 | |||
297 | if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS) | ||
298 | return ret; | ||
299 | |||
300 | if (!t->od_table || !t->od_table[od].used) | ||
301 | return ret; | ||
302 | |||
303 | |||
304 | ret = close_od_entry(t->od_table + od); | ||
305 | |||
306 | return ret; | ||
307 | } | ||
diff --git a/litmus/fp_common.c b/litmus/fp_common.c new file mode 100644 index 000000000000..964a4729deff --- /dev/null +++ b/litmus/fp_common.c | |||
@@ -0,0 +1,119 @@ | |||
1 | /* | ||
2 | * litmus/fp_common.c | ||
3 | * | ||
4 | * Common functions for fixed-priority scheduler. | ||
5 | */ | ||
6 | |||
7 | #include <linux/percpu.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/list.h> | ||
10 | |||
11 | #include <litmus/litmus.h> | ||
12 | #include <litmus/sched_plugin.h> | ||
13 | #include <litmus/sched_trace.h> | ||
14 | |||
15 | #include <litmus/fp_common.h> | ||
16 | |||
17 | /* fp_higher_prio - returns true if first has a higher static priority | ||
18 | * than second. Ties are broken by PID. | ||
19 | * | ||
20 | * both first and second may be NULL | ||
21 | */ | ||
22 | int fp_higher_prio(struct task_struct* first, | ||
23 | struct task_struct* second) | ||
24 | { | ||
25 | struct task_struct *first_task = first; | ||
26 | struct task_struct *second_task = second; | ||
27 | |||
28 | /* There is no point in comparing a task to itself. */ | ||
29 | if (unlikely(first && first == second)) { | ||
30 | TRACE_TASK(first, | ||
31 | "WARNING: pointless FP priority comparison.\n"); | ||
32 | return 0; | ||
33 | } | ||
34 | |||
35 | |||
36 | /* check for NULL tasks */ | ||
37 | if (!first || !second) | ||
38 | return first && !second; | ||
39 | |||
40 | if (!is_realtime(second_task)) | ||
41 | return 1; | ||
42 | |||
43 | #ifdef CONFIG_LITMUS_LOCKING | ||
44 | |||
45 | /* Check for inherited priorities. Change task | ||
46 | * used for comparison in such a case. | ||
47 | */ | ||
48 | if (unlikely(first->rt_param.inh_task)) | ||
49 | first_task = first->rt_param.inh_task; | ||
50 | if (unlikely(second->rt_param.inh_task)) | ||
51 | second_task = second->rt_param.inh_task; | ||
52 | |||
53 | /* Check for priority boosting. Tie-break by start of boosting. | ||
54 | */ | ||
55 | if (unlikely(is_priority_boosted(first_task))) { | ||
56 | /* first_task is boosted, how about second_task? */ | ||
57 | if (is_priority_boosted(second_task)) | ||
58 | /* break by priority point */ | ||
59 | return lt_before(get_boost_start(first_task), | ||
60 | get_boost_start(second_task)); | ||
61 | else | ||
62 | /* priority boosting wins. */ | ||
63 | return 1; | ||
64 | } else if (unlikely(is_priority_boosted(second_task))) | ||
65 | /* second_task is boosted, first is not*/ | ||
66 | return 0; | ||
67 | |||
68 | #endif | ||
69 | |||
70 | /* Comparisons to itself are not expected; priority inheritance | ||
71 | * should also not cause this to happen. */ | ||
72 | BUG_ON(first_task == second_task); | ||
73 | |||
74 | if (get_priority(first_task) < get_priority(second_task)) | ||
75 | return 1; | ||
76 | else if (get_priority(first_task) == get_priority(second_task)) | ||
77 | /* Break by PID. */ | ||
78 | return first_task->pid < second_task->pid; | ||
79 | else | ||
80 | return 0; | ||
81 | } | ||
82 | |||
83 | int fp_ready_order(struct bheap_node* a, struct bheap_node* b) | ||
84 | { | ||
85 | return fp_higher_prio(bheap2task(a), bheap2task(b)); | ||
86 | } | ||
87 | |||
88 | void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched, | ||
89 | release_jobs_t release) | ||
90 | { | ||
91 | rt_domain_init(rt, fp_ready_order, resched, release); | ||
92 | } | ||
93 | |||
94 | /* need_to_preempt - check whether the task t needs to be preempted | ||
95 | */ | ||
96 | int fp_preemption_needed(struct fp_prio_queue *q, struct task_struct *t) | ||
97 | { | ||
98 | struct task_struct *pending; | ||
99 | |||
100 | pending = fp_prio_peek(q); | ||
101 | |||
102 | if (!pending) | ||
103 | return 0; | ||
104 | if (!t) | ||
105 | return 1; | ||
106 | |||
107 | /* make sure to get non-rt stuff out of the way */ | ||
108 | return !is_realtime(t) || fp_higher_prio(pending, t); | ||
109 | } | ||
110 | |||
111 | void fp_prio_queue_init(struct fp_prio_queue* q) | ||
112 | { | ||
113 | int i; | ||
114 | |||
115 | for (i = 0; i < FP_PRIO_BIT_WORDS; i++) | ||
116 | q->bitmask[i] = 0; | ||
117 | for (i = 0; i < LITMUS_MAX_PRIORITY; i++) | ||
118 | bheap_init(&q->queue[i]); | ||
119 | } | ||
diff --git a/litmus/jobs.c b/litmus/jobs.c new file mode 100644 index 000000000000..89a0810415fa --- /dev/null +++ b/litmus/jobs.c | |||
@@ -0,0 +1,55 @@ | |||
1 | /* litmus/jobs.c - common job control code | ||
2 | */ | ||
3 | |||
4 | #include <linux/sched.h> | ||
5 | |||
6 | #include <litmus/litmus.h> | ||
7 | #include <litmus/jobs.h> | ||
8 | |||
9 | static inline void setup_release(struct task_struct *t, lt_t release) | ||
10 | { | ||
11 | /* prepare next release */ | ||
12 | t->rt_param.job_params.release = release; | ||
13 | t->rt_param.job_params.deadline = release + get_rt_relative_deadline(t); | ||
14 | t->rt_param.job_params.exec_time = 0; | ||
15 | |||
16 | /* update job sequence number */ | ||
17 | t->rt_param.job_params.job_no++; | ||
18 | } | ||
19 | |||
20 | void prepare_for_next_period(struct task_struct *t) | ||
21 | { | ||
22 | BUG_ON(!t); | ||
23 | |||
24 | /* Record lateness before we set up the next job's | ||
25 | * release and deadline. Lateness may be negative. | ||
26 | */ | ||
27 | t->rt_param.job_params.lateness = | ||
28 | (long long)litmus_clock() - | ||
29 | (long long)t->rt_param.job_params.deadline; | ||
30 | |||
31 | setup_release(t, get_release(t) + get_rt_period(t)); | ||
32 | tsk_rt(t)->dont_requeue = 0; | ||
33 | } | ||
34 | |||
35 | void release_at(struct task_struct *t, lt_t start) | ||
36 | { | ||
37 | BUG_ON(!t); | ||
38 | setup_release(t, start); | ||
39 | tsk_rt(t)->completed = 0; | ||
40 | } | ||
41 | |||
42 | |||
43 | /* | ||
44 | * Deactivate current task until the beginning of the next period. | ||
45 | */ | ||
46 | long complete_job(void) | ||
47 | { | ||
48 | /* Mark that we do not excute anymore */ | ||
49 | tsk_rt(current)->completed = 1; | ||
50 | /* call schedule, this will return when a new job arrives | ||
51 | * it also takes care of preparing for the next release | ||
52 | */ | ||
53 | schedule(); | ||
54 | return 0; | ||
55 | } | ||
diff --git a/litmus/litmus.c b/litmus/litmus.c new file mode 100644 index 000000000000..7417a8fbda74 --- /dev/null +++ b/litmus/litmus.c | |||
@@ -0,0 +1,576 @@ | |||
1 | /* | ||
2 | * litmus.c -- Implementation of the LITMUS syscalls, | ||
3 | * the LITMUS intialization code, | ||
4 | * and the procfs interface.. | ||
5 | */ | ||
6 | #include <asm/uaccess.h> | ||
7 | #include <linux/uaccess.h> | ||
8 | #include <linux/sysrq.h> | ||
9 | #include <linux/sched.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/slab.h> | ||
12 | #include <linux/reboot.h> | ||
13 | #include <linux/stop_machine.h> | ||
14 | |||
15 | #include <litmus/litmus.h> | ||
16 | #include <litmus/bheap.h> | ||
17 | #include <litmus/trace.h> | ||
18 | #include <litmus/rt_domain.h> | ||
19 | #include <litmus/litmus_proc.h> | ||
20 | #include <litmus/sched_trace.h> | ||
21 | |||
22 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
23 | #include <litmus/affinity.h> | ||
24 | #endif | ||
25 | |||
26 | /* Number of RT tasks that exist in the system */ | ||
27 | atomic_t rt_task_count = ATOMIC_INIT(0); | ||
28 | |||
29 | #ifdef CONFIG_RELEASE_MASTER | ||
30 | /* current master CPU for handling timer IRQs */ | ||
31 | atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU); | ||
32 | #endif | ||
33 | |||
34 | static struct kmem_cache * bheap_node_cache; | ||
35 | extern struct kmem_cache * release_heap_cache; | ||
36 | |||
37 | struct bheap_node* bheap_node_alloc(int gfp_flags) | ||
38 | { | ||
39 | return kmem_cache_alloc(bheap_node_cache, gfp_flags); | ||
40 | } | ||
41 | |||
42 | void bheap_node_free(struct bheap_node* hn) | ||
43 | { | ||
44 | kmem_cache_free(bheap_node_cache, hn); | ||
45 | } | ||
46 | |||
47 | struct release_heap* release_heap_alloc(int gfp_flags); | ||
48 | void release_heap_free(struct release_heap* rh); | ||
49 | |||
50 | /* | ||
51 | * sys_set_task_rt_param | ||
52 | * @pid: Pid of the task which scheduling parameters must be changed | ||
53 | * @param: New real-time extension parameters such as the execution cost and | ||
54 | * period | ||
55 | * Syscall for manipulating with task rt extension params | ||
56 | * Returns EFAULT if param is NULL. | ||
57 | * ESRCH if pid is not corrsponding | ||
58 | * to a valid task. | ||
59 | * EINVAL if either period or execution cost is <=0 | ||
60 | * EPERM if pid is a real-time task | ||
61 | * 0 if success | ||
62 | * | ||
63 | * Only non-real-time tasks may be configured with this system call | ||
64 | * to avoid races with the scheduler. In practice, this means that a | ||
65 | * task's parameters must be set _before_ calling sys_prepare_rt_task() | ||
66 | * | ||
67 | * find_task_by_vpid() assumes that we are in the same namespace of the | ||
68 | * target. | ||
69 | */ | ||
70 | asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param) | ||
71 | { | ||
72 | struct rt_task tp; | ||
73 | struct task_struct *target; | ||
74 | int retval = -EINVAL; | ||
75 | |||
76 | printk("Setting up rt task parameters for process %d.\n", pid); | ||
77 | |||
78 | if (pid < 0 || param == 0) { | ||
79 | goto out; | ||
80 | } | ||
81 | if (copy_from_user(&tp, param, sizeof(tp))) { | ||
82 | retval = -EFAULT; | ||
83 | goto out; | ||
84 | } | ||
85 | |||
86 | /* Task search and manipulation must be protected */ | ||
87 | read_lock_irq(&tasklist_lock); | ||
88 | if (!(target = find_task_by_vpid(pid))) { | ||
89 | retval = -ESRCH; | ||
90 | goto out_unlock; | ||
91 | } | ||
92 | |||
93 | if (is_realtime(target)) { | ||
94 | /* The task is already a real-time task. | ||
95 | * We cannot not allow parameter changes at this point. | ||
96 | */ | ||
97 | retval = -EBUSY; | ||
98 | goto out_unlock; | ||
99 | } | ||
100 | |||
101 | /* set relative deadline to be implicit if left unspecified */ | ||
102 | if (tp.relative_deadline == 0) | ||
103 | tp.relative_deadline = tp.period; | ||
104 | |||
105 | if (tp.exec_cost <= 0) | ||
106 | goto out_unlock; | ||
107 | if (tp.period <= 0) | ||
108 | goto out_unlock; | ||
109 | if (!cpu_online(tp.cpu)) | ||
110 | goto out_unlock; | ||
111 | if (min(tp.relative_deadline, tp.period) < tp.exec_cost) /*density check*/ | ||
112 | { | ||
113 | printk(KERN_INFO "litmus: real-time task %d rejected " | ||
114 | "because task density > 1.0\n", pid); | ||
115 | goto out_unlock; | ||
116 | } | ||
117 | if (tp.cls != RT_CLASS_HARD && | ||
118 | tp.cls != RT_CLASS_SOFT && | ||
119 | tp.cls != RT_CLASS_BEST_EFFORT) | ||
120 | { | ||
121 | printk(KERN_INFO "litmus: real-time task %d rejected " | ||
122 | "because its class is invalid\n", pid); | ||
123 | goto out_unlock; | ||
124 | } | ||
125 | if (tp.budget_policy != NO_ENFORCEMENT && | ||
126 | tp.budget_policy != QUANTUM_ENFORCEMENT && | ||
127 | tp.budget_policy != PRECISE_ENFORCEMENT) | ||
128 | { | ||
129 | printk(KERN_INFO "litmus: real-time task %d rejected " | ||
130 | "because unsupported budget enforcement policy " | ||
131 | "specified (%d)\n", | ||
132 | pid, tp.budget_policy); | ||
133 | goto out_unlock; | ||
134 | } | ||
135 | |||
136 | target->rt_param.task_params = tp; | ||
137 | |||
138 | retval = 0; | ||
139 | out_unlock: | ||
140 | read_unlock_irq(&tasklist_lock); | ||
141 | out: | ||
142 | return retval; | ||
143 | } | ||
144 | |||
145 | /* | ||
146 | * Getter of task's RT params | ||
147 | * returns EINVAL if param or pid is NULL | ||
148 | * returns ESRCH if pid does not correspond to a valid task | ||
149 | * returns EFAULT if copying of parameters has failed. | ||
150 | * | ||
151 | * find_task_by_vpid() assumes that we are in the same namespace of the | ||
152 | * target. | ||
153 | */ | ||
154 | asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param) | ||
155 | { | ||
156 | int retval = -EINVAL; | ||
157 | struct task_struct *source; | ||
158 | struct rt_task lp; | ||
159 | if (param == 0 || pid < 0) | ||
160 | goto out; | ||
161 | read_lock(&tasklist_lock); | ||
162 | if (!(source = find_task_by_vpid(pid))) { | ||
163 | retval = -ESRCH; | ||
164 | goto out_unlock; | ||
165 | } | ||
166 | lp = source->rt_param.task_params; | ||
167 | read_unlock(&tasklist_lock); | ||
168 | /* Do copying outside the lock */ | ||
169 | retval = | ||
170 | copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0; | ||
171 | return retval; | ||
172 | out_unlock: | ||
173 | read_unlock(&tasklist_lock); | ||
174 | out: | ||
175 | return retval; | ||
176 | |||
177 | } | ||
178 | |||
179 | /* | ||
180 | * This is the crucial function for periodic task implementation, | ||
181 | * It checks if a task is periodic, checks if such kind of sleep | ||
182 | * is permitted and calls plugin-specific sleep, which puts the | ||
183 | * task into a wait array. | ||
184 | * returns 0 on successful wakeup | ||
185 | * returns EPERM if current conditions do not permit such sleep | ||
186 | * returns EINVAL if current task is not able to go to sleep | ||
187 | */ | ||
188 | asmlinkage long sys_complete_job(void) | ||
189 | { | ||
190 | int retval = -EPERM; | ||
191 | if (!is_realtime(current)) { | ||
192 | retval = -EINVAL; | ||
193 | goto out; | ||
194 | } | ||
195 | /* Task with negative or zero period cannot sleep */ | ||
196 | if (get_rt_period(current) <= 0) { | ||
197 | retval = -EINVAL; | ||
198 | goto out; | ||
199 | } | ||
200 | /* The plugin has to put the task into an | ||
201 | * appropriate queue and call schedule | ||
202 | */ | ||
203 | retval = litmus->complete_job(); | ||
204 | out: | ||
205 | return retval; | ||
206 | } | ||
207 | |||
208 | /* This is an "improved" version of sys_complete_job that | ||
209 | * addresses the problem of unintentionally missing a job after | ||
210 | * an overrun. | ||
211 | * | ||
212 | * returns 0 on successful wakeup | ||
213 | * returns EPERM if current conditions do not permit such sleep | ||
214 | * returns EINVAL if current task is not able to go to sleep | ||
215 | */ | ||
216 | asmlinkage long sys_wait_for_job_release(unsigned int job) | ||
217 | { | ||
218 | int retval = -EPERM; | ||
219 | if (!is_realtime(current)) { | ||
220 | retval = -EINVAL; | ||
221 | goto out; | ||
222 | } | ||
223 | |||
224 | /* Task with negative or zero period cannot sleep */ | ||
225 | if (get_rt_period(current) <= 0) { | ||
226 | retval = -EINVAL; | ||
227 | goto out; | ||
228 | } | ||
229 | |||
230 | retval = 0; | ||
231 | |||
232 | /* first wait until we have "reached" the desired job | ||
233 | * | ||
234 | * This implementation has at least two problems: | ||
235 | * | ||
236 | * 1) It doesn't gracefully handle the wrap around of | ||
237 | * job_no. Since LITMUS is a prototype, this is not much | ||
238 | * of a problem right now. | ||
239 | * | ||
240 | * 2) It is theoretically racy if a job release occurs | ||
241 | * between checking job_no and calling sleep_next_period(). | ||
242 | * A proper solution would requiring adding another callback | ||
243 | * in the plugin structure and testing the condition with | ||
244 | * interrupts disabled. | ||
245 | * | ||
246 | * FIXME: At least problem 2 should be taken care of eventually. | ||
247 | */ | ||
248 | while (!retval && job > current->rt_param.job_params.job_no) | ||
249 | /* If the last job overran then job <= job_no and we | ||
250 | * don't send the task to sleep. | ||
251 | */ | ||
252 | retval = litmus->complete_job(); | ||
253 | out: | ||
254 | return retval; | ||
255 | } | ||
256 | |||
257 | /* This is a helper syscall to query the current job sequence number. | ||
258 | * | ||
259 | * returns 0 on successful query | ||
260 | * returns EPERM if task is not a real-time task. | ||
261 | * returns EFAULT if &job is not a valid pointer. | ||
262 | */ | ||
263 | asmlinkage long sys_query_job_no(unsigned int __user *job) | ||
264 | { | ||
265 | int retval = -EPERM; | ||
266 | if (is_realtime(current)) | ||
267 | retval = put_user(current->rt_param.job_params.job_no, job); | ||
268 | |||
269 | return retval; | ||
270 | } | ||
271 | |||
272 | /* sys_null_call() is only used for determining raw system call | ||
273 | * overheads (kernel entry, kernel exit). It has no useful side effects. | ||
274 | * If ts is non-NULL, then the current Feather-Trace time is recorded. | ||
275 | */ | ||
276 | asmlinkage long sys_null_call(cycles_t __user *ts) | ||
277 | { | ||
278 | long ret = 0; | ||
279 | cycles_t now; | ||
280 | |||
281 | if (ts) { | ||
282 | now = get_cycles(); | ||
283 | ret = put_user(now, ts); | ||
284 | } | ||
285 | |||
286 | return ret; | ||
287 | } | ||
288 | |||
289 | /* p is a real-time task. Re-init its state as a best-effort task. */ | ||
290 | static void reinit_litmus_state(struct task_struct* p, int restore) | ||
291 | { | ||
292 | struct rt_task user_config = {}; | ||
293 | void* ctrl_page = NULL; | ||
294 | |||
295 | if (restore) { | ||
296 | /* Safe user-space provided configuration data. | ||
297 | * and allocated page. */ | ||
298 | user_config = p->rt_param.task_params; | ||
299 | ctrl_page = p->rt_param.ctrl_page; | ||
300 | } | ||
301 | |||
302 | /* We probably should not be inheriting any task's priority | ||
303 | * at this point in time. | ||
304 | */ | ||
305 | WARN_ON(p->rt_param.inh_task); | ||
306 | |||
307 | /* Cleanup everything else. */ | ||
308 | memset(&p->rt_param, 0, sizeof(p->rt_param)); | ||
309 | |||
310 | /* Restore preserved fields. */ | ||
311 | if (restore) { | ||
312 | p->rt_param.task_params = user_config; | ||
313 | p->rt_param.ctrl_page = ctrl_page; | ||
314 | } | ||
315 | } | ||
316 | |||
317 | long litmus_admit_task(struct task_struct* tsk) | ||
318 | { | ||
319 | long retval = 0; | ||
320 | |||
321 | BUG_ON(is_realtime(tsk)); | ||
322 | |||
323 | tsk_rt(tsk)->heap_node = NULL; | ||
324 | tsk_rt(tsk)->rel_heap = NULL; | ||
325 | |||
326 | if (get_rt_relative_deadline(tsk) == 0 || | ||
327 | get_exec_cost(tsk) > | ||
328 | min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) { | ||
329 | TRACE_TASK(tsk, | ||
330 | "litmus admit: invalid task parameters " | ||
331 | "(e = %lu, p = %lu, d = %lu)\n", | ||
332 | get_exec_cost(tsk), get_rt_period(tsk), | ||
333 | get_rt_relative_deadline(tsk)); | ||
334 | retval = -EINVAL; | ||
335 | goto out; | ||
336 | } | ||
337 | |||
338 | if (!cpu_online(get_partition(tsk))) { | ||
339 | TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n", | ||
340 | get_partition(tsk)); | ||
341 | retval = -EINVAL; | ||
342 | goto out; | ||
343 | } | ||
344 | |||
345 | INIT_LIST_HEAD(&tsk_rt(tsk)->list); | ||
346 | |||
347 | /* allocate heap node for this task */ | ||
348 | tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC); | ||
349 | tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC); | ||
350 | |||
351 | if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) { | ||
352 | printk(KERN_WARNING "litmus: no more heap node memory!?\n"); | ||
353 | |||
354 | retval = -ENOMEM; | ||
355 | goto out; | ||
356 | } else { | ||
357 | bheap_node_init(&tsk_rt(tsk)->heap_node, tsk); | ||
358 | } | ||
359 | |||
360 | preempt_disable(); | ||
361 | |||
362 | retval = litmus->admit_task(tsk); | ||
363 | |||
364 | if (!retval) { | ||
365 | sched_trace_task_name(tsk); | ||
366 | sched_trace_task_param(tsk); | ||
367 | atomic_inc(&rt_task_count); | ||
368 | } | ||
369 | |||
370 | preempt_enable(); | ||
371 | |||
372 | out: | ||
373 | if (retval) { | ||
374 | bheap_node_free(tsk_rt(tsk)->heap_node); | ||
375 | release_heap_free(tsk_rt(tsk)->rel_heap); | ||
376 | } | ||
377 | return retval; | ||
378 | } | ||
379 | |||
380 | void litmus_exit_task(struct task_struct* tsk) | ||
381 | { | ||
382 | if (is_realtime(tsk)) { | ||
383 | sched_trace_task_completion(tsk, 1); | ||
384 | |||
385 | litmus->task_exit(tsk); | ||
386 | |||
387 | BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node)); | ||
388 | bheap_node_free(tsk_rt(tsk)->heap_node); | ||
389 | release_heap_free(tsk_rt(tsk)->rel_heap); | ||
390 | |||
391 | atomic_dec(&rt_task_count); | ||
392 | reinit_litmus_state(tsk, 1); | ||
393 | } | ||
394 | } | ||
395 | |||
396 | static int do_plugin_switch(void *_plugin) | ||
397 | { | ||
398 | int ret; | ||
399 | struct sched_plugin* plugin = _plugin; | ||
400 | |||
401 | /* don't switch if there are active real-time tasks */ | ||
402 | if (atomic_read(&rt_task_count) == 0) { | ||
403 | ret = litmus->deactivate_plugin(); | ||
404 | if (0 != ret) | ||
405 | goto out; | ||
406 | ret = plugin->activate_plugin(); | ||
407 | if (0 != ret) { | ||
408 | printk(KERN_INFO "Can't activate %s (%d).\n", | ||
409 | plugin->plugin_name, ret); | ||
410 | plugin = &linux_sched_plugin; | ||
411 | } | ||
412 | printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name); | ||
413 | litmus = plugin; | ||
414 | } else | ||
415 | ret = -EBUSY; | ||
416 | out: | ||
417 | return ret; | ||
418 | } | ||
419 | |||
420 | /* Switching a plugin in use is tricky. | ||
421 | * We must watch out that no real-time tasks exists | ||
422 | * (and that none is created in parallel) and that the plugin is not | ||
423 | * currently in use on any processor (in theory). | ||
424 | */ | ||
425 | int switch_sched_plugin(struct sched_plugin* plugin) | ||
426 | { | ||
427 | BUG_ON(!plugin); | ||
428 | |||
429 | if (atomic_read(&rt_task_count) == 0) | ||
430 | return stop_machine(do_plugin_switch, plugin, NULL); | ||
431 | else | ||
432 | return -EBUSY; | ||
433 | } | ||
434 | |||
435 | /* Called upon fork. | ||
436 | * p is the newly forked task. | ||
437 | */ | ||
438 | void litmus_fork(struct task_struct* p) | ||
439 | { | ||
440 | if (is_realtime(p)) { | ||
441 | /* clean out any litmus related state, don't preserve anything */ | ||
442 | reinit_litmus_state(p, 0); | ||
443 | /* Don't let the child be a real-time task. */ | ||
444 | p->sched_reset_on_fork = 1; | ||
445 | } else | ||
446 | /* non-rt tasks might have ctrl_page set */ | ||
447 | tsk_rt(p)->ctrl_page = NULL; | ||
448 | |||
449 | /* od tables are never inherited across a fork */ | ||
450 | p->od_table = NULL; | ||
451 | } | ||
452 | |||
453 | /* Called upon execve(). | ||
454 | * current is doing the exec. | ||
455 | * Don't let address space specific stuff leak. | ||
456 | */ | ||
457 | void litmus_exec(void) | ||
458 | { | ||
459 | struct task_struct* p = current; | ||
460 | |||
461 | if (is_realtime(p)) { | ||
462 | WARN_ON(p->rt_param.inh_task); | ||
463 | if (tsk_rt(p)->ctrl_page) { | ||
464 | free_page((unsigned long) tsk_rt(p)->ctrl_page); | ||
465 | tsk_rt(p)->ctrl_page = NULL; | ||
466 | } | ||
467 | } | ||
468 | } | ||
469 | |||
470 | void exit_litmus(struct task_struct *dead_tsk) | ||
471 | { | ||
472 | /* We also allow non-RT tasks to | ||
473 | * allocate control pages to allow | ||
474 | * measurements with non-RT tasks. | ||
475 | * So check if we need to free the page | ||
476 | * in any case. | ||
477 | */ | ||
478 | if (tsk_rt(dead_tsk)->ctrl_page) { | ||
479 | TRACE_TASK(dead_tsk, | ||
480 | "freeing ctrl_page %p\n", | ||
481 | tsk_rt(dead_tsk)->ctrl_page); | ||
482 | free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page); | ||
483 | } | ||
484 | |||
485 | /* main cleanup only for RT tasks */ | ||
486 | if (is_realtime(dead_tsk)) | ||
487 | litmus_exit_task(dead_tsk); | ||
488 | } | ||
489 | |||
490 | |||
491 | #ifdef CONFIG_MAGIC_SYSRQ | ||
492 | int sys_kill(int pid, int sig); | ||
493 | |||
494 | static void sysrq_handle_kill_rt_tasks(int key) | ||
495 | { | ||
496 | struct task_struct *t; | ||
497 | read_lock(&tasklist_lock); | ||
498 | for_each_process(t) { | ||
499 | if (is_realtime(t)) { | ||
500 | sys_kill(t->pid, SIGKILL); | ||
501 | } | ||
502 | } | ||
503 | read_unlock(&tasklist_lock); | ||
504 | } | ||
505 | |||
506 | static struct sysrq_key_op sysrq_kill_rt_tasks_op = { | ||
507 | .handler = sysrq_handle_kill_rt_tasks, | ||
508 | .help_msg = "quit-rt-tasks(X)", | ||
509 | .action_msg = "sent SIGKILL to all LITMUS^RT real-time tasks", | ||
510 | }; | ||
511 | #endif | ||
512 | |||
513 | extern struct sched_plugin linux_sched_plugin; | ||
514 | |||
515 | static int litmus_shutdown_nb(struct notifier_block *unused1, | ||
516 | unsigned long unused2, void *unused3) | ||
517 | { | ||
518 | /* Attempt to switch back to regular Linux scheduling. | ||
519 | * Forces the active plugin to clean up. | ||
520 | */ | ||
521 | if (litmus != &linux_sched_plugin) { | ||
522 | int ret = switch_sched_plugin(&linux_sched_plugin); | ||
523 | if (ret) { | ||
524 | printk("Auto-shutdown of active Litmus plugin failed.\n"); | ||
525 | } | ||
526 | } | ||
527 | return NOTIFY_DONE; | ||
528 | } | ||
529 | |||
530 | static struct notifier_block shutdown_notifier = { | ||
531 | .notifier_call = litmus_shutdown_nb, | ||
532 | }; | ||
533 | |||
534 | static int __init _init_litmus(void) | ||
535 | { | ||
536 | /* Common initializers, | ||
537 | * mode change lock is used to enforce single mode change | ||
538 | * operation. | ||
539 | */ | ||
540 | printk("Starting LITMUS^RT kernel\n"); | ||
541 | |||
542 | register_sched_plugin(&linux_sched_plugin); | ||
543 | |||
544 | bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC); | ||
545 | release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC); | ||
546 | |||
547 | #ifdef CONFIG_MAGIC_SYSRQ | ||
548 | /* offer some debugging help */ | ||
549 | if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op)) | ||
550 | printk("Registered kill rt tasks magic sysrq.\n"); | ||
551 | else | ||
552 | printk("Could not register kill rt tasks magic sysrq.\n"); | ||
553 | #endif | ||
554 | |||
555 | init_litmus_proc(); | ||
556 | |||
557 | #ifdef CONFIG_SCHED_CPU_AFFINITY | ||
558 | init_topology(); | ||
559 | #endif | ||
560 | |||
561 | register_reboot_notifier(&shutdown_notifier); | ||
562 | |||
563 | return 0; | ||
564 | } | ||
565 | |||
566 | static void _exit_litmus(void) | ||
567 | { | ||
568 | unregister_reboot_notifier(&shutdown_notifier); | ||
569 | |||
570 | exit_litmus_proc(); | ||
571 | kmem_cache_destroy(bheap_node_cache); | ||
572 | kmem_cache_destroy(release_heap_cache); | ||
573 | } | ||
574 | |||
575 | module_init(_init_litmus); | ||
576 | module_exit(_exit_litmus); | ||
diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c new file mode 100644 index 000000000000..1ebf1277f5d3 --- /dev/null +++ b/litmus/litmus_proc.c | |||
@@ -0,0 +1,407 @@ | |||
1 | /* | ||
2 | * litmus_proc.c -- Implementation of the /proc/litmus directory tree. | ||
3 | */ | ||
4 | |||
5 | #include <linux/sched.h> | ||
6 | #include <linux/uaccess.h> | ||
7 | #include <linux/seq_file.h> | ||
8 | |||
9 | #include <litmus/litmus.h> | ||
10 | #include <litmus/litmus_proc.h> | ||
11 | |||
12 | #include <litmus/clustered.h> | ||
13 | |||
14 | /* in litmus/litmus.c */ | ||
15 | extern atomic_t rt_task_count; | ||
16 | |||
17 | static struct proc_dir_entry *litmus_dir = NULL, | ||
18 | *curr_file = NULL, | ||
19 | *stat_file = NULL, | ||
20 | *plugs_dir = NULL, | ||
21 | #ifdef CONFIG_RELEASE_MASTER | ||
22 | *release_master_file = NULL, | ||
23 | #endif | ||
24 | *plugs_file = NULL; | ||
25 | |||
26 | /* in litmus/sync.c */ | ||
27 | int count_tasks_waiting_for_release(void); | ||
28 | |||
29 | static int litmus_stats_proc_show(struct seq_file *m, void *v) | ||
30 | { | ||
31 | seq_printf(m, | ||
32 | "real-time tasks = %d\n" | ||
33 | "ready for release = %d\n", | ||
34 | atomic_read(&rt_task_count), | ||
35 | count_tasks_waiting_for_release()); | ||
36 | return 0; | ||
37 | } | ||
38 | |||
39 | static int litmus_stats_proc_open(struct inode *inode, struct file *file) | ||
40 | { | ||
41 | return single_open(file, litmus_stats_proc_show, PDE_DATA(inode)); | ||
42 | } | ||
43 | |||
44 | static const struct file_operations litmus_stats_proc_fops = { | ||
45 | .open = litmus_stats_proc_open, | ||
46 | .read = seq_read, | ||
47 | .llseek = seq_lseek, | ||
48 | .release = single_release, | ||
49 | }; | ||
50 | |||
51 | |||
52 | static int litmus_loaded_proc_show(struct seq_file *m, void *v) | ||
53 | { | ||
54 | print_sched_plugins(m); | ||
55 | return 0; | ||
56 | } | ||
57 | |||
58 | static int litmus_loaded_proc_open(struct inode *inode, struct file *file) | ||
59 | { | ||
60 | return single_open(file, litmus_loaded_proc_show, PDE_DATA(inode)); | ||
61 | } | ||
62 | |||
63 | static const struct file_operations litmus_loaded_proc_fops = { | ||
64 | .open = litmus_loaded_proc_open, | ||
65 | .read = seq_read, | ||
66 | .llseek = seq_lseek, | ||
67 | .release = single_release, | ||
68 | }; | ||
69 | |||
70 | |||
71 | |||
72 | |||
73 | /* in litmus/litmus.c */ | ||
74 | int switch_sched_plugin(struct sched_plugin*); | ||
75 | |||
76 | static ssize_t litmus_active_proc_write(struct file *file, | ||
77 | const char __user *buffer, size_t count, | ||
78 | loff_t *ppos) | ||
79 | { | ||
80 | char name[65]; | ||
81 | struct sched_plugin* found; | ||
82 | ssize_t ret = -EINVAL; | ||
83 | int err; | ||
84 | |||
85 | |||
86 | ret = copy_and_chomp(name, sizeof(name), buffer, count); | ||
87 | if (ret < 0) | ||
88 | return ret; | ||
89 | |||
90 | found = find_sched_plugin(name); | ||
91 | |||
92 | if (found) { | ||
93 | err = switch_sched_plugin(found); | ||
94 | if (err) { | ||
95 | printk(KERN_INFO "Could not switch plugin: %d\n", err); | ||
96 | ret = err; | ||
97 | } | ||
98 | } else { | ||
99 | printk(KERN_INFO "Plugin '%s' is unknown.\n", name); | ||
100 | ret = -ESRCH; | ||
101 | } | ||
102 | |||
103 | return ret; | ||
104 | } | ||
105 | |||
106 | static int litmus_active_proc_show(struct seq_file *m, void *v) | ||
107 | { | ||
108 | seq_printf(m, "%s\n", litmus->plugin_name); | ||
109 | return 0; | ||
110 | } | ||
111 | |||
112 | static int litmus_active_proc_open(struct inode *inode, struct file *file) | ||
113 | { | ||
114 | return single_open(file, litmus_active_proc_show, PDE_DATA(inode)); | ||
115 | } | ||
116 | |||
117 | static const struct file_operations litmus_active_proc_fops = { | ||
118 | .open = litmus_active_proc_open, | ||
119 | .read = seq_read, | ||
120 | .llseek = seq_lseek, | ||
121 | .release = single_release, | ||
122 | .write = litmus_active_proc_write, | ||
123 | }; | ||
124 | |||
125 | |||
126 | #ifdef CONFIG_RELEASE_MASTER | ||
127 | static ssize_t litmus_release_master_proc_write( | ||
128 | struct file *file, | ||
129 | const char __user *buffer, size_t count, | ||
130 | loff_t *ppos) | ||
131 | { | ||
132 | int cpu, err, online = 0; | ||
133 | char msg[64]; | ||
134 | ssize_t len; | ||
135 | |||
136 | len = copy_and_chomp(msg, sizeof(msg), buffer, count); | ||
137 | |||
138 | if (len < 0) | ||
139 | return len; | ||
140 | |||
141 | if (strcmp(msg, "NO_CPU") == 0) | ||
142 | atomic_set(&release_master_cpu, NO_CPU); | ||
143 | else { | ||
144 | err = sscanf(msg, "%d", &cpu); | ||
145 | if (err == 1 && cpu >= 0 && (online = cpu_online(cpu))) { | ||
146 | atomic_set(&release_master_cpu, cpu); | ||
147 | } else { | ||
148 | TRACE("invalid release master: '%s' " | ||
149 | "(err:%d cpu:%d online:%d)\n", | ||
150 | msg, err, cpu, online); | ||
151 | len = -EINVAL; | ||
152 | } | ||
153 | } | ||
154 | return len; | ||
155 | } | ||
156 | |||
157 | static int litmus_release_master_proc_show(struct seq_file *m, void *v) | ||
158 | { | ||
159 | int master; | ||
160 | master = atomic_read(&release_master_cpu); | ||
161 | if (master == NO_CPU) | ||
162 | seq_printf(m, "NO_CPU\n"); | ||
163 | else | ||
164 | seq_printf(m, "%d\n", master); | ||
165 | return 0; | ||
166 | } | ||
167 | |||
168 | static int litmus_release_master_proc_open(struct inode *inode, struct file *file) | ||
169 | { | ||
170 | return single_open(file, litmus_release_master_proc_show, PDE_DATA(inode)); | ||
171 | } | ||
172 | |||
173 | static const struct file_operations litmus_release_master_proc_fops = { | ||
174 | .open = litmus_release_master_proc_open, | ||
175 | .read = seq_read, | ||
176 | .llseek = seq_lseek, | ||
177 | .release = single_release, | ||
178 | .write = litmus_release_master_proc_write, | ||
179 | }; | ||
180 | #endif | ||
181 | |||
182 | int __init init_litmus_proc(void) | ||
183 | { | ||
184 | litmus_dir = proc_mkdir("litmus", NULL); | ||
185 | if (!litmus_dir) { | ||
186 | printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n"); | ||
187 | return -ENOMEM; | ||
188 | } | ||
189 | |||
190 | curr_file = proc_create("active_plugin", 0644, litmus_dir, | ||
191 | &litmus_active_proc_fops); | ||
192 | |||
193 | if (!curr_file) { | ||
194 | printk(KERN_ERR "Could not allocate active_plugin " | ||
195 | "procfs entry.\n"); | ||
196 | return -ENOMEM; | ||
197 | } | ||
198 | |||
199 | #ifdef CONFIG_RELEASE_MASTER | ||
200 | release_master_file = proc_create("release_master", 0644, litmus_dir, | ||
201 | &litmus_release_master_proc_fops); | ||
202 | if (!release_master_file) { | ||
203 | printk(KERN_ERR "Could not allocate release_master " | ||
204 | "procfs entry.\n"); | ||
205 | return -ENOMEM; | ||
206 | } | ||
207 | #endif | ||
208 | |||
209 | stat_file = proc_create("stats", 0444, litmus_dir, &litmus_stats_proc_fops); | ||
210 | |||
211 | plugs_dir = proc_mkdir("plugins", litmus_dir); | ||
212 | if (!plugs_dir){ | ||
213 | printk(KERN_ERR "Could not allocate plugins directory " | ||
214 | "procfs entry.\n"); | ||
215 | return -ENOMEM; | ||
216 | } | ||
217 | |||
218 | plugs_file = proc_create("loaded", 0444, plugs_dir, | ||
219 | &litmus_loaded_proc_fops); | ||
220 | |||
221 | return 0; | ||
222 | } | ||
223 | |||
224 | void exit_litmus_proc(void) | ||
225 | { | ||
226 | if (plugs_file) | ||
227 | remove_proc_entry("loaded", plugs_dir); | ||
228 | if (plugs_dir) | ||
229 | remove_proc_entry("plugins", litmus_dir); | ||
230 | if (stat_file) | ||
231 | remove_proc_entry("stats", litmus_dir); | ||
232 | if (curr_file) | ||
233 | remove_proc_entry("active_plugin", litmus_dir); | ||
234 | #ifdef CONFIG_RELEASE_MASTER | ||
235 | if (release_master_file) | ||
236 | remove_proc_entry("release_master", litmus_dir); | ||
237 | #endif | ||
238 | if (litmus_dir) | ||
239 | remove_proc_entry("litmus", NULL); | ||
240 | } | ||
241 | |||
242 | long make_plugin_proc_dir(struct sched_plugin* plugin, | ||
243 | struct proc_dir_entry** pde_in) | ||
244 | { | ||
245 | struct proc_dir_entry *pde_new = NULL; | ||
246 | long rv; | ||
247 | |||
248 | if (!plugin || !plugin->plugin_name){ | ||
249 | printk(KERN_ERR "Invalid plugin struct passed to %s.\n", | ||
250 | __func__); | ||
251 | rv = -EINVAL; | ||
252 | goto out_no_pde; | ||
253 | } | ||
254 | |||
255 | if (!plugs_dir){ | ||
256 | printk(KERN_ERR "Could not make plugin sub-directory, because " | ||
257 | "/proc/litmus/plugins does not exist.\n"); | ||
258 | rv = -ENOENT; | ||
259 | goto out_no_pde; | ||
260 | } | ||
261 | |||
262 | pde_new = proc_mkdir(plugin->plugin_name, plugs_dir); | ||
263 | if (!pde_new){ | ||
264 | printk(KERN_ERR "Could not make plugin sub-directory: " | ||
265 | "out of memory?.\n"); | ||
266 | rv = -ENOMEM; | ||
267 | goto out_no_pde; | ||
268 | } | ||
269 | |||
270 | rv = 0; | ||
271 | *pde_in = pde_new; | ||
272 | goto out_ok; | ||
273 | |||
274 | out_no_pde: | ||
275 | *pde_in = NULL; | ||
276 | out_ok: | ||
277 | return rv; | ||
278 | } | ||
279 | |||
280 | void remove_plugin_proc_dir(struct sched_plugin* plugin) | ||
281 | { | ||
282 | if (!plugin || !plugin->plugin_name){ | ||
283 | printk(KERN_ERR "Invalid plugin struct passed to %s.\n", | ||
284 | __func__); | ||
285 | return; | ||
286 | } | ||
287 | remove_proc_entry(plugin->plugin_name, plugs_dir); | ||
288 | } | ||
289 | |||
290 | |||
291 | |||
292 | /* misc. I/O helper functions */ | ||
293 | |||
294 | int copy_and_chomp(char *kbuf, unsigned long ksize, | ||
295 | __user const char* ubuf, unsigned long ulength) | ||
296 | { | ||
297 | /* caller must provide buffer space */ | ||
298 | BUG_ON(!ksize); | ||
299 | |||
300 | ksize--; /* leave space for null byte */ | ||
301 | |||
302 | if (ksize > ulength) | ||
303 | ksize = ulength; | ||
304 | |||
305 | if(copy_from_user(kbuf, ubuf, ksize)) | ||
306 | return -EFAULT; | ||
307 | |||
308 | kbuf[ksize] = '\0'; | ||
309 | |||
310 | /* chomp kbuf */ | ||
311 | if (ksize > 0 && kbuf[ksize - 1] == '\n') | ||
312 | kbuf[ksize - 1] = '\0'; | ||
313 | |||
314 | return ksize; | ||
315 | } | ||
316 | |||
317 | /* helper functions for clustered plugins */ | ||
318 | static const char* cache_level_names[] = { | ||
319 | "ALL", | ||
320 | "L1", | ||
321 | "L2", | ||
322 | "L3", | ||
323 | }; | ||
324 | |||
325 | int parse_cache_level(const char *cache_name, enum cache_level *level) | ||
326 | { | ||
327 | int err = -EINVAL; | ||
328 | int i; | ||
329 | /* do a quick and dirty comparison to find the cluster size */ | ||
330 | for (i = GLOBAL_CLUSTER; i <= L3_CLUSTER; i++) | ||
331 | if (!strcmp(cache_name, cache_level_names[i])) { | ||
332 | *level = (enum cache_level) i; | ||
333 | err = 0; | ||
334 | break; | ||
335 | } | ||
336 | return err; | ||
337 | } | ||
338 | |||
339 | const char* cache_level_name(enum cache_level level) | ||
340 | { | ||
341 | int idx = level; | ||
342 | |||
343 | if (idx >= GLOBAL_CLUSTER && idx <= L3_CLUSTER) | ||
344 | return cache_level_names[idx]; | ||
345 | else | ||
346 | return "INVALID"; | ||
347 | } | ||
348 | |||
349 | |||
350 | |||
351 | |||
352 | /* proc file interface to configure the cluster size */ | ||
353 | |||
354 | static ssize_t litmus_cluster_proc_write(struct file *file, | ||
355 | const char __user *buffer, size_t count, | ||
356 | loff_t *ppos) | ||
357 | { | ||
358 | enum cache_level *level = (enum cache_level *) PDE_DATA(file_inode(file)); | ||
359 | ssize_t len; | ||
360 | char cache_name[8]; | ||
361 | |||
362 | len = copy_and_chomp(cache_name, sizeof(cache_name), buffer, count); | ||
363 | |||
364 | if (len > 0 && parse_cache_level(cache_name, level)) { | ||
365 | printk(KERN_INFO "Cluster '%s' is unknown.\n", cache_name); | ||
366 | len = -EINVAL; | ||
367 | } | ||
368 | |||
369 | return len; | ||
370 | } | ||
371 | |||
372 | static int litmus_cluster_proc_show(struct seq_file *m, void *v) | ||
373 | { | ||
374 | enum cache_level *level = (enum cache_level *) m->private; | ||
375 | |||
376 | seq_printf(m, "%s\n", cache_level_name(*level)); | ||
377 | return 0; | ||
378 | } | ||
379 | |||
380 | static int litmus_cluster_proc_open(struct inode *inode, struct file *file) | ||
381 | { | ||
382 | return single_open(file, litmus_cluster_proc_show, PDE_DATA(inode)); | ||
383 | } | ||
384 | |||
385 | static const struct file_operations litmus_cluster_proc_fops = { | ||
386 | .open = litmus_cluster_proc_open, | ||
387 | .read = seq_read, | ||
388 | .llseek = seq_lseek, | ||
389 | .release = single_release, | ||
390 | .write = litmus_cluster_proc_write, | ||
391 | }; | ||
392 | |||
393 | struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent, | ||
394 | enum cache_level* level) | ||
395 | { | ||
396 | struct proc_dir_entry* cluster_file; | ||
397 | |||
398 | |||
399 | cluster_file = proc_create_data("cluster", 0644, parent, | ||
400 | &litmus_cluster_proc_fops, | ||
401 | (void *) level); | ||
402 | if (!cluster_file) { | ||
403 | printk(KERN_ERR | ||
404 | "Could not cluster procfs entry.\n"); | ||
405 | } | ||
406 | return cluster_file; | ||
407 | } | ||
diff --git a/litmus/locking.c b/litmus/locking.c new file mode 100644 index 000000000000..43d9aece2e74 --- /dev/null +++ b/litmus/locking.c | |||
@@ -0,0 +1,188 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <litmus/litmus.h> | ||
3 | #include <litmus/fdso.h> | ||
4 | |||
5 | #ifdef CONFIG_LITMUS_LOCKING | ||
6 | |||
7 | #include <linux/sched.h> | ||
8 | #include <litmus/litmus.h> | ||
9 | #include <litmus/sched_plugin.h> | ||
10 | #include <litmus/trace.h> | ||
11 | #include <litmus/wait.h> | ||
12 | |||
13 | static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg); | ||
14 | static int open_generic_lock(struct od_table_entry* entry, void* __user arg); | ||
15 | static int close_generic_lock(struct od_table_entry* entry); | ||
16 | static void destroy_generic_lock(obj_type_t type, void* sem); | ||
17 | |||
18 | struct fdso_ops generic_lock_ops = { | ||
19 | .create = create_generic_lock, | ||
20 | .open = open_generic_lock, | ||
21 | .close = close_generic_lock, | ||
22 | .destroy = destroy_generic_lock | ||
23 | }; | ||
24 | |||
25 | static inline bool is_lock(struct od_table_entry* entry) | ||
26 | { | ||
27 | return entry->class == &generic_lock_ops; | ||
28 | } | ||
29 | |||
30 | static inline struct litmus_lock* get_lock(struct od_table_entry* entry) | ||
31 | { | ||
32 | BUG_ON(!is_lock(entry)); | ||
33 | return (struct litmus_lock*) entry->obj->obj; | ||
34 | } | ||
35 | |||
36 | static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg) | ||
37 | { | ||
38 | struct litmus_lock* lock; | ||
39 | int err; | ||
40 | |||
41 | err = litmus->allocate_lock(&lock, type, arg); | ||
42 | if (err == 0) | ||
43 | *obj_ref = lock; | ||
44 | return err; | ||
45 | } | ||
46 | |||
47 | static int open_generic_lock(struct od_table_entry* entry, void* __user arg) | ||
48 | { | ||
49 | struct litmus_lock* lock = get_lock(entry); | ||
50 | if (lock->ops->open) | ||
51 | return lock->ops->open(lock, arg); | ||
52 | else | ||
53 | return 0; /* default: any task can open it */ | ||
54 | } | ||
55 | |||
56 | static int close_generic_lock(struct od_table_entry* entry) | ||
57 | { | ||
58 | struct litmus_lock* lock = get_lock(entry); | ||
59 | if (lock->ops->close) | ||
60 | return lock->ops->close(lock); | ||
61 | else | ||
62 | return 0; /* default: closing succeeds */ | ||
63 | } | ||
64 | |||
65 | static void destroy_generic_lock(obj_type_t type, void* obj) | ||
66 | { | ||
67 | struct litmus_lock* lock = (struct litmus_lock*) obj; | ||
68 | lock->ops->deallocate(lock); | ||
69 | } | ||
70 | |||
71 | asmlinkage long sys_litmus_lock(int lock_od) | ||
72 | { | ||
73 | long err = -EINVAL; | ||
74 | struct od_table_entry* entry; | ||
75 | struct litmus_lock* l; | ||
76 | |||
77 | TS_SYSCALL_IN_START; | ||
78 | |||
79 | TS_SYSCALL_IN_END; | ||
80 | |||
81 | TS_LOCK_START; | ||
82 | |||
83 | entry = get_entry_for_od(lock_od); | ||
84 | if (entry && is_lock(entry)) { | ||
85 | l = get_lock(entry); | ||
86 | TRACE_CUR("attempts to lock 0x%p\n", l); | ||
87 | err = l->ops->lock(l); | ||
88 | } | ||
89 | |||
90 | /* Note: task my have been suspended or preempted in between! Take | ||
91 | * this into account when computing overheads. */ | ||
92 | TS_LOCK_END; | ||
93 | |||
94 | TS_SYSCALL_OUT_START; | ||
95 | |||
96 | return err; | ||
97 | } | ||
98 | |||
99 | asmlinkage long sys_litmus_unlock(int lock_od) | ||
100 | { | ||
101 | long err = -EINVAL; | ||
102 | struct od_table_entry* entry; | ||
103 | struct litmus_lock* l; | ||
104 | |||
105 | TS_SYSCALL_IN_START; | ||
106 | |||
107 | TS_SYSCALL_IN_END; | ||
108 | |||
109 | TS_UNLOCK_START; | ||
110 | |||
111 | entry = get_entry_for_od(lock_od); | ||
112 | if (entry && is_lock(entry)) { | ||
113 | l = get_lock(entry); | ||
114 | TRACE_CUR("attempts to unlock 0x%p\n", l); | ||
115 | err = l->ops->unlock(l); | ||
116 | } | ||
117 | |||
118 | /* Note: task my have been preempted in between! Take this into | ||
119 | * account when computing overheads. */ | ||
120 | TS_UNLOCK_END; | ||
121 | |||
122 | TS_SYSCALL_OUT_START; | ||
123 | |||
124 | return err; | ||
125 | } | ||
126 | |||
127 | struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq) | ||
128 | { | ||
129 | wait_queue_t* q; | ||
130 | struct task_struct* t = NULL; | ||
131 | |||
132 | if (waitqueue_active(wq)) { | ||
133 | q = list_entry(wq->task_list.next, | ||
134 | wait_queue_t, task_list); | ||
135 | t = (struct task_struct*) q->private; | ||
136 | __remove_wait_queue(wq, q); | ||
137 | } | ||
138 | return(t); | ||
139 | } | ||
140 | |||
141 | unsigned int __add_wait_queue_prio_exclusive( | ||
142 | wait_queue_head_t* head, | ||
143 | prio_wait_queue_t *new) | ||
144 | { | ||
145 | struct list_head *pos; | ||
146 | unsigned int passed = 0; | ||
147 | |||
148 | new->wq.flags |= WQ_FLAG_EXCLUSIVE; | ||
149 | |||
150 | /* find a spot where the new entry is less than the next */ | ||
151 | list_for_each(pos, &head->task_list) { | ||
152 | prio_wait_queue_t* queued = list_entry(pos, prio_wait_queue_t, | ||
153 | wq.task_list); | ||
154 | |||
155 | if (unlikely(lt_before(new->priority, queued->priority) || | ||
156 | (new->priority == queued->priority && | ||
157 | new->tie_breaker < queued->tie_breaker))) { | ||
158 | /* pos is not less than new, thus insert here */ | ||
159 | __list_add(&new->wq.task_list, pos->prev, pos); | ||
160 | goto out; | ||
161 | } | ||
162 | passed++; | ||
163 | } | ||
164 | |||
165 | /* if we get to this point either the list is empty or every entry | ||
166 | * queued element is less than new. | ||
167 | * Let's add new to the end. */ | ||
168 | list_add_tail(&new->wq.task_list, &head->task_list); | ||
169 | out: | ||
170 | return passed; | ||
171 | } | ||
172 | |||
173 | |||
174 | #else | ||
175 | |||
176 | struct fdso_ops generic_lock_ops = {}; | ||
177 | |||
178 | asmlinkage long sys_litmus_lock(int sem_od) | ||
179 | { | ||
180 | return -ENOSYS; | ||
181 | } | ||
182 | |||
183 | asmlinkage long sys_litmus_unlock(int sem_od) | ||
184 | { | ||
185 | return -ENOSYS; | ||
186 | } | ||
187 | |||
188 | #endif | ||
diff --git a/litmus/preempt.c b/litmus/preempt.c new file mode 100644 index 000000000000..6be2f26728b8 --- /dev/null +++ b/litmus/preempt.c | |||
@@ -0,0 +1,137 @@ | |||
1 | #include <linux/sched.h> | ||
2 | |||
3 | #include <litmus/litmus.h> | ||
4 | #include <litmus/preempt.h> | ||
5 | #include <litmus/trace.h> | ||
6 | |||
7 | /* The rescheduling state of each processor. | ||
8 | */ | ||
9 | DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state); | ||
10 | |||
11 | void sched_state_will_schedule(struct task_struct* tsk) | ||
12 | { | ||
13 | /* Litmus hack: we only care about processor-local invocations of | ||
14 | * set_tsk_need_resched(). We can't reliably set the flag remotely | ||
15 | * since it might race with other updates to the scheduling state. We | ||
16 | * can't rely on the runqueue lock protecting updates to the sched | ||
17 | * state since processors do not acquire the runqueue locks for all | ||
18 | * updates to the sched state (to avoid acquiring two runqueue locks at | ||
19 | * the same time). Further, if tsk is residing on a remote processor, | ||
20 | * then that processor doesn't actually know yet that it is going to | ||
21 | * reschedule; it still must receive an IPI (unless a local invocation | ||
22 | * races). | ||
23 | */ | ||
24 | if (likely(task_cpu(tsk) == smp_processor_id())) { | ||
25 | VERIFY_SCHED_STATE(TASK_SCHEDULED | SHOULD_SCHEDULE | TASK_PICKED | WILL_SCHEDULE); | ||
26 | if (is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK)) | ||
27 | set_sched_state(PICKED_WRONG_TASK); | ||
28 | else | ||
29 | set_sched_state(WILL_SCHEDULE); | ||
30 | } else | ||
31 | /* Litmus tasks should never be subject to a remote | ||
32 | * set_tsk_need_resched(). */ | ||
33 | BUG_ON(is_realtime(tsk)); | ||
34 | #ifdef CONFIG_PREEMPT_STATE_TRACE | ||
35 | TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n", | ||
36 | __builtin_return_address(0)); | ||
37 | #endif | ||
38 | } | ||
39 | |||
40 | /* Called by the IPI handler after another CPU called smp_send_resched(). */ | ||
41 | void sched_state_ipi(void) | ||
42 | { | ||
43 | /* If the IPI was slow, we might be in any state right now. The IPI is | ||
44 | * only meaningful if we are in SHOULD_SCHEDULE. */ | ||
45 | if (is_in_sched_state(SHOULD_SCHEDULE)) { | ||
46 | /* Cause scheduler to be invoked. | ||
47 | * This will cause a transition to WILL_SCHEDULE. */ | ||
48 | set_tsk_need_resched(current); | ||
49 | TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n", | ||
50 | current->comm, current->pid); | ||
51 | TS_SEND_RESCHED_END; | ||
52 | } else { | ||
53 | /* ignore */ | ||
54 | TRACE_STATE("ignoring IPI in state %x (%s)\n", | ||
55 | get_sched_state(), | ||
56 | sched_state_name(get_sched_state())); | ||
57 | } | ||
58 | } | ||
59 | |||
60 | /* Called by plugins to cause a CPU to reschedule. IMPORTANT: the caller must | ||
61 | * hold the lock that is used to serialize scheduling decisions. */ | ||
62 | void litmus_reschedule(int cpu) | ||
63 | { | ||
64 | int picked_transition_ok = 0; | ||
65 | int scheduled_transition_ok = 0; | ||
66 | |||
67 | /* The (remote) CPU could be in any state. */ | ||
68 | |||
69 | /* The critical states are TASK_PICKED and TASK_SCHEDULED, as the CPU | ||
70 | * is not aware of the need to reschedule at this point. */ | ||
71 | |||
72 | /* is a context switch in progress? */ | ||
73 | if (cpu_is_in_sched_state(cpu, TASK_PICKED)) | ||
74 | picked_transition_ok = sched_state_transition_on( | ||
75 | cpu, TASK_PICKED, PICKED_WRONG_TASK); | ||
76 | |||
77 | if (!picked_transition_ok && | ||
78 | cpu_is_in_sched_state(cpu, TASK_SCHEDULED)) { | ||
79 | /* We either raced with the end of the context switch, or the | ||
80 | * CPU was in TASK_SCHEDULED anyway. */ | ||
81 | scheduled_transition_ok = sched_state_transition_on( | ||
82 | cpu, TASK_SCHEDULED, SHOULD_SCHEDULE); | ||
83 | } | ||
84 | |||
85 | /* If the CPU was in state TASK_SCHEDULED, then we need to cause the | ||
86 | * scheduler to be invoked. */ | ||
87 | if (scheduled_transition_ok) { | ||
88 | if (smp_processor_id() == cpu) | ||
89 | set_tsk_need_resched(current); | ||
90 | else { | ||
91 | TS_SEND_RESCHED_START(cpu); | ||
92 | smp_send_reschedule(cpu); | ||
93 | } | ||
94 | } | ||
95 | |||
96 | TRACE_STATE("%s picked-ok:%d sched-ok:%d\n", | ||
97 | __FUNCTION__, | ||
98 | picked_transition_ok, | ||
99 | scheduled_transition_ok); | ||
100 | } | ||
101 | |||
102 | void litmus_reschedule_local(void) | ||
103 | { | ||
104 | if (is_in_sched_state(TASK_PICKED)) | ||
105 | set_sched_state(PICKED_WRONG_TASK); | ||
106 | else if (is_in_sched_state(TASK_SCHEDULED | SHOULD_SCHEDULE)) { | ||
107 | set_sched_state(WILL_SCHEDULE); | ||
108 | set_tsk_need_resched(current); | ||
109 | } | ||
110 | } | ||
111 | |||
112 | #ifdef CONFIG_DEBUG_KERNEL | ||
113 | |||
114 | void sched_state_plugin_check(void) | ||
115 | { | ||
116 | if (!is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK)) { | ||
117 | TRACE("!!!! plugin did not call sched_state_task_picked()!" | ||
118 | "Calling sched_state_task_picked() is mandatory---fix this.\n"); | ||
119 | set_sched_state(TASK_PICKED); | ||
120 | } | ||
121 | } | ||
122 | |||
123 | #define NAME_CHECK(x) case x: return #x | ||
124 | const char* sched_state_name(int s) | ||
125 | { | ||
126 | switch (s) { | ||
127 | NAME_CHECK(TASK_SCHEDULED); | ||
128 | NAME_CHECK(SHOULD_SCHEDULE); | ||
129 | NAME_CHECK(WILL_SCHEDULE); | ||
130 | NAME_CHECK(TASK_PICKED); | ||
131 | NAME_CHECK(PICKED_WRONG_TASK); | ||
132 | default: | ||
133 | return "UNKNOWN"; | ||
134 | }; | ||
135 | } | ||
136 | |||
137 | #endif | ||
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c new file mode 100644 index 000000000000..bfadf1284291 --- /dev/null +++ b/litmus/rt_domain.c | |||
@@ -0,0 +1,348 @@ | |||
1 | /* | ||
2 | * litmus/rt_domain.c | ||
3 | * | ||
4 | * LITMUS real-time infrastructure. This file contains the | ||
5 | * functions that manipulate RT domains. RT domains are an abstraction | ||
6 | * of a ready queue and a release queue. | ||
7 | */ | ||
8 | |||
9 | #include <linux/percpu.h> | ||
10 | #include <linux/sched.h> | ||
11 | #include <linux/list.h> | ||
12 | #include <linux/slab.h> | ||
13 | |||
14 | #include <litmus/litmus.h> | ||
15 | #include <litmus/sched_plugin.h> | ||
16 | #include <litmus/sched_trace.h> | ||
17 | |||
18 | #include <litmus/rt_domain.h> | ||
19 | |||
20 | #include <litmus/trace.h> | ||
21 | |||
22 | #include <litmus/bheap.h> | ||
23 | |||
24 | /* Uncomment when debugging timer races... */ | ||
25 | #if 0 | ||
26 | #define VTRACE_TASK TRACE_TASK | ||
27 | #define VTRACE TRACE | ||
28 | #else | ||
29 | #define VTRACE_TASK(t, fmt, args...) /* shut up */ | ||
30 | #define VTRACE(fmt, args...) /* be quiet already */ | ||
31 | #endif | ||
32 | |||
33 | static int dummy_resched(rt_domain_t *rt) | ||
34 | { | ||
35 | return 0; | ||
36 | } | ||
37 | |||
38 | static int dummy_order(struct bheap_node* a, struct bheap_node* b) | ||
39 | { | ||
40 | return 0; | ||
41 | } | ||
42 | |||
43 | /* default implementation: use default lock */ | ||
44 | static void default_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
45 | { | ||
46 | merge_ready(rt, tasks); | ||
47 | } | ||
48 | |||
49 | static unsigned int time2slot(lt_t time) | ||
50 | { | ||
51 | return (unsigned int) time2quanta(time, FLOOR) % RELEASE_QUEUE_SLOTS; | ||
52 | } | ||
53 | |||
54 | static enum hrtimer_restart on_release_timer(struct hrtimer *timer) | ||
55 | { | ||
56 | unsigned long flags; | ||
57 | struct release_heap* rh; | ||
58 | rh = container_of(timer, struct release_heap, timer); | ||
59 | |||
60 | TS_RELEASE_LATENCY(rh->release_time); | ||
61 | |||
62 | VTRACE("on_release_timer(0x%p) starts.\n", timer); | ||
63 | |||
64 | TS_RELEASE_START; | ||
65 | |||
66 | |||
67 | raw_spin_lock_irqsave(&rh->dom->release_lock, flags); | ||
68 | VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock); | ||
69 | /* remove from release queue */ | ||
70 | list_del(&rh->list); | ||
71 | raw_spin_unlock_irqrestore(&rh->dom->release_lock, flags); | ||
72 | VTRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock); | ||
73 | |||
74 | /* call release callback */ | ||
75 | rh->dom->release_jobs(rh->dom, &rh->heap); | ||
76 | /* WARNING: rh can be referenced from other CPUs from now on. */ | ||
77 | |||
78 | TS_RELEASE_END; | ||
79 | |||
80 | VTRACE("on_release_timer(0x%p) ends.\n", timer); | ||
81 | |||
82 | return HRTIMER_NORESTART; | ||
83 | } | ||
84 | |||
85 | /* allocated in litmus.c */ | ||
86 | struct kmem_cache * release_heap_cache; | ||
87 | |||
88 | struct release_heap* release_heap_alloc(int gfp_flags) | ||
89 | { | ||
90 | struct release_heap* rh; | ||
91 | rh= kmem_cache_alloc(release_heap_cache, gfp_flags); | ||
92 | if (rh) { | ||
93 | /* initialize timer */ | ||
94 | hrtimer_init(&rh->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
95 | rh->timer.function = on_release_timer; | ||
96 | } | ||
97 | return rh; | ||
98 | } | ||
99 | |||
100 | void release_heap_free(struct release_heap* rh) | ||
101 | { | ||
102 | /* make sure timer is no longer in use */ | ||
103 | hrtimer_cancel(&rh->timer); | ||
104 | kmem_cache_free(release_heap_cache, rh); | ||
105 | } | ||
106 | |||
107 | /* Caller must hold release lock. | ||
108 | * Will return heap for given time. If no such heap exists prior to | ||
109 | * the invocation it will be created. | ||
110 | */ | ||
111 | static struct release_heap* get_release_heap(rt_domain_t *rt, | ||
112 | struct task_struct* t, | ||
113 | int use_task_heap) | ||
114 | { | ||
115 | struct list_head* pos; | ||
116 | struct release_heap* heap = NULL; | ||
117 | struct release_heap* rh; | ||
118 | lt_t release_time = get_release(t); | ||
119 | unsigned int slot = time2slot(release_time); | ||
120 | |||
121 | /* initialize pos for the case that the list is empty */ | ||
122 | pos = rt->release_queue.slot[slot].next; | ||
123 | list_for_each(pos, &rt->release_queue.slot[slot]) { | ||
124 | rh = list_entry(pos, struct release_heap, list); | ||
125 | if (release_time == rh->release_time) { | ||
126 | /* perfect match -- this happens on hyperperiod | ||
127 | * boundaries | ||
128 | */ | ||
129 | heap = rh; | ||
130 | break; | ||
131 | } else if (lt_before(release_time, rh->release_time)) { | ||
132 | /* we need to insert a new node since rh is | ||
133 | * already in the future | ||
134 | */ | ||
135 | break; | ||
136 | } | ||
137 | } | ||
138 | if (!heap && use_task_heap) { | ||
139 | /* use pre-allocated release heap */ | ||
140 | rh = tsk_rt(t)->rel_heap; | ||
141 | |||
142 | rh->dom = rt; | ||
143 | rh->release_time = release_time; | ||
144 | |||
145 | /* add to release queue */ | ||
146 | list_add(&rh->list, pos->prev); | ||
147 | heap = rh; | ||
148 | } | ||
149 | return heap; | ||
150 | } | ||
151 | |||
152 | static void reinit_release_heap(struct task_struct* t) | ||
153 | { | ||
154 | struct release_heap* rh; | ||
155 | |||
156 | /* use pre-allocated release heap */ | ||
157 | rh = tsk_rt(t)->rel_heap; | ||
158 | |||
159 | /* Make sure it is safe to use. The timer callback could still | ||
160 | * be executing on another CPU; hrtimer_cancel() will wait | ||
161 | * until the timer callback has completed. However, under no | ||
162 | * circumstances should the timer be active (= yet to be | ||
163 | * triggered). | ||
164 | * | ||
165 | * WARNING: If the CPU still holds the release_lock at this point, | ||
166 | * deadlock may occur! | ||
167 | */ | ||
168 | BUG_ON(hrtimer_cancel(&rh->timer)); | ||
169 | |||
170 | /* initialize */ | ||
171 | bheap_init(&rh->heap); | ||
172 | #ifdef CONFIG_RELEASE_MASTER | ||
173 | atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE); | ||
174 | #endif | ||
175 | } | ||
176 | /* arm_release_timer() - start local release timer or trigger | ||
177 | * remote timer (pull timer) | ||
178 | * | ||
179 | * Called by add_release() with: | ||
180 | * - tobe_lock taken | ||
181 | * - IRQ disabled | ||
182 | */ | ||
183 | #ifdef CONFIG_RELEASE_MASTER | ||
184 | #define arm_release_timer(t) arm_release_timer_on((t), NO_CPU) | ||
185 | static void arm_release_timer_on(rt_domain_t *_rt , int target_cpu) | ||
186 | #else | ||
187 | static void arm_release_timer(rt_domain_t *_rt) | ||
188 | #endif | ||
189 | { | ||
190 | rt_domain_t *rt = _rt; | ||
191 | struct list_head list; | ||
192 | struct list_head *pos, *safe; | ||
193 | struct task_struct* t; | ||
194 | struct release_heap* rh; | ||
195 | |||
196 | VTRACE("arm_release_timer() at %llu\n", litmus_clock()); | ||
197 | list_replace_init(&rt->tobe_released, &list); | ||
198 | |||
199 | list_for_each_safe(pos, safe, &list) { | ||
200 | /* pick task of work list */ | ||
201 | t = list_entry(pos, struct task_struct, rt_param.list); | ||
202 | sched_trace_task_release(t); | ||
203 | list_del(pos); | ||
204 | |||
205 | /* put into release heap while holding release_lock */ | ||
206 | raw_spin_lock(&rt->release_lock); | ||
207 | VTRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock); | ||
208 | |||
209 | rh = get_release_heap(rt, t, 0); | ||
210 | if (!rh) { | ||
211 | /* need to use our own, but drop lock first */ | ||
212 | raw_spin_unlock(&rt->release_lock); | ||
213 | VTRACE_TASK(t, "Dropped release_lock 0x%p\n", | ||
214 | &rt->release_lock); | ||
215 | |||
216 | reinit_release_heap(t); | ||
217 | VTRACE_TASK(t, "release_heap ready\n"); | ||
218 | |||
219 | raw_spin_lock(&rt->release_lock); | ||
220 | VTRACE_TASK(t, "Re-acquired release_lock 0x%p\n", | ||
221 | &rt->release_lock); | ||
222 | |||
223 | rh = get_release_heap(rt, t, 1); | ||
224 | } | ||
225 | bheap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node); | ||
226 | VTRACE_TASK(t, "arm_release_timer(): added to release heap\n"); | ||
227 | |||
228 | raw_spin_unlock(&rt->release_lock); | ||
229 | VTRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock); | ||
230 | |||
231 | /* To avoid arming the timer multiple times, we only let the | ||
232 | * owner do the arming (which is the "first" task to reference | ||
233 | * this release_heap anyway). | ||
234 | */ | ||
235 | if (rh == tsk_rt(t)->rel_heap) { | ||
236 | VTRACE_TASK(t, "arming timer 0x%p\n", &rh->timer); | ||
237 | /* we cannot arm the timer using hrtimer_start() | ||
238 | * as it may deadlock on rq->lock | ||
239 | * | ||
240 | * PINNED mode is ok on both local and remote CPU | ||
241 | */ | ||
242 | #ifdef CONFIG_RELEASE_MASTER | ||
243 | if (rt->release_master == NO_CPU && | ||
244 | target_cpu == NO_CPU) | ||
245 | #endif | ||
246 | __hrtimer_start_range_ns(&rh->timer, | ||
247 | ns_to_ktime(rh->release_time), | ||
248 | 0, HRTIMER_MODE_ABS_PINNED, 0); | ||
249 | #ifdef CONFIG_RELEASE_MASTER | ||
250 | else | ||
251 | hrtimer_start_on( | ||
252 | /* target_cpu overrides release master */ | ||
253 | (target_cpu != NO_CPU ? | ||
254 | target_cpu : rt->release_master), | ||
255 | &rh->info, &rh->timer, | ||
256 | ns_to_ktime(rh->release_time), | ||
257 | HRTIMER_MODE_ABS_PINNED); | ||
258 | #endif | ||
259 | } else | ||
260 | VTRACE_TASK(t, "0x%p is not my timer\n", &rh->timer); | ||
261 | } | ||
262 | } | ||
263 | |||
264 | void rt_domain_init(rt_domain_t *rt, | ||
265 | bheap_prio_t order, | ||
266 | check_resched_needed_t check, | ||
267 | release_jobs_t release | ||
268 | ) | ||
269 | { | ||
270 | int i; | ||
271 | |||
272 | BUG_ON(!rt); | ||
273 | if (!check) | ||
274 | check = dummy_resched; | ||
275 | if (!release) | ||
276 | release = default_release_jobs; | ||
277 | if (!order) | ||
278 | order = dummy_order; | ||
279 | |||
280 | #ifdef CONFIG_RELEASE_MASTER | ||
281 | rt->release_master = NO_CPU; | ||
282 | #endif | ||
283 | |||
284 | bheap_init(&rt->ready_queue); | ||
285 | INIT_LIST_HEAD(&rt->tobe_released); | ||
286 | for (i = 0; i < RELEASE_QUEUE_SLOTS; i++) | ||
287 | INIT_LIST_HEAD(&rt->release_queue.slot[i]); | ||
288 | |||
289 | raw_spin_lock_init(&rt->ready_lock); | ||
290 | raw_spin_lock_init(&rt->release_lock); | ||
291 | raw_spin_lock_init(&rt->tobe_lock); | ||
292 | |||
293 | rt->check_resched = check; | ||
294 | rt->release_jobs = release; | ||
295 | rt->order = order; | ||
296 | } | ||
297 | |||
298 | /* add_ready - add a real-time task to the rt ready queue. It must be runnable. | ||
299 | * @new: the newly released task | ||
300 | */ | ||
301 | void __add_ready(rt_domain_t* rt, struct task_struct *new) | ||
302 | { | ||
303 | TRACE("rt: adding %s/%d (%llu, %llu, %llu) rel=%llu " | ||
304 | "to ready queue at %llu\n", | ||
305 | new->comm, new->pid, | ||
306 | get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new), | ||
307 | get_release(new), litmus_clock()); | ||
308 | |||
309 | BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node)); | ||
310 | |||
311 | bheap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node); | ||
312 | rt->check_resched(rt); | ||
313 | } | ||
314 | |||
315 | /* merge_ready - Add a sorted set of tasks to the rt ready queue. They must be runnable. | ||
316 | * @tasks - the newly released tasks | ||
317 | */ | ||
318 | void __merge_ready(rt_domain_t* rt, struct bheap* tasks) | ||
319 | { | ||
320 | bheap_union(rt->order, &rt->ready_queue, tasks); | ||
321 | rt->check_resched(rt); | ||
322 | } | ||
323 | |||
324 | |||
325 | #ifdef CONFIG_RELEASE_MASTER | ||
326 | void __add_release_on(rt_domain_t* rt, struct task_struct *task, | ||
327 | int target_cpu) | ||
328 | { | ||
329 | TRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n", | ||
330 | get_release(task), target_cpu); | ||
331 | list_add(&tsk_rt(task)->list, &rt->tobe_released); | ||
332 | task->rt_param.domain = rt; | ||
333 | |||
334 | arm_release_timer_on(rt, target_cpu); | ||
335 | } | ||
336 | #endif | ||
337 | |||
338 | /* add_release - add a real-time task to the rt release queue. | ||
339 | * @task: the sleeping task | ||
340 | */ | ||
341 | void __add_release(rt_domain_t* rt, struct task_struct *task) | ||
342 | { | ||
343 | TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task)); | ||
344 | list_add(&tsk_rt(task)->list, &rt->tobe_released); | ||
345 | task->rt_param.domain = rt; | ||
346 | |||
347 | arm_release_timer(rt); | ||
348 | } | ||
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c new file mode 100644 index 000000000000..c4747e0ef2ab --- /dev/null +++ b/litmus/sched_plugin.c | |||
@@ -0,0 +1,224 @@ | |||
1 | /* sched_plugin.c -- core infrastructure for the scheduler plugin system | ||
2 | * | ||
3 | * This file includes the initialization of the plugin system, the no-op Linux | ||
4 | * scheduler plugin, some dummy functions, and some helper functions. | ||
5 | */ | ||
6 | |||
7 | #include <linux/list.h> | ||
8 | #include <linux/spinlock.h> | ||
9 | #include <linux/sched.h> | ||
10 | #include <linux/seq_file.h> | ||
11 | |||
12 | #include <litmus/litmus.h> | ||
13 | #include <litmus/sched_plugin.h> | ||
14 | #include <litmus/preempt.h> | ||
15 | #include <litmus/jobs.h> | ||
16 | |||
17 | /* | ||
18 | * Generic function to trigger preemption on either local or remote cpu | ||
19 | * from scheduler plugins. The key feature is that this function is | ||
20 | * non-preemptive section aware and does not invoke the scheduler / send | ||
21 | * IPIs if the to-be-preempted task is actually non-preemptive. | ||
22 | */ | ||
23 | void preempt_if_preemptable(struct task_struct* t, int cpu) | ||
24 | { | ||
25 | /* t is the real-time task executing on CPU on_cpu If t is NULL, then | ||
26 | * on_cpu is currently scheduling background work. | ||
27 | */ | ||
28 | |||
29 | int reschedule = 0; | ||
30 | |||
31 | if (!t) | ||
32 | /* move non-real-time task out of the way */ | ||
33 | reschedule = 1; | ||
34 | else { | ||
35 | if (smp_processor_id() == cpu) { | ||
36 | /* local CPU case */ | ||
37 | /* check if we need to poke userspace */ | ||
38 | if (is_user_np(t)) | ||
39 | /* Yes, poke it. This doesn't have to be atomic since | ||
40 | * the task is definitely not executing. */ | ||
41 | request_exit_np(t); | ||
42 | else if (!is_kernel_np(t)) | ||
43 | /* only if we are allowed to preempt the | ||
44 | * currently-executing task */ | ||
45 | reschedule = 1; | ||
46 | } else { | ||
47 | /* Remote CPU case. Only notify if it's not a kernel | ||
48 | * NP section and if we didn't set the userspace | ||
49 | * flag. */ | ||
50 | reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t)); | ||
51 | } | ||
52 | } | ||
53 | if (likely(reschedule)) | ||
54 | litmus_reschedule(cpu); | ||
55 | } | ||
56 | |||
57 | |||
58 | /************************************************************* | ||
59 | * Dummy plugin functions * | ||
60 | *************************************************************/ | ||
61 | |||
62 | static void litmus_dummy_finish_switch(struct task_struct * prev) | ||
63 | { | ||
64 | } | ||
65 | |||
66 | static struct task_struct* litmus_dummy_schedule(struct task_struct * prev) | ||
67 | { | ||
68 | sched_state_task_picked(); | ||
69 | return NULL; | ||
70 | } | ||
71 | |||
72 | static void litmus_dummy_tick(struct task_struct* tsk) | ||
73 | { | ||
74 | } | ||
75 | |||
76 | static long litmus_dummy_admit_task(struct task_struct* tsk) | ||
77 | { | ||
78 | printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n", | ||
79 | tsk->comm, tsk->pid); | ||
80 | return -EINVAL; | ||
81 | } | ||
82 | |||
83 | static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running) | ||
84 | { | ||
85 | } | ||
86 | |||
87 | static void litmus_dummy_task_wake_up(struct task_struct *task) | ||
88 | { | ||
89 | } | ||
90 | |||
91 | static void litmus_dummy_task_block(struct task_struct *task) | ||
92 | { | ||
93 | } | ||
94 | |||
95 | static void litmus_dummy_task_exit(struct task_struct *task) | ||
96 | { | ||
97 | } | ||
98 | |||
99 | static long litmus_dummy_complete_job(void) | ||
100 | { | ||
101 | return -ENOSYS; | ||
102 | } | ||
103 | |||
104 | static long litmus_dummy_activate_plugin(void) | ||
105 | { | ||
106 | return 0; | ||
107 | } | ||
108 | |||
109 | static long litmus_dummy_deactivate_plugin(void) | ||
110 | { | ||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | #ifdef CONFIG_LITMUS_LOCKING | ||
115 | |||
116 | static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type, | ||
117 | void* __user config) | ||
118 | { | ||
119 | return -ENXIO; | ||
120 | } | ||
121 | |||
122 | #endif | ||
123 | |||
124 | |||
125 | /* The default scheduler plugin. It doesn't do anything and lets Linux do its | ||
126 | * job. | ||
127 | */ | ||
128 | struct sched_plugin linux_sched_plugin = { | ||
129 | .plugin_name = "Linux", | ||
130 | .tick = litmus_dummy_tick, | ||
131 | .task_new = litmus_dummy_task_new, | ||
132 | .task_exit = litmus_dummy_task_exit, | ||
133 | .task_wake_up = litmus_dummy_task_wake_up, | ||
134 | .task_block = litmus_dummy_task_block, | ||
135 | .complete_job = litmus_dummy_complete_job, | ||
136 | .schedule = litmus_dummy_schedule, | ||
137 | .finish_switch = litmus_dummy_finish_switch, | ||
138 | .activate_plugin = litmus_dummy_activate_plugin, | ||
139 | .deactivate_plugin = litmus_dummy_deactivate_plugin, | ||
140 | #ifdef CONFIG_LITMUS_LOCKING | ||
141 | .allocate_lock = litmus_dummy_allocate_lock, | ||
142 | #endif | ||
143 | .admit_task = litmus_dummy_admit_task | ||
144 | }; | ||
145 | |||
146 | /* | ||
147 | * The reference to current plugin that is used to schedule tasks within | ||
148 | * the system. It stores references to actual function implementations | ||
149 | * Should be initialized by calling "init_***_plugin()" | ||
150 | */ | ||
151 | struct sched_plugin *litmus = &linux_sched_plugin; | ||
152 | |||
153 | /* the list of registered scheduling plugins */ | ||
154 | static LIST_HEAD(sched_plugins); | ||
155 | static DEFINE_RAW_SPINLOCK(sched_plugins_lock); | ||
156 | |||
157 | #define CHECK(func) {\ | ||
158 | if (!plugin->func) \ | ||
159 | plugin->func = litmus_dummy_ ## func;} | ||
160 | |||
161 | /* FIXME: get reference to module */ | ||
162 | int register_sched_plugin(struct sched_plugin* plugin) | ||
163 | { | ||
164 | printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n", | ||
165 | plugin->plugin_name); | ||
166 | |||
167 | /* make sure we don't trip over null pointers later */ | ||
168 | CHECK(finish_switch); | ||
169 | CHECK(schedule); | ||
170 | CHECK(tick); | ||
171 | CHECK(task_wake_up); | ||
172 | CHECK(task_exit); | ||
173 | CHECK(task_block); | ||
174 | CHECK(task_new); | ||
175 | CHECK(complete_job); | ||
176 | CHECK(activate_plugin); | ||
177 | CHECK(deactivate_plugin); | ||
178 | #ifdef CONFIG_LITMUS_LOCKING | ||
179 | CHECK(allocate_lock); | ||
180 | #endif | ||
181 | CHECK(admit_task); | ||
182 | |||
183 | if (!plugin->release_at) | ||
184 | plugin->release_at = release_at; | ||
185 | |||
186 | raw_spin_lock(&sched_plugins_lock); | ||
187 | list_add(&plugin->list, &sched_plugins); | ||
188 | raw_spin_unlock(&sched_plugins_lock); | ||
189 | |||
190 | return 0; | ||
191 | } | ||
192 | |||
193 | |||
194 | /* FIXME: reference counting, etc. */ | ||
195 | struct sched_plugin* find_sched_plugin(const char* name) | ||
196 | { | ||
197 | struct list_head *pos; | ||
198 | struct sched_plugin *plugin; | ||
199 | |||
200 | raw_spin_lock(&sched_plugins_lock); | ||
201 | list_for_each(pos, &sched_plugins) { | ||
202 | plugin = list_entry(pos, struct sched_plugin, list); | ||
203 | if (!strcmp(plugin->plugin_name, name)) | ||
204 | goto out_unlock; | ||
205 | } | ||
206 | plugin = NULL; | ||
207 | |||
208 | out_unlock: | ||
209 | raw_spin_unlock(&sched_plugins_lock); | ||
210 | return plugin; | ||
211 | } | ||
212 | |||
213 | void print_sched_plugins(struct seq_file *m) | ||
214 | { | ||
215 | struct list_head *pos; | ||
216 | struct sched_plugin *plugin; | ||
217 | |||
218 | raw_spin_lock(&sched_plugins_lock); | ||
219 | list_for_each(pos, &sched_plugins) { | ||
220 | plugin = list_entry(pos, struct sched_plugin, list); | ||
221 | seq_printf(m, "%s\n", plugin->plugin_name); | ||
222 | } | ||
223 | raw_spin_unlock(&sched_plugins_lock); | ||
224 | } | ||
diff --git a/litmus/srp.c b/litmus/srp.c new file mode 100644 index 000000000000..c88dbf2f580f --- /dev/null +++ b/litmus/srp.c | |||
@@ -0,0 +1,305 @@ | |||
1 | /* ************************************************************************** */ | ||
2 | /* STACK RESOURCE POLICY */ | ||
3 | /* ************************************************************************** */ | ||
4 | |||
5 | #include <asm/atomic.h> | ||
6 | #include <linux/sched.h> | ||
7 | #include <linux/wait.h> | ||
8 | |||
9 | #include <litmus/litmus.h> | ||
10 | #include <litmus/sched_plugin.h> | ||
11 | #include <litmus/fdso.h> | ||
12 | #include <litmus/trace.h> | ||
13 | |||
14 | |||
15 | #ifdef CONFIG_LITMUS_LOCKING | ||
16 | |||
17 | #include <litmus/srp.h> | ||
18 | |||
19 | srp_prioritization_t get_srp_prio; | ||
20 | |||
21 | struct srp { | ||
22 | struct list_head ceiling; | ||
23 | wait_queue_head_t ceiling_blocked; | ||
24 | }; | ||
25 | #define system_ceiling(srp) list2prio(srp->ceiling.next) | ||
26 | #define ceiling2sem(c) container_of(c, struct srp_semaphore, ceiling) | ||
27 | |||
28 | #define UNDEF_SEM -2 | ||
29 | |||
30 | atomic_t srp_objects_in_use = ATOMIC_INIT(0); | ||
31 | |||
32 | DEFINE_PER_CPU(struct srp, srp); | ||
33 | |||
34 | /* Initialize SRP semaphores at boot time. */ | ||
35 | static int __init srp_init(void) | ||
36 | { | ||
37 | int i; | ||
38 | |||
39 | printk("Initializing SRP per-CPU ceilings..."); | ||
40 | for (i = 0; i < NR_CPUS; i++) { | ||
41 | init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked); | ||
42 | INIT_LIST_HEAD(&per_cpu(srp, i).ceiling); | ||
43 | } | ||
44 | printk(" done!\n"); | ||
45 | |||
46 | return 0; | ||
47 | } | ||
48 | module_init(srp_init); | ||
49 | |||
50 | /* SRP task priority comparison function. Smaller numeric values have higher | ||
51 | * priority, tie-break is PID. Special case: priority == 0 <=> no priority | ||
52 | */ | ||
53 | static int srp_higher_prio(struct srp_priority* first, | ||
54 | struct srp_priority* second) | ||
55 | { | ||
56 | if (!first->priority) | ||
57 | return 0; | ||
58 | else | ||
59 | return !second->priority || | ||
60 | first->priority < second->priority || ( | ||
61 | first->priority == second->priority && | ||
62 | first->pid < second->pid); | ||
63 | } | ||
64 | |||
65 | |||
66 | static int srp_exceeds_ceiling(struct task_struct* first, | ||
67 | struct srp* srp) | ||
68 | { | ||
69 | struct srp_priority prio; | ||
70 | |||
71 | if (list_empty(&srp->ceiling)) | ||
72 | return 1; | ||
73 | else { | ||
74 | prio.pid = first->pid; | ||
75 | prio.priority = get_srp_prio(first); | ||
76 | return srp_higher_prio(&prio, system_ceiling(srp)) || | ||
77 | ceiling2sem(system_ceiling(srp))->owner == first; | ||
78 | } | ||
79 | } | ||
80 | |||
81 | static void srp_add_prio(struct srp* srp, struct srp_priority* prio) | ||
82 | { | ||
83 | struct list_head *pos; | ||
84 | if (in_list(&prio->list)) { | ||
85 | printk(KERN_CRIT "WARNING: SRP violation detected, prio is already in " | ||
86 | "ceiling list! cpu=%d, srp=%p\n", smp_processor_id(), ceiling2sem(prio)); | ||
87 | return; | ||
88 | } | ||
89 | list_for_each(pos, &srp->ceiling) | ||
90 | if (unlikely(srp_higher_prio(prio, list2prio(pos)))) { | ||
91 | __list_add(&prio->list, pos->prev, pos); | ||
92 | return; | ||
93 | } | ||
94 | |||
95 | list_add_tail(&prio->list, &srp->ceiling); | ||
96 | } | ||
97 | |||
98 | |||
99 | static int lock_srp_semaphore(struct litmus_lock* l) | ||
100 | { | ||
101 | struct task_struct* t = current; | ||
102 | struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock); | ||
103 | |||
104 | if (!is_realtime(t)) | ||
105 | return -EPERM; | ||
106 | |||
107 | /* prevent acquisition of local locks in global critical sections */ | ||
108 | if (tsk_rt(t)->num_locks_held) | ||
109 | return -EBUSY; | ||
110 | |||
111 | preempt_disable(); | ||
112 | |||
113 | /* Update ceiling. */ | ||
114 | srp_add_prio(&__get_cpu_var(srp), &sem->ceiling); | ||
115 | |||
116 | /* SRP invariant: all resources available */ | ||
117 | BUG_ON(sem->owner != NULL); | ||
118 | |||
119 | sem->owner = t; | ||
120 | TRACE_CUR("acquired srp 0x%p\n", sem); | ||
121 | |||
122 | tsk_rt(t)->num_local_locks_held++; | ||
123 | |||
124 | preempt_enable(); | ||
125 | |||
126 | return 0; | ||
127 | } | ||
128 | |||
129 | static int unlock_srp_semaphore(struct litmus_lock* l) | ||
130 | { | ||
131 | struct task_struct* t = current; | ||
132 | struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock); | ||
133 | int err = 0; | ||
134 | |||
135 | preempt_disable(); | ||
136 | |||
137 | if (sem->owner != t) { | ||
138 | err = -EINVAL; | ||
139 | } else { | ||
140 | /* Determine new system priority ceiling for this CPU. */ | ||
141 | BUG_ON(!in_list(&sem->ceiling.list)); | ||
142 | |||
143 | list_del(&sem->ceiling.list); | ||
144 | sem->owner = NULL; | ||
145 | |||
146 | /* Wake tasks on this CPU, if they exceed current ceiling. */ | ||
147 | TRACE_CUR("released srp 0x%p\n", sem); | ||
148 | wake_up_all(&__get_cpu_var(srp).ceiling_blocked); | ||
149 | |||
150 | tsk_rt(t)->num_local_locks_held--; | ||
151 | } | ||
152 | |||
153 | preempt_enable(); | ||
154 | return err; | ||
155 | } | ||
156 | |||
157 | static int open_srp_semaphore(struct litmus_lock* l, void* __user arg) | ||
158 | { | ||
159 | struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock); | ||
160 | int err = 0; | ||
161 | struct task_struct* t = current; | ||
162 | struct srp_priority t_prio; | ||
163 | |||
164 | if (!is_realtime(t)) | ||
165 | return -EPERM; | ||
166 | |||
167 | TRACE_CUR("opening SRP semaphore %p, cpu=%d\n", sem, sem->cpu); | ||
168 | |||
169 | preempt_disable(); | ||
170 | |||
171 | if (sem->owner != NULL) | ||
172 | err = -EBUSY; | ||
173 | |||
174 | if (err == 0) { | ||
175 | if (sem->cpu == UNDEF_SEM) | ||
176 | sem->cpu = get_partition(t); | ||
177 | else if (sem->cpu != get_partition(t)) | ||
178 | err = -EPERM; | ||
179 | } | ||
180 | |||
181 | if (err == 0) { | ||
182 | t_prio.priority = get_srp_prio(t); | ||
183 | t_prio.pid = t->pid; | ||
184 | if (srp_higher_prio(&t_prio, &sem->ceiling)) { | ||
185 | sem->ceiling.priority = t_prio.priority; | ||
186 | sem->ceiling.pid = t_prio.pid; | ||
187 | } | ||
188 | } | ||
189 | |||
190 | preempt_enable(); | ||
191 | |||
192 | return err; | ||
193 | } | ||
194 | |||
195 | static int close_srp_semaphore(struct litmus_lock* l) | ||
196 | { | ||
197 | struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock); | ||
198 | int err = 0; | ||
199 | |||
200 | preempt_disable(); | ||
201 | |||
202 | if (sem->owner == current) | ||
203 | unlock_srp_semaphore(l); | ||
204 | |||
205 | preempt_enable(); | ||
206 | |||
207 | return err; | ||
208 | } | ||
209 | |||
210 | static void deallocate_srp_semaphore(struct litmus_lock* l) | ||
211 | { | ||
212 | struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock); | ||
213 | atomic_dec(&srp_objects_in_use); | ||
214 | kfree(sem); | ||
215 | } | ||
216 | |||
217 | static struct litmus_lock_ops srp_lock_ops = { | ||
218 | .open = open_srp_semaphore, | ||
219 | .close = close_srp_semaphore, | ||
220 | .lock = lock_srp_semaphore, | ||
221 | .unlock = unlock_srp_semaphore, | ||
222 | .deallocate = deallocate_srp_semaphore, | ||
223 | }; | ||
224 | |||
225 | struct srp_semaphore* allocate_srp_semaphore(void) | ||
226 | { | ||
227 | struct srp_semaphore* sem; | ||
228 | |||
229 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
230 | if (!sem) | ||
231 | return NULL; | ||
232 | |||
233 | INIT_LIST_HEAD(&sem->ceiling.list); | ||
234 | sem->ceiling.priority = 0; | ||
235 | sem->cpu = UNDEF_SEM; | ||
236 | sem->owner = NULL; | ||
237 | |||
238 | sem->litmus_lock.ops = &srp_lock_ops; | ||
239 | |||
240 | atomic_inc(&srp_objects_in_use); | ||
241 | return sem; | ||
242 | } | ||
243 | |||
244 | static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync, | ||
245 | void *key) | ||
246 | { | ||
247 | int cpu = smp_processor_id(); | ||
248 | struct task_struct *tsk = wait->private; | ||
249 | if (cpu != get_partition(tsk)) | ||
250 | TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b", | ||
251 | get_partition(tsk)); | ||
252 | else if (srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) | ||
253 | return default_wake_function(wait, mode, sync, key); | ||
254 | return 0; | ||
255 | } | ||
256 | |||
257 | static void do_ceiling_block(struct task_struct *tsk) | ||
258 | { | ||
259 | wait_queue_t wait = { | ||
260 | .private = tsk, | ||
261 | .func = srp_wake_up, | ||
262 | .task_list = {NULL, NULL} | ||
263 | }; | ||
264 | |||
265 | tsk->state = TASK_UNINTERRUPTIBLE; | ||
266 | add_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait); | ||
267 | tsk->rt_param.srp_non_recurse = 1; | ||
268 | preempt_enable_no_resched(); | ||
269 | schedule(); | ||
270 | preempt_disable(); | ||
271 | tsk->rt_param.srp_non_recurse = 0; | ||
272 | remove_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait); | ||
273 | } | ||
274 | |||
275 | /* Wait for current task priority to exceed system-wide priority ceiling. | ||
276 | * FIXME: the hotpath should be inline. | ||
277 | */ | ||
278 | void srp_ceiling_block(void) | ||
279 | { | ||
280 | struct task_struct *tsk = current; | ||
281 | |||
282 | /* Only applies to real-time tasks, but optimize for RT tasks. */ | ||
283 | if (unlikely(!is_realtime(tsk))) | ||
284 | return; | ||
285 | |||
286 | /* Avoid recursive ceiling blocking. */ | ||
287 | if (unlikely(tsk->rt_param.srp_non_recurse)) | ||
288 | return; | ||
289 | |||
290 | /* Bail out early if there aren't any SRP resources around. */ | ||
291 | if (likely(!atomic_read(&srp_objects_in_use))) | ||
292 | return; | ||
293 | |||
294 | preempt_disable(); | ||
295 | if (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) { | ||
296 | TRACE_CUR("is priority ceiling blocked.\n"); | ||
297 | while (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) | ||
298 | do_ceiling_block(tsk); | ||
299 | TRACE_CUR("finally exceeds system ceiling.\n"); | ||
300 | } else | ||
301 | TRACE_CUR("is not priority ceiling blocked\n"); | ||
302 | preempt_enable(); | ||
303 | } | ||
304 | |||
305 | #endif | ||
diff --git a/litmus/sync.c b/litmus/sync.c new file mode 100644 index 000000000000..61a95463e4d2 --- /dev/null +++ b/litmus/sync.c | |||
@@ -0,0 +1,166 @@ | |||
1 | /* litmus/sync.c - Support for synchronous and asynchronous task system releases. | ||
2 | * | ||
3 | * | ||
4 | */ | ||
5 | |||
6 | #include <asm/atomic.h> | ||
7 | #include <asm/uaccess.h> | ||
8 | #include <linux/spinlock.h> | ||
9 | #include <linux/list.h> | ||
10 | #include <linux/sched.h> | ||
11 | #include <linux/completion.h> | ||
12 | |||
13 | #include <litmus/litmus.h> | ||
14 | #include <litmus/sched_plugin.h> | ||
15 | #include <litmus/jobs.h> | ||
16 | |||
17 | #include <litmus/sched_trace.h> | ||
18 | |||
19 | struct ts_release_wait { | ||
20 | struct list_head list; | ||
21 | struct completion completion; | ||
22 | lt_t ts_release_time; | ||
23 | }; | ||
24 | |||
25 | #define DECLARE_TS_RELEASE_WAIT(symb) \ | ||
26 | struct ts_release_wait symb = \ | ||
27 | { \ | ||
28 | LIST_HEAD_INIT(symb.list), \ | ||
29 | COMPLETION_INITIALIZER_ONSTACK(symb.completion), \ | ||
30 | 0 \ | ||
31 | } | ||
32 | |||
33 | static LIST_HEAD(task_release_list); | ||
34 | static DEFINE_MUTEX(task_release_lock); | ||
35 | |||
36 | static long do_wait_for_ts_release(void) | ||
37 | { | ||
38 | DECLARE_TS_RELEASE_WAIT(wait); | ||
39 | |||
40 | long ret = -ERESTARTSYS; | ||
41 | |||
42 | if (mutex_lock_interruptible(&task_release_lock)) | ||
43 | goto out; | ||
44 | |||
45 | list_add(&wait.list, &task_release_list); | ||
46 | |||
47 | mutex_unlock(&task_release_lock); | ||
48 | |||
49 | /* We are enqueued, now we wait for someone to wake us up. */ | ||
50 | ret = wait_for_completion_interruptible(&wait.completion); | ||
51 | |||
52 | if (!ret) { | ||
53 | /* Setting this flag before releasing ensures that this CPU | ||
54 | * will be the next CPU to requeue the task on a ready or | ||
55 | * release queue. Cleared by prepare_for_next_period() | ||
56 | */ | ||
57 | tsk_rt(current)->dont_requeue = 1; | ||
58 | |||
59 | /* Completion succeeded, setup release time. complete_job() | ||
60 | * will indirectly cause the period to be added to the next | ||
61 | * release time, so subtract it here. */ | ||
62 | litmus->release_at(current, wait.ts_release_time | ||
63 | + current->rt_param.task_params.phase | ||
64 | - current->rt_param.task_params.period); | ||
65 | |||
66 | /* Advance to next job --- when complete_job() returns, the | ||
67 | * first job has been released. Since we patched up the release | ||
68 | * time, this occurs when all tasks synchronously release their | ||
69 | * first job.*/ | ||
70 | ret = complete_job(); | ||
71 | } else { | ||
72 | /* We were interrupted, must cleanup list. */ | ||
73 | mutex_lock(&task_release_lock); | ||
74 | if (!wait.completion.done) | ||
75 | list_del(&wait.list); | ||
76 | mutex_unlock(&task_release_lock); | ||
77 | } | ||
78 | |||
79 | out: | ||
80 | return ret; | ||
81 | } | ||
82 | |||
83 | int count_tasks_waiting_for_release(void) | ||
84 | { | ||
85 | int task_count = 0; | ||
86 | struct list_head *pos; | ||
87 | |||
88 | mutex_lock(&task_release_lock); | ||
89 | |||
90 | list_for_each(pos, &task_release_list) { | ||
91 | task_count++; | ||
92 | } | ||
93 | |||
94 | mutex_unlock(&task_release_lock); | ||
95 | |||
96 | |||
97 | return task_count; | ||
98 | } | ||
99 | |||
100 | static long do_release_ts(lt_t start) | ||
101 | { | ||
102 | long task_count = 0; | ||
103 | |||
104 | struct list_head *pos, *safe; | ||
105 | struct ts_release_wait *wait; | ||
106 | |||
107 | if (mutex_lock_interruptible(&task_release_lock)) { | ||
108 | task_count = -ERESTARTSYS; | ||
109 | goto out; | ||
110 | } | ||
111 | |||
112 | TRACE("<<<<<< synchronous task system release >>>>>>\n"); | ||
113 | sched_trace_sys_release(&start); | ||
114 | |||
115 | task_count = 0; | ||
116 | list_for_each_safe(pos, safe, &task_release_list) { | ||
117 | wait = (struct ts_release_wait*) | ||
118 | list_entry(pos, struct ts_release_wait, list); | ||
119 | |||
120 | task_count++; | ||
121 | wait->ts_release_time = start; | ||
122 | complete(&wait->completion); | ||
123 | } | ||
124 | |||
125 | /* clear stale list */ | ||
126 | INIT_LIST_HEAD(&task_release_list); | ||
127 | |||
128 | mutex_unlock(&task_release_lock); | ||
129 | |||
130 | out: | ||
131 | return task_count; | ||
132 | } | ||
133 | |||
134 | |||
135 | asmlinkage long sys_wait_for_ts_release(void) | ||
136 | { | ||
137 | long ret = -EPERM; | ||
138 | struct task_struct *t = current; | ||
139 | |||
140 | if (is_realtime(t)) | ||
141 | ret = do_wait_for_ts_release(); | ||
142 | |||
143 | return ret; | ||
144 | } | ||
145 | |||
146 | #define ONE_MS 1000000 | ||
147 | |||
148 | asmlinkage long sys_release_ts(lt_t __user *__delay) | ||
149 | { | ||
150 | long ret; | ||
151 | lt_t delay; | ||
152 | lt_t start_time; | ||
153 | |||
154 | /* FIXME: check capabilities... */ | ||
155 | |||
156 | ret = copy_from_user(&delay, __delay, sizeof(delay)); | ||
157 | if (ret == 0) { | ||
158 | /* round up to next larger integral millisecond */ | ||
159 | start_time = litmus_clock(); | ||
160 | do_div(start_time, ONE_MS); | ||
161 | start_time *= ONE_MS; | ||
162 | ret = do_release_ts(start_time + delay); | ||
163 | } | ||
164 | |||
165 | return ret; | ||
166 | } | ||