aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/litmus/litmus.h2
-rw-r--r--include/litmus/locking.h1
-rw-r--r--include/litmus/rt_param.h2
-rw-r--r--include/litmus/sched_plugin.h5
-rw-r--r--include/litmus/sched_trace.h60
-rw-r--r--include/trace/events/litmus.h232
-rw-r--r--include/trace/ftrace.h5
-rw-r--r--litmus/Makefile5
-rw-r--r--litmus/locking.c6
-rw-r--r--litmus/sched_gsn_edf.c6
-rw-r--r--litmus/sched_litmus.c4
-rw-r--r--litmus/sched_mc.c1369
-rw-r--r--litmus/sched_mc_ce.c1052
-rw-r--r--litmus/sched_plugin.c6
-rw-r--r--litmus/sched_psn_edf.c3
-rw-r--r--litmus/sync.c3
16 files changed, 2723 insertions, 38 deletions
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
index 0b071fd359f9..2776470bb897 100644
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -44,6 +44,8 @@ void litmus_exit_task(struct task_struct *tsk);
44 44
45#define tsk_rt(t) (&(t)->rt_param) 45#define tsk_rt(t) (&(t)->rt_param)
46 46
47#define get_server_job(t) (tsk_rt(t)->job_params.fake_job_no)
48
47/* Realtime utility macros */ 49/* Realtime utility macros */
48#define get_rt_flags(t) (tsk_rt(t)->flags) 50#define get_rt_flags(t) (tsk_rt(t)->flags)
49#define set_rt_flags(t,f) (tsk_rt(t)->flags=(f)) 51#define set_rt_flags(t,f) (tsk_rt(t)->flags=(f))
diff --git a/include/litmus/locking.h b/include/litmus/locking.h
index 4d7b870cb443..41991d5af01b 100644
--- a/include/litmus/locking.h
+++ b/include/litmus/locking.h
@@ -9,6 +9,7 @@ struct litmus_lock_ops;
9struct litmus_lock { 9struct litmus_lock {
10 struct litmus_lock_ops *ops; 10 struct litmus_lock_ops *ops;
11 int type; 11 int type;
12 int id;
12}; 13};
13 14
14struct litmus_lock_ops { 15struct litmus_lock_ops {
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index d6d799174160..ba62e10d6f2c 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -89,7 +89,7 @@ struct rt_job {
89 lt_t exec_time; 89 lt_t exec_time;
90 90
91 /* Which job is this. This is used to let user space 91 /* Which job is this. This is used to let user space
92 * specify which job to wait for, which is important if jobs 92yes * specify which job to wait for, which is important if jobs
93 * overrun. If we just call sys_sleep_next_period() then we 93 * overrun. If we just call sys_sleep_next_period() then we
94 * will unintentionally miss jobs after an overrun. 94 * will unintentionally miss jobs after an overrun.
95 * 95 *
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
index 6e7cabdddae8..01786b57a4a9 100644
--- a/include/litmus/sched_plugin.h
+++ b/include/litmus/sched_plugin.h
@@ -67,6 +67,9 @@ typedef long (*admit_task_t)(struct task_struct* tsk);
67 67
68typedef void (*release_at_t)(struct task_struct *t, lt_t start); 68typedef void (*release_at_t)(struct task_struct *t, lt_t start);
69 69
70/* TODO remove me */
71typedef void (*release_ts_t)(lt_t time);
72
70struct sched_plugin { 73struct sched_plugin {
71 struct list_head list; 74 struct list_head list;
72 /* basic info */ 75 /* basic info */
@@ -93,6 +96,8 @@ struct sched_plugin {
93 task_block_t task_block; 96 task_block_t task_block;
94 task_exit_t task_exit; 97 task_exit_t task_exit;
95 98
99 release_ts_t release_ts;
100
96#ifdef CONFIG_LITMUS_LOCKING 101#ifdef CONFIG_LITMUS_LOCKING
97 /* locking protocols */ 102 /* locking protocols */
98 allocate_lock_t allocate_lock; 103 allocate_lock_t allocate_lock;
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
index 82bde8241298..49972d75ef38 100644
--- a/include/litmus/sched_trace.h
+++ b/include/litmus/sched_trace.h
@@ -180,6 +180,13 @@ feather_callback void do_sched_trace_sys_release(unsigned long id,
180#define trace_litmus_task_resume(t) 180#define trace_litmus_task_resume(t)
181#define trace_litmus_sys_release(start) 181#define trace_litmus_sys_release(start)
182 182
183#define trace_litmus_container_param(cid, name)
184#define trace_litmus_server_param(sid, cid, wcet, time)
185#define trace_litmus_server_switch_to(sid, job, tid)
186#define trace_litmus_server_switch_away(sid, job, tid)
187#define trace_litmus_server_release(sid, job, release, deadline)
188#define trace_litmus_server_completion(sid, job)
189
183#endif 190#endif
184 191
185 192
@@ -226,18 +233,28 @@ feather_callback void do_sched_trace_sys_release(unsigned long id,
226 trace_litmus_task_completion(t, forced); \ 233 trace_litmus_task_completion(t, forced); \
227 } while (0) 234 } while (0)
228 235
229#define sched_trace_task_block(t) \ 236#define sched_trace_task_block(t, i) \
230 do { \ 237 do { \
231 SCHED_TRACE(SCHED_TRACE_BASE_ID + 7, \ 238 SCHED_TRACE(SCHED_TRACE_BASE_ID + 7, \
232 do_sched_trace_task_block, t); \ 239 do_sched_trace_task_block, t); \
233 trace_litmus_task_block(t); \ 240 trace_litmus_task_block(t, i); \
234 } while (0) 241 } while (0)
235 242
236#define sched_trace_task_resume(t) \ 243#define sched_trace_task_resume(t, i) \
237 do { \ 244 do { \
238 SCHED_TRACE(SCHED_TRACE_BASE_ID + 8, \ 245 SCHED_TRACE(SCHED_TRACE_BASE_ID + 8, \
239 do_sched_trace_task_resume, t); \ 246 do_sched_trace_task_resume, t); \
240 trace_litmus_task_resume(t); \ 247 trace_litmus_task_resume(t, i); \
248 } while (0)
249
250#define sched_trace_resource_acquire(t, i) \
251 do { \
252 trace_litmus_resource_acquire(t, i); \
253 } while (0)
254
255#define sched_trace_resource_released(t, i) \
256 do { \
257 trace_litmus_resource_released(t, i); \
241 } while (0) 258 } while (0)
242 259
243#define sched_trace_action(t, action) \ 260#define sched_trace_action(t, action) \
@@ -252,6 +269,41 @@ feather_callback void do_sched_trace_sys_release(unsigned long id,
252 trace_litmus_sys_release(when); \ 269 trace_litmus_sys_release(when); \
253 } while (0) 270 } while (0)
254 271
272#define QT_START lt_t _qt_start = litmus_clock()
273#define QT_END \
274 sched_trace_log_message("%d P%d [%s@%s:%d]: Took %llu\n\n", \
275 TRACE_ARGS, litmus_clock() - _qt_start)
276
277#define sched_trace_container_param(cid, name) \
278 do { \
279 trace_litmus_container_param(cid, name); \
280 } while (0)
281
282#define sched_trace_server_param(sid, cid, wcet, period) \
283 do { \
284 trace_litmus_server_param(sid, cid, wcet, period); \
285 } while(0)
286
287#define sched_trace_server_switch_to(sid, job, tid) \
288 do { \
289 trace_litmus_server_switch_to(sid, job, tid); \
290 } while(0)
291
292#define sched_trace_server_switch_away(sid, job, tid) \
293 do { \
294 trace_litmus_server_switch_away(sid, job, tid); \
295 } while (0)
296
297#define sched_trace_server_release(sid, job, rel, dead) \
298 do { \
299 trace_litmus_server_release(sid, job, rel, dead); \
300 } while (0)
301
302#define sched_trace_server_completion(sid, job) \
303 do { \
304 trace_litmus_server_completion(sid, job); \
305 } while (0)
306
255#define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */ 307#define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
256 308
257#endif /* __KERNEL__ */ 309#endif /* __KERNEL__ */
diff --git a/include/trace/events/litmus.h b/include/trace/events/litmus.h
index 0fffcee02be0..b3a8f166e65f 100644
--- a/include/trace/events/litmus.h
+++ b/include/trace/events/litmus.h
@@ -11,10 +11,6 @@
11 11
12#include <litmus/litmus.h> 12#include <litmus/litmus.h>
13#include <litmus/rt_param.h> 13#include <litmus/rt_param.h>
14
15/*
16 * Tracing task admission
17 */
18TRACE_EVENT(litmus_task_param, 14TRACE_EVENT(litmus_task_param,
19 15
20 TP_PROTO(struct task_struct *t), 16 TP_PROTO(struct task_struct *t),
@@ -24,9 +20,9 @@ TRACE_EVENT(litmus_task_param,
24 TP_STRUCT__entry( 20 TP_STRUCT__entry(
25 __field( pid_t, pid ) 21 __field( pid_t, pid )
26 __field( unsigned int, job ) 22 __field( unsigned int, job )
27 __field( lt_t, wcet ) 23 __field( unsigned long long, wcet )
28 __field( lt_t, period ) 24 __field( unsigned long long, period )
29 __field( lt_t, phase ) 25 __field( unsigned long long, phase )
30 __field( int, partition ) 26 __field( int, partition )
31 ), 27 ),
32 28
@@ -56,8 +52,8 @@ TRACE_EVENT(litmus_task_release,
56 TP_STRUCT__entry( 52 TP_STRUCT__entry(
57 __field( pid_t, pid ) 53 __field( pid_t, pid )
58 __field( unsigned int, job ) 54 __field( unsigned int, job )
59 __field( lt_t, release ) 55 __field( unsigned long long, release )
60 __field( lt_t, deadline ) 56 __field( unsigned long long, deadline )
61 ), 57 ),
62 58
63 TP_fast_assign( 59 TP_fast_assign(
@@ -84,8 +80,8 @@ TRACE_EVENT(litmus_switch_to,
84 TP_STRUCT__entry( 80 TP_STRUCT__entry(
85 __field( pid_t, pid ) 81 __field( pid_t, pid )
86 __field( unsigned int, job ) 82 __field( unsigned int, job )
87 __field( lt_t, when ) 83 __field( unsigned long long, when )
88 __field( lt_t, exec_time ) 84 __field( unsigned long long, exec_time )
89 ), 85 ),
90 86
91 TP_fast_assign( 87 TP_fast_assign(
@@ -112,8 +108,8 @@ TRACE_EVENT(litmus_switch_away,
112 TP_STRUCT__entry( 108 TP_STRUCT__entry(
113 __field( pid_t, pid ) 109 __field( pid_t, pid )
114 __field( unsigned int, job ) 110 __field( unsigned int, job )
115 __field( lt_t, when ) 111 __field( unsigned long long, when )
116 __field( lt_t, exec_time ) 112 __field( unsigned long long, exec_time )
117 ), 113 ),
118 114
119 TP_fast_assign( 115 TP_fast_assign(
@@ -140,7 +136,7 @@ TRACE_EVENT(litmus_task_completion,
140 TP_STRUCT__entry( 136 TP_STRUCT__entry(
141 __field( pid_t, pid ) 137 __field( pid_t, pid )
142 __field( unsigned int, job ) 138 __field( unsigned int, job )
143 __field( lt_t, when ) 139 __field( unsigned long long, when )
144 __field( unsigned long, forced ) 140 __field( unsigned long, forced )
145 ), 141 ),
146 142
@@ -161,21 +157,71 @@ TRACE_EVENT(litmus_task_completion,
161 */ 157 */
162TRACE_EVENT(litmus_task_block, 158TRACE_EVENT(litmus_task_block,
163 159
164 TP_PROTO(struct task_struct *t), 160 TP_PROTO(struct task_struct *t, int lid),
165 161
166 TP_ARGS(t), 162 TP_ARGS(t, lid),
167 163
168 TP_STRUCT__entry( 164 TP_STRUCT__entry(
169 __field( pid_t, pid ) 165 __field( pid_t, pid )
170 __field( lt_t, when ) 166 __field( int, lid )
167 __field( unsigned long long, when )
171 ), 168 ),
172 169
173 TP_fast_assign( 170 TP_fast_assign(
174 __entry->pid = t ? t->pid : 0; 171 __entry->pid = t ? t->pid : 0;
172 __entry->lid = lid;
175 __entry->when = litmus_clock(); 173 __entry->when = litmus_clock();
176 ), 174 ),
177 175
178 TP_printk("(%u) blocks: %Lu\n", __entry->pid, __entry->when) 176 TP_printk("(%u) blocks on %d: %Lu\n", __entry->pid,
177 __entry->lid, __entry->when)
178);
179
180/*
181 * Lock events
182 */
183TRACE_EVENT(litmus_resource_acquire,
184
185 TP_PROTO(struct task_struct *t, int lid),
186
187 TP_ARGS(t, lid),
188
189 TP_STRUCT__entry(
190 __field( pid_t, pid )
191 __field( int, lid )
192 __field( unsigned long long, when )
193 ),
194
195 TP_fast_assign(
196 __entry->pid = t ? t->pid : 0;
197 __entry->lid = lid;
198 __entry->when = litmus_clock();
199 ),
200
201 TP_printk("(%u) acquires %d: %Lu\n", __entry->pid,
202 __entry->lid, __entry->when)
203);
204
205TRACE_EVENT(litmus_resource_release,
206
207 TP_PROTO(struct task_struct *t, int lid),
208
209 TP_ARGS(t, lid),
210
211 TP_STRUCT__entry(
212 __field( pid_t, pid )
213 __field( int, lid )
214 __field( unsigned long long, when )
215 ),
216
217 TP_fast_assign(
218 __entry->pid = t ? t->pid : 0;
219 __entry->lid = lid;
220 __entry->when = litmus_clock();
221 ),
222
223 TP_printk("(%u) releases %d: %Lu\n", __entry->pid,
224 __entry->lid, __entry->when)
179); 225);
180 226
181/* 227/*
@@ -183,24 +229,27 @@ TRACE_EVENT(litmus_task_block,
183 */ 229 */
184TRACE_EVENT(litmus_task_resume, 230TRACE_EVENT(litmus_task_resume,
185 231
186 TP_PROTO(struct task_struct *t), 232 TP_PROTO(struct task_struct *t, int lid),
187 233
188 TP_ARGS(t), 234 TP_ARGS(t, lid),
189 235
190 TP_STRUCT__entry( 236 TP_STRUCT__entry(
191 __field( pid_t, pid ) 237 __field( pid_t, pid )
238 __field( int, lid )
192 __field( unsigned int, job ) 239 __field( unsigned int, job )
193 __field( lt_t, when ) 240 __field( unsigned long long, when )
194 ), 241 ),
195 242
196 TP_fast_assign( 243 TP_fast_assign(
197 __entry->pid = t ? t->pid : 0; 244 __entry->pid = t ? t->pid : 0;
198 __entry->job = t ? t->rt_param.job_params.job_no : 0; 245 __entry->job = t ? t->rt_param.job_params.job_no : 0;
199 __entry->when = litmus_clock(); 246 __entry->when = litmus_clock();
247 __entry->lid = lid;
200 ), 248 ),
201 249
202 TP_printk("resume(job(%u, %u)): %Lu\n", 250 TP_printk("resume(job(%u, %u)) on %d: %Lu\n",
203 __entry->pid, __entry->job, __entry->when) 251 __entry->pid, __entry->job,
252 __entry->lid, __entry->when)
204); 253);
205 254
206/* 255/*
@@ -208,13 +257,13 @@ TRACE_EVENT(litmus_task_resume,
208 */ 257 */
209TRACE_EVENT(litmus_sys_release, 258TRACE_EVENT(litmus_sys_release,
210 259
211 TP_PROTO(lt_t *start), 260 TP_PROTO(unsigned long long *start),
212 261
213 TP_ARGS(start), 262 TP_ARGS(start),
214 263
215 TP_STRUCT__entry( 264 TP_STRUCT__entry(
216 __field( lt_t, rel ) 265 __field( unsigned long long, rel )
217 __field( lt_t, when ) 266 __field( unsigned long long, when )
218 ), 267 ),
219 268
220 TP_fast_assign( 269 TP_fast_assign(
@@ -225,6 +274,137 @@ TRACE_EVENT(litmus_sys_release,
225 TP_printk("SynRelease(%Lu) at %Lu\n", __entry->rel, __entry->when) 274 TP_printk("SynRelease(%Lu) at %Lu\n", __entry->rel, __entry->when)
226); 275);
227 276
277/*
278 * Containers
279 */
280TRACE_EVENT(litmus_container_param,
281
282 TP_PROTO(int cid, const char *name),
283
284 TP_ARGS(cid, name),
285
286 TP_STRUCT__entry(
287 __field( int, cid )
288 __array( char, name, TASK_COMM_LEN )
289 ),
290
291 TP_fast_assign(
292 memcpy(__entry->name, name, TASK_COMM_LEN);
293 __entry->cid = cid;
294 ),
295
296 TP_printk("container, name: %s, id: %d\n", __entry->name, __entry->cid)
297);
298
299TRACE_EVENT(litmus_server_param,
300
301 TP_PROTO(int sid, int cid, unsigned long long wcet, unsigned long long period),
302
303 TP_ARGS(sid, cid, wcet, period),
304
305 TP_STRUCT__entry(
306 __field( int, sid )
307 __field( int, cid )
308 __field( unsigned long long, wcet )
309 __field( unsigned long long, period )
310 ),
311
312 TP_fast_assign(
313 __entry->cid = cid;
314 __entry->sid = sid;
315 __entry->wcet = wcet;
316 __entry->period = period;
317 ),
318
319 TP_printk("server(%llu, %llu), sid: %llu, cont: %llu\n",
320 __entry->wcet, __entry->period, __entry->sid, __entry->cid)
321);
322
323TRACE_EVENT(litmus_server_switch_to,
324
325 TP_PROTO(int sid, unsigned int job, int tid),
326
327 TP_ARGS(sid, job, tid),
328
329 TP_STRUCT__entry(
330 __field( int, sid)
331 __field( unsigned int, job)
332 __field( int, tid)
333 ),
334
335 TP_fast_assign(
336 __entry->sid = sid;
337 __entry->tid = tid;
338 __entry->job = job;
339 ),
340
341 TP_printk("switch_to(server(%d, %u)): %d\n", __entry->sid, __entry->job, __entry->tid)
342);
343
344TRACE_EVENT(litmus_server_switch_away,
345
346 TP_PROTO(int sid, unsigned int job, int tid),
347
348 TP_ARGS(sid, job, tid),
349
350 TP_STRUCT__entry(
351 __field( int, sid)
352 __field( unsigned int, job)
353 __field( int, tid)
354 ),
355
356 TP_fast_assign(
357 __entry->sid = sid;
358 __entry->tid = tid;
359 ),
360
361 TP_printk("switch_away(server(%d, %u)): %d\n", __entry->sid, __entry->job, __entry->tid)
362);
363
364TRACE_EVENT(litmus_server_release,
365
366 TP_PROTO(int sid, unsigned int job,
367 unsigned long long release,
368 unsigned long long deadline),
369
370 TP_ARGS(sid, job, release, deadline),
371
372 TP_STRUCT__entry(
373 __field( int, sid)
374 __field( unsigned int, job)
375 __field( unsigned long long, release)
376 __field( unsigned long long, deadline)
377 ),
378
379 TP_fast_assign(
380 __entry->sid = sid;
381 __entry->job = job;
382 __entry->release = release;
383 __entry->deadline = deadline;
384 ),
385
386 TP_printk("release(server(%d, %u)), release: %llu, deadline: %llu\n", __entry->sid, __entry->job, __entry->release, __entry->deadline)
387);
388
389TRACE_EVENT(litmus_server_completion,
390
391 TP_PROTO(int sid, int job),
392
393 TP_ARGS(sid, job),
394
395 TP_STRUCT__entry(
396 __field( int, sid)
397 __field( unsigned int, job)
398 ),
399
400 TP_fast_assign(
401 __entry->sid = sid;
402 __entry->job = job;
403 ),
404
405 TP_printk("completion(server(%d, %d))\n", __entry->sid, __entry->job)
406);
407
228#endif /* _SCHED_TASK_TRACEPOINT_H */ 408#endif /* _SCHED_TASK_TRACEPOINT_H */
229 409
230/* Must stay outside the protection */ 410/* Must stay outside the protection */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 533c49f48047..4d6f3474e8fa 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -17,6 +17,7 @@
17 */ 17 */
18 18
19#include <linux/ftrace_event.h> 19#include <linux/ftrace_event.h>
20#include <litmus/litmus.h>
20 21
21/* 22/*
22 * DECLARE_EVENT_CLASS can be used to add a generic function 23 * DECLARE_EVENT_CLASS can be used to add a generic function
@@ -54,7 +55,7 @@
54#define __string(item, src) __dynamic_array(char, item, -1) 55#define __string(item, src) __dynamic_array(char, item, -1)
55 56
56#undef TP_STRUCT__entry 57#undef TP_STRUCT__entry
57#define TP_STRUCT__entry(args...) args 58#define TP_STRUCT__entry(args...) args __field( unsigned long long, __rt_ts )
58 59
59#undef DECLARE_EVENT_CLASS 60#undef DECLARE_EVENT_CLASS
60#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) \ 61#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) \
@@ -507,7 +508,7 @@ static inline notrace int ftrace_get_offsets_##call( \
507 strcpy(__get_str(dst), src); 508 strcpy(__get_str(dst), src);
508 509
509#undef TP_fast_assign 510#undef TP_fast_assign
510#define TP_fast_assign(args...) args 511#define TP_fast_assign(args...) args; __entry->__rt_ts = litmus_clock();
511 512
512#undef TP_perf_assign 513#undef TP_perf_assign
513#define TP_perf_assign(args...) 514#define TP_perf_assign(args...)
diff --git a/litmus/Makefile b/litmus/Makefile
index 7338180f196f..3487dfe8df05 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -16,8 +16,9 @@ obj-y = sched_plugin.o litmus.o \
16 srp.o \ 16 srp.o \
17 bheap.o \ 17 bheap.o \
18 ctrldev.o \ 18 ctrldev.o \
19 sched_gsn_edf.o \ 19 domain.o \
20 sched_psn_edf.o 20 sched_psn_edf.o \
21 sched_gsn_edf.o
21 22
22obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o 23obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
23obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o 24obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
diff --git a/litmus/locking.c b/litmus/locking.c
index 0c1aa6aa40b7..447b8aaee8dc 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -28,14 +28,18 @@ static inline struct litmus_lock* get_lock(struct od_table_entry* entry)
28 return (struct litmus_lock*) entry->obj->obj; 28 return (struct litmus_lock*) entry->obj->obj;
29} 29}
30 30
31atomic_t lock_id = ATOMIC_INIT(0);
32
31static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg) 33static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg)
32{ 34{
33 struct litmus_lock* lock; 35 struct litmus_lock* lock;
34 int err; 36 int err;
35 37
36 err = litmus->allocate_lock(&lock, type, arg); 38 err = litmus->allocate_lock(&lock, type, arg);
37 if (err == 0) 39 if (err == 0) {
40 lock->id = atomic_add_return(1, &lock_id);
38 *obj_ref = lock; 41 *obj_ref = lock;
42 }
39 return err; 43 return err;
40} 44}
41 45
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
index 6ed504f4750e..f50b58c37b31 100644
--- a/litmus/sched_gsn_edf.c
+++ b/litmus/sched_gsn_edf.c
@@ -805,6 +805,8 @@ int gsnedf_fmlp_lock(struct litmus_lock* l)
805 /* release lock before sleeping */ 805 /* release lock before sleeping */
806 spin_unlock_irqrestore(&sem->wait.lock, flags); 806 spin_unlock_irqrestore(&sem->wait.lock, flags);
807 807
808 sched_trace_task_block(t, l->id);
809
808 /* We depend on the FIFO order. Thus, we don't need to recheck 810 /* We depend on the FIFO order. Thus, we don't need to recheck
809 * when we wake up; we are guaranteed to have the lock since 811 * when we wake up; we are guaranteed to have the lock since
810 * there is only one wake up per release. 812 * there is only one wake up per release.
@@ -812,7 +814,11 @@ int gsnedf_fmlp_lock(struct litmus_lock* l)
812 814
813 schedule(); 815 schedule();
814 816
817<<<<<<< HEAD
815 TS_LOCK_RESUME; 818 TS_LOCK_RESUME;
819=======
820 sched_trace_task_resume(t, l->id);
821>>>>>>> d47039b... sched_trace: allow tasks to block on a lock id
816 822
817 /* Since we hold the lock, no other task will change 823 /* Since we hold the lock, no other task will change
818 * ->owner. We can thus check it without acquiring the spin 824 * ->owner. We can thus check it without acquiring the spin
diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
index 5a15ce938984..659dc13d3fa9 100644
--- a/litmus/sched_litmus.c
+++ b/litmus/sched_litmus.c
@@ -160,7 +160,7 @@ static void enqueue_task_litmus(struct rq *rq, struct task_struct *p,
160 int flags) 160 int flags)
161{ 161{
162 if (flags & ENQUEUE_WAKEUP) { 162 if (flags & ENQUEUE_WAKEUP) {
163 sched_trace_task_resume(p); 163 sched_trace_task_resume(p, 0);
164 tsk_rt(p)->present = 1; 164 tsk_rt(p)->present = 1;
165 /* LITMUS^RT plugins need to update the state 165 /* LITMUS^RT plugins need to update the state
166 * _before_ making it available in global structures. 166 * _before_ making it available in global structures.
@@ -185,7 +185,7 @@ static void dequeue_task_litmus(struct rq *rq, struct task_struct *p,
185 if (flags & DEQUEUE_SLEEP) { 185 if (flags & DEQUEUE_SLEEP) {
186 litmus->task_block(p); 186 litmus->task_block(p);
187 tsk_rt(p)->present = 0; 187 tsk_rt(p)->present = 0;
188 sched_trace_task_block(p); 188 sched_trace_task_block(p, 0);
189 189
190 rq->litmus.nr_running--; 190 rq->litmus.nr_running--;
191 } else 191 } else
diff --git a/litmus/sched_mc.c b/litmus/sched_mc.c
new file mode 100644
index 000000000000..41f02ee3e6ca
--- /dev/null
+++ b/litmus/sched_mc.c
@@ -0,0 +1,1369 @@
1/**
2 * litmus/sched_mc.c
3 *
4 * Implementation of the Mixed Criticality scheduling algorithm.
5 *
6 * (Per Mollison, Erickson, Anderson, Baruah, Scoredos 2010)
7 *
8 * Absolute first: relative time spent doing different parts of release
9 * and scheduling overhead needs to be measured and graphed.
10 *
11 * Domain locks should be more fine-grained. There is no reason to hold the
12 * ready-queue lock when adding a task to the release-queue.
13 *
14 * The levels should be converted to linked-lists so that they are more
15 * adaptable and need not be identical on all processors.
16 *
17 * The interaction between remove_from_all and other concurrent operations
18 * should be re-examined. If a job_completion and a preemption happen
19 * simultaneously, a task could be requeued, removed, then requeued again.
20 *
21 * Level-C tasks should be able to swap CPUs a-la GSN-EDF. They should also
22 * try and swap with the last CPU they were on. This could be complicated for
23 * ghost tasks.
24 *
25 * Locking for timer-merging could be infinitely more fine-grained. A second
26 * hash could select a lock to use based on queue slot. This approach might
27 * also help with add_release in rt_domains.
28 *
29 * It should be possible to reserve a CPU for ftdumping.
30 *
31 * The real_deadline business seems sloppy.
32 *
33 * The amount of data in the header file should be cut down. The use of the
34 * header file in general needs to be re-examined.
35 *
36 * The plugin needs to be modified so that it doesn't freeze when it is
37 * deactivated in a VM.
38 *
39 * The locking in check_for_preempt is not fine-grained enough.
40 *
41 * The size of the structures could be smaller. Debugging info might be
42 * excessive as things currently stand.
43 *
44 * The macro can_requeue has been expanded too much. Anything beyond
45 * scheduled_on is a hack!
46 *
47 * Domain names (rt_domain) are still clumsy.
48 *
49 * Should BE be moved into the kernel? This will require benchmarking.
50 */
51
52#include <linux/spinlock.h>
53#include <linux/percpu.h>
54#include <linux/sched.h>
55#include <linux/hrtimer.h>
56#include <linux/slab.h>
57#include <linux/module.h>
58#include <linux/poison.h>
59#include <linux/pid.h>
60
61#include <litmus/litmus.h>
62#include <litmus/trace.h>
63#include <litmus/jobs.h>
64#include <litmus/sched_plugin.h>
65#include <litmus/edf_common.h>
66#include <litmus/sched_trace.h>
67#include <litmus/domain.h>
68#include <litmus/bheap.h>
69#include <litmus/event_group.h>
70#include <litmus/budget.h>
71
72#include <litmus/sched_mc.h>
73#include <litmus/ce_domain.h>
74
75/**
76 * struct cpu_entry - State of a CPU for the entire MC system
77 * @cpu CPU id
78 * @scheduled Task that is physically running
79 * @linked Task that should be running / is logically running
80 * @lock For serialization
81 * @crit_entries Array of CPU state per criticality level
82 * @redir List of redirected work for this CPU.
83 * @redir_lock Lock for @redir.
84 * @event_group Event group for timer merging.
85 */
86struct cpu_entry {
87 int cpu;
88 struct task_struct* scheduled;
89 struct task_struct* will_schedule;
90 struct task_struct* linked;
91 raw_spinlock_t lock;
92 struct crit_entry crit_entries[NUM_CRIT_LEVELS];
93#ifdef CONFIG_PLUGIN_MC_REDIRECT
94 struct list_head redir;
95 raw_spinlock_t redir_lock;
96#endif
97#ifdef CONFIG_MERGE_TIMERS
98 struct event_group *event_group;
99#endif
100};
101
102DEFINE_PER_CPU(struct cpu_entry, cpus);
103#ifdef CONFIG_RELEASE_MASTER
104static int interrupt_cpu;
105#endif
106
107#define domain_data(dom) (container_of(dom, struct domain_data, domain))
108#define is_global(dom) (domain_data(dom)->heap)
109#define is_global_task(t) (is_global(get_task_domain(t)))
110#define can_use(ce) \
111 ((ce)->state == CS_ACTIVE || (ce->state == CS_ACTIVATE))
112#define can_requeue(t) \
113 ((t)->rt_param.linked_on == NO_CPU && /* Not linked anywhere */ \
114 !is_queued(t) && /* Not gonna be linked */ \
115 (!is_global_task(t) || (t)->rt_param.scheduled_on == NO_CPU))
116#define entry_level(e) \
117 (((e)->linked) ? tsk_mc_crit((e)->linked) : NUM_CRIT_LEVELS - 1)
118#define crit_cpu(ce) \
119 (container_of((void*)((ce) - (ce)->level), struct cpu_entry, crit_entries))
120#define get_crit_entry_for(cpu, level) (&per_cpu(cpus, cpu).crit_entries[level])
121#define TRACE_ENTRY(e, fmt, args...) \
122 STRACE("P%d, linked=" TS " " fmt, e->cpu, TA(e->linked), ##args)
123#define TRACE_CRIT_ENTRY(ce, fmt, args...) \
124 STRACE("%s P%d, linked=" TS " " fmt, \
125 (ce)->domain->name, crit_cpu(ce)->cpu, TA((ce)->linked), ##args)
126
127static int sid(struct crit_entry *ce)
128{
129 int level = ce->level * num_online_cpus() + crit_cpu(ce)->cpu + 1;
130 BUG_ON(level >= 0);
131 return -level;
132}
133
134/*
135 * Sort CPUs within a global domain's heap.
136 */
137static int cpu_lower_prio(struct bheap_node *a, struct bheap_node *b)
138{
139 struct domain *domain;
140 struct crit_entry *first, *second;
141 struct task_struct *first_link, *second_link;
142
143 first = a->value;
144 second = b->value;
145 first_link = first->linked;
146 second_link = second->linked;
147
148 if (first->state == CS_REMOVED || second->state == CS_REMOVED) {
149 /* Removed entries go at the back of the heap */
150 return first->state != CS_REMOVED &&
151 second->state != CS_REMOVED;
152 } else if (!first_link || !second_link) {
153 /* Entry with nothing scheduled is lowest priority */
154 return second_link && !first_link;
155 } else {
156 /* Sort by deadlines of tasks */
157 domain = get_task_domain(first_link);
158 return domain->higher_prio(second_link, first_link);
159 }
160}
161
162/*
163 * Return true if the domain has a higher priority ready task. The @curr
164 * task must belong to the domain.
165 */
166static int mc_preempt_needed(struct domain *dom, struct task_struct* curr)
167{
168 struct task_struct *next = dom->peek_ready(dom);
169 if (!next || !curr) {
170 return next && !curr;
171 } else {
172 BUG_ON(tsk_mc_crit(next) != tsk_mc_crit(curr));
173 return get_task_domain(next)->higher_prio(next, curr);
174 }
175}
176
177/*
178 * Update crit entry position in a global heap. Caller must hold
179 * @ce's domain lock.
180 */
181static inline void update_crit_position(struct crit_entry *ce)
182{
183 struct bheap *heap;
184 if (is_global(ce->domain)) {
185 heap = domain_data(ce->domain)->heap;
186 BUG_ON(!heap);
187 BUG_ON(!bheap_node_in_heap(ce->node));
188 bheap_delete(cpu_lower_prio, heap, ce->node);
189 bheap_insert(cpu_lower_prio, heap, ce->node);
190 }
191}
192
193/*
194 * Update crit entry position in a global heap if it has been marked
195 * for update. Caller must hold @ce's domain lock.
196 */
197static void fix_crit_position(struct crit_entry *ce)
198{
199 if (is_global(ce->domain)) {
200 if (CS_ACTIVATE == ce->state) {
201 ce->state = CS_ACTIVE;
202 update_crit_position(ce);
203 } else if (CS_REMOVE == ce->state) {
204 ce->state = CS_REMOVED;
205 update_crit_position(ce);
206 }
207 }
208}
209
210/*
211 * Return next CPU which should preempted or NULL if the domain has no
212 * preemptable CPUs. Caller must hold the @dom lock.
213 */
214static inline struct crit_entry* lowest_prio_cpu(struct domain *dom)
215{
216 struct bheap *heap = domain_data(dom)->heap;
217 struct bheap_node* hn;
218 struct crit_entry *ce, *res = NULL;
219 do {
220 hn = bheap_peek(cpu_lower_prio, heap);
221 ce = (hn) ? hn->value : NULL;
222 if (ce) {
223 if (ce->state == CS_ACTIVE)
224 res = ce;
225 else if (ce->state == CS_REMOVED)
226 ce = NULL;
227 else
228 fix_crit_position(ce);
229 }
230 } while (ce && !res);
231 return res;
232}
233
234/*
235 * Cancel ghost timer.
236 */
237static inline void cancel_ghost(struct crit_entry *ce)
238{
239#ifdef CONFIG_MERGE_TIMERS
240 cancel_event(&ce->event);
241#else
242 hrtimer_try_to_cancel(&ce->timer);
243#endif
244}
245
246/*
247 * Arm ghost timer. Will merge timers if the option is specified.
248 */
249static inline void arm_ghost(struct crit_entry *ce, lt_t fire)
250{
251#ifdef CONFIG_MERGE_TIMERS
252 add_event(crit_cpu(ce)->event_group, &ce->event, fire);
253#else
254 __hrtimer_start_range_ns(&ce->timer,
255 ns_to_ktime(fire),
256 0 /* delta */,
257 HRTIMER_MODE_ABS_PINNED,
258 0 /* no wakeup */);
259#endif
260}
261
262/*
263 * Time accounting for ghost tasks.
264 * Must be called before a decision is made involving the task's budget.
265 */
266static void update_ghost_time(struct task_struct *p)
267{
268 u64 clock = litmus_clock();
269 u64 delta = clock - p->se.exec_start;
270 BUG_ON(!is_ghost(p));
271 if (unlikely ((s64)delta < 0)) {
272 delta = 0;
273 TRACE_MC_TASK(p, "WARNING: negative time delta\n");
274 }
275 if (tsk_mc_data(p)->mc_job.ghost_budget <= delta) {
276 TRACE_MC_TASK(p, "Ghost job could have ended\n");
277 tsk_mc_data(p)->mc_job.ghost_budget = 0;
278 p->se.exec_start = clock;
279 } else {
280 TRACE_MC_TASK(p, "Ghost job updated, but didn't finish\n");
281 tsk_mc_data(p)->mc_job.ghost_budget -= delta;
282 p->se.exec_start = clock;
283 }
284}
285
286/**
287 * link_task_to_crit() - Logically run a task at a criticality level.
288 * Caller must hold @ce's CPU lock.
289 */
290static void link_task_to_crit(struct crit_entry *ce,
291 struct task_struct *task)
292{
293 lt_t when_to_fire;
294
295 TRACE_CRIT_ENTRY(ce, "Linking " TS "\n", TA(task));
296 BUG_ON(!can_use(ce) && task);
297 BUG_ON(task && tsk_rt(task)->linked_on != NO_CPU);
298 BUG_ON(task && is_global(ce->domain) &&
299 !bheap_node_in_heap(ce->node));
300
301 /* Unlink last task */
302 if (ce->linked) {
303 TRACE_MC_TASK(ce->linked, "Unlinking\n");
304 ce->linked->rt_param.linked_on = NO_CPU;
305 if (is_ghost(ce->linked)) {
306 cancel_ghost(ce);
307 if (tsk_mc_data(ce->linked)->mc_job.ghost_budget > 0) {
308 /* Job isn't finished, so do accounting */
309 update_ghost_time(ce->linked);
310 }
311 }
312 sched_trace_server_switch_away(sid(ce), 0, ce->linked->pid);
313 }
314
315 /* Actually link task */
316 ce->linked = task;
317 if (task) {
318 task->rt_param.linked_on = crit_cpu(ce)->cpu;
319 if (is_ghost(task) && CRIT_LEVEL_A != tsk_mc_crit(task)) {
320 /* There is a level-A timer that will force a
321 * preemption, so we don't set this for level-A
322 * tasks. Otherwise reset the budget timer.
323 */
324 task->se.exec_start = litmus_clock();
325 when_to_fire = task->se.exec_start +
326 tsk_mc_data(task)->mc_job.ghost_budget;
327 arm_ghost(ce, when_to_fire);
328
329 sched_trace_server_switch_to(sid(ce), 0, 0);
330 } else {
331 sched_trace_server_switch_to(sid(ce), 0, task->pid);
332 }
333 }
334}
335
336static void check_for_preempt(struct domain*);
337
338/**
339 * job_arrival() - Called when a task re-enters the system.
340 * Caller must hold no locks.
341 */
342static void job_arrival(struct task_struct *task)
343{
344 struct domain *dom = get_task_domain(task);
345
346 TRACE_MC_TASK(task, "Job arriving\n");
347 BUG_ON(!task);
348
349 raw_spin_lock(dom->lock);
350 if (can_requeue(task)) {
351 BUG_ON(task->rt_param.linked_on != NO_CPU);
352 dom->requeue(dom, task);
353 check_for_preempt(dom);
354 } else {
355 /* If a global task is scheduled on one cpu, it CANNOT
356 * be requeued into a global domain. Another cpu might
357 * dequeue the global task before it is descheduled,
358 * causing the system to crash when the task is scheduled
359 * in two places simultaneously.
360 */
361 TRACE_MC_TASK(task, "Delayed arrival of scheduled task\n");
362 }
363 raw_spin_unlock(dom->lock);
364}
365
366/**
367 * low_prio_arrival() - If CONFIG_PLUGIN_MC_REDIRECT is enabled, will
368 * redirect a lower priority job_arrival work to the interrupt_cpu.
369 */
370static void low_prio_arrival(struct task_struct *task)
371{
372 struct cpu_entry *entry;
373
374 /* Race conditions! */
375 if (!can_requeue(task)) return;
376
377#ifdef CONFIG_PLUGIN_MC_REDIRECT
378 if (!is_global_task(task))
379 goto arrive;
380 if (smp_processor_id() != interrupt_cpu) {
381 entry = &__get_cpu_var(cpus);
382 raw_spin_lock(&entry->redir_lock);
383 TRACE_MC_TASK(task, "Adding to redirect queue\n");
384 list_add(&tsk_rt(task)->list, &entry->redir);
385 raw_spin_unlock(&entry->redir_lock);
386 litmus_reschedule(interrupt_cpu);
387 } else
388#endif
389 {
390arrive:
391 job_arrival(task);
392 }
393}
394
395#ifdef CONFIG_PLUGIN_MC_REDIRECT
396/**
397 * fix_global_levels() - Execute redirected job arrivals on this cpu.
398 */
399static void fix_global_levels(void)
400{
401 int c;
402 struct cpu_entry *e;
403 struct list_head *pos, *safe;
404 struct task_struct *t;
405
406 STRACE("Fixing global levels\n");
407 for_each_online_cpu(c) {
408 e = &per_cpu(cpus, c);
409 raw_spin_lock(&e->redir_lock);
410 list_for_each_safe(pos, safe, &e->redir) {
411 t = list_entry(pos, struct task_struct, rt_param.list);
412 BUG_ON(!t);
413 TRACE_MC_TASK(t, "Dequeued redirected job\n");
414 list_del_init(pos);
415 job_arrival(t);
416 }
417 raw_spin_unlock(&e->redir_lock);
418 }
419}
420#endif
421
422/**
423 * link_task_to_cpu() - Logically run a task on a CPU.
424 * The task must first have been linked to one of the CPU's crit_entries.
425 * Caller must hold the entry lock.
426 */
427static void link_task_to_cpu(struct cpu_entry *entry, struct task_struct *task)
428{
429 int i = entry_level(entry);
430 struct crit_entry *ce;
431 TRACE_MC_TASK(task, "Linking to P%d\n", entry->cpu);
432 BUG_ON(task && tsk_rt(task)->linked_on != entry->cpu);
433 BUG_ON(task && is_ghost(task));
434
435 if (entry->linked) {
436 sched_trace_server_switch_away(-entry->linked->pid,
437 get_server_job(entry->linked),
438 entry->linked->pid);
439 }
440
441 if (task){
442 set_rt_flags(task, RT_F_RUNNING);
443 sched_trace_server_switch_to(-task->pid,
444 get_server_job(task),
445 task->pid);
446 }
447 entry->linked = task;
448
449 /* Higher criticality crit entries are now usable */
450 for (; i < entry_level(entry) + 1; i++) {
451 ce = &entry->crit_entries[i];
452 if (!can_use(ce)) {
453 ce->state = CS_ACTIVATE;
454 }
455 }
456}
457
458/**
459 * preempt() - Preempt a logically running task with a higher priority one.
460 * @dom Domain from which to draw higher priority task
461 * @ce CPU criticality level to preempt
462 *
463 * Caller must hold the lock for @dom and @ce's CPU lock.
464 */
465static void preempt(struct domain *dom, struct crit_entry *ce)
466{
467 struct task_struct *task = dom->take_ready(dom);
468 struct cpu_entry *entry = crit_cpu(ce);
469 struct task_struct *old = ce->linked;
470
471 BUG_ON(!task);
472 TRACE_CRIT_ENTRY(ce, "Preempted by " TS "\n", TA(task));
473
474 /* Per-domain preemption */
475 link_task_to_crit(ce, task);
476 if (old && can_requeue(old)) {
477 dom->requeue(dom, old);
478 }
479 update_crit_position(ce);
480
481 /* Preempt actual execution if this is a running task */
482 if (!is_ghost(task)) {
483 link_task_to_cpu(entry, task);
484 preempt_if_preemptable(entry->scheduled, entry->cpu);
485 } else if (old && old == entry->linked) {
486 /* Preempted a running task with a ghost job. Null needs to be
487 * running.
488 */
489 link_task_to_cpu(entry, NULL);
490 preempt_if_preemptable(entry->scheduled, entry->cpu);
491 }
492}
493
494/**
495 * update_crit_levels() - Update criticality entries for the new cpu state.
496 * This should be called after a new task has been linked to @entry.
497 * The caller must hold the @entry->lock, but this method will release it.
498 */
499static void update_crit_levels(struct cpu_entry *entry)
500{
501 int i, global_preempted;
502 struct crit_entry *ce;
503 struct task_struct *readmit[NUM_CRIT_LEVELS];
504 enum crit_level level = entry_level(entry);
505
506 /* Remove lower priority tasks from the entry */
507 for (i = level + 1; i < NUM_CRIT_LEVELS; i++) {
508 ce = &entry->crit_entries[i];
509
510 global_preempted = ce->linked &&
511 /* This task is running on a cpu */
512 ce->linked->rt_param.scheduled_on == entry->cpu &&
513 /* But it was preempted */
514 ce->linked != entry->linked &&
515 /* And it is an eligible global task */
516 !is_ghost(ce->linked) && is_global(ce->domain);
517
518 /* Do not readmit global tasks which are preempted! These can't
519 * ever be re-admitted until they are descheduled for reasons
520 * explained in job_arrival.
521 */
522 readmit[i] = (!global_preempted) ? ce->linked : NULL;
523
524 ce->state = CS_REMOVE;
525 if (ce->linked)
526 link_task_to_crit(ce, NULL);
527 }
528 /* Need to unlock so we can access domains */
529 raw_spin_unlock(&entry->lock);
530
531 /* Re-admit tasks to the system */
532 for (i = level + 1; i < NUM_CRIT_LEVELS; i++) {
533 ce = &entry->crit_entries[i];
534 if (readmit[i]) {
535 low_prio_arrival(readmit[i]);
536 }
537 }
538}
539
540/**
541 * check_for_preempt() - Causes a preemption if higher-priority tasks are ready.
542 * Caller must hold domain lock.
543 * Makes gigantic nasty assumption that there is 1 global criticality level,
544 * and it is the last one in each list, so it doesn't call update_crit..
545 */
546static void check_for_preempt(struct domain *dom)
547{
548 int recheck = 1;
549 struct cpu_entry *entry;
550 struct crit_entry *ce;
551
552 if (is_global(dom)) {
553 /* Loop until we find a non-preemptable CPU */
554 while ((ce = lowest_prio_cpu(dom)) && recheck) {
555 entry = crit_cpu(ce);
556 recheck = 1;
557
558 /* Cache next task */
559 dom->peek_ready(dom);
560
561 raw_spin_lock(&entry->lock);
562 if (!can_use(ce))
563 /* CPU disabled while locking! */
564 fix_crit_position(ce);
565 else if (dom->preempt_needed(dom, ce->linked))
566 /* Success! Check for more preemptions */
567 preempt(dom, ce);
568 else {
569 /* Failure! */
570 recheck = 0;
571 TRACE_CRIT_ENTRY(ce, "Stopped global check\n");
572 }
573 raw_spin_unlock(&entry->lock);
574 }
575 } else /* Partitioned */ {
576 ce = domain_data(dom)->crit_entry;
577 entry = crit_cpu(ce);
578
579 /* Cache next task */
580 dom->peek_ready(dom);
581
582 raw_spin_lock(&entry->lock);
583 if (can_use(ce) && dom->preempt_needed(dom, ce->linked)) {
584 preempt(dom, ce);
585 update_crit_levels(entry);
586 } else {
587 raw_spin_unlock(&entry->lock);
588 }
589 }
590}
591
592/**
593 * remove_from_all() - Logically remove a task from all structures.
594 * Caller must hold no locks.
595 */
596static void remove_from_all(struct task_struct* task)
597{
598 int update = 0;
599 struct cpu_entry *entry;
600 struct crit_entry *ce;
601 struct domain *dom = get_task_domain(task);
602
603 TRACE_MC_TASK(task, "Removing from everything\n");
604 BUG_ON(!task);
605
606 raw_spin_lock(dom->lock);
607
608 /* Remove the task from any CPU state */
609 if (task->rt_param.linked_on != NO_CPU) {
610 entry = &per_cpu(cpus, task->rt_param.linked_on);
611 raw_spin_lock(&entry->lock);
612
613 /* Unlink only if task is still linked post lock */
614 ce = &entry->crit_entries[tsk_mc_crit(task)];
615 if (task->rt_param.linked_on != NO_CPU) {
616 BUG_ON(ce->linked != task);
617 link_task_to_crit(ce, NULL);
618 update_crit_position(ce);
619 if (!is_ghost(task) && entry->linked == task) {
620 update = 1;
621 link_task_to_cpu(entry, NULL);
622 }
623 } else {
624 TRACE_MC_TASK(task, "Unlinked before we got lock!\n");
625 }
626 if (update)
627 update_crit_levels(entry);
628 else
629 raw_spin_unlock(&entry->lock);
630 } else {
631 TRACE_MC_TASK(task, "Not linked to anything\n");
632 }
633
634 /* Ensure the task isn't returned by its domain */
635 dom->remove(dom, task);
636
637 raw_spin_unlock(dom->lock);
638}
639
640/**
641 * job_completion() - Update task state and re-enter it into the system.
642 * Converts tasks which have completed their execution early into ghost jobs.
643 * Caller must hold no locks.
644 */
645static void job_completion(struct task_struct *task, int forced)
646{
647 int behind;
648 TRACE_MC_TASK(task, "Completed\n");
649
650 /* Logically stop the task execution */
651 set_rt_flags(task, RT_F_SLEEP);
652 remove_from_all(task);
653
654 /* Level-A tasks cannot ever get behind */
655 behind = tsk_mc_crit(task) != CRIT_LEVEL_A && behind_server(task);
656
657 if (!forced && !is_ghost(task)) {
658 /* Task voluntarily ceased execution. Move on to next period */
659 task_release(task);
660 sched_trace_task_completion(task, forced);
661
662 /* Convert to ghost job */
663 tsk_mc_data(task)->mc_job.ghost_budget = budget_remaining(task);
664 tsk_mc_data(task)->mc_job.is_ghost = 1;
665 }
666
667 /* If the task has no ghost budget, convert back from ghost.
668 * If the task is behind, undo ghost conversion so that it
669 * can catch up.
670 */
671 if (behind || tsk_mc_data(task)->mc_job.ghost_budget == 0) {
672 TRACE_MC_TASK(task, "Not a ghost task\n");
673 tsk_mc_data(task)->mc_job.is_ghost = 0;
674 tsk_mc_data(task)->mc_job.ghost_budget = 0;
675 }
676
677 /* If server has run out of budget, wait until next release */
678 if (budget_exhausted(task)) {
679 sched_trace_server_completion(-task->pid,
680 get_server_job(task));
681 server_release(task);
682 }
683
684 /* Requeue non-blocking tasks */
685 if (is_running(task))
686 job_arrival(task);
687}
688
689/**
690 * mc_ghost_exhausted() - Complete logically running ghost task.
691 */
692#ifdef CONFIG_MERGE_TIMERS
693static void mc_ghost_exhausted(struct rt_event *e)
694{
695 struct crit_entry *ce = container_of(e, struct crit_entry, event);
696#else
697static enum hrtimer_restart mc_ghost_exhausted(struct hrtimer *timer)
698{
699 struct crit_entry *ce = container_of(timer, struct crit_entry, timer);
700#endif
701
702 unsigned long flags;
703 struct task_struct *tmp = NULL;
704
705 local_irq_save(flags);
706 TRACE("Ghost exhausted\n");
707 TRACE_CRIT_ENTRY(ce, "Firing here\n");
708
709 /* Due to race conditions, we cannot just set the linked
710 * task's budget to 0 as it may no longer be the task
711 * for which this timer was armed. Instead, update the running
712 * task time and see if this causes exhaustion.
713 */
714 raw_spin_lock(&crit_cpu(ce)->lock);
715 if (ce->linked && is_ghost(ce->linked)) {
716 update_ghost_time(ce->linked);
717 if (tsk_mc_data(ce->linked)->mc_job.ghost_budget == 0) {
718 tmp = ce->linked;
719 }
720 }
721 raw_spin_unlock(&crit_cpu(ce)->lock);
722
723 if (tmp)
724 job_completion(tmp, 0);
725
726 local_irq_restore(flags);
727#ifndef CONFIG_MERGE_TIMERS
728 return HRTIMER_NORESTART;
729#endif
730}
731
732/*
733 * The MC-CE common timer callback code for merged and non-merged timers.
734 * Returns the next time the timer should fire.
735 */
736static lt_t __ce_timer_function(struct ce_dom_data *ce_data)
737{
738 struct crit_entry *ce = get_crit_entry_for(ce_data->cpu, CRIT_LEVEL_A);
739 struct domain *dom = ce->domain;
740 struct task_struct *old_link = NULL;
741 lt_t next_timer_abs;
742
743 TRACE("MC level-A timer callback for CPU %d\n", ce_data->cpu);
744
745 raw_spin_lock(dom->lock);
746
747 raw_spin_lock(&crit_cpu(ce)->lock);
748 if (ce->linked &&
749 ce->linked == ce_data->should_schedule &&
750 is_ghost(ce->linked))
751 {
752 old_link = ce->linked;
753 tsk_mc_data(ce->linked)->mc_job.ghost_budget = 0;
754 link_task_to_crit(ce, NULL);
755 }
756 raw_spin_unlock(&crit_cpu(ce)->lock);
757
758 next_timer_abs = mc_ce_timer_callback_common(dom);
759
760 /* Job completion will check for preemptions by means of calling job
761 * arrival if the task is not blocked */
762 if (NULL != old_link) {
763 STRACE("old_link " TS " so will call job completion\n", TA(old_link));
764 raw_spin_unlock(dom->lock);
765 job_completion(old_link, 0);
766 } else {
767 STRACE("old_link was null, so will call check for preempt\n");
768 raw_spin_unlock(dom->lock);
769 check_for_preempt(dom);
770 }
771 return next_timer_abs;
772}
773
774#ifdef CONFIG_MERGE_TIMERS
775static void ce_timer_function(struct rt_event *e)
776{
777 struct ce_dom_data *ce_data =
778 container_of(e, struct ce_dom_data, event);
779 unsigned long flags;
780 lt_t next_timer_abs;
781
782 TS_LVLA_RELEASE_START;
783
784 local_irq_save(flags);
785 next_timer_abs = __ce_timer_function(ce_data);
786 add_event(per_cpu(cpus, ce_data->cpu).event_group, e, next_timer_abs);
787 local_irq_restore(flags);
788
789 TS_LVLA_RELEASE_END;
790}
791#else /* else to CONFIG_MERGE_TIMERS */
792static enum hrtimer_restart ce_timer_function(struct hrtimer *timer)
793{
794 struct ce_dom_data *ce_data =
795 container_of(timer, struct ce_dom_data, timer);
796 unsigned long flags;
797 lt_t next_timer_abs;
798
799 TS_LVLA_RELEASE_START;
800
801 local_irq_save(flags);
802 next_timer_abs = __ce_timer_function(ce_data);
803 hrtimer_set_expires(timer, ns_to_ktime(next_timer_abs));
804 local_irq_restore(flags);
805
806 TS_LVLA_RELEASE_END;
807
808 return HRTIMER_RESTART;
809}
810#endif /* CONFIG_MERGE_TIMERS */
811
812
813/**
814 * mc_release_jobs() - Add heap of tasks to the system, check for preemptions.
815 */
816static void mc_release_jobs(rt_domain_t* rt, struct bheap* tasks)
817{
818 unsigned long flags;
819 struct task_struct *first = bheap_peek(rt->order, tasks)->value;
820 struct domain *dom = get_task_domain(first);
821
822 raw_spin_lock_irqsave(dom->lock, flags);
823 TRACE(TS "Jobs released\n", TA(first));
824 __merge_ready(rt, tasks);
825 check_for_preempt(dom);
826 raw_spin_unlock_irqrestore(dom->lock, flags);
827}
828
829/**
830 * ms_task_new() - Setup new mixed-criticality task.
831 * Assumes that there are no partitioned domains after level B.
832 */
833static void mc_task_new(struct task_struct *t, int on_rq, int running)
834{
835 unsigned long flags;
836 struct cpu_entry* entry;
837 enum crit_level level = tsk_mc_crit(t);
838 char name[TASK_COMM_LEN];
839 strcpy(name, "rtspin");
840
841 local_irq_save(flags);
842 TRACE("New mixed criticality task %d\n", t->pid);
843
844 /* Assign domain */
845 if (level < CRIT_LEVEL_C)
846 entry = &per_cpu(cpus, get_partition(t));
847 else
848 entry = &per_cpu(cpus, task_cpu(t));
849 t->rt_param._domain = entry->crit_entries[level].domain;
850
851 sched_trace_container_param(t->pid, name);
852 sched_trace_server_param(-t->pid, t->pid,
853 get_exec_cost(t), get_rt_period(t));
854
855 /* Setup job params */
856 release_at(t, litmus_clock());
857 tsk_mc_data(t)->mc_job.ghost_budget = 0;
858 tsk_mc_data(t)->mc_job.is_ghost = 0;
859 if (running) {
860 BUG_ON(entry->scheduled);
861 entry->scheduled = t;
862 tsk_rt(t)->scheduled_on = entry->cpu;
863 } else {
864 t->rt_param.scheduled_on = NO_CPU;
865 }
866 t->rt_param.linked_on = NO_CPU;
867
868
869 job_arrival(t);
870
871 local_irq_restore(flags);
872}
873
874/**
875 * mc_task_new() - Add task back into its domain check for preemptions.
876 */
877static void mc_task_wake_up(struct task_struct *task)
878{
879 unsigned long flags;
880 lt_t now = litmus_clock();
881 local_irq_save(flags);
882
883 TRACE(TS " wakes up\n", TA(task));
884 if (is_tardy(task, now)) {
885 /* Task missed its last release */
886 release_at(task, now);
887 sched_trace_task_release(task);
888 }
889 if (!is_ghost(task))
890 job_arrival(task);
891
892 local_irq_restore(flags);
893}
894
895/**
896 * mc_task_block() - Remove task from state to prevent it being run anywhere.
897 */
898static void mc_task_block(struct task_struct *task)
899{
900 unsigned long flags;
901 local_irq_save(flags);
902 TRACE(TS " blocks\n", TA(task));
903 remove_from_all(task);
904 local_irq_restore(flags);
905}
906
907/**
908 * mc_task_exit() - Remove task from the system.
909 */
910static void mc_task_exit(struct task_struct *task)
911{
912 unsigned long flags;
913 local_irq_save(flags);
914 BUG_ON(!is_realtime(task));
915 TRACE(TS " RIP\n", TA(task));
916
917 remove_from_all(task);
918 if (tsk_rt(task)->scheduled_on != NO_CPU) {
919 per_cpu(cpus, tsk_rt(task)->scheduled_on).scheduled = NULL;
920 tsk_rt(task)->scheduled_on = NO_CPU;
921 }
922
923 if (CRIT_LEVEL_A == tsk_mc_crit(task))
924 mc_ce_task_exit_common(task);
925
926 local_irq_restore(flags);
927}
928
929/**
930 * mc_admit_task() - Return true if the task is valid.
931 * Assumes there are no partitioned levels after level B.
932 */
933static long mc_admit_task(struct task_struct* task)
934{
935 const enum crit_level crit = tsk_mc_crit(task);
936 long ret;
937 if (!tsk_mc_data(task)) {
938 printk(KERN_WARNING "Tried to admit task with no criticality "
939 "level\n");
940 ret = -EINVAL;
941 goto out;
942 }
943 if (crit < CRIT_LEVEL_C && get_partition(task) == NO_CPU) {
944 printk(KERN_WARNING "Tried to admit partitioned task with no "
945 "partition\n");
946 ret = -EINVAL;
947 goto out;
948 }
949 if (crit == CRIT_LEVEL_A) {
950 ret = mc_ce_admit_task_common(task);
951 if (ret)
952 goto out;
953 }
954 printk(KERN_INFO "Admitted task with criticality level %d\n",
955 tsk_mc_crit(task));
956 ret = 0;
957out:
958 return ret;
959}
960
961/**
962 * mc_schedule() - Return next task which should be scheduled.
963 */
964static struct task_struct* mc_schedule(struct task_struct* prev)
965{
966 unsigned long flags;
967 struct domain *dom;
968 struct crit_entry *ce;
969 struct cpu_entry* entry = &__get_cpu_var(cpus);
970 int i, out_of_time, sleep, preempt, exists, blocks, global, lower;
971 struct task_struct *dtask = NULL, *ready_task = NULL, *next = NULL;
972
973 local_irq_save(flags);
974
975 /* Litmus gave up because it couldn't access the stack of the CPU
976 * on which will_schedule was migrating from. Requeue it.
977 * This really only happens in VMs.
978 */
979 if (entry->will_schedule && entry->will_schedule != prev) {
980 entry->will_schedule->rt_param.scheduled_on = NO_CPU;
981 low_prio_arrival(entry->will_schedule);
982 }
983
984 raw_spin_lock(&entry->lock);
985
986 /* Sanity checking */
987 BUG_ON(entry->scheduled && entry->scheduled != prev);
988 BUG_ON(entry->scheduled && !is_realtime(prev));
989 BUG_ON(is_realtime(prev) && !entry->scheduled);
990
991 /* Determine state */
992 exists = entry->scheduled != NULL;
993 blocks = exists && !is_running(entry->scheduled);
994 out_of_time = exists && budget_enforced(entry->scheduled) &&
995 budget_exhausted(entry->scheduled);
996 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
997 global = exists && is_global_task(entry->scheduled);
998 preempt = entry->scheduled != entry->linked;
999 lower = exists && preempt && entry->linked &&
1000 tsk_mc_crit(entry->scheduled) > tsk_mc_crit(entry->linked);
1001
1002 TRACE(TS " blocks:%d out_of_time:%d sleep:%d preempt:%d\n",
1003 TA(prev), blocks, out_of_time, sleep, preempt);
1004
1005 if (exists)
1006 prev->rt_param.scheduled_on = NO_CPU;
1007
1008 raw_spin_unlock(&entry->lock);
1009
1010
1011#ifdef CONFIG_PLUGIN_MC_REDIRECT
1012 if (smp_processor_id() == interrupt_cpu)
1013 fix_global_levels();
1014#endif
1015
1016 /* If a task blocks we have no choice but to reschedule */
1017 if (blocks)
1018 remove_from_all(entry->scheduled);
1019 /* Any task which exhausts its budget or sleeps waiting for its next
1020 * period completes unless its execution has been forcibly stopped.
1021 */
1022 if ((out_of_time || sleep) && !blocks)/* && !preempt)*/
1023 job_completion(entry->scheduled, !sleep);
1024 /* Global scheduled tasks must wait for a deschedule before they
1025 * can rejoin the global state. Rejoin them here.
1026 */
1027 else if (global && preempt && !blocks) {
1028 if (lower)
1029 low_prio_arrival(entry->scheduled);
1030 else
1031 job_arrival(entry->scheduled);
1032 }
1033
1034 /* Pick next task if none is linked */
1035 raw_spin_lock(&entry->lock);
1036 for (i = 0; i < NUM_CRIT_LEVELS && !entry->linked; i++) {
1037 ce = &entry->crit_entries[i];
1038 dom = ce->domain;
1039
1040 /* Swap locks. We cannot acquire a domain lock while
1041 * holding an entry lock or deadlocks will happen.
1042 */
1043 raw_spin_unlock(&entry->lock);
1044 raw_spin_lock(dom->lock);
1045
1046 /* Do domain stuff before grabbing CPU locks */
1047 dtask = dom->peek_ready(dom);
1048 fix_crit_position(ce);
1049
1050 raw_spin_lock(&entry->lock);
1051
1052 if (!entry->linked && !ce->linked && dtask && can_use(ce)) {
1053 dom->take_ready(dom);
1054 link_task_to_crit(ce, dtask);
1055 update_crit_position(ce);
1056 ready_task = (is_ghost(dtask)) ? NULL : dtask;
1057
1058 /* Task found! */
1059 if (ready_task) {
1060 link_task_to_cpu(entry, ready_task);
1061 raw_spin_unlock(dom->lock);
1062 update_crit_levels(entry);
1063 raw_spin_lock(&entry->lock);
1064 continue;
1065 }
1066 }
1067 raw_spin_unlock(dom->lock);
1068 }
1069
1070 /* Schedule next task */
1071 next = entry->linked;
1072 if (entry->linked)
1073 entry->linked->rt_param.scheduled_on = entry->cpu;
1074 entry->will_schedule = entry->linked;
1075 sched_state_task_picked();
1076
1077 raw_spin_unlock(&entry->lock);
1078 local_irq_restore(flags);
1079 if (next) {
1080 TRACE_MC_TASK(next, "Picked this task\n");
1081 } else if (exists && !next)
1082 TRACE_ENTRY(entry, "Becomes idle at %llu\n", litmus_clock());
1083 return next;
1084}
1085
1086void mc_finish_switch(struct task_struct *prev)
1087{
1088 struct cpu_entry* entry = &__get_cpu_var(cpus);
1089 entry->scheduled = is_realtime(current) ? current : NULL;
1090 TRACE_TASK(prev, "Switched away from to " TS "\n",
1091 TA(entry->scheduled));
1092}
1093
1094/*
1095 * This is the plugin's release at function, called by the release task-set
1096 * system call. Other places in the file use the generic LITMUS release_at(),
1097 * which is not this.
1098 */
1099void mc_release_at(struct task_struct *ts, lt_t start)
1100{
1101 /* hack so that we can have CE timers start at the right time */
1102 if (CRIT_LEVEL_A == tsk_mc_crit(ts))
1103 mc_ce_release_at_common(ts, start);
1104 else
1105 release_at(ts, start);
1106}
1107
1108long mc_deactivate_plugin(void)
1109{
1110 return mc_ce_deactivate_plugin_common();
1111}
1112
1113/* **************************************************************************
1114 * Initialization
1115 * ************************************************************************** */
1116
1117/* Initialize values here so that they are allocated with the module
1118 * and destroyed when the module is unloaded.
1119 */
1120
1121/* LVL-A */
1122DEFINE_PER_CPU(struct domain_data, _mc_crit_a);
1123DEFINE_PER_CPU(raw_spinlock_t, _mc_crit_a_lock);
1124DEFINE_PER_CPU(struct ce_dom_data, _mc_crit_a_ce_data);
1125/* LVL-B */
1126DEFINE_PER_CPU(struct domain_data, _mc_crit_b);
1127DEFINE_PER_CPU(rt_domain_t, _mc_crit_b_rt);
1128/* LVL-C */
1129static struct domain_data _mc_crit_c;
1130static rt_domain_t _mc_crit_c_rt;
1131struct bheap _mc_heap_c;
1132struct bheap_node _mc_nodes_c[NR_CPUS];
1133
1134static long mc_activate_plugin(void)
1135{
1136 struct domain_data *dom_data;
1137 struct domain *dom;
1138 struct domain_data *our_domains[NR_CPUS];
1139 int cpu, n = 0;
1140 long ret;
1141
1142#ifdef CONFIG_RELEASE_MASTER
1143 interrupt_cpu = atomic_read(&release_master_cpu);
1144#if defined(CONFIG_PLUGIN_MC_REDIRECT) || defined(CONFIG_PLUGIN_MC_RELEASE_MASTER)
1145 if (NO_CPU == interrupt_cpu) {
1146 printk(KERN_ERR "LITMUS-MC: need a release master\n");
1147 ret = -EINVAL;
1148 goto out;
1149 }
1150#endif
1151#endif
1152
1153 for_each_online_cpu(cpu) {
1154 BUG_ON(NR_CPUS <= n);
1155 dom = per_cpu(cpus, cpu).crit_entries[CRIT_LEVEL_A].domain;
1156 dom_data = domain_data(dom);
1157 our_domains[cpu] = dom_data;
1158#if defined(CONFIG_MERGE_TIMERS) && defined(CONFIG_PLUGIN_MC_RELEASE_MASTER)
1159 per_cpu(cpus, cpu).event_group =
1160 get_event_group_for(interrupt_cpu);
1161#elif defined(CONFIG_MERGE_TIMERS) && !defined(CONFIG_PLUGIN_MC_RELEASE_MASTER)
1162 per_cpu(cpus, cpu).event_group = get_event_group_for(cpu);
1163#endif
1164 n++;
1165 }
1166 ret = mc_ce_set_domains(n, our_domains);
1167 if (ret)
1168 goto out;
1169 ret = mc_ce_activate_plugin_common();
1170out:
1171 return ret;
1172}
1173
1174
1175static void mc_release_ts(lt_t time)
1176{
1177 int i, cpu, base_id = 0, cont_id = -1;
1178 char name[TASK_COMM_LEN];
1179 enum crit_level level;
1180 struct cpu_entry *entry;
1181 struct crit_entry *ce;
1182
1183 level = CRIT_LEVEL_A;
1184 strcpy(name, "LVL-A");
1185 for_each_online_cpu(cpu) {
1186 entry = &per_cpu(cpus, cpu);
1187 trace_litmus_container_param(++cont_id, &name);
1188 ce = &entry->crit_entries[level];
1189 sched_trace_server_param(sid(ce), cont_id, 0, 0);
1190 }
1191
1192 level = CRIT_LEVEL_B;
1193 strcpy(name, "LVL-B");
1194 for_each_online_cpu(cpu) {
1195 entry = &per_cpu(cpus, cpu);
1196 trace_litmus_container_param(++cont_id, &name);
1197 ce = &entry->crit_entries[level];
1198 sched_trace_server_param(sid(ce), cont_id, 0, 0);
1199 }
1200
1201 level = CRIT_LEVEL_C;
1202 strcpy(name, "LVL-C");
1203 trace_litmus_container_param(++cont_id, &name);
1204 for_each_online_cpu(cpu) {
1205 entry = &per_cpu(cpus, cpu);
1206 ce = &entry->crit_entries[level];
1207 sched_trace_server_param(sid(ce), cont_id, 0, 0);
1208 }
1209
1210
1211
1212}
1213
1214static struct sched_plugin mc_plugin __cacheline_aligned_in_smp = {
1215 .plugin_name = "MC",
1216 .task_new = mc_task_new,
1217 .complete_job = complete_job,
1218 .task_exit = mc_task_exit,
1219 .schedule = mc_schedule,
1220 .task_wake_up = mc_task_wake_up,
1221 .task_block = mc_task_block,
1222 .admit_task = mc_admit_task,
1223 .activate_plugin = mc_activate_plugin,
1224 .release_at = mc_release_at,
1225 .deactivate_plugin = mc_deactivate_plugin,
1226 .finish_switch = mc_finish_switch,
1227 .release_ts = mc_release_ts,
1228};
1229
1230static void init_crit_entry(struct crit_entry *ce, enum crit_level level,
1231 struct domain_data *dom_data,
1232 struct bheap_node *node)
1233{
1234 ce->level = level;
1235 ce->linked = NULL;
1236 ce->node = node;
1237 ce->domain = &dom_data->domain;
1238 ce->state = CS_ACTIVE;
1239#ifdef CONFIG_MERGE_TIMERS
1240 init_event(&ce->event, level, mc_ghost_exhausted,
1241 event_list_alloc(GFP_ATOMIC));
1242#else
1243 hrtimer_init(&ce->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1244 ce->timer.function = mc_ghost_exhausted;
1245#endif
1246
1247}
1248
1249static void init_local_domain(struct cpu_entry *entry, struct domain_data *dom_data,
1250 enum crit_level level)
1251{
1252 dom_data->heap = NULL;
1253 dom_data->crit_entry = &entry->crit_entries[level];
1254 init_crit_entry(dom_data->crit_entry, level, dom_data, NULL);
1255}
1256
1257static void init_global_domain(struct domain_data *dom_data, enum crit_level level,
1258 struct bheap *heap, struct bheap_node *nodes)
1259{
1260 int cpu;
1261 struct cpu_entry *entry;
1262 struct crit_entry *ce;
1263 struct bheap_node *node;
1264
1265 dom_data->crit_entry = NULL;
1266 dom_data->heap = heap;
1267 bheap_init(heap);
1268
1269 for_each_online_cpu(cpu) {
1270 entry = &per_cpu(cpus, cpu);
1271 node = &nodes[cpu];
1272 ce = &entry->crit_entries[level];
1273 init_crit_entry(ce, level, dom_data, node);
1274 bheap_node_init(&ce->node, ce);
1275 bheap_insert(cpu_lower_prio, heap, node);
1276 }
1277}
1278
1279static inline void init_edf_domain(struct domain *dom, rt_domain_t *rt,
1280 enum crit_level prio, int is_partitioned, int cpu)
1281{
1282 pd_domain_init(dom, rt, edf_ready_order, NULL,
1283 mc_release_jobs, mc_preempt_needed,
1284 edf_higher_prio);
1285 rt->level = prio;
1286#if defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) && defined(CONFIG_MERGE_TIMERS)
1287 /* All timers are on one CPU and release-master is using the event
1288 * merging interface as well. */
1289 BUG_ON(NO_CPU == interrupt_cpu);
1290 rt->event_group = get_event_group_for(interrupt_cpu);
1291 rt->prio = prio;
1292#elif defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) && !defined(CONFIG_MERGE_TIMERS)
1293 /* Using release master, but not merging timers. */
1294 rt->release_master = interrupt_cpu;
1295#elif !defined(CONFIG_PLUGIN_MC_RELEASE_MASTER) && defined(CONFIG_MERGE_TIMERS)
1296 /* Merge the timers, but don't move them to the release master. */
1297 if (is_partitioned) {
1298 rt->event_group = get_event_group_for(cpu);
1299 } else {
1300 /* Global timers will be added to the event groups that code is
1301 * executing on when add_event() is called.
1302 */
1303 rt->event_group = NULL;
1304 }
1305 rt->prio = prio;
1306#endif
1307}
1308
1309struct domain_data *ce_domain_for(int);
1310static int __init init_mc(void)
1311{
1312 int cpu;
1313 struct cpu_entry *entry;
1314 struct domain_data *dom_data;
1315 rt_domain_t *rt;
1316 raw_spinlock_t *a_dom_lock, *b_dom_lock, *c_dom_lock; /* For lock debugger */
1317 struct ce_dom_data *ce_data;
1318
1319 for_each_online_cpu(cpu) {
1320 entry = &per_cpu(cpus, cpu);
1321
1322 /* CPU */
1323 entry->cpu = cpu;
1324 entry->scheduled = NULL;
1325 entry->linked = NULL;
1326
1327 raw_spin_lock_init(&entry->lock);
1328
1329#ifdef CONFIG_PLUGIN_MC_REDIRECT
1330 raw_spin_lock_init(&entry->redir_lock);
1331 INIT_LIST_HEAD(&entry->redir);
1332#endif
1333
1334 /* CRIT_LEVEL_A */
1335 dom_data = &per_cpu(_mc_crit_a, cpu);
1336 ce_data = &per_cpu(_mc_crit_a_ce_data, cpu);
1337 a_dom_lock = &per_cpu(_mc_crit_a_lock, cpu);
1338 raw_spin_lock_init(a_dom_lock);
1339 ce_domain_init(&dom_data->domain,
1340 a_dom_lock, ce_requeue, ce_peek_and_take_ready,
1341 ce_peek_and_take_ready, mc_preempt_needed,
1342 ce_higher_prio, ce_data, cpu,
1343 ce_timer_function);
1344 init_local_domain(entry, dom_data, CRIT_LEVEL_A);
1345 dom_data->domain.name = "LVL-A";
1346
1347 /* CRIT_LEVEL_B */
1348 dom_data = &per_cpu(_mc_crit_b, cpu);
1349 rt = &per_cpu(_mc_crit_b_rt, cpu);
1350 init_local_domain(entry, dom_data, CRIT_LEVEL_B);
1351 init_edf_domain(&dom_data->domain, rt, CRIT_LEVEL_B, 1, cpu);
1352 b_dom_lock = dom_data->domain.lock;
1353 raw_spin_lock_init(b_dom_lock);
1354 dom_data->domain.name = "LVL-B";
1355 }
1356
1357 /* CRIT_LEVEL_C */
1358 init_global_domain(&_mc_crit_c, CRIT_LEVEL_C,
1359 &_mc_heap_c, _mc_nodes_c);
1360 init_edf_domain(&_mc_crit_c.domain, &_mc_crit_c_rt, CRIT_LEVEL_C,
1361 0, NO_CPU);
1362 c_dom_lock = _mc_crit_c.domain.lock;
1363 raw_spin_lock_init(c_dom_lock);
1364 _mc_crit_c.domain.name = "LVL-C";
1365
1366 return register_sched_plugin(&mc_plugin);
1367}
1368
1369module_init(init_mc);
diff --git a/litmus/sched_mc_ce.c b/litmus/sched_mc_ce.c
new file mode 100644
index 000000000000..702b46da93d5
--- /dev/null
+++ b/litmus/sched_mc_ce.c
@@ -0,0 +1,1052 @@
1/**
2 * litmus/sched_mc_ce.c
3 *
4 * The Cyclic Executive (CE) scheduler used by the mixed criticality scheduling
5 * algorithm.
6 */
7
8#include <asm/atomic.h>
9#include <asm/uaccess.h>
10
11#include <linux/module.h>
12#include <linux/percpu.h>
13#include <linux/hrtimer.h>
14#include <linux/pid.h>
15#include <linux/sched.h>
16#include <linux/proc_fs.h>
17
18#include <litmus/litmus.h>
19#include <litmus/sched_plugin.h>
20#include <litmus/rt_domain.h>
21#include <litmus/rt_param.h>
22#include <litmus/litmus_proc.h>
23#include <litmus/sched_trace.h>
24#include <litmus/jobs.h>
25#include <litmus/sched_mc.h>
26#include <litmus/ce_domain.h>
27
28static struct sched_plugin mc_ce_plugin __cacheline_aligned_in_smp;
29
30#define using_linux_plugin() (litmus == &linux_sched_plugin)
31
32/* get a reference to struct domain for a CPU */
33#define get_domain_for(cpu) (&per_cpu(domains, cpu)->domain)
34
35#define get_pid_table(cpu) (&per_cpu(ce_pid_table, cpu))
36#define get_pid_entry(cpu, idx) (&(get_pid_table(cpu)->entries[idx]))
37
38static atomic_t start_time_set = ATOMIC_INIT(-1);
39static atomic64_t start_time = ATOMIC64_INIT(0);
40static struct proc_dir_entry *mc_ce_dir = NULL, *ce_file = NULL;
41
42/*
43 * Cache the budget along with the struct PID for a task so that we don't need
44 * to fetch its task_struct every time we check to see what should be
45 * scheduled.
46 */
47struct ce_pid_entry {
48 struct pid *pid;
49 lt_t budget;
50 /* accumulated (summed) budgets, including this one */
51 lt_t acc_time;
52 unsigned int expected_job;
53};
54
55/*
56 * Each CPU needs a mapping of level A ID (integer) to struct pid so that we
57 * can get its task struct.
58 */
59struct ce_pid_table {
60 struct ce_pid_entry entries[CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS];
61 int num_pid_entries;
62 lt_t cycle_time;
63};
64
65DEFINE_PER_CPU(struct ce_pid_table, ce_pid_table);
66
67/*
68 * How we get the domain for a given CPU locally. Set with the
69 * mc_ce_set_domains function. Must be done before activating plugins. Be
70 * careful when using domains as a variable elsewhere in this file.
71 */
72
73DEFINE_PER_CPU(struct domain_data*, domains);
74
75/*
76 * The domains and other data used by the MC-CE plugin when it runs alone.
77 */
78DEFINE_PER_CPU(struct domain_data, _mc_ce_doms);
79DEFINE_PER_CPU(struct ce_dom_data, _mc_ce_dom_data);
80DEFINE_PER_CPU(raw_spinlock_t, _mc_ce_dom_locks);
81
82#ifdef CONFIG_PLUGIN_MC_RELEASE_MASTER
83static int interrupt_cpu;
84#endif
85
86long mc_ce_set_domains(const int n, struct domain_data *domains_in[])
87{
88 const int max = (NR_CPUS < n) ? NR_CPUS : n;
89 struct domain_data *new_dom = NULL;
90 int i, ret;
91 if (!using_linux_plugin()) {
92 printk(KERN_WARNING "can't set MC-CE domains when not using "
93 "Linux scheduler.\n");
94 ret = -EINVAL;
95 goto out;
96 }
97 for (i = 0; i < max; ++i) {
98 new_dom = domains_in[i];
99 per_cpu(domains, i) = new_dom;
100 }
101 ret = 0;
102out:
103 return ret;
104}
105
106unsigned int mc_ce_get_expected_job(const int cpu, const int idx)
107{
108 const struct ce_pid_table *pid_table = get_pid_table(cpu);
109 BUG_ON(0 > cpu);
110 BUG_ON(0 > idx);
111 BUG_ON(pid_table->num_pid_entries <= idx);
112 return pid_table->entries[idx].expected_job;
113}
114
115/*
116 * Get the offset into the cycle taking the start time into account.
117 */
118static inline lt_t get_cycle_offset(const lt_t when, const lt_t cycle_time)
119{
120 long long st = atomic64_read(&start_time);
121 lt_t offset = (when - st) % cycle_time;
122 TRACE("when: %llu cycle_time: %llu start_time: %lld offset %llu\n",
123 when, cycle_time, st, offset);
124 return offset;
125}
126
127/*
128 * The user land job completion call will set the RT_F_SLEEP flag and then
129 * call schedule. This function is used when schedule sleeps a task.
130 *
131 * Do not call prepare_for_next_period on Level-A tasks!
132 */
133static void mc_ce_job_completion(struct domain *dom, struct task_struct *ts)
134{
135 const int cpu = task_cpu(ts);
136 const int idx = tsk_mc_data(ts)->mc_task.lvl_a_id;
137 const struct ce_pid_entry *pid_entry = get_pid_entry(cpu, idx);
138 unsigned int just_finished;
139
140 TRACE_TASK(ts, "Completed\n");
141
142 /* sched_trace_task_completion(ts, 0); */
143 /* post-increment is important here */
144 just_finished = (tsk_rt(ts)->job_params.job_no)++;
145
146 /* Job completes in expected window: everything is normal.
147 * Job completes in an earlier window: BUG(), that's wrong.
148 * Job completes in a later window: The job is behind.
149 */
150 if (just_finished < pid_entry->expected_job) {
151 /* this job is already released because it's running behind */
152 set_rt_flags(ts, RT_F_RUNNING);
153 TRACE_TASK(ts, "appears behind: the expected job is %u but "
154 "job %u just completed\n",
155 pid_entry->expected_job, just_finished);
156 } else if (pid_entry->expected_job < just_finished) {
157 printk(KERN_CRIT "job %u completed in expected job %u which "
158 "seems too early\n", just_finished,
159 pid_entry->expected_job);
160 BUG();
161 }
162}
163
164
165/*
166 * Return the index into the PID entries table of what to schedule next.
167 * Don't call if the table is empty. Assumes the caller has the domain lock.
168 * The offset parameter is the offset into the cycle.
169 *
170 * TODO Currently O(n) in the number of tasks on the CPU. Binary search?
171 */
172static int mc_ce_schedule_at(const struct domain *dom, lt_t offset)
173{
174 const struct ce_dom_data *ce_data = dom->data;
175 struct ce_pid_table *pid_table = get_pid_table(ce_data->cpu);
176 const struct ce_pid_entry *pid_entry = NULL;
177 int idx;
178
179 BUG_ON(pid_table->cycle_time < 1);
180 BUG_ON(pid_table->num_pid_entries < 1);
181
182 for (idx = 0; idx < pid_table->num_pid_entries; ++idx) {
183 pid_entry = &pid_table->entries[idx];
184 if (offset < pid_entry->acc_time) {
185 /* found task to schedule in this window */
186 break;
187 }
188 }
189 /* can only happen if cycle_time is not right */
190 BUG_ON(pid_entry->acc_time > pid_table->cycle_time);
191 TRACE("schedule at returning task %d for CPU %d\n", idx, ce_data->cpu);
192 return idx;
193}
194
195static struct task_struct *mc_ce_schedule(struct task_struct *prev)
196{
197 struct domain *dom = get_domain_for(smp_processor_id());
198 struct ce_dom_data *ce_data = dom->data;
199 struct task_struct *next = NULL;
200 int exists, sleep, should_sched_exists, should_sched_blocked,
201 should_sched_asleep;
202
203 raw_spin_lock(dom->lock);
204
205 /* sanity checking */
206 BUG_ON(ce_data->scheduled && ce_data->scheduled != prev);
207 BUG_ON(ce_data->scheduled && !is_realtime(prev));
208 BUG_ON(is_realtime(prev) && !ce_data->scheduled);
209
210 exists = NULL != ce_data->scheduled;
211 sleep = exists && RT_F_SLEEP == get_rt_flags(ce_data->scheduled);
212
213 TRACE("exists: %d, sleep: %d\n", exists, sleep);
214
215 if (sleep)
216 mc_ce_job_completion(dom, ce_data->scheduled);
217
218 /* these checks must go after the call to mc_ce_job_completion in case
219 * a late task needs to be scheduled again right away and its the only
220 * task on a core
221 */
222 should_sched_exists = NULL != ce_data->should_schedule;
223 should_sched_blocked = should_sched_exists &&
224 !is_running(ce_data->should_schedule);
225 should_sched_asleep = should_sched_exists &&
226 RT_F_SLEEP == get_rt_flags(ce_data->should_schedule);
227
228 TRACE("should_sched_exists: %d, should_sched_blocked: %d, "
229 "should_sched_asleep: %d\n", should_sched_exists,
230 should_sched_blocked, should_sched_asleep);
231
232 if (should_sched_exists && !should_sched_blocked &&
233 !should_sched_asleep) {
234 /*
235 * schedule the task that should be executing in the cyclic
236 * schedule if it is not blocked and not sleeping
237 */
238 next = ce_data->should_schedule;
239 }
240 sched_state_task_picked();
241 raw_spin_unlock(dom->lock);
242 return next;
243}
244
245static void mc_ce_finish_switch(struct task_struct *prev)
246{
247 struct domain *dom = get_domain_for(smp_processor_id());
248 struct ce_dom_data *ce_data = dom->data;
249
250 TRACE("finish switch\n");
251
252 if (is_realtime(current) && CRIT_LEVEL_A == tsk_mc_crit(current))
253 ce_data->scheduled = current;
254 else
255 ce_data->scheduled = NULL;
256}
257
258/*
259 * Admit task called to see if this task is permitted to enter the system.
260 * Here we look up the task's PID structure and save it in the proper slot on
261 * the CPU this task will run on.
262 */
263long mc_ce_admit_task_common(struct task_struct *ts)
264{
265 struct domain *dom = get_domain_for(get_partition(ts));
266 struct ce_dom_data *ce_data = dom->data;
267 struct mc_data *mcd = tsk_mc_data(ts);
268 struct pid *pid = NULL;
269 long retval = -EINVAL;
270 const int lvl_a_id = mcd->mc_task.lvl_a_id;
271 struct ce_pid_table *pid_table = get_pid_table(ce_data->cpu);
272
273 BUG_ON(get_partition(ts) != ce_data->cpu);
274
275 /* check the task has migrated to the right CPU (like in sched_cedf) */
276 if (task_cpu(ts) != get_partition(ts)) {
277 printk(KERN_INFO "litmus: %d admitted on CPU %d but want %d ",
278 ts->pid, task_cpu(ts), get_partition(ts));
279 goto out;
280 }
281
282 /* only level A tasks can be CE */
283 if (!mcd || CRIT_LEVEL_A != tsk_mc_crit(ts)) {
284 printk(KERN_INFO "litmus: non-MC or non level A task %d\n",
285 ts->pid);
286 goto out;
287 }
288
289 /* try and get the task's PID structure */
290 pid = get_task_pid(ts, PIDTYPE_PID);
291 if (IS_ERR_OR_NULL(pid)) {
292 printk(KERN_INFO "litmus: couldn't get pid struct for %d\n",
293 ts->pid);
294 goto out;
295 }
296
297 if (lvl_a_id >= pid_table->num_pid_entries) {
298 printk(KERN_INFO "litmus: level A id greater than expected "
299 "number of tasks %d for %d cpu %d\n",
300 pid_table->num_pid_entries, ts->pid,
301 get_partition(ts));
302 goto out_put_pid;
303 }
304 if (pid_table->entries[lvl_a_id].pid) {
305 printk(KERN_INFO "litmus: have saved pid info id: %d cpu: %d\n",
306 lvl_a_id, get_partition(ts));
307 goto out_put_pid;
308 }
309 if (get_exec_cost(ts) >= pid_table->entries[lvl_a_id].budget) {
310 printk(KERN_INFO "litmus: execution cost %llu is larger than "
311 "the budget %llu\n",
312 get_exec_cost(ts),
313 pid_table->entries[lvl_a_id].budget);
314 goto out_put_pid;
315 }
316 pid_table->entries[lvl_a_id].pid = pid;
317 retval = 0;
318 /* don't call put_pid if we are successful */
319 goto out;
320
321out_put_pid:
322 put_pid(pid);
323out:
324 return retval;
325}
326
327static long mc_ce_admit_task(struct task_struct *ts)
328{
329 struct domain *dom = get_domain_for(get_partition(ts));
330 unsigned long flags, retval;
331 raw_spin_lock_irqsave(dom->lock, flags);
332 retval = mc_ce_admit_task_common(ts);
333 raw_spin_unlock_irqrestore(dom->lock, flags);
334 return retval;
335}
336
337/*
338 * Called to set up a new real-time task (after the admit_task callback).
339 * At this point the task's struct PID is already hooked up on the destination
340 * CPU. The task may already be running.
341 */
342static void mc_ce_task_new(struct task_struct *ts, int on_rq, int running)
343{
344 const int cpu = task_cpu(ts);
345 struct domain *dom = get_domain_for(cpu);
346 struct ce_dom_data *ce_data = dom->data;
347 struct ce_pid_table *pid_table = get_pid_table(cpu);
348 struct pid *pid_should_be_running;
349 struct ce_pid_entry *pid_entry;
350 unsigned long flags;
351 int idx, should_be_running;
352 lt_t offset;
353
354 raw_spin_lock_irqsave(dom->lock, flags);
355 pid_entry = get_pid_entry(cpu, tsk_mc_data(ts)->mc_task.lvl_a_id);
356 /* initialize some task state */
357 set_rt_flags(ts, RT_F_RUNNING);
358
359 /* have to call mc_ce_schedule_at because the task only gets a PID
360 * entry after calling admit_task */
361 offset = get_cycle_offset(litmus_clock(), pid_table->cycle_time);
362 idx = mc_ce_schedule_at(dom, offset);
363 pid_should_be_running = get_pid_entry(cpu, idx)->pid;
364 rcu_read_lock();
365 should_be_running = (ts == pid_task(pid_should_be_running, PIDTYPE_PID));
366 rcu_read_unlock();
367 if (running) {
368 /* admit task checks that the task is not on the wrong CPU */
369 BUG_ON(task_cpu(ts) != get_partition(ts));
370 BUG_ON(ce_data->scheduled);
371 ce_data->scheduled = ts;
372
373 if (should_be_running)
374 ce_data->should_schedule = ts;
375 else
376 preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
377 } else if (!running && should_be_running) {
378 ce_data->should_schedule = ts;
379 preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
380 }
381 raw_spin_unlock_irqrestore(dom->lock, flags);
382}
383
384/*
385 * Called to re-introduce a task after blocking.
386 * Can potentailly be called multiple times.
387 */
388static void mc_ce_task_wake_up(struct task_struct *ts)
389{
390 struct domain *dom = get_domain_for(get_partition(ts));
391 struct ce_dom_data *ce_data = dom->data;
392 unsigned long flags;
393
394 TRACE_TASK(ts, "wake up\n");
395
396 raw_spin_lock_irqsave(dom->lock, flags);
397 if (ts == ce_data->should_schedule && ts != ce_data->scheduled)
398 preempt_if_preemptable(ts, ce_data->cpu);
399 raw_spin_unlock_irqrestore(dom->lock, flags);
400}
401
402/*
403 * Called to notify the plugin of a blocking real-time tasks. Only called for
404 * real-time tasks and before schedule is called.
405 */
406static void mc_ce_task_block(struct task_struct *ts)
407{
408 /* nothing to do because it will be taken care of in schedule */
409 TRACE_TASK(ts, "blocked\n");
410}
411
412/*
413 * Called when a task switches from RT mode back to normal mode.
414 */
415void mc_ce_task_exit_common(struct task_struct *ts)
416{
417 struct domain *dom = get_domain_for(get_partition(ts));
418 struct ce_dom_data *ce_data = dom->data;
419 unsigned long flags;
420 struct pid *pid;
421 const int lvl_a_id = tsk_mc_data(ts)->mc_task.lvl_a_id;
422 struct ce_pid_table *pid_table = get_pid_table(ce_data->cpu);
423
424 BUG_ON(CRIT_LEVEL_A != tsk_mc_crit(ts));
425 BUG_ON(lvl_a_id >= pid_table->num_pid_entries);
426
427 raw_spin_lock_irqsave(dom->lock, flags);
428 pid = pid_table->entries[lvl_a_id].pid;
429 BUG_ON(!pid);
430 put_pid(pid);
431 pid_table->entries[lvl_a_id].pid = NULL;
432 if (ce_data->scheduled == ts)
433 ce_data->scheduled = NULL;
434 if (ce_data->should_schedule == ts)
435 ce_data->should_schedule = NULL;
436 raw_spin_unlock_irqrestore(dom->lock, flags);
437}
438
439/***********************************************************
440 * Timer stuff
441 **********************************************************/
442
443/*
444 * Returns the next absolute time that the timer should fire.
445 */
446lt_t mc_ce_timer_callback_common(struct domain *dom)
447{
448 /* relative and absolute times for cycles */
449 lt_t now, offset_rel, cycle_start_abs, next_timer_abs;
450 struct task_struct *should_schedule;
451 struct ce_pid_table *pid_table;
452 struct ce_pid_entry *pid_entry;
453 struct ce_dom_data *ce_data;
454 int idx, budget_overrun;
455
456 ce_data = dom->data;
457 pid_table = get_pid_table(ce_data->cpu);
458
459 /* Based off of the current time, figure out the offset into the cycle
460 * and the cycle's start time, and determine what should be scheduled.
461 */
462 now = litmus_clock();
463 offset_rel = get_cycle_offset(now, pid_table->cycle_time);
464 cycle_start_abs = now - offset_rel;
465 idx = mc_ce_schedule_at(dom, offset_rel);
466 pid_entry = get_pid_entry(ce_data->cpu, idx);
467 next_timer_abs = cycle_start_abs + pid_entry->acc_time;
468
469 STRACE("timer: now: %llu offset_rel: %llu cycle_start_abs: %llu "
470 "next_timer_abs: %llu\n", now, offset_rel,
471 cycle_start_abs, next_timer_abs);
472
473 /* get the task_struct (pid_task can accept a NULL) */
474 rcu_read_lock();
475 should_schedule = pid_task(pid_entry->pid, PIDTYPE_PID);
476 rcu_read_unlock();
477 ce_data->should_schedule = should_schedule;
478
479 if (should_schedule && 0 == atomic_read(&start_time_set)) {
480 /*
481 * If jobs are not overrunning their budgets, then this
482 * should not happen.
483 */
484 pid_entry->expected_job++;
485 budget_overrun = pid_entry->expected_job !=
486 tsk_rt(should_schedule)->job_params.job_no;
487 if (budget_overrun)
488 TRACE_MC_TASK(should_schedule,
489 "timer expected job number: %u "
490 "but current job: %u\n",
491 pid_entry->expected_job,
492 tsk_rt(should_schedule)->job_params.job_no);
493 }
494
495 if (ce_data->should_schedule) {
496 tsk_rt(should_schedule)->job_params.deadline =
497 cycle_start_abs + pid_entry->acc_time;
498 tsk_rt(should_schedule)->job_params.release =
499 tsk_rt(should_schedule)->job_params.deadline -
500 pid_entry->budget;
501 tsk_rt(should_schedule)->job_params.exec_time = 0;
502 /* sched_trace_task_release(should_schedule); */
503 set_rt_flags(ce_data->should_schedule, RT_F_RUNNING);
504 }
505 return next_timer_abs;
506}
507
508/*
509 * What to do when a timer fires. The timer should only be armed if the number
510 * of PID entries is positive.
511 */
512#ifdef CONFIG_MERGE_TIMERS
513static void mc_ce_timer_callback(struct rt_event *e)
514#else
515static enum hrtimer_restart mc_ce_timer_callback(struct hrtimer *timer)
516#endif
517{
518 struct ce_dom_data *ce_data;
519 unsigned long flags;
520 struct domain *dom;
521 lt_t next_timer_abs;
522#ifdef CONFIG_MERGE_TIMERS
523 struct event_group *event_group;
524 ce_data = container_of(e, struct ce_dom_data, event);
525 /* use the same CPU the callbacking is executing on by passing NO_CPU */
526 event_group = get_event_group_for(NO_CPU);
527#else /* CONFIG_MERGE_TIMERS */
528 ce_data = container_of(timer, struct ce_dom_data, timer);
529#endif
530 dom = get_domain_for(ce_data->cpu);
531
532 TRACE("timer callback on CPU %d (before lock)\n", ce_data->cpu);
533
534 raw_spin_lock_irqsave(dom->lock, flags);
535 next_timer_abs = mc_ce_timer_callback_common(dom);
536
537 /* setup an event or timer for the next release in the CE schedule */
538#ifdef CONFIG_MERGE_TIMERS
539 add_event(event_group, e, next_timer_abs);
540#else
541 hrtimer_set_expires(timer, ns_to_ktime(next_timer_abs));
542#endif
543
544 if (ce_data->scheduled != ce_data->should_schedule)
545 preempt_if_preemptable(ce_data->scheduled, ce_data->cpu);
546
547 raw_spin_unlock_irqrestore(dom->lock, flags);
548
549#ifndef CONFIG_MERGE_TIMERS
550 return HRTIMER_RESTART;
551#endif
552}
553
554/*
555 * Cancel timers on all CPUs. Returns 1 if any were active.
556 */
557static int cancel_all_timers(void)
558{
559 struct ce_dom_data *ce_data;
560 struct domain *dom;
561 int cpu, ret = 0;
562#ifndef CONFIG_MERGE_TIMERS
563 int cancel_res;
564#endif
565
566 TRACE("cancel all timers\n");
567
568 for_each_online_cpu(cpu) {
569 dom = get_domain_for(cpu);
570 ce_data = dom->data;
571 ce_data->should_schedule = NULL;
572#ifdef CONFIG_MERGE_TIMERS
573 cancel_event(&ce_data->event);
574#else
575 cancel_res = hrtimer_cancel(&ce_data->timer);
576 atomic_set(&ce_data->timer_info.state,
577 HRTIMER_START_ON_INACTIVE);
578 ret = ret || cancel_res;
579#endif
580 }
581 return ret;
582}
583
584/*
585 * Arm all timers so that they start at the new value of start time.
586 * Any CPU without CE PID entries won't have a timer armed.
587 * All timers should be canceled before calling this.
588 */
589static void arm_all_timers(void)
590{
591 struct domain *dom;
592 struct ce_dom_data *ce_data;
593 struct ce_pid_table *pid_table;
594 int cpu, idx, cpu_for_timer;
595 const lt_t start = atomic64_read(&start_time);
596
597 TRACE("arm all timers\n");
598
599 for_each_online_cpu(cpu) {
600 dom = get_domain_for(cpu);
601 ce_data = dom->data;
602 pid_table = get_pid_table(cpu);
603 if (0 == pid_table->num_pid_entries)
604 continue;
605 for (idx = 0; idx < pid_table->num_pid_entries; idx++) {
606 pid_table->entries[idx].expected_job = 0;
607 }
608#ifdef CONFIG_PLUGIN_MC_RELEASE_MASTER
609 cpu_for_timer = interrupt_cpu;
610#else
611 cpu_for_timer = cpu;
612#endif
613
614#ifdef CONFIG_MERGE_TIMERS
615 add_event(get_event_group_for(cpu_for_timer),
616 &ce_data->event, start);
617#else
618 hrtimer_start_on(cpu_for_timer, &ce_data->timer_info,
619 &ce_data->timer, ns_to_ktime(start),
620 HRTIMER_MODE_ABS_PINNED);
621#endif
622 }
623}
624
625/*
626 * There are no real releases in the CE, but the task release syscall will
627 * call this. We can re-set our notion of the CE period start to make
628 * the schedule look pretty.
629 */
630void mc_ce_release_at_common(struct task_struct *ts, lt_t start)
631{
632 TRACE_TASK(ts, "release at\n");
633 if (atomic_inc_and_test(&start_time_set)) {
634 /* in this case, we won the race */
635 cancel_all_timers();
636 atomic64_set(&start_time, start);
637 arm_all_timers();
638 } else
639 atomic_dec(&start_time_set);
640}
641
642long mc_ce_activate_plugin_common(void)
643{
644 struct ce_dom_data *ce_data;
645 struct domain *dom;
646 long ret;
647 int cpu;
648
649#ifdef CONFIG_PLUGIN_MC_RELEASE_MASTER
650 interrupt_cpu = atomic_read(&release_master_cpu);
651 if (NO_CPU == interrupt_cpu) {
652 printk(KERN_ERR "LITMUS: MC-CE needs a release master\n");
653 ret = -EINVAL;
654 goto out;
655 }
656#endif
657
658 for_each_online_cpu(cpu) {
659 dom = get_domain_for(cpu);
660 ce_data = dom->data;
661 ce_data->scheduled = NULL;
662 ce_data->should_schedule = NULL;
663 }
664
665 atomic_set(&start_time_set, -1);
666 atomic64_set(&start_time, litmus_clock());
667 /* may not want to arm timers on activation, just after release */
668 arm_all_timers();
669 ret = 0;
670out:
671 return ret;
672}
673
674static long mc_ce_activate_plugin(void)
675{
676 struct domain_data *our_domains[NR_CPUS];
677 int cpu, n = 0;
678 long ret;
679
680 for_each_online_cpu(cpu) {
681 BUG_ON(NR_CPUS <= n);
682 our_domains[cpu] = &per_cpu(_mc_ce_doms, cpu);
683 n++;
684 }
685 ret = mc_ce_set_domains(n, our_domains);
686 if (ret)
687 goto out;
688 ret = mc_ce_activate_plugin_common();
689out:
690 return ret;
691}
692
693static void clear_pid_entries(void)
694{
695 struct ce_pid_table *pid_table = NULL;
696 int cpu, entry;
697
698 for_each_online_cpu(cpu) {
699 pid_table = get_pid_table(cpu);
700 pid_table->num_pid_entries = 0;
701 pid_table->cycle_time = 0;
702 for (entry = 0; entry < CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS;
703 ++entry) {
704 if (NULL != pid_table->entries[entry].pid) {
705 put_pid(pid_table->entries[entry].pid);
706 pid_table->entries[entry].pid = NULL;
707 }
708 pid_table->entries[entry].budget = 0;
709 pid_table->entries[entry].acc_time = 0;
710 pid_table->entries[entry].expected_job = 0;
711 }
712 }
713}
714
715long mc_ce_deactivate_plugin_common(void)
716{
717 int cpu;
718 cancel_all_timers();
719 for_each_online_cpu(cpu) {
720 per_cpu(domains, cpu) = NULL;
721 }
722 return 0;
723}
724
725/* Plugin object */
726static struct sched_plugin mc_ce_plugin __cacheline_aligned_in_smp = {
727 .plugin_name = "MC-CE",
728 .admit_task = mc_ce_admit_task,
729 .task_new = mc_ce_task_new,
730 .complete_job = complete_job,
731 .release_at = mc_ce_release_at_common,
732 .task_exit = mc_ce_task_exit_common,
733 .schedule = mc_ce_schedule,
734 .finish_switch = mc_ce_finish_switch,
735 .task_wake_up = mc_ce_task_wake_up,
736 .task_block = mc_ce_task_block,
737 .activate_plugin = mc_ce_activate_plugin,
738 .deactivate_plugin = mc_ce_deactivate_plugin_common,
739};
740
741static int setup_proc(void);
742static int __init init_sched_mc_ce(void)
743{
744 raw_spinlock_t *ce_lock;
745 struct domain_data *dom_data;
746 struct domain *dom;
747 int cpu, err;
748
749 for_each_online_cpu(cpu) {
750 per_cpu(domains, cpu) = NULL;
751 ce_lock = &per_cpu(_mc_ce_dom_locks, cpu);
752 raw_spin_lock_init(ce_lock);
753 dom_data = &per_cpu(_mc_ce_doms, cpu);
754 dom = &dom_data->domain;
755 ce_domain_init(dom, ce_lock, NULL, NULL, NULL, NULL, NULL,
756 &per_cpu(_mc_ce_dom_data, cpu), cpu,
757 mc_ce_timer_callback);
758 }
759 clear_pid_entries();
760 err = setup_proc();
761 if (!err)
762 err = register_sched_plugin(&mc_ce_plugin);
763 return err;
764}
765
766#define BUF_SIZE PAGE_SIZE
767static int write_into_proc(char *proc_buf, const int proc_size, char *fmt, ...)
768{
769 static char buf[BUF_SIZE];
770 int n;
771 va_list args;
772
773 /* When writing to procfs, we don't care about the trailing null that
774 * is not included in the count returned by vscnprintf.
775 */
776 va_start(args, fmt);
777 n = vsnprintf(buf, BUF_SIZE, fmt, args);
778 va_end(args);
779 if (BUF_SIZE <= n || proc_size <= n) {
780 /* too big for formatting buffer or proc (less null byte) */
781 n = -EINVAL;
782 goto out;
783 }
784 memcpy(proc_buf, buf, n);
785out:
786 return n;
787}
788#undef BUF_SIZE
789
790/*
791 * Writes a PID entry to the procfs.
792 *
793 * @page buffer to write into.
794 * @count bytes available in the buffer
795 */
796#define PID_SPACE 15
797#define TASK_INFO_BUF (PID_SPACE + TASK_COMM_LEN)
798static int write_pid_entry(char *page, const int count, const int cpu,
799 const int task, struct ce_pid_entry *pid_entry)
800{
801 static char task_info[TASK_INFO_BUF];
802 struct task_struct *ts;
803 int n = 0, err, ti_n;
804 char *ti_b;
805
806 if (pid_entry->pid) {
807 rcu_read_lock();
808 ts = pid_task(pid_entry->pid, PIDTYPE_PID);
809 rcu_read_unlock();
810
811 /* get some information about the task */
812 if (ts) {
813 ti_b = task_info;
814 ti_n = snprintf(ti_b, PID_SPACE, "%d", ts->pid);
815 if (PID_SPACE <= ti_n)
816 ti_n = PID_SPACE - 1;
817 ti_b += ti_n;
818 *ti_b = ' '; /* nuke the null byte */
819 ti_b++;
820 get_task_comm(ti_b, ts);
821 } else {
822 strncpy(task_info, "pid_task() failed :(",
823 TASK_INFO_BUF);
824 }
825
826 } else
827 strncpy(task_info, "no", TASK_INFO_BUF);
828 task_info[TASK_INFO_BUF - 1] = '\0'; /* just to be sure */
829
830 err = write_into_proc(page + n, count - n, "# task: %s\n", task_info);
831 if (err < 0) {
832 n = -ENOSPC;
833 goto out;
834 }
835 n += err;
836 err = write_into_proc(page + n, count - n, "%d, %d, %llu\n",
837 cpu, task, pid_entry->budget);
838 if (err < 0) {
839 n = -ENOSPC;
840 goto out;
841 }
842 n += err;
843out:
844 return n;
845}
846#undef PID_SPACE
847#undef TASK_INFO_BUF
848
849/*
850 * Called when the user-land reads from proc.
851 */
852static int proc_read_ce_file(char *page, char **start, off_t off, int count,
853 int *eof, void *data)
854{
855 int n = 0, err, cpu, t;
856 struct ce_pid_table *pid_table;
857
858 if (off > 0) {
859 printk(KERN_INFO "litmus: MC-CE called read with off > 0\n");
860 goto out;
861 }
862
863 for_each_online_cpu(cpu) {
864 pid_table = get_pid_table(cpu);
865 for (t = 0; t < pid_table->num_pid_entries; ++t) {
866 err = write_pid_entry(page + n, count - n,
867 cpu, t, get_pid_entry(cpu, t));
868 if (err < 0) {
869 n = -ENOSPC;
870 goto out;
871 }
872 n += err;
873 }
874 }
875out:
876 *eof = 1;
877 return n;
878}
879
880/*
881 * Skip a commented line.
882 */
883static int skip_comment(const char *buf, const unsigned long max)
884{
885 unsigned long i = 0;
886 const char *c = buf;
887 if (0 == max || !c || *c != '#')
888 return 0;
889 ++c; ++i;
890 for (; i < max; ++i) {
891 if (*c == '\n') {
892 ++c; ++i;
893 break;
894 }
895 ++c;
896 }
897 return i;
898}
899
900/* a budget of 5 milliseconds is probably reasonable */
901#define BUDGET_THRESHOLD 5000000ULL
902static int setup_pid_entry(const int cpu, const int task, const lt_t budget)
903{
904 struct ce_pid_table *pid_table = get_pid_table(cpu);
905 struct ce_pid_entry *new_entry = NULL;
906 int err = 0;
907
908 /* check the inputs */
909 if (cpu < 0 || NR_CPUS <= cpu || task < 0 ||
910 CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS <= task ||
911 budget < 1) {
912 printk(KERN_INFO "litmus: bad cpu, task ID, or budget sent to "
913 "MC-CE proc\n");
914 err = -EINVAL;
915 goto out;
916 }
917 /* check for small budgets */
918 if (BUDGET_THRESHOLD > budget) {
919 printk(KERN_CRIT "litmus: you gave a small budget for an "
920 "MC-CE task; that might be an issue.\n");
921 }
922 /* check that we have space for a new entry */
923 if (CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS <= pid_table->num_pid_entries) {
924 printk(KERN_INFO "litmus: too many MC-CE tasks for cpu "
925 "%d\n", cpu);
926 err = -EINVAL;
927 goto out;
928 }
929 /* add the new entry */
930 new_entry = get_pid_entry(cpu, pid_table->num_pid_entries);
931 BUG_ON(NULL != new_entry->pid);
932 new_entry->budget = budget;
933 new_entry->acc_time = pid_table->cycle_time + budget;
934 /* update the domain entry */
935 pid_table->cycle_time += budget;
936 pid_table->num_pid_entries++;
937out:
938 return err;
939}
940#undef BUDGET_THRESHOLD
941
942/*
943 * Called when the user-land writes to proc.
944 *
945 * Error checking is quite minimal. Format is:
946 * <cpu>, <process ID>, <budget>
947 */
948#define PROCFS_MAX_SIZE PAGE_SIZE
949static int proc_write_ce_file(struct file *file, const char __user *buffer,
950 unsigned long count, void *data)
951{
952 static char kbuf[PROCFS_MAX_SIZE];
953 char *c = kbuf, *c_skipped;
954 int cpu, task, cnt = 0, chars_read, converted, err;
955 lt_t budget;
956
957 if (!using_linux_plugin()) {
958 printk(KERN_INFO "litmus: can only edit MC-CE proc under Linux "
959 "plugin\n");
960 cnt = -EINVAL;
961 goto out;
962 }
963
964 if (count > PROCFS_MAX_SIZE) {
965 printk(KERN_INFO "litmus: MC-CE procfs got too many bytes "
966 "from user-space.\n");
967 cnt = -EINVAL;
968 goto out;
969 }
970
971 if (copy_from_user(kbuf, buffer, count)) {
972 printk(KERN_INFO "litmus: couldn't copy from user %s\n",
973 __FUNCTION__);
974 cnt = -EFAULT;
975 goto out;
976 }
977 clear_pid_entries();
978 while (cnt < count) {
979 c_skipped = skip_spaces(c);
980 if (c_skipped != c) {
981 chars_read = c_skipped - c;
982 cnt += chars_read;
983 c += chars_read;
984 continue;
985 }
986 if (*c == '#') {
987 chars_read = skip_comment(c, count - cnt);
988 cnt += chars_read;
989 c += chars_read;
990 continue;
991 }
992 converted = sscanf(c, "%d, %d, %llu%n", &cpu, &task, &budget,
993 &chars_read);
994 if (3 != converted) {
995 printk(KERN_INFO "litmus: MC-CE procfs expected three "
996 "arguments, but got %d.\n", converted);
997 cnt = -EINVAL;
998 goto out;
999 }
1000 cnt += chars_read;
1001 c += chars_read;
1002 err = setup_pid_entry(cpu, task, budget);
1003 if (err) {
1004 cnt = -EINVAL;
1005 goto out;
1006 }
1007 }
1008out:
1009 return cnt;
1010}
1011#undef PROCFS_MAX_SIZE
1012
1013#define CE_FILE_PROC_NAME "ce_file"
1014static void tear_down_proc(void)
1015{
1016 if (ce_file)
1017 remove_proc_entry(CE_FILE_PROC_NAME, mc_ce_dir);
1018 if (mc_ce_dir)
1019 remove_plugin_proc_dir(&mc_ce_plugin);
1020}
1021
1022static int setup_proc(void)
1023{
1024 int err;
1025 err = make_plugin_proc_dir(&mc_ce_plugin, &mc_ce_dir);
1026 if (err) {
1027 printk(KERN_ERR "could not create MC-CE procfs dir.\n");
1028 goto out;
1029 }
1030 ce_file = create_proc_entry(CE_FILE_PROC_NAME, 0644, mc_ce_dir);
1031 if (!ce_file) {
1032 printk(KERN_ERR "could not create MC-CE procfs file.\n");
1033 err = -EIO;
1034 goto out_remove_proc;
1035 }
1036 ce_file->read_proc = proc_read_ce_file;
1037 ce_file->write_proc = proc_write_ce_file;
1038 goto out;
1039out_remove_proc:
1040 tear_down_proc();
1041out:
1042 return err;
1043}
1044#undef CE_FILE_PROC_NAME
1045
1046static void clean_sched_mc_ce(void)
1047{
1048 tear_down_proc();
1049}
1050
1051module_init(init_sched_mc_ce);
1052module_exit(clean_sched_mc_ce);
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
index 00a1900d6457..123c7516fb76 100644
--- a/litmus/sched_plugin.c
+++ b/litmus/sched_plugin.c
@@ -95,6 +95,10 @@ static void litmus_dummy_task_exit(struct task_struct *task)
95{ 95{
96} 96}
97 97
98static void litmus_dummy_release_ts(lt_t time)
99{
100}
101
98static long litmus_dummy_complete_job(void) 102static long litmus_dummy_complete_job(void)
99{ 103{
100 return -ENOSYS; 104 return -ENOSYS;
@@ -136,6 +140,7 @@ struct sched_plugin linux_sched_plugin = {
136 .finish_switch = litmus_dummy_finish_switch, 140 .finish_switch = litmus_dummy_finish_switch,
137 .activate_plugin = litmus_dummy_activate_plugin, 141 .activate_plugin = litmus_dummy_activate_plugin,
138 .deactivate_plugin = litmus_dummy_deactivate_plugin, 142 .deactivate_plugin = litmus_dummy_deactivate_plugin,
143 .release_ts = litmus_dummy_release_ts,
139#ifdef CONFIG_LITMUS_LOCKING 144#ifdef CONFIG_LITMUS_LOCKING
140 .allocate_lock = litmus_dummy_allocate_lock, 145 .allocate_lock = litmus_dummy_allocate_lock,
141#endif 146#endif
@@ -174,6 +179,7 @@ int register_sched_plugin(struct sched_plugin* plugin)
174 CHECK(complete_job); 179 CHECK(complete_job);
175 CHECK(activate_plugin); 180 CHECK(activate_plugin);
176 CHECK(deactivate_plugin); 181 CHECK(deactivate_plugin);
182 CHECK(release_ts);
177#ifdef CONFIG_LITMUS_LOCKING 183#ifdef CONFIG_LITMUS_LOCKING
178 CHECK(allocate_lock); 184 CHECK(allocate_lock);
179#endif 185#endif
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
index 8e4a22dd8d6a..eaaec38f43da 100644
--- a/litmus/sched_psn_edf.c
+++ b/litmus/sched_psn_edf.c
@@ -284,6 +284,9 @@ static void psnedf_task_new(struct task_struct * t, int on_rq, int running)
284 TRACE_TASK(t, "psn edf: task new, cpu = %d\n", 284 TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
285 t->rt_param.task_params.cpu); 285 t->rt_param.task_params.cpu);
286 286
287 trace_litmus_server_param(0 - t->pid, -1 - get_partition(t),
288 get_exec_time(t), get_rt_period(t));
289
287 /* setup job parameters */ 290 /* setup job parameters */
288 release_at(t, litmus_clock()); 291 release_at(t, litmus_clock());
289 292
diff --git a/litmus/sync.c b/litmus/sync.c
index bf75fde5450b..f3c9262f7022 100644
--- a/litmus/sync.c
+++ b/litmus/sync.c
@@ -73,6 +73,9 @@ static long do_release_ts(lt_t start)
73 73
74 complete_n(&ts_release, task_count); 74 complete_n(&ts_release, task_count);
75 75
76 /* TODO: remove this hack */
77 litmus->release_ts(start);
78
76 return task_count; 79 return task_count;
77} 80}
78 81