summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjoern Brandenburg <bbb@bbb1-cs.cs.unc.edu>2008-07-19 05:34:11 -0400
committerBjoern Brandenburg <bbb@bbb1-cs.cs.unc.edu>2008-07-19 05:34:11 -0400
commite42e6ebad6e516497b34e6ee0a3ff49839a2caca (patch)
treef94caa2e842e1db088ca2a6f7c5661eb6da41818
parente794a7a2ada5773cc95924e310c756f35aaf5e63 (diff)
working on the new release
-rw-r--r--download/2008.1/SHA256SUMS2
-rw-r--r--download/2008.1/liblitmus-2008.1.tgzbin0 -> 11901 bytes
-rw-r--r--download/2008.1/litmus-rt-2008.1.patch8895
-rw-r--r--index2.html11
4 files changed, 8904 insertions, 4 deletions
diff --git a/download/2008.1/SHA256SUMS b/download/2008.1/SHA256SUMS
new file mode 100644
index 0000000..62cb9bc
--- /dev/null
+++ b/download/2008.1/SHA256SUMS
@@ -0,0 +1,2 @@
1db11c6375a1a539e7910814e56bd79ec54127a7906307ff539c7fe29e4055874 liblitmus-2008.1.tgz
27d97a505c61e80968772c6fd25935f8ee7002a486c41f4ec0d55992f7d827118 litmus-rt-2008.1.patch
diff --git a/download/2008.1/liblitmus-2008.1.tgz b/download/2008.1/liblitmus-2008.1.tgz
new file mode 100644
index 0000000..0c3816e
--- /dev/null
+++ b/download/2008.1/liblitmus-2008.1.tgz
Binary files differ
diff --git a/download/2008.1/litmus-rt-2008.1.patch b/download/2008.1/litmus-rt-2008.1.patch
new file mode 100644
index 0000000..44c942d
--- /dev/null
+++ b/download/2008.1/litmus-rt-2008.1.patch
@@ -0,0 +1,8895 @@
1 Makefile | 2 +-
2 arch/sparc64/Kconfig | 2 +
3 arch/sparc64/kernel/smp.c | 1 +
4 arch/sparc64/kernel/systbls.S | 20 +-
5 arch/x86/Kconfig | 2 +
6 arch/x86/kernel/Makefile_32 | 3 +
7 arch/x86/kernel/ft_event.c | 104 +++++
8 arch/x86/kernel/smp_32.c | 1 +
9 arch/x86/kernel/syscall_table_32.S | 16 +
10 fs/exec.c | 3 +
11 fs/inode.c | 2 +
12 include/asm-sparc64/feather_trace.h | 22 +
13 include/asm-sparc64/unistd.h | 6 +-
14 include/asm-x86/feather_trace.h | 104 +++++
15 include/asm-x86/unistd_32.h | 6 +-
16 include/linux/completion.h | 2 +-
17 include/linux/fs.h | 5 +
18 include/linux/sched.h | 11 +
19 include/linux/tick.h | 3 +
20 include/linux/uaccess.h | 16 +
21 include/litmus/edf_common.h | 25 +
22 include/litmus/fdso.h | 69 +++
23 include/litmus/feather_buffer.h | 94 ++++
24 include/litmus/feather_trace.h | 37 ++
25 include/litmus/heap.h | 301 +++++++++++++
26 include/litmus/jobs.h | 9 +
27 include/litmus/litmus.h | 205 +++++++++
28 include/litmus/norqlock.h | 26 ++
29 include/litmus/rt_domain.h | 127 ++++++
30 include/litmus/rt_param.h | 170 +++++++
31 include/litmus/sched_plugin.h | 144 ++++++
32 include/litmus/sched_trace.h | 31 ++
33 include/litmus/trace.h | 107 +++++
34 include/litmus/unistd.h | 20 +
35 kernel/exit.c | 4 +
36 kernel/fork.c | 8 +
37 kernel/printk.c | 10 +-
38 kernel/sched.c | 93 ++++-
39 kernel/sched_fair.c | 2 +-
40 kernel/sched_rt.c | 2 +-
41 kernel/time/tick-sched.c | 37 ++-
42 litmus/Kconfig | 78 ++++
43 litmus/Makefile | 14 +
44 litmus/edf_common.c | 94 ++++
45 litmus/fdso.c | 282 ++++++++++++
46 litmus/fmlp.c | 262 +++++++++++
47 litmus/ft_event.c | 43 ++
48 litmus/jobs.c | 43 ++
49 litmus/litmus.c | 826 +++++++++++++++++++++++++++++++++++
50 litmus/norqlock.c | 56 +++
51 litmus/rt_domain.c | 138 ++++++
52 litmus/sched_cedf.c | 717 ++++++++++++++++++++++++++++++
53 litmus/sched_gsn_edf.c | 731 +++++++++++++++++++++++++++++++
54 litmus/sched_litmus.c | 228 ++++++++++
55 litmus/sched_pfair.c | 785 +++++++++++++++++++++++++++++++++
56 litmus/sched_plugin.c | 185 ++++++++
57 litmus/sched_psn_edf.c | 454 +++++++++++++++++++
58 litmus/sched_trace.c | 569 ++++++++++++++++++++++++
59 litmus/srp.c | 318 ++++++++++++++
60 litmus/sync.c | 86 ++++
61 litmus/trace.c | 335 ++++++++++++++
62 61 files changed, 8075 insertions(+), 21 deletions(-)
63
64diff --git a/Makefile b/Makefile
65index 189d8ef..d9e4495 100644
66--- a/Makefile
67+++ b/Makefile
68@@ -597,7 +597,7 @@ export mod_strip_cmd
69
70
71 ifeq ($(KBUILD_EXTMOD),)
72-core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/
73+core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ litmus/
74
75 vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
76 $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
77diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
78index 10b212a..8d90b5a 100644
79--- a/arch/sparc64/Kconfig
80+++ b/arch/sparc64/Kconfig
81@@ -471,3 +471,5 @@ source "security/Kconfig"
82 source "crypto/Kconfig"
83
84 source "lib/Kconfig"
85+
86+source "litmus/Kconfig"
87diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
88index c399449..cd2bc7e 100644
89--- a/arch/sparc64/kernel/smp.c
90+++ b/arch/sparc64/kernel/smp.c
91@@ -1033,6 +1033,7 @@ void smp_receive_signal(int cpu)
92 void smp_receive_signal_client(int irq, struct pt_regs *regs)
93 {
94 clear_softint(1 << irq);
95+ set_tsk_need_resched(current);
96 }
97
98 void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
99diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S
100index 06d1090..7fc7615 100644
101--- a/arch/sparc64/kernel/systbls.S
102+++ b/arch/sparc64/kernel/systbls.S
103@@ -82,6 +82,13 @@ sys_call_table32:
104 .word compat_sys_set_mempolicy, compat_sys_kexec_load, compat_sys_move_pages, sys_getcpu, compat_sys_epoll_pwait
105 /*310*/ .word compat_sys_utimensat, compat_sys_signalfd, compat_sys_timerfd, sys_eventfd, compat_sys_fallocate
106
107+/*LITMUS, 315*/
108+ .word sys_set_rt_task_param, sys_get_rt_task_param, sys_complete_job, sys_register_np_flag, sys_exit_np
109+/*320*/
110+ .word sys_od_open, sys_od_close, sys_fmlp_down, sys_fmlp_up, sys_srp_down
111+/*325*/ .word sys_srp_up, sys_query_job_no, sys_wait_for_job_release, sys_wait_for_ts_release, sys_release_ts
112+
113+
114 #endif /* CONFIG_COMPAT */
115
116 /* Now the 64-bit native Linux syscall table. */
117@@ -154,6 +161,12 @@ sys_call_table:
118 .word sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait
119 /*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd, sys_eventfd, sys_fallocate
120
121+/*LITMUS, 315*/
122+ .word sys_set_rt_task_param, sys_get_rt_task_param, sys_complete_job, sys_register_np_flag, sys_exit_np
123+/*320*/
124+ .word sys_od_open, sys_od_close, sys_fmlp_down, sys_fmlp_up, sys_srp_down
125+/*325*/ .word sys_srp_up, sys_query_job_no, sys_wait_for_job_release, sys_wait_for_ts_release, sys_release_ts
126+
127 #if defined(CONFIG_SUNOS_EMUL) || defined(CONFIG_SOLARIS_EMUL) || \
128 defined(CONFIG_SOLARIS_EMUL_MODULE)
129 /* Now the 32-bit SunOS syscall table. */
130@@ -271,6 +284,11 @@ sunos_sys_table:
131 .word sunos_nosys, sunos_nosys, sunos_nosys
132 .word sunos_nosys
133 /*310*/ .word sunos_nosys, sunos_nosys, sunos_nosys
134- .word sunos_nosys, sunos_nosys
135+ .word sunos_nosys, sunos_nosys, sunos_nosys
136+ .word sunos_nosys, sunos_nosys, sunos_nosys
137+ .word sunos_nosys
138+/*320*/ .word sunos_nosys, sunos_nosys, sunos_nosys
139+ .word sunos_nosys, sunos_nosys, sunos_nosys
140+ .word sunos_nosys, sunos_nosys, sunos_nosys
141
142 #endif
143diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
144index 80b7ba4..f99330f 100644
145--- a/arch/x86/Kconfig
146+++ b/arch/x86/Kconfig
147@@ -1620,3 +1620,5 @@ source "security/Kconfig"
148 source "crypto/Kconfig"
149
150 source "lib/Kconfig"
151+
152+source "litmus/Kconfig"
153diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
154index a7bc93c..5f87f32 100644
155--- a/arch/x86/kernel/Makefile_32
156+++ b/arch/x86/kernel/Makefile_32
157@@ -49,6 +49,9 @@ obj-y += pcspeaker.o
158
159 obj-$(CONFIG_SCx200) += scx200_32.o
160
161+obj-$(CONFIG_FEATHER_TRACE) += ft_event.o
162+
163+
164 # vsyscall_32.o contains the vsyscall DSO images as __initdata.
165 # We must build both images before we can assemble it.
166 # Note: kbuild does not track this dependency due to usage of .incbin
167diff --git a/arch/x86/kernel/ft_event.c b/arch/x86/kernel/ft_event.c
168new file mode 100644
169index 0000000..b1d80c5
170--- /dev/null
171+++ b/arch/x86/kernel/ft_event.c
172@@ -0,0 +1,104 @@
173+#include <linux/types.h>
174+
175+#include <litmus/feather_trace.h>
176+
177+/* the feather trace management functions assume
178+ * exclusive access to the event table
179+ */
180+
181+
182+#define BYTE_JUMP 0xeb
183+#define BYTE_JUMP_LEN 0x02
184+
185+/* for each event, there is an entry in the event table */
186+struct trace_event {
187+ long id;
188+ long count;
189+ long start_addr;
190+ long end_addr;
191+};
192+
193+extern struct trace_event __start___event_table[];
194+extern struct trace_event __stop___event_table[];
195+
196+int ft_enable_event(unsigned long id)
197+{
198+ struct trace_event* te = __start___event_table;
199+ int count = 0;
200+ char* delta;
201+ unsigned char* instr;
202+
203+ while (te < __stop___event_table) {
204+ if (te->id == id && ++te->count == 1) {
205+ instr = (unsigned char*) te->start_addr;
206+ /* make sure we don't clobber something wrong */
207+ if (*instr == BYTE_JUMP) {
208+ delta = (((unsigned char*) te->start_addr) + 1);
209+ *delta = 0;
210+ }
211+ }
212+ if (te->id == id)
213+ count++;
214+ te++;
215+ }
216+ return count;
217+}
218+
219+int ft_disable_event(unsigned long id)
220+{
221+ struct trace_event* te = __start___event_table;
222+ int count = 0;
223+ char* delta;
224+ unsigned char* instr;
225+
226+ while (te < __stop___event_table) {
227+ if (te->id == id && --te->count == 0) {
228+ instr = (unsigned char*) te->start_addr;
229+ if (*instr == BYTE_JUMP) {
230+ delta = (((unsigned char*) te->start_addr) + 1);
231+ *delta = te->end_addr - te->start_addr -
232+ BYTE_JUMP_LEN;
233+ }
234+ }
235+ if (te->id == id)
236+ count++;
237+ te++;
238+ }
239+ return count;
240+}
241+
242+int ft_disable_all_events(void)
243+{
244+ struct trace_event* te = __start___event_table;
245+ int count = 0;
246+ char* delta;
247+ unsigned char* instr;
248+
249+ while (te < __stop___event_table) {
250+ if (te->count) {
251+ instr = (unsigned char*) te->start_addr;
252+ if (*instr == BYTE_JUMP) {
253+ delta = (((unsigned char*) te->start_addr)
254+ + 1);
255+ *delta = te->end_addr - te->start_addr -
256+ BYTE_JUMP_LEN;
257+ te->count = 0;
258+ count++;
259+ }
260+ }
261+ te++;
262+ }
263+ return count;
264+}
265+
266+int ft_is_event_enabled(unsigned long id)
267+{
268+ struct trace_event* te = __start___event_table;
269+
270+ while (te < __stop___event_table) {
271+ if (te->id == id)
272+ return te->count;
273+ te++;
274+ }
275+ return 0;
276+}
277diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c
278index fcaa026..1063dfc 100644
279--- a/arch/x86/kernel/smp_32.c
280+++ b/arch/x86/kernel/smp_32.c
281@@ -641,6 +641,7 @@ static void native_smp_send_stop(void)
282 fastcall void smp_reschedule_interrupt(struct pt_regs *regs)
283 {
284 ack_APIC_irq();
285+ set_tsk_need_resched(current);
286 __get_cpu_var(irq_stat).irq_resched_count++;
287 }
288
289diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
290index 8344c70..f6fdb0a 100644
291--- a/arch/x86/kernel/syscall_table_32.S
292+++ b/arch/x86/kernel/syscall_table_32.S
293@@ -324,3 +324,19 @@ ENTRY(sys_call_table)
294 .long sys_timerfd
295 .long sys_eventfd
296 .long sys_fallocate
297+ /* LITMUS */
298+ .long sys_set_rt_task_param /* 325 */
299+ .long sys_get_rt_task_param
300+ .long sys_complete_job
301+ .long sys_register_np_flag
302+ .long sys_exit_np
303+ .long sys_od_open /* 330 */
304+ .long sys_od_close
305+ .long sys_fmlp_down
306+ .long sys_fmlp_up
307+ .long sys_srp_down
308+ .long sys_srp_up /* 335 */
309+ .long sys_query_job_no
310+ .long sys_wait_for_job_release
311+ .long sys_wait_for_ts_release
312+ .long sys_release_ts /* 339 */
313diff --git a/fs/exec.c b/fs/exec.c
314index 282240a..6f47786 100644
315--- a/fs/exec.c
316+++ b/fs/exec.c
317@@ -56,6 +56,8 @@
318 #include <asm/mmu_context.h>
319 #include <asm/tlb.h>
320
321+#include <litmus/litmus.h>
322+
323 #ifdef CONFIG_KMOD
324 #include <linux/kmod.h>
325 #endif
326@@ -1309,6 +1311,7 @@ int do_execve(char * filename,
327 goto out_kfree;
328
329 sched_exec();
330+ litmus_exec();
331
332 bprm->file = file;
333 bprm->filename = filename;
334diff --git a/fs/inode.c b/fs/inode.c
335index ed35383..ef71ea0 100644
336--- a/fs/inode.c
337+++ b/fs/inode.c
338@@ -220,6 +220,8 @@ void inode_init_once(struct inode *inode)
339 INIT_LIST_HEAD(&inode->inotify_watches);
340 mutex_init(&inode->inotify_mutex);
341 #endif
342+ INIT_LIST_HEAD(&inode->i_obj_list);
343+ mutex_init(&inode->i_obj_mutex);
344 }
345
346 EXPORT_SYMBOL(inode_init_once);
347diff --git a/include/asm-sparc64/feather_trace.h b/include/asm-sparc64/feather_trace.h
348new file mode 100644
349index 0000000..35ec70f
350--- /dev/null
351+++ b/include/asm-sparc64/feather_trace.h
352@@ -0,0 +1,22 @@
353+#ifndef _ARCH_FEATHER_TRACE_H
354+#define _ARCH_FEATHER_TRACE_H
355+
356+#include <asm/atomic.h>
357+#include <asm/timex.h>
358+
359+static inline int fetch_and_inc(int *val)
360+{
361+ return atomic_add_ret(1, (atomic_t*) val) - 1;
362+}
363+
364+static inline int fetch_and_dec(int *val)
365+{
366+ return atomic_sub_ret(1, (atomic_t*) val) + 1;
367+}
368+
369+static inline unsigned long long ft_timestamp(void)
370+{
371+ return get_cycles();
372+}
373+
374+#endif
375diff --git a/include/asm-sparc64/unistd.h b/include/asm-sparc64/unistd.h
376index cb751b4..ebebde6 100644
377--- a/include/asm-sparc64/unistd.h
378+++ b/include/asm-sparc64/unistd.h
379@@ -333,7 +333,11 @@
380 #define __NR_eventfd 313
381 #define __NR_fallocate 314
382
383-#define NR_SYSCALLS 315
384+#define __NR_LITMUS 315
385+
386+#include "litmus/unistd.h"
387+
388+#define NR_SYSCALLS 315 + NR_litmus_syscalls
389
390 #ifdef __KERNEL__
391 /* sysconf options, for SunOS compatibility */
392diff --git a/include/asm-x86/feather_trace.h b/include/asm-x86/feather_trace.h
393new file mode 100644
394index 0000000..253067e
395--- /dev/null
396+++ b/include/asm-x86/feather_trace.h
397@@ -0,0 +1,104 @@
398+#ifndef _ARCH_FEATHER_TRACE_H
399+#define _ARCH_FEATHER_TRACE_H
400+
401+static inline int fetch_and_inc(int *val)
402+{
403+ int ret = 1;
404+ __asm__ __volatile__("lock; xaddl %0, %1" : "+r" (ret), "+m" (*val) : : "memory" );
405+ return ret;
406+}
407+
408+static inline int fetch_and_dec(int *val)
409+{
410+ int ret = -1;
411+ __asm__ __volatile__("lock; xaddl %0, %1" : "+r" (ret), "+m" (*val) : : "memory" );
412+ return ret;
413+}
414+
415+#define feather_callback __attribute__((regparm(0)))
416+
417+/* make the compiler reload any register that is not saved in
418+ * a cdecl function call
419+ */
420+#define CLOBBER_LIST "memory", "cc", "eax", "ecx", "edx"
421+
422+#define ft_event(id, callback) \
423+ __asm__ __volatile__( \
424+ "1: jmp 2f \n\t" \
425+ " call " #callback " \n\t" \
426+ ".section __event_table, \"aw\" \n\t" \
427+ ".long " #id ", 0, 1b, 2f \n\t" \
428+ ".previous \n\t" \
429+ "2: \n\t" \
430+ : : : CLOBBER_LIST)
431+
432+#define ft_event0(id, callback) \
433+ __asm__ __volatile__( \
434+ "1: jmp 2f \n\t" \
435+ " subl $4, %%esp \n\t" \
436+ " movl $" #id ", (%%esp) \n\t" \
437+ " call " #callback " \n\t" \
438+ " addl $4, %%esp \n\t" \
439+ ".section __event_table, \"aw\" \n\t" \
440+ ".long " #id ", 0, 1b, 2f \n\t" \
441+ ".previous \n\t" \
442+ "2: \n\t" \
443+ : : : CLOBBER_LIST)
444+
445+#define ft_event1(id, callback, param) \
446+ __asm__ __volatile__( \
447+ "1: jmp 2f \n\t" \
448+ " subl $8, %%esp \n\t" \
449+ " movl %0, 4(%%esp) \n\t" \
450+ " movl $" #id ", (%%esp) \n\t" \
451+ " call " #callback " \n\t" \
452+ " addl $8, %%esp \n\t" \
453+ ".section __event_table, \"aw\" \n\t" \
454+ ".long " #id ", 0, 1b, 2f \n\t" \
455+ ".previous \n\t" \
456+ "2: \n\t" \
457+ : : "r" (param) : CLOBBER_LIST)
458+
459+#define ft_event2(id, callback, param, param2) \
460+ __asm__ __volatile__( \
461+ "1: jmp 2f \n\t" \
462+ " subl $12, %%esp \n\t" \
463+ " movl %1, 8(%%esp) \n\t" \
464+ " movl %0, 4(%%esp) \n\t" \
465+ " movl $" #id ", (%%esp) \n\t" \
466+ " call " #callback " \n\t" \
467+ " addl $12, %%esp \n\t" \
468+ ".section __event_table, \"aw\" \n\t" \
469+ ".long " #id ", 0, 1b, 2f \n\t" \
470+ ".previous \n\t" \
471+ "2: \n\t" \
472+ : : "r" (param), "r" (param2) : CLOBBER_LIST)
473+
474+
475+#define ft_event3(id, callback, p, p2, p3) \
476+ __asm__ __volatile__( \
477+ "1: jmp 2f \n\t" \
478+ " subl $16, %%esp \n\t" \
479+ " movl %1, 12(%%esp) \n\t" \
480+ " movl %1, 8(%%esp) \n\t" \
481+ " movl %0, 4(%%esp) \n\t" \
482+ " movl $" #id ", (%%esp) \n\t" \
483+ " call " #callback " \n\t" \
484+ " addl $16, %%esp \n\t" \
485+ ".section __event_table, \"aw\" \n\t" \
486+ ".long " #id ", 0, 1b, 2f \n\t" \
487+ ".previous \n\t" \
488+ "2: \n\t" \
489+ : : "r" (p), "r" (p2), "r" (p3) : CLOBBER_LIST)
490+
491+
492+static inline unsigned long long ft_timestamp(void)
493+{
494+ unsigned long long ret;
495+ __asm__ __volatile__("rdtsc" : "=A" (ret));
496+ return ret;
497+}
498+
499+#define __ARCH_HAS_FEATHER_TRACE
500+
501+#endif
502diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
503index 9b15545..36fec84 100644
504--- a/include/asm-x86/unistd_32.h
505+++ b/include/asm-x86/unistd_32.h
506@@ -331,9 +331,13 @@
507 #define __NR_eventfd 323
508 #define __NR_fallocate 324
509
510+#define __NR_LITMUS 325
511+
512+#include "litmus/unistd.h"
513+
514 #ifdef __KERNEL__
515
516-#define NR_syscalls 325
517+#define NR_syscalls 324 + NR_litmus_syscalls
518
519 #define __ARCH_WANT_IPC_PARSE_VERSION
520 #define __ARCH_WANT_OLD_READDIR
521diff --git a/include/linux/completion.h b/include/linux/completion.h
522index 33d6aaf..5b55e97 100644
523--- a/include/linux/completion.h
524+++ b/include/linux/completion.h
525@@ -51,7 +51,7 @@ extern unsigned long wait_for_completion_interruptible_timeout(
526
527 extern void complete(struct completion *);
528 extern void complete_all(struct completion *);
529-
530+extern void complete_n(struct completion *, int n);
531 #define INIT_COMPLETION(x) ((x).done = 0)
532
533 #endif
534diff --git a/include/linux/fs.h b/include/linux/fs.h
535index b3ec4a4..22f856c 100644
536--- a/include/linux/fs.h
537+++ b/include/linux/fs.h
538@@ -588,6 +588,8 @@ static inline int mapping_writably_mapped(struct address_space *mapping)
539 #define i_size_ordered_init(inode) do { } while (0)
540 #endif
541
542+struct inode_obj_id_table;
543+
544 struct inode {
545 struct hlist_node i_hash;
546 struct list_head i_list;
547@@ -653,6 +655,9 @@ struct inode {
548 void *i_security;
549 #endif
550 void *i_private; /* fs or device private pointer */
551+
552+ struct list_head i_obj_list;
553+ struct mutex i_obj_mutex;
554 };
555
556 /*
557diff --git a/include/linux/sched.h b/include/linux/sched.h
558index cc14656..76e28f1 100644
559--- a/include/linux/sched.h
560+++ b/include/linux/sched.h
561@@ -37,6 +37,7 @@
562 #define SCHED_BATCH 3
563 /* SCHED_ISO: reserved but not implemented yet */
564 #define SCHED_IDLE 5
565+#define SCHED_LITMUS 6
566
567 #ifdef __KERNEL__
568
569@@ -91,6 +92,8 @@ struct sched_param {
570
571 #include <asm/processor.h>
572
573+#include <litmus/rt_param.h>
574+
575 struct exec_domain;
576 struct futex_pi_state;
577 struct bio;
578@@ -914,6 +917,8 @@ struct sched_entity {
579 #endif
580 };
581
582+struct od_table_entry;
583+
584 struct task_struct {
585 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
586 void *stack;
587@@ -1178,6 +1183,12 @@ struct task_struct {
588 int make_it_fail;
589 #endif
590 struct prop_local_single dirties;
591+
592+ /* litmus parameters and state */
593+ struct rt_param rt_param;
594+
595+ /* references to PI semaphores, etc. */
596+ struct od_table_entry* od_table;
597 };
598
599 /*
600diff --git a/include/linux/tick.h b/include/linux/tick.h
601index f4a1395..7eae358 100644
602--- a/include/linux/tick.h
603+++ b/include/linux/tick.h
604@@ -64,6 +64,9 @@ extern int tick_is_oneshot_available(void);
605 extern struct tick_device *tick_get_device(int cpu);
606
607 # ifdef CONFIG_HIGH_RES_TIMERS
608+#define LINUX_DEFAULT_TICKS 0
609+#define LITMUS_ALIGNED_TICKS 1
610+#define LITMUS_STAGGERED_TICKS 2
611 extern int tick_init_highres(void);
612 extern int tick_program_event(ktime_t expires, int force);
613 extern void tick_setup_sched_timer(void);
614diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
615index 975c963..6ae0ff9 100644
616--- a/include/linux/uaccess.h
617+++ b/include/linux/uaccess.h
618@@ -84,4 +84,20 @@ static inline unsigned long __copy_from_user_nocache(void *to,
619 ret; \
620 })
621
622+/* This is a naive attempt at a write version of the above native Linux macro.
623+ */
624+#define poke_kernel_address(val, addr) \
625+ ({ \
626+ long ret; \
627+ mm_segment_t old_fs = get_fs(); \
628+ \
629+ set_fs(KERNEL_DS); \
630+ pagefault_disable(); \
631+ ret = __put_user(val, (__force typeof(val) __user *)(addr)); \
632+ pagefault_enable(); \
633+ set_fs(old_fs); \
634+ ret; \
635+ })
636+
637+
638 #endif /* __LINUX_UACCESS_H__ */
639diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h
640new file mode 100644
641index 0000000..4dff77a
642--- /dev/null
643+++ b/include/litmus/edf_common.h
644@@ -0,0 +1,25 @@
645+/* EDF common data structures and utility functions shared by all EDF
646+ * based scheduler plugins
647+ */
648+
649+/* CLEANUP: Add comments and make it less messy.
650+ *
651+ */
652+
653+#ifndef __UNC_EDF_COMMON_H__
654+#define __UNC_EDF_COMMON_H__
655+
656+#include <litmus/rt_domain.h>
657+
658+
659+void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
660+ release_job_t release);
661+
662+int edf_higher_prio(struct task_struct* first,
663+ struct task_struct* second);
664+
665+int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t);
666+
667+int edf_set_hp_task(struct pi_semaphore *sem);
668+int edf_set_hp_cpu_task(struct pi_semaphore *sem, int cpu);
669+#endif
670diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
671new file mode 100644
672index 0000000..286e10f
673--- /dev/null
674+++ b/include/litmus/fdso.h
675@@ -0,0 +1,69 @@
676+/* fdso.h - file descriptor attached shared objects
677+ *
678+ * (c) 2007 B. Brandenburg, LITMUS^RT project
679+ */
680+
681+#ifndef _LINUX_FDSO_H_
682+#define _LINUX_FDSO_H_
683+
684+#include <linux/list.h>
685+#include <asm/atomic.h>
686+
687+#include <linux/fs.h>
688+
689+#define MAX_OBJECT_DESCRIPTORS 32
690+
691+typedef enum {
692+ MIN_OBJ_TYPE = 0,
693+
694+ FMLP_SEM = 0,
695+ SRP_SEM = 1,
696+
697+ MAX_OBJ_TYPE = 1
698+} obj_type_t;
699+
700+struct inode_obj_id {
701+ struct list_head list;
702+ atomic_t count;
703+ struct inode* inode;
704+
705+ obj_type_t type;
706+ void* obj;
707+ unsigned int id;
708+};
709+
710+
711+struct od_table_entry {
712+ unsigned int used;
713+
714+ struct inode_obj_id* obj;
715+ void* extra;
716+};
717+
718+struct fdso_ops {
719+ void* (*create) (void);
720+ void (*destroy)(void*);
721+ int (*open) (struct od_table_entry*, void* __user);
722+ int (*close) (struct od_table_entry*);
723+};
724+
725+/* translate a userspace supplied od into the raw table entry
726+ * returns NULL if od is invalid
727+ */
728+struct od_table_entry* __od_lookup(int od);
729+
730+/* translate a userspace supplied od into the associated object
731+ * returns NULL if od is invalid
732+ */
733+static inline void* od_lookup(int od, obj_type_t type)
734+{
735+ struct od_table_entry* e = __od_lookup(od);
736+ return e && e->obj->type == type ? e->obj->obj : NULL;
737+}
738+
739+#define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM))
740+#define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
741+#define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID))
742+
743+
744+#endif
745diff --git a/include/litmus/feather_buffer.h b/include/litmus/feather_buffer.h
746new file mode 100644
747index 0000000..6c18277
748--- /dev/null
749+++ b/include/litmus/feather_buffer.h
750@@ -0,0 +1,94 @@
751+#ifndef _FEATHER_BUFFER_H_
752+#define _FEATHER_BUFFER_H_
753+
754+/* requires UINT_MAX and memcpy */
755+
756+#define SLOT_FREE 0
757+#define SLOT_BUSY 1
758+#define SLOT_READY 2
759+
760+struct ft_buffer {
761+ unsigned int slot_count;
762+ unsigned int slot_size;
763+
764+ int free_count;
765+ unsigned int write_idx;
766+ unsigned int read_idx;
767+
768+ char* slots;
769+ void* buffer_mem;
770+ unsigned int failed_writes;
771+};
772+
773+static inline int init_ft_buffer(struct ft_buffer* buf,
774+ unsigned int slot_count,
775+ unsigned int slot_size,
776+ char* slots,
777+ void* buffer_mem)
778+{
779+ int i = 0;
780+ if (!slot_count || UINT_MAX % slot_count != slot_count - 1) {
781+ /* The slot count must divide UNIT_MAX + 1 so that when it
782+ * wraps around the index correctly points to 0.
783+ */
784+ return 0;
785+ } else {
786+ buf->slot_count = slot_count;
787+ buf->slot_size = slot_size;
788+ buf->slots = slots;
789+ buf->buffer_mem = buffer_mem;
790+ buf->free_count = slot_count;
791+ buf->write_idx = 0;
792+ buf->read_idx = 0;
793+ buf->failed_writes = 0;
794+ for (i = 0; i < slot_count; i++)
795+ buf->slots[i] = SLOT_FREE;
796+ return 1;
797+ }
798+}
799+
800+static inline int ft_buffer_start_write(struct ft_buffer* buf, void **ptr)
801+{
802+ int free = fetch_and_dec(&buf->free_count);
803+ unsigned int idx;
804+ if (free <= 0) {
805+ fetch_and_inc(&buf->free_count);
806+ *ptr = 0;
807+ fetch_and_inc(&buf->failed_writes);
808+ return 0;
809+ } else {
810+ idx = fetch_and_inc((int*) &buf->write_idx) % buf->slot_count;
811+ buf->slots[idx] = SLOT_BUSY;
812+ *ptr = ((char*) buf->buffer_mem) + idx * buf->slot_size;
813+ return 1;
814+ }
815+}
816+
817+static inline void ft_buffer_finish_write(struct ft_buffer* buf, void *ptr)
818+{
819+ unsigned int idx = ((char*) ptr - (char*) buf->buffer_mem) / buf->slot_size;
820+ buf->slots[idx] = SLOT_READY;
821+}
822+
823+
824+/* exclusive reader access is assumed */
825+static inline int ft_buffer_read(struct ft_buffer* buf, void* dest)
826+{
827+ unsigned int idx;
828+ if (buf->free_count == buf->slot_count)
829+ /* nothing available */
830+ return 0;
831+ idx = buf->read_idx % buf->slot_count;
832+ if (buf->slots[idx] == SLOT_READY) {
833+ memcpy(dest, ((char*) buf->buffer_mem) + idx * buf->slot_size,
834+ buf->slot_size);
835+ buf->slots[idx] = SLOT_FREE;
836+ buf->read_idx++;
837+ fetch_and_inc(&buf->free_count);
838+ return 1;
839+ } else
840+ return 0;
841+}
842+
843+
844+#endif
845diff --git a/include/litmus/feather_trace.h b/include/litmus/feather_trace.h
846new file mode 100644
847index 0000000..f8fb7ba
848--- /dev/null
849+++ b/include/litmus/feather_trace.h
850@@ -0,0 +1,37 @@
851+#ifndef _FEATHER_TRACE_H_
852+#define _FEATHER_TRACE_H_
853+
854+#include <asm/feather_trace.h>
855+
856+int ft_enable_event(unsigned long id);
857+int ft_disable_event(unsigned long id);
858+int ft_is_event_enabled(unsigned long id);
859+int ft_disable_all_events(void);
860+
861+#ifndef __ARCH_HAS_FEATHER_TRACE
862+/* provide default implementation */
863+
864+#define feather_callback
865+
866+#define MAX_EVENTS 1024
867+
868+extern int ft_events[MAX_EVENTS];
869+
870+#define ft_event(id, callback) \
871+ if (ft_events[id]) callback();
872+
873+#define ft_event0(id, callback) \
874+ if (ft_events[id]) callback(id);
875+
876+#define ft_event1(id, callback, param) \
877+ if (ft_events[id]) callback(id, param);
878+
879+#define ft_event2(id, callback, param, param2) \
880+ if (ft_events[id]) callback(id, param, param2);
881+
882+#define ft_event3(id, callback, p, p2, p3) \
883+ if (ft_events[id]) callback(id, p, p2, p3);
884+#endif
885+
886+
887+#endif
888diff --git a/include/litmus/heap.h b/include/litmus/heap.h
889new file mode 100644
890index 0000000..b26804f
891--- /dev/null
892+++ b/include/litmus/heap.h
893@@ -0,0 +1,301 @@
894+/* heaps.h -- Binomial Heaps
895+ *
896+ * (c) 2008 Bjoern Brandenburg
897+ */
898+
899+#ifndef HEAP_H
900+#define HEAP_H
901+
902+#define NOT_IN_HEAP UINT_MAX
903+
904+struct heap_node {
905+ struct heap_node* parent;
906+ struct heap_node* next;
907+ struct heap_node* child;
908+
909+ unsigned int degree;
910+ void* value;
911+ struct heap_node** ref;
912+};
913+
914+struct heap {
915+ struct heap_node* head;
916+ /* We cache the minimum of the heap.
917+ * This speeds up repeated peek operations.
918+ */
919+ struct heap_node* min;
920+};
921+
922+typedef int (*heap_prio_t)(struct heap_node* a, struct heap_node* b);
923+
924+static inline void heap_init(struct heap* heap)
925+{
926+ heap->head = NULL;
927+ heap->head = NULL;
928+}
929+
930+static inline void heap_node_init(struct heap_node** _h, void* value)
931+{
932+ struct heap_node* h = *_h;
933+ h->parent = NULL;
934+ h->next = NULL;
935+ h->child = NULL;
936+ h->degree = NOT_IN_HEAP;
937+ h->value = value;
938+ h->ref = _h;
939+}
940+
941+static inline int heap_node_in_heap(struct heap_node* h)
942+{
943+ return h->degree != NOT_IN_HEAP;
944+}
945+
946+static inline int heap_empty(struct heap* heap)
947+{
948+ return heap->head == NULL && heap->min == NULL;
949+}
950+
951+/* make child a subtree of root */
952+static inline void __heap_link(struct heap_node* root,
953+ struct heap_node* child)
954+{
955+ child->parent = root;
956+ child->next = root->child;
957+ root->child = child;
958+ root->degree++;
959+}
960+
961+/* merge root lists */
962+static inline struct heap_node* __heap_merge(struct heap_node* a,
963+ struct heap_node* b)
964+{
965+ struct heap_node* head = NULL;
966+ struct heap_node** pos = &head;
967+
968+ while (a && b) {
969+ if (a->degree < b->degree) {
970+ *pos = a;
971+ a = a->next;
972+ } else {
973+ *pos = b;
974+ b = b->next;
975+ }
976+ pos = &(*pos)->next;
977+ }
978+ if (a)
979+ *pos = a;
980+ else
981+ *pos = b;
982+ return head;
983+}
984+
985+/* reverse a linked list of nodes. also clears parent pointer */
986+static inline struct heap_node* __heap_reverse(struct heap_node* h)
987+{
988+ struct heap_node* tail = NULL;
989+ struct heap_node* next;
990+
991+ if (!h)
992+ return h;
993+
994+ h->parent = NULL;
995+ while (h->next) {
996+ next = h->next;
997+ h->next = tail;
998+ tail = h;
999+ h = next;
1000+ h->parent = NULL;
1001+ }
1002+ h->next = tail;
1003+ return h;
1004+}
1005+
1006+static inline void __heap_min(heap_prio_t higher_prio, struct heap* heap,
1007+ struct heap_node** prev, struct heap_node** node)
1008+{
1009+ struct heap_node *_prev, *cur;
1010+ *prev = NULL;
1011+
1012+ if (!heap->head) {
1013+ *node = NULL;
1014+ return;
1015+ }
1016+
1017+ *node = heap->head;
1018+ _prev = heap->head;
1019+ cur = heap->head->next;
1020+ while (cur) {
1021+ if (higher_prio(cur, *node)) {
1022+ *node = cur;
1023+ *prev = _prev;
1024+ }
1025+ _prev = cur;
1026+ cur = cur->next;
1027+ }
1028+}
1029+
1030+static inline void __heap_union(heap_prio_t higher_prio, struct heap* heap,
1031+ struct heap_node* h2)
1032+{
1033+ struct heap_node* h1;
1034+ struct heap_node *prev, *x, *next;
1035+ if (!h2)
1036+ return;
1037+ h1 = heap->head;
1038+ if (!h1) {
1039+ heap->head = h2;
1040+ return;
1041+ }
1042+ h1 = __heap_merge(h1, h2);
1043+ prev = NULL;
1044+ x = h1;
1045+ next = x->next;
1046+ while (next) {
1047+ if (x->degree != next->degree ||
1048+ (next->next && next->next->degree == x->degree)) {
1049+ /* nothing to do, advance */
1050+ prev = x;
1051+ x = next;
1052+ } else if (higher_prio(x, next)) {
1053+ /* x becomes the root of next */
1054+ x->next = next->next;
1055+ __heap_link(x, next);
1056+ } else {
1057+ /* next becomes the root of x */
1058+ if (prev)
1059+ prev->next = next;
1060+ else
1061+ h1 = next;
1062+ __heap_link(next, x);
1063+ x = next;
1064+ }
1065+ next = x->next;
1066+ }
1067+ heap->head = h1;
1068+}
1069+
1070+static inline struct heap_node* __heap_extract_min(heap_prio_t higher_prio,
1071+ struct heap* heap)
1072+{
1073+ struct heap_node *prev, *node;
1074+ __heap_min(higher_prio, heap, &prev, &node);
1075+ if (!node)
1076+ return NULL;
1077+ if (prev)
1078+ prev->next = node->next;
1079+ else
1080+ heap->head = node->next;
1081+ __heap_union(higher_prio, heap, __heap_reverse(node->child));
1082+ return node;
1083+}
1084+
1085+/* insert (and reinitialize) a node into the heap */
1086+static inline void heap_insert(heap_prio_t higher_prio, struct heap* heap,
1087+ struct heap_node* node)
1088+{
1089+ struct heap_node *min;
1090+ node->child = NULL;
1091+ node->parent = NULL;
1092+ node->next = NULL;
1093+ node->degree = 0;
1094+ if (heap->min && higher_prio(node, heap->min)) {
1095+ /* swap min cache */
1096+ min = heap->min;
1097+ min->child = NULL;
1098+ min->parent = NULL;
1099+ min->next = NULL;
1100+ min->degree = 0;
1101+ __heap_union(higher_prio, heap, min);
1102+ heap->min = node;
1103+ } else
1104+ __heap_union(higher_prio, heap, node);
1105+}
1106+
1107+static inline void __uncache_min(heap_prio_t higher_prio, struct heap* heap)
1108+{
1109+ struct heap_node* min;
1110+ if (heap->min) {
1111+ min = heap->min;
1112+ heap->min = NULL;
1113+ heap_insert(higher_prio, heap, min);
1114+ }
1115+}
1116+
1117+/* merge addition into target */
1118+static inline void heap_union(heap_prio_t higher_prio,
1119+ struct heap* target, struct heap* addition)
1120+{
1121+ /* first insert any cached minima, if necessary */
1122+ __uncache_min(higher_prio, target);
1123+ __uncache_min(higher_prio, addition);
1124+ __heap_union(higher_prio, target, addition->head);
1125+ /* this is a destructive merge */
1126+ addition->head = NULL;
1127+}
1128+
1129+static inline struct heap_node* heap_peek(heap_prio_t higher_prio,
1130+ struct heap* heap)
1131+{
1132+ if (!heap->min)
1133+ heap->min = __heap_extract_min(higher_prio, heap);
1134+ return heap->min;
1135+}
1136+
1137+static inline struct heap_node* heap_take(heap_prio_t higher_prio,
1138+ struct heap* heap)
1139+{
1140+ struct heap_node *node;
1141+ if (!heap->min)
1142+ heap->min = __heap_extract_min(higher_prio, heap);
1143+ node = heap->min;
1144+ heap->min = NULL;
1145+ if (node)
1146+ node->degree = NOT_IN_HEAP;
1147+ return node;
1148+}
1149+
1150+static inline void heap_delete(heap_prio_t higher_prio, struct heap* heap,
1151+ struct heap_node* node)
1152+{
1153+ struct heap_node *parent, *prev, *pos;
1154+ struct heap_node** tmp_ref;
1155+ void* tmp;
1156+
1157+ if (heap->min != node) {
1158+ /* bubble up */
1159+ parent = node->parent;
1160+ while (parent) {
1161+ /* swap parent and node */
1162+ tmp = parent->value;
1163+ parent->value = node->value;
1164+ node->value = tmp;
1165+ /* swap references */
1166+ *(parent->ref) = node;
1167+ *(node->ref) = parent;
1168+ tmp_ref = parent->ref;
1169+ parent->ref = node->ref;
1170+ node->ref = tmp_ref;
1171+ /* step up */
1172+ node = parent;
1173+ parent = node->parent;
1174+ }
1175+ /* now delete:
1176+ * first find prev */
1177+ prev = NULL;
1178+ pos = heap->head;
1179+ while (pos != node) {
1180+ prev = pos;
1181+ pos = pos->next;
1182+ }
1183+ /* we have prev, now remove node */
1184+ if (prev)
1185+ prev->next = node->next;
1186+ else
1187+ heap->head = node->next;
1188+ __heap_union(higher_prio, heap, __heap_reverse(node->child));
1189+ } else
1190+ heap->min = NULL;
1191+ node->degree = NOT_IN_HEAP;
1192+}
1193+
1194+#endif
1195diff --git a/include/litmus/jobs.h b/include/litmus/jobs.h
1196new file mode 100644
1197index 0000000..9bd361e
1198--- /dev/null
1199+++ b/include/litmus/jobs.h
1200@@ -0,0 +1,9 @@
1201+#ifndef __LITMUS_JOBS_H__
1202+#define __LITMUS_JOBS_H__
1203+
1204+void prepare_for_next_period(struct task_struct *t);
1205+void release_at(struct task_struct *t, lt_t start);
1206+long complete_job(void);
1207+
1208+#endif
1209+
1210diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
1211new file mode 100644
1212index 0000000..de2a3c2
1213--- /dev/null
1214+++ b/include/litmus/litmus.h
1215@@ -0,0 +1,205 @@
1216+/*
1217+ * Constant definitions related to
1218+ * scheduling policy.
1219+ */
1220+
1221+#ifndef _LINUX_LITMUS_H_
1222+#define _LINUX_LITMUS_H_
1223+
1224+#include <linux/jiffies.h>
1225+#include <litmus/sched_trace.h>
1226+
1227+/* RT mode start time */
1228+extern volatile unsigned long rt_start_time;
1229+
1230+extern atomic_t __log_seq_no;
1231+
1232+#define TRACE(fmt, args...) \
1233+ sched_trace_log_message("%d P%d: " fmt, atomic_add_return(1, &__log_seq_no), \
1234+ raw_smp_processor_id(), ## args)
1235+
1236+#define TRACE_TASK(t, fmt, args...) \
1237+ TRACE("(%s/%d) " fmt, (t)->comm, (t)->pid, ##args)
1238+
1239+#define TRACE_CUR(fmt, args...) \
1240+ TRACE_TASK(current, fmt, ## args)
1241+
1242+#define TRACE_BUG_ON(cond) \
1243+ do { if (cond) TRACE("BUG_ON(%s) at %s:%d " \
1244+ "called from %p current=%s/%d state=%d " \
1245+ "flags=%x partition=%d cpu=%d rtflags=%d"\
1246+ " job=%u knp=%d timeslice=%u\n", \
1247+ #cond, __FILE__, __LINE__, __builtin_return_address(0), current->comm, \
1248+ current->pid, current->state, current->flags, \
1249+ get_partition(current), smp_processor_id(), get_rt_flags(current), \
1250+ current->rt_param.job_params.job_no, current->rt_param.kernel_np, \
1251+ current->time_slice\
1252+ ); } while(0);
1253+
1254+
1255+/* in_list - is a given list_head queued on some list?
1256+ */
1257+static inline int in_list(struct list_head* list)
1258+{
1259+ return !( /* case 1: deleted */
1260+ (list->next == LIST_POISON1 &&
1261+ list->prev == LIST_POISON2)
1262+ ||
1263+ /* case 2: initialized */
1264+ (list->next == list &&
1265+ list->prev == list)
1266+ );
1267+}
1268+
1269+typedef int (*list_cmp_t)(struct list_head*, struct list_head*);
1270+
1271+static inline unsigned int list_insert(struct list_head* new,
1272+ struct list_head* head,
1273+ list_cmp_t order_before)
1274+{
1275+ struct list_head *pos;
1276+ unsigned int passed = 0;
1277+
1278+ BUG_ON(!new);
1279+
1280+ /* find a spot where the new entry is less than the next */
1281+ list_for_each(pos, head) {
1282+ if (unlikely(order_before(new, pos))) {
1283+ /* pos is not less than new, thus insert here */
1284+ __list_add(new, pos->prev, pos);
1285+ goto out;
1286+ }
1287+ passed++;
1288+ }
1289+ /* if we get to this point either the list is empty or every entry
1290+ * queued element is less than new.
1291+ * Let's add new to the end. */
1292+ list_add_tail(new, head);
1293+ out:
1294+ return passed;
1295+}
1296+
1297+void list_qsort(struct list_head* list, list_cmp_t less_than);
1298+
1299+
1300+#define RT_PREEMPTIVE 0x2050 /* = NP */
1301+#define RT_NON_PREEMPTIVE 0x4e50 /* = P */
1302+#define RT_EXIT_NP_REQUESTED 0x5251 /* = RQ */
1303+
1304+
1305+/* kill naughty tasks
1306+ */
1307+void scheduler_signal(struct task_struct *t, unsigned int signal);
1308+void send_scheduler_signals(void);
1309+void np_mem_kill(struct task_struct *t);
1310+
1311+void litmus_fork(struct task_struct *tsk);
1312+void litmus_exec(void);
1313+/* clean up real-time state of a task */
1314+void exit_litmus(struct task_struct *dead_tsk);
1315+
1316+long litmus_admit_task(struct task_struct *tsk);
1317+void litmus_exit_task(struct task_struct *tsk);
1318+
1319+#define is_realtime(t) ((t)->policy == SCHED_LITMUS)
1320+#define rt_transition_pending(t) \
1321+ ((t)->rt_param.transition_pending)
1322+
1323+#define tsk_rt(t) (&(t)->rt_param)
1324+
1325+/* Realtime utility macros */
1326+#define get_rt_flags(t) (tsk_rt(t)->flags)
1327+#define set_rt_flags(t,f) (tsk_rt(t)->flags=(f))
1328+#define get_exec_cost(t) (tsk_rt(t)->task_params.exec_cost)
1329+#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time)
1330+#define get_rt_period(t) (tsk_rt(t)->task_params.period)
1331+#define get_partition(t) (tsk_rt(t)->task_params.cpu)
1332+#define get_deadline(t) (tsk_rt(t)->job_params.deadline)
1333+#define get_class(t) (tsk_rt(t)->task_params.cls)
1334+
1335+inline static int budget_exhausted(struct task_struct* t)
1336+{
1337+ return get_exec_time(t) >= get_exec_cost(t);
1338+}
1339+
1340+
1341+#define is_hrt(t) \
1342+ (tsk_rt(t)->task_params.class == RT_CLASS_HARD)
1343+#define is_srt(t) \
1344+ (tsk_rt(t)->task_params.class == RT_CLASS_SOFT)
1345+#define is_be(t) \
1346+ (tsk_rt(t)->task_params.class == RT_CLASS_BEST_EFFORT)
1347+
1348+#define get_release(t) (tsk_rt(t)->job_params.release)
1349+
1350+/* Our notion of time within LITMUS: kernel monotonic time. */
1351+static inline lt_t litmus_clock(void)
1352+{
1353+ return ktime_to_ns(ktime_get());
1354+}
1355+
1356+/* A macro to convert from nanoseconds to ktime_t. */
1357+#define ns_to_ktime(t) ktime_add_ns(ktime_set(0, 0), t)
1358+
1359+/* The high-resolution release timer for a task. */
1360+#define release_timer(t) (tsk_rt(t)->release_timer)
1361+
1362+/* The high-resolution release timer for a task. */
1363+#define get_domain(t) (tsk_rt(t)->domain)
1364+
1365+/* Honor the flag in the preempt_count variable that is set
1366+ * when scheduling is in progress.
1367+ */
1368+#define is_running(t) \
1369+ ((t)->state == TASK_RUNNING || \
1370+ task_thread_info(t)->preempt_count & PREEMPT_ACTIVE)
1371+
1372+#define is_blocked(t) \
1373+ (!is_running(t))
1374+#define is_released(t, now) \
1375+ (lt_before_eq(get_release(t), now))
1376+#define is_tardy(t, now) \
1377+ (lt_before_eq(tsk_rt(t)->job_params.deadline, now))
1378+
1379+/* real-time comparison macros */
1380+#define earlier_deadline(a, b) (lt_before(\
1381+ (a)->rt_param.job_params.deadline,\
1382+ (b)->rt_param.job_params.deadline))
1383+#define earlier_release(a, b) (lt_before(\
1384+ (a)->rt_param.job_params.release,\
1385+ (b)->rt_param.job_params.release))
1386+
1387+#define make_np(t) do {t->rt_param.kernel_np++;} while(0);
1388+#define take_np(t) do {t->rt_param.kernel_np--;} while(0);
1389+
1390+#ifdef CONFIG_SRP
1391+void srp_ceiling_block(void);
1392+#else
1393+#define srp_ceiling_block() /* nothing */
1394+#endif
1395+
1396+#define heap2task(hn) ((struct task_struct*) hn->value)
1397+
1398+
1399+#ifdef CONFIG_NP_SECTION
1400+/* returns 1 if task t has registered np flag and set it to RT_NON_PREEMPTIVE
1401+ */
1402+int is_np(struct task_struct *t);
1403+
1404+/* request that the task should call sys_exit_np()
1405+ */
1406+void request_exit_np(struct task_struct *t);
1407+
1408+#else
1409+
1410+static inline int is_np(struct task_struct *t)
1411+{
1412+ return tsk_rt(t)->kernel_np;
1413+}
1414+
1415+#define request_exit_np(t)
1416+
1417+#endif
1418+
1419+
1420+#endif
1421diff --git a/include/litmus/norqlock.h b/include/litmus/norqlock.h
1422new file mode 100644
1423index 0000000..e4c1d06
1424--- /dev/null
1425+++ b/include/litmus/norqlock.h
1426@@ -0,0 +1,26 @@
1427+#ifndef NORQLOCK_H
1428+#define NORQLOCK_H
1429+
1430+typedef void (*work_t)(unsigned long arg);
1431+
1432+struct no_rqlock_work {
1433+ int active;
1434+ work_t work;
1435+ unsigned long arg;
1436+ struct no_rqlock_work* next;
1437+};
1438+
1439+void init_no_rqlock_work(struct no_rqlock_work* w, work_t work,
1440+ unsigned long arg);
1441+
1442+void __do_without_rqlock(struct no_rqlock_work *work);
1443+
1444+static inline void do_without_rqlock(struct no_rqlock_work *work)
1445+{
1446+ if (!test_and_set_bit(0, (void*)&work->active))
1447+ __do_without_rqlock(work);
1448+}
1449+
1450+void tick_no_rqlock(void);
1451+
1452+#endif
1453diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h
1454new file mode 100644
1455index 0000000..47cd123
1456--- /dev/null
1457+++ b/include/litmus/rt_domain.h
1458@@ -0,0 +1,127 @@
1459+/* CLEANUP: Add comments and make it less messy.
1460+ *
1461+ */
1462+
1463+#ifndef __UNC_RT_DOMAIN_H__
1464+#define __UNC_RT_DOMAIN_H__
1465+
1466+#include <litmus/norqlock.h>
1467+#include <litmus/heap.h>
1468+
1469+struct _rt_domain;
1470+
1471+typedef int (*check_resched_needed_t)(struct _rt_domain *rt);
1472+typedef void (*release_job_t)(struct task_struct *t, struct _rt_domain *rt);
1473+
1474+typedef struct _rt_domain {
1475+ struct no_rqlock_work arm_timers;
1476+
1477+ /* runnable rt tasks are in here */
1478+ spinlock_t ready_lock;
1479+ struct heap ready_queue;
1480+
1481+ /* real-time tasks waiting for release are in here */
1482+ spinlock_t release_lock;
1483+ struct list_head release_queue;
1484+
1485+ /* how do we check if we need to kick another CPU? */
1486+ check_resched_needed_t check_resched;
1487+
1488+ /* how do we release a job? */
1489+ release_job_t release_job;
1490+
1491+ /* how are tasks ordered in the ready queue? */
1492+ heap_prio_t order;
1493+} rt_domain_t;
1494+
1495+static inline struct task_struct* __next_ready(rt_domain_t* rt)
1496+{
1497+ struct heap_node *hn = heap_peek(rt->order, &rt->ready_queue);
1498+ if (hn)
1499+ return heap2task(hn);
1500+ else
1501+ return NULL;
1502+}
1503+
1504+void rt_domain_init(rt_domain_t *rt, heap_prio_t order,
1505+ check_resched_needed_t check,
1506+ release_job_t relase);
1507+
1508+void __add_ready(rt_domain_t* rt, struct task_struct *new);
1509+void __add_release(rt_domain_t* rt, struct task_struct *task);
1510+
1511+static inline struct task_struct* __take_ready(rt_domain_t* rt)
1512+{
1513+ struct heap_node* hn = heap_take(rt->order, &rt->ready_queue);
1514+ if (hn)
1515+ return heap2task(hn);
1516+ else
1517+ return NULL;
1518+}
1519+
1520+static inline struct task_struct* __peek_ready(rt_domain_t* rt)
1521+{
1522+ struct heap_node* hn = heap_peek(rt->order, &rt->ready_queue);
1523+ if (hn)
1524+ return heap2task(hn);
1525+ else
1526+ return NULL;
1527+}
1528+
1529+static inline int is_queued(struct task_struct *t)
1530+{
1531+ return heap_node_in_heap(tsk_rt(t)->heap_node);
1532+}
1533+
1534+static inline void remove(rt_domain_t* rt, struct task_struct *t)
1535+{
1536+ heap_delete(rt->order, &rt->ready_queue, tsk_rt(t)->heap_node);
1537+}
1538+
1539+static inline void add_ready(rt_domain_t* rt, struct task_struct *new)
1540+{
1541+ unsigned long flags;
1542+ /* first we need the write lock for rt_ready_queue */
1543+ spin_lock_irqsave(&rt->ready_lock, flags);
1544+ __add_ready(rt, new);
1545+ spin_unlock_irqrestore(&rt->ready_lock, flags);
1546+}
1547+
1548+static inline struct task_struct* take_ready(rt_domain_t* rt)
1549+{
1550+ unsigned long flags;
1551+ struct task_struct* ret;
1552+ /* first we need the write lock for rt_ready_queue */
1553+ spin_lock_irqsave(&rt->ready_lock, flags);
1554+ ret = __take_ready(rt);
1555+ spin_unlock_irqrestore(&rt->ready_lock, flags);
1556+ return ret;
1557+}
1558+
1559+
1560+static inline void add_release(rt_domain_t* rt, struct task_struct *task)
1561+{
1562+ unsigned long flags;
1563+ /* first we need the write lock for rt_ready_queue */
1564+ spin_lock_irqsave(&rt->release_lock, flags);
1565+ __add_release(rt, task);
1566+ spin_unlock_irqrestore(&rt->release_lock, flags);
1567+}
1568+
1569+static inline int __jobs_pending(rt_domain_t* rt)
1570+{
1571+ return !heap_empty(&rt->ready_queue);
1572+}
1573+
1574+static inline int jobs_pending(rt_domain_t* rt)
1575+{
1576+ unsigned long flags;
1577+ int ret;
1578+ /* first we need the write lock for rt_ready_queue */
1579+ spin_lock_irqsave(&rt->ready_lock, flags);
1580+ ret = !heap_empty(&rt->ready_queue);
1581+ spin_unlock_irqrestore(&rt->ready_lock, flags);
1582+ return ret;
1583+}
1584+
1585+#endif
1586diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
1587new file mode 100644
1588index 0000000..7bb5684
1589--- /dev/null
1590+++ b/include/litmus/rt_param.h
1591@@ -0,0 +1,170 @@
1592+/*
1593+ * Definition of the scheduler plugin interface.
1594+ *
1595+ */
1596+#ifndef _LINUX_RT_PARAM_H_
1597+#define _LINUX_RT_PARAM_H_
1598+
1599+/* Litmus time type. */
1600+typedef unsigned long long lt_t;
1601+
1602+static inline int lt_after(lt_t a, lt_t b)
1603+{
1604+ return ((long long) b) - ((long long) a) < 0;
1605+}
1606+#define lt_before(a, b) lt_after(b, a)
1607+
1608+static inline int lt_after_eq(lt_t a, lt_t b)
1609+{
1610+ return ((long long) a) - ((long long) b) >= 0;
1611+}
1612+#define lt_before_eq(a, b) lt_after_eq(b, a)
1613+
1614+/* different types of clients */
1615+typedef enum {
1616+ RT_CLASS_HARD,
1617+ RT_CLASS_SOFT,
1618+ RT_CLASS_BEST_EFFORT
1619+} task_class_t;
1620+
1621+struct rt_task {
1622+ lt_t exec_cost;
1623+ lt_t period;
1624+ lt_t phase;
1625+ unsigned int cpu;
1626+ task_class_t cls;
1627+};
1628+
1629+/* don't export internal data structures to user space (liblitmus) */
1630+#ifdef __KERNEL__
1631+
1632+struct _rt_domain;
1633+struct heap_node;
1634+
1635+struct rt_job {
1636+ /* Time instant the the job was or will be released. */
1637+ lt_t release;
1638+ /* What is the current deadline? */
1639+ lt_t deadline;
1640+
1641+ /* How much service has this job received so far? */
1642+ lt_t exec_time;
1643+
1644+ /* Which job is this. This is used to let user space
1645+ * specify which job to wait for, which is important if jobs
1646+ * overrun. If we just call sys_sleep_next_period() then we
1647+ * will unintentionally miss jobs after an overrun.
1648+ *
1649+ * Increase this sequence number when a job is released.
1650+ */
1651+ unsigned int job_no;
1652+
1653+ /* when did this job start executing? */
1654+ lt_t exec_start;
1655+};
1656+
1657+
1658+struct pfair_param;
1659+
1660+/* RT task parameters for scheduling extensions
1661+ * These parameters are inherited during clone and therefore must
1662+ * be explicitly set up before the task set is launched.
1663+ */
1664+struct rt_param {
1665+ /* is the task sleeping? */
1666+ unsigned int flags:8;
1667+
1668+ /* do we need to check for srp blocking? */
1669+ unsigned int srp_non_recurse:1;
1670+
1671+ /* user controlled parameters */
1672+ struct rt_task task_params;
1673+
1674+ /* timing parameters */
1675+ struct rt_job job_params;
1676+
1677+ /* task representing the current "inherited" task
1678+ * priority, assigned by inherit_priority and
1679+ * return priority in the scheduler plugins.
1680+ * could point to self if PI does not result in
1681+ * an increased task priority.
1682+ */
1683+ struct task_struct* inh_task;
1684+
1685+ /* Don't just dereference this pointer in kernel space!
1686+ * It might very well point to junk or nothing at all.
1687+ * NULL indicates that the task has not requested any non-preemptable
1688+ * section support.
1689+ * Not inherited upon fork.
1690+ */
1691+ short* np_flag;
1692+
1693+ /* For the FMLP under PSN-EDF, it is required to make the task
1694+ * non-preemptive from kernel space. In order not to interfere with
1695+ * user space, this counter indicates the kernel space np setting.
1696+ * kernel_np > 0 => task is non-preemptive
1697+ */
1698+ unsigned int kernel_np;
1699+
1700+ /* This field can be used by plugins to store where the task
1701+ * is currently scheduled. It is the responsibility of the
1702+ * plugin to avoid race conditions.
1703+ *
1704+ * This used by GSN-EDF and PFAIR.
1705+ */
1706+ volatile int scheduled_on;
1707+
1708+ /* Is the stack of the task currently in use? This is updated by
1709+ * the LITMUS core.
1710+ *
1711+ * Be careful to avoid deadlocks!
1712+ */
1713+ volatile int stack_in_use;
1714+
1715+ /* This field can be used by plugins to store where the task
1716+ * is currently linked. It is the responsibility of the plugin
1717+ * to avoid race conditions.
1718+ *
1719+ * Used by GSN-EDF.
1720+ */
1721+ volatile int linked_on;
1722+
1723+ /* PFAIR/PD^2 state. Allocated on demand. */
1724+ struct pfair_param* pfair;
1725+
1726+ /* Fields saved before BE->RT transition.
1727+ */
1728+ int old_policy;
1729+ int old_prio;
1730+
1731+ /* The high-resolution timer used to control its release. */
1732+ struct hrtimer release_timer;
1733+
1734+ /* ready queue for this task */
1735+ struct _rt_domain* domain;
1736+
1737+ /* heap element for this task
1738+ *
1739+ * Warning: Don't statically allocate this node. The heap
1740+ * implementation swaps these between tasks, thus after
1741+ * dequeuing from a heap you may end up with a different node
1742+ * then the one you had when enqueuing the task. For the same
1743+ * reason, don't obtain and store references to this node
1744+ * other than this pointer (which is updated by the heap
1745+ * implementation).
1746+ */
1747+ struct heap_node* heap_node;
1748+
1749+ /* Used by rt_domain to queue task in release list.
1750+ */
1751+ struct list_head list;
1752+};
1753+
1754+/* Possible RT flags */
1755+#define RT_F_RUNNING 0x00000000
1756+#define RT_F_SLEEP 0x00000001
1757+#define RT_F_EXIT_SEM 0x00000008
1758+
1759+#endif
1760+
1761+#endif
1762diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
1763new file mode 100644
1764index 0000000..aba7522
1765--- /dev/null
1766+++ b/include/litmus/sched_plugin.h
1767@@ -0,0 +1,144 @@
1768+/*
1769+ * Definition of the scheduler plugin interface.
1770+ *
1771+ */
1772+#ifndef _LINUX_SCHED_PLUGIN_H_
1773+#define _LINUX_SCHED_PLUGIN_H_
1774+
1775+#include <linux/sched.h>
1776+
1777+/* struct for semaphore with priority inheritance */
1778+struct pi_semaphore {
1779+ atomic_t count;
1780+ int sleepers;
1781+ wait_queue_head_t wait;
1782+ union {
1783+ /* highest-prio holder/waiter */
1784+ struct task_struct *task;
1785+ struct task_struct* cpu_task[NR_CPUS];
1786+ } hp;
1787+ /* current lock holder */
1788+ struct task_struct *holder;
1789+};
1790+
1791+
1792+/********************* scheduler invocation ******************/
1793+
1794+/* Plugin-specific realtime tick handler */
1795+typedef void (*scheduler_tick_t) (struct task_struct *cur);
1796+/* Novell make sched decision function */
1797+typedef struct task_struct* (*schedule_t)(struct task_struct * prev);
1798+/* Clean up after the task switch has occured.
1799+ * This function is called after every (even non-rt) task switch.
1800+ */
1801+typedef void (*finish_switch_t)(struct task_struct *prev);
1802+
1803+
1804+/********************* task state changes ********************/
1805+
1806+/* Called to setup a new real-time task.
1807+ * Release the first job, enqueue, etc.
1808+ * Task may already be running.
1809+ */
1810+typedef void (*task_new_t) (struct task_struct *task,
1811+ int on_rq,
1812+ int running);
1813+
1814+/* Called to re-introduce a task after blocking.
1815+ * Can potentially be called multiple times.
1816+ */
1817+typedef void (*task_wake_up_t) (struct task_struct *task);
1818+/* called to notify the plugin of a blocking real-time task
1819+ * it will only be called for real-time tasks and before schedule is called */
1820+typedef void (*task_block_t) (struct task_struct *task);
1821+/* Called when a real-time task exits or changes to a different scheduling
1822+ * class.
1823+ * Free any allocated resources
1824+ */
1825+typedef void (*task_exit_t) (struct task_struct *);
1826+
1827+/* Called when the new_owner is released from the wait queue
1828+ * it should now inherit the priority from sem, _before_ it gets readded
1829+ * to any queue
1830+ */
1831+typedef long (*inherit_priority_t) (struct pi_semaphore *sem,
1832+ struct task_struct *new_owner);
1833+
1834+/* Called when the current task releases a semahpore where it might have
1835+ * inherited a piority from
1836+ */
1837+typedef long (*return_priority_t) (struct pi_semaphore *sem);
1838+
1839+/* Called when a task tries to acquire a semaphore and fails. Check if its
1840+ * priority is higher than that of the current holder.
1841+ */
1842+typedef long (*pi_block_t) (struct pi_semaphore *sem, struct task_struct *t);
1843+
1844+
1845+/********************* sys call backends ********************/
1846+/* This function causes the caller to sleep until the next release */
1847+typedef long (*complete_job_t) (void);
1848+
1849+typedef long (*admit_task_t)(struct task_struct* tsk);
1850+
1851+typedef void (*release_at_t)(struct task_struct *t, lt_t start);
1852+
1853+struct sched_plugin {
1854+ struct list_head list;
1855+ /* basic info */
1856+ char *plugin_name;
1857+#ifdef CONFIG_SRP
1858+ unsigned int srp_active;
1859+#endif
1860+
1861+ /* scheduler invocation */
1862+ scheduler_tick_t tick;
1863+ schedule_t schedule;
1864+ finish_switch_t finish_switch;
1865+
1866+ /* syscall backend */
1867+ complete_job_t complete_job;
1868+ release_at_t release_at;
1869+
1870+ /* task state changes */
1871+ admit_task_t admit_task;
1872+
1873+ task_new_t task_new;
1874+ task_wake_up_t task_wake_up;
1875+ task_block_t task_block;
1876+ task_exit_t task_exit;
1877+
1878+#ifdef CONFIG_FMLP
1879+ /* priority inheritance */
1880+ unsigned int fmlp_active;
1881+ inherit_priority_t inherit_priority;
1882+ return_priority_t return_priority;
1883+ pi_block_t pi_block;
1884+#endif
1885+} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
1886+
1887+
1888+extern struct sched_plugin *litmus;
1889+
1890+int register_sched_plugin(struct sched_plugin* plugin);
1891+struct sched_plugin* find_sched_plugin(const char* name);
1892+int print_sched_plugins(char* buf, int max);
1893+
1894+static inline int srp_active(void)
1895+{
1896+#ifdef CONFIG_SRP
1897+ return litmus->srp_active;
1898+#else
1899+ return 0;
1900+#endif
1901+}
1902+static inline int fmlp_active(void)
1903+{
1904+#ifdef CONFIG_FMLP
1905+ return litmus->fmlp_active;
1906+#else
1907+ return 0;
1908+#endif
1909+}
1910+
1911+#endif
1912diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
1913new file mode 100644
1914index 0000000..60dcbfb
1915--- /dev/null
1916+++ b/include/litmus/sched_trace.h
1917@@ -0,0 +1,31 @@
1918+/* sched_trace.h -- record scheduler events to a byte stream for offline analysis.
1919+ */
1920+#ifndef _LINUX_SCHED_TRACE_H_
1921+#define _LINUX_SCHED_TRACE_H_
1922+
1923+#include <linux/sched.h>
1924+
1925+/* dummies, need to be re-implemented */
1926+
1927+/* used in sched.c */
1928+#define sched_trace_task_arrival(t)
1929+#define sched_trace_task_departure(t)
1930+#define sched_trace_task_preemption(t, by)
1931+#define sched_trace_task_scheduled(t)
1932+
1933+/* used in scheduler plugins */
1934+#define sched_trace_job_release(t)
1935+#define sched_trace_job_completion(t)
1936+
1937+
1938+#ifdef CONFIG_SCHED_DEBUG_TRACE
1939+void sched_trace_log_message(const char* fmt, ...);
1940+
1941+#else
1942+
1943+#define sched_trace_log_message(fmt, ...)
1944+
1945+#endif
1946+
1947+
1948+#endif
1949diff --git a/include/litmus/trace.h b/include/litmus/trace.h
1950new file mode 100644
1951index 0000000..2c8e141
1952--- /dev/null
1953+++ b/include/litmus/trace.h
1954@@ -0,0 +1,107 @@
1955+#ifndef _SYS_TRACE_H_
1956+#define _SYS_TRACE_H_
1957+
1958+#ifdef CONFIG_FEATHER_TRACE
1959+
1960+#include <litmus/feather_trace.h>
1961+#include <litmus/feather_buffer.h>
1962+
1963+
1964+/*********************** TIMESTAMPS ************************/
1965+
1966+enum task_type_marker {
1967+ TSK_BE,
1968+ TSK_RT,
1969+ TSK_UNKNOWN
1970+};
1971+
1972+struct timestamp {
1973+ uint64_t timestamp;
1974+ uint32_t seq_no;
1975+ uint8_t cpu;
1976+ uint8_t event;
1977+ uint8_t task_type;
1978+};
1979+
1980+
1981+/* buffer holding time stamps - will be provided by driver */
1982+extern struct ft_buffer* trace_ts_buf;
1983+
1984+/* tracing callbacks */
1985+feather_callback void save_timestamp(unsigned long event);
1986+feather_callback void save_timestamp_def(unsigned long event, unsigned long type);
1987+feather_callback void save_timestamp_task(unsigned long event, unsigned long t_ptr);
1988+
1989+#define TIMESTAMP(id) ft_event0(id, save_timestamp)
1990+
1991+#define DTIMESTAMP(id, def) ft_event1(id, save_timestamp_def, def)
1992+
1993+#define TTIMESTAMP(id, task) ft_event1(id, save_timestamp_task, (unsigned long) task)
1994+
1995+#else /* !CONFIG_FEATHER_TRACE */
1996+
1997+#define TIMESTAMP(id) /* no tracing */
1998+
1999+#define DTIMESTAMP(id, def) /* no tracing */
2000+
2001+#define TTIMESTAMP(id, task) /* no tracing */
2002+
2003+#endif
2004+
2005+
2006+/* Convention for timestamps
2007+ * =========================
2008+ *
2009+ * In order to process the trace files with a common tool, we use the following
2010+ * convention to measure execution times: The end time id of a code segment is
2011+ * always the next number after the start time event id.
2012+ */
2013+
2014+#define TS_SCHED_START DTIMESTAMP(100, TSK_UNKNOWN) /* we only
2015+ * care
2016+ * about
2017+ * next */
2018+#define TS_SCHED_END(t) TTIMESTAMP(101, t)
2019+#define TS_SCHED2_START(t) TTIMESTAMP(102, t)
2020+#define TS_SCHED2_END(t) TTIMESTAMP(103, t)
2021+
2022+#define TS_CXS_START(t) TTIMESTAMP(104, t)
2023+#define TS_CXS_END(t) TTIMESTAMP(105, t)
2024+
2025+#define TS_RELEASE_START DTIMESTAMP(106, TSK_RT)
2026+#define TS_RELEASE_END DTIMESTAMP(107, TSK_RT)
2027+
2028+#define TS_TICK_START(t) TTIMESTAMP(110, t)
2029+#define TS_TICK_END(t) TTIMESTAMP(111, t)
2030+
2031+
2032+#define TS_PLUGIN_SCHED_START /* TIMESTAMP(120) */ /* currently unused */
2033+#define TS_PLUGIN_SCHED_END /* TIMESTAMP(121) */
2034+
2035+#define TS_PLUGIN_TICK_START /* TIMESTAMP(130) */
2036+#define TS_PLUGIN_TICK_END /* TIMESTAMP(131) */
2037+
2038+#define TS_ENTER_NP_START TIMESTAMP(140)
2039+#define TS_ENTER_NP_END TIMESTAMP(141)
2040+
2041+#define TS_EXIT_NP_START TIMESTAMP(150)
2042+#define TS_EXIT_NP_END TIMESTAMP(151)
2043+
2044+#define TS_SRP_UP_START TIMESTAMP(160)
2045+#define TS_SRP_UP_END TIMESTAMP(161)
2046+#define TS_SRP_DOWN_START TIMESTAMP(162)
2047+#define TS_SRP_DOWN_END TIMESTAMP(163)
2048+
2049+#define TS_PI_UP_START TIMESTAMP(170)
2050+#define TS_PI_UP_END TIMESTAMP(171)
2051+#define TS_PI_DOWN_START TIMESTAMP(172)
2052+#define TS_PI_DOWN_END TIMESTAMP(173)
2053+
2054+#define TS_FIFO_UP_START TIMESTAMP(180)
2055+#define TS_FIFO_UP_END TIMESTAMP(181)
2056+#define TS_FIFO_DOWN_START TIMESTAMP(182)
2057+#define TS_FIFO_DOWN_END TIMESTAMP(183)
2058+
2059+
2060+
2061+#endif /* !_SYS_TRACE_H_ */
2062diff --git a/include/litmus/unistd.h b/include/litmus/unistd.h
2063new file mode 100644
2064index 0000000..8224235
2065--- /dev/null
2066+++ b/include/litmus/unistd.h
2067@@ -0,0 +1,20 @@
2068+
2069+#define __LSC(x) (__NR_LITMUS + x)
2070+
2071+#define __NR_set_rt_task_param __LSC(0)
2072+#define __NR_get_rt_task_param __LSC(1)
2073+#define __NR_sleep_next_period __LSC(2)
2074+#define __NR_register_np_flag __LSC(3)
2075+#define __NR_exit_np __LSC(4)
2076+#define __NR_od_open __LSC(5)
2077+#define __NR_od_close __LSC(6)
2078+#define __NR_fmlp_down __LSC(7)
2079+#define __NR_fmlp_up __LSC(8)
2080+#define __NR_srp_down __LSC(9)
2081+#define __NR_srp_up __LSC(10)
2082+#define __NR_query_job_no __LSC(11)
2083+#define __NR_wait_for_job_release __LSC(12)
2084+#define __NR_wait_for_ts_release __LSC(13)
2085+#define __NR_release_ts __LSC(14)
2086+
2087+#define NR_litmus_syscalls 15
2088diff --git a/kernel/exit.c b/kernel/exit.c
2089index 549c055..bc313b7 100644
2090--- a/kernel/exit.c
2091+++ b/kernel/exit.c
2092@@ -52,6 +52,8 @@
2093
2094 extern void sem_exit (void);
2095
2096+extern void exit_od_table(struct task_struct* t);
2097+
2098 static void exit_mm(struct task_struct * tsk);
2099
2100 static void __unhash_process(struct task_struct *p)
2101@@ -987,6 +989,8 @@ fastcall NORET_TYPE void do_exit(long code)
2102 if (unlikely(tsk->audit_context))
2103 audit_free(tsk);
2104
2105+ exit_od_table(tsk);
2106+
2107 tsk->exit_code = code;
2108 taskstats_exit(tsk, group_dead);
2109
2110diff --git a/kernel/fork.c b/kernel/fork.c
2111index 8dd8ff2..4c322d4 100644
2112--- a/kernel/fork.c
2113+++ b/kernel/fork.c
2114@@ -59,6 +59,9 @@
2115 #include <asm/cacheflush.h>
2116 #include <asm/tlbflush.h>
2117
2118+#include <litmus/litmus.h>
2119+#include <litmus/sched_plugin.h>
2120+
2121 /*
2122 * Protected counters by write_lock_irq(&tasklist_lock)
2123 */
2124@@ -121,6 +124,8 @@ void __put_task_struct(struct task_struct *tsk)
2125 WARN_ON(atomic_read(&tsk->usage));
2126 WARN_ON(tsk == current);
2127
2128+ exit_litmus(tsk);
2129+
2130 security_task_free(tsk);
2131 free_uid(tsk->user);
2132 put_group_info(tsk->group_info);
2133@@ -182,6 +187,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
2134 *tsk = *orig;
2135 tsk->stack = ti;
2136
2137+ /* Don't let the new task be a real-time task. */
2138+ memset(&tsk->rt_param, 0, sizeof(struct rt_task));
2139+
2140 err = prop_local_init_single(&tsk->dirties);
2141 if (err) {
2142 free_thread_info(ti);
2143diff --git a/kernel/printk.c b/kernel/printk.c
2144index 89011bf..9eb2dc5 100644
2145--- a/kernel/printk.c
2146+++ b/kernel/printk.c
2147@@ -54,6 +54,12 @@ int console_printk[4] = {
2148 DEFAULT_CONSOLE_LOGLEVEL, /* default_console_loglevel */
2149 };
2150
2151+/* divert printk() messages when we have a LITMUS^RT
2152+ * debug listener
2153+ */
2154+#include <litmus/litmus.h>
2155+int trace_override = 0;
2156+
2157 /*
2158 * Low level drivers may need that to know if they can schedule in
2159 * their unblank() callback or not. So let's export it.
2160@@ -652,6 +658,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
2161
2162 /* Emit the output into the temporary buffer */
2163 printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args);
2164+ if (trace_override)
2165+ TRACE("%s", printk_buf);
2166
2167 /*
2168 * Copy the output into log_buf. If the caller didn't provide
2169@@ -932,7 +940,7 @@ int is_console_locked(void)
2170
2171 void wake_up_klogd(void)
2172 {
2173- if (!oops_in_progress && waitqueue_active(&log_wait))
2174+ if (!trace_override && !oops_in_progress && waitqueue_active(&log_wait))
2175 wake_up_interruptible(&log_wait);
2176 }
2177
2178diff --git a/kernel/sched.c b/kernel/sched.c
2179index e76b11c..9ee07ba 100644
2180--- a/kernel/sched.c
2181+++ b/kernel/sched.c
2182@@ -67,6 +67,10 @@
2183 #include <asm/tlb.h>
2184 #include <asm/irq_regs.h>
2185
2186+#include <litmus/trace.h>
2187+
2188+#include <litmus/norqlock.h>
2189+
2190 /*
2191 * Scheduler clock - returns current time in nanosec units.
2192 * This is default implementation.
2193@@ -324,6 +328,8 @@ struct rq {
2194
2195 atomic_t nr_iowait;
2196
2197+ struct task_struct* litmus_next;
2198+
2199 #ifdef CONFIG_SMP
2200 struct sched_domain *sd;
2201
2202@@ -875,11 +881,12 @@ static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
2203 #include "sched_idletask.c"
2204 #include "sched_fair.c"
2205 #include "sched_rt.c"
2206+#include "../litmus/sched_litmus.c"
2207 #ifdef CONFIG_SCHED_DEBUG
2208 # include "sched_debug.c"
2209 #endif
2210
2211-#define sched_class_highest (&rt_sched_class)
2212+#define sched_class_highest (&litmus_sched_class)
2213
2214 /*
2215 * Update delta_exec, delta_fair fields for rq.
2216@@ -1516,6 +1523,8 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
2217 int new_cpu;
2218 #endif
2219
2220+ if (is_realtime(p))
2221+ TRACE_TASK(p, "try_to_wake_up()\n");
2222 rq = task_rq_lock(p, &flags);
2223 old_state = p->state;
2224 if (!(old_state & state))
2225@@ -1529,7 +1538,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
2226 this_cpu = smp_processor_id();
2227
2228 #ifdef CONFIG_SMP
2229- if (unlikely(task_running(rq, p)))
2230+ if (unlikely(task_running(rq, p) || is_realtime(p)))
2231 goto out_activate;
2232
2233 new_cpu = cpu;
2234@@ -1650,8 +1659,10 @@ out_activate:
2235 out_running:
2236 p->state = TASK_RUNNING;
2237 out:
2238+ if (is_realtime(p))
2239+ TRACE_TASK(p, "try_to_wake_up() done, p->state=%d\n", p->state);
2240 task_rq_unlock(rq, &flags);
2241-
2242+ tick_no_rqlock();
2243 return success;
2244 }
2245
2246@@ -1890,6 +1901,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
2247 */
2248 prev_state = prev->state;
2249 finish_arch_switch(prev);
2250+ litmus->finish_switch(prev);
2251+ prev->rt_param.stack_in_use = NO_CPU;
2252 finish_lock_switch(rq, prev);
2253 fire_sched_in_preempt_notifiers(current);
2254 if (mm)
2255@@ -3480,6 +3493,7 @@ void scheduler_tick(void)
2256 struct task_struct *curr = rq->curr;
2257 u64 next_tick = rq->tick_timestamp + TICK_NSEC;
2258
2259+ TS_TICK_START(current);
2260 spin_lock(&rq->lock);
2261 __update_rq_clock(rq);
2262 /*
2263@@ -3491,12 +3505,17 @@ void scheduler_tick(void)
2264 update_cpu_load(rq);
2265 if (curr != rq->idle) /* FIXME: needed? */
2266 curr->sched_class->task_tick(rq, curr);
2267+ TS_PLUGIN_TICK_START;
2268+ litmus_tick(rq, curr);
2269+ TS_PLUGIN_TICK_END;
2270 spin_unlock(&rq->lock);
2271
2272 #ifdef CONFIG_SMP
2273 rq->idle_at_tick = idle_cpu(cpu);
2274- trigger_load_balance(rq, cpu);
2275+ if (!is_realtime(current))
2276+ trigger_load_balance(rq, cpu);
2277 #endif
2278+ TS_TICK_END(current);
2279 }
2280
2281 #if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT)
2282@@ -3594,11 +3613,13 @@ pick_next_task(struct rq *rq, struct task_struct *prev)
2283 * Optimization: we know that if all tasks are in
2284 * the fair class we can call that function directly:
2285 */
2286- if (likely(rq->nr_running == rq->cfs.nr_running)) {
2287+ /* Don't do that for LITMUS.
2288+ if (likely(rq->nr_running == rq->cfs.nr_running)) {
2289 p = fair_sched_class.pick_next_task(rq);
2290 if (likely(p))
2291 return p;
2292 }
2293+ */
2294
2295 class = sched_class_highest;
2296 for ( ; ; ) {
2297@@ -3633,6 +3654,7 @@ need_resched:
2298
2299 release_kernel_lock(prev);
2300 need_resched_nonpreemptible:
2301+ TS_SCHED_START;
2302
2303 schedule_debug(prev);
2304
2305@@ -3643,6 +3665,9 @@ need_resched_nonpreemptible:
2306 __update_rq_clock(rq);
2307 spin_lock(&rq->lock);
2308 clear_tsk_need_resched(prev);
2309+ TS_PLUGIN_SCHED_START;
2310+ litmus_schedule(rq, prev);
2311+ TS_PLUGIN_SCHED_END;
2312
2313 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
2314 if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
2315@@ -3667,18 +3692,32 @@ need_resched_nonpreemptible:
2316 rq->curr = next;
2317 ++*switch_count;
2318
2319+ TS_SCHED_END(next);
2320+ TS_CXS_START(next);
2321 context_switch(rq, prev, next); /* unlocks the rq */
2322- } else
2323+ TS_CXS_END(current);
2324+ } else {
2325+ TS_SCHED_END(prev);
2326 spin_unlock_irq(&rq->lock);
2327+ }
2328+ TS_SCHED2_START(current);
2329+
2330+ tick_no_rqlock();
2331
2332 if (unlikely(reacquire_kernel_lock(current) < 0)) {
2333 cpu = smp_processor_id();
2334 rq = cpu_rq(cpu);
2335+ TS_SCHED2_END(current);
2336 goto need_resched_nonpreemptible;
2337 }
2338 preempt_enable_no_resched();
2339- if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
2340+ if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) {
2341+ TS_SCHED2_END(current);
2342 goto need_resched;
2343+ }
2344+ TS_SCHED2_END(current);
2345+ if (srp_active())
2346+ srp_ceiling_block();
2347 }
2348 EXPORT_SYMBOL(schedule);
2349
2350@@ -3886,6 +3925,18 @@ void complete_all(struct completion *x)
2351 }
2352 EXPORT_SYMBOL(complete_all);
2353
2354+void complete_n(struct completion *x, int n)
2355+{
2356+ unsigned long flags;
2357+
2358+ spin_lock_irqsave(&x->wait.lock, flags);
2359+ x->done += n;
2360+ __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
2361+ n, 0, NULL);
2362+ spin_unlock_irqrestore(&x->wait.lock, flags);
2363+}
2364+EXPORT_SYMBOL(complete_n);
2365+
2366 static inline long __sched
2367 do_wait_for_common(struct completion *x, long timeout, int state)
2368 {
2369@@ -4236,6 +4287,9 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
2370 case SCHED_RR:
2371 p->sched_class = &rt_sched_class;
2372 break;
2373+ case SCHED_LITMUS:
2374+ p->sched_class = &litmus_sched_class;
2375+ break;
2376 }
2377
2378 p->rt_priority = prio;
2379@@ -4268,7 +4322,7 @@ recheck:
2380 policy = oldpolicy = p->policy;
2381 else if (policy != SCHED_FIFO && policy != SCHED_RR &&
2382 policy != SCHED_NORMAL && policy != SCHED_BATCH &&
2383- policy != SCHED_IDLE)
2384+ policy != SCHED_IDLE && policy != SCHED_LITMUS)
2385 return -EINVAL;
2386 /*
2387 * Valid priorities for SCHED_FIFO and SCHED_RR are
2388@@ -4282,6 +4336,9 @@ recheck:
2389 if (rt_policy(policy) != (param->sched_priority != 0))
2390 return -EINVAL;
2391
2392+ if (policy == SCHED_LITMUS && policy == p->policy)
2393+ return -EINVAL;
2394+
2395 /*
2396 * Allow unprivileged RT tasks to decrease priority:
2397 */
2398@@ -4316,6 +4373,12 @@ recheck:
2399 return -EPERM;
2400 }
2401
2402+ if (policy == SCHED_LITMUS) {
2403+ retval = litmus_admit_task(p);
2404+ if (retval)
2405+ return retval;
2406+ }
2407+
2408 retval = security_task_setscheduler(p, policy, param);
2409 if (retval)
2410 return retval;
2411@@ -4345,9 +4408,17 @@ recheck:
2412 p->sched_class->put_prev_task(rq, p);
2413 }
2414
2415+ if (p->policy == SCHED_LITMUS)
2416+ litmus_exit_task(p);
2417+
2418 oldprio = p->prio;
2419 __setscheduler(rq, p, policy, param->sched_priority);
2420
2421+ if (policy == SCHED_LITMUS) {
2422+ p->rt_param.stack_in_use = running ? rq->cpu : NO_CPU;
2423+ litmus->task_new(p, on_rq, running);
2424+ }
2425+
2426 if (on_rq) {
2427 if (running)
2428 p->sched_class->set_curr_task(rq);
2429@@ -4364,6 +4435,7 @@ recheck:
2430 check_preempt_curr(rq, p);
2431 }
2432 }
2433+
2434 __task_rq_unlock(rq);
2435 spin_unlock_irqrestore(&p->pi_lock, flags);
2436
2437@@ -4494,10 +4566,11 @@ long sched_setaffinity(pid_t pid, cpumask_t new_mask)
2438 read_lock(&tasklist_lock);
2439
2440 p = find_process_by_pid(pid);
2441- if (!p) {
2442+ if (!p || is_realtime(p)) {
2443+ /* LITMUS tasks don't get to do this, transition to BE first */
2444 read_unlock(&tasklist_lock);
2445 mutex_unlock(&sched_hotcpu_mutex);
2446- return -ESRCH;
2447+ return p ? -EPERM : -ESRCH;
2448 }
2449
2450 /*
2451diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
2452index da7c061..de30496 100644
2453--- a/kernel/sched_fair.c
2454+++ b/kernel/sched_fair.c
2455@@ -845,7 +845,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
2456 struct sched_entity *se = &curr->se, *pse = &p->se;
2457 unsigned long gran;
2458
2459- if (unlikely(rt_prio(p->prio))) {
2460+ if (unlikely(rt_prio(p->prio) || p->policy == SCHED_LITMUS)) {
2461 update_rq_clock(rq);
2462 update_curr(cfs_rq);
2463 resched_task(curr);
2464diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
2465index 9ba3daa..c7c938c 100644
2466--- a/kernel/sched_rt.c
2467+++ b/kernel/sched_rt.c
2468@@ -70,7 +70,7 @@ yield_task_rt(struct rq *rq)
2469 */
2470 static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
2471 {
2472- if (p->prio < rq->curr->prio)
2473+ if (p->prio < rq->curr->prio || p->policy == SCHED_LITMUS)
2474 resched_task(rq->curr);
2475 }
2476
2477diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
2478index cb89fa8..3b1936f 100644
2479--- a/kernel/time/tick-sched.c
2480+++ b/kernel/time/tick-sched.c
2481@@ -568,6 +568,22 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
2482 }
2483
2484 /**
2485+ * tick_set_quanta_type - get the quanta type as a boot option
2486+ * Default is standard setup with ticks staggered over first
2487+ * half of tick period.
2488+ */
2489+int quanta_type = LINUX_DEFAULT_TICKS;
2490+static int __init tick_set_quanta_type(char *str)
2491+{
2492+ if (strcmp("aligned", str) == 0)
2493+ quanta_type = LITMUS_ALIGNED_TICKS;
2494+ else if (strcmp("staggered", str) == 0)
2495+ quanta_type = LITMUS_STAGGERED_TICKS;
2496+ return 1;
2497+}
2498+__setup("quanta=", tick_set_quanta_type);
2499+
2500+/**
2501 * tick_setup_sched_timer - setup the tick emulation timer
2502 */
2503 void tick_setup_sched_timer(void)
2504@@ -585,9 +601,24 @@ void tick_setup_sched_timer(void)
2505
2506 /* Get the next period (per cpu) */
2507 ts->sched_timer.expires = tick_init_jiffy_update();
2508- offset = ktime_to_ns(tick_period) >> 1;
2509- do_div(offset, num_possible_cpus());
2510- offset *= smp_processor_id();
2511+
2512+ /* Offset must be set correctly to achieve desired quanta type. */
2513+ switch (quanta_type) {
2514+ case LITMUS_ALIGNED_TICKS:
2515+ offset = 0;
2516+ break;
2517+ case LITMUS_STAGGERED_TICKS:
2518+ offset = ktime_to_ns(tick_period);
2519+ do_div(offset, num_possible_cpus());
2520+ offset *= smp_processor_id();
2521+ break;
2522+ default:
2523+ offset = ktime_to_ns(tick_period) >> 1;
2524+ do_div(offset, num_possible_cpus());
2525+ offset *= smp_processor_id();
2526+ }
2527+
2528+ /* Add correct offset to expiration time. */
2529 ts->sched_timer.expires = ktime_add_ns(ts->sched_timer.expires, offset);
2530
2531 for (;;) {
2532diff --git a/litmus/Kconfig b/litmus/Kconfig
2533new file mode 100644
2534index 0000000..9a2ab90
2535--- /dev/null
2536+++ b/litmus/Kconfig
2537@@ -0,0 +1,78 @@
2538+menu "LITMUS^RT"
2539+
2540+menu "Real-Time Synchronization"
2541+
2542+config NP_SECTION
2543+ bool "Non-preemptive section support"
2544+ depends on !SPARC64
2545+ default n
2546+ help
2547+ Include support for flag-based non-preemptive section signaling
2548+ from userspace.
2549+
2550+ (currently broken on SPARC64)
2551+
2552+ Say Yes if you want FMLP short critical section synchronization support.
2553+
2554+
2555+config SRP
2556+ bool "Stack Resource Policy (SRP)"
2557+ default n
2558+ help
2559+ Include support for Baker's Stack Resource Policy.
2560+
2561+ Say Yes if you want FMLP local long critical section synchronization support.
2562+
2563+config FMLP
2564+ bool "FMLP support"
2565+ depends on NP_SECTION
2566+ default n
2567+ help
2568+ Include support for deterministic multiprocessor real-time
2569+ synchronization support.
2570+
2571+ Say Yes if you want FMLP long critical section synchronization support.
2572+
2573+endmenu
2574+
2575+menu "Tracing"
2576+
2577+config SCHED_TASK_TRACE
2578+ bool "Trace real-time tasks"
2579+ default y
2580+ help
2581+ Include support for the sched_trace_XXX() tracing functions. This
2582+ allows the collection of real-time task events such as job
2583+ completions, job releases, early completions, etc. This results in a
2584+ small overhead in the scheduling code. Disable if the overhead is not
2585+ acceptable (e.g., benchmarking).
2586+
2587+ Say Yes for debugging.
2588+ Say No for overhead tracing.
2589+
2590+config SCHED_DEBUG_TRACE
2591+ bool "TRACE() debugging"
2592+ default y
2593+ help
2594+ Include support for sched_trace_log_messageg(), which is used to
2595+ implement TRACE(). If disabled, no TRACE() messages will be included
2596+ in the kernel, and no overheads due to debugging statements will be
2597+ incurred by the scheduler. Disable if the overhead is not acceptable
2598+ (e.g. benchmarking).
2599+
2600+ Say Yes for debugging.
2601+ Say No for overhead tracing.
2602+
2603+config FEATHER_TRACE
2604+ bool "Feather-Trace Instrumentation Support"
2605+ default y
2606+ help
2607+ Include Feather-Trace trace points. Currently not supported on
2608+ sparc64.
2609+
2610+ Say Yes for overhead tracing.
2611+
2612+
2613+endmenu
2614+
2615+endmenu
2616diff --git a/litmus/Makefile b/litmus/Makefile
2617new file mode 100644
2618index 0000000..5452038
2619--- /dev/null
2620+++ b/litmus/Makefile
2621@@ -0,0 +1,14 @@
2622+#
2623+# Makefile for LITMUS^RT
2624+#
2625+
2626+obj-y = sched_plugin.o litmus.o sched_trace.o \
2627+ edf_common.o jobs.o \
2628+ rt_domain.o fdso.o sync.o \
2629+ fmlp.o srp.o norqlock.o \
2630+ sched_gsn_edf.o \
2631+ sched_psn_edf.o \
2632+ sched_cedf.o \
2633+ sched_pfair.o
2634+
2635+obj-$(CONFIG_FEATHER_TRACE) += trace.o ft_event.o
2636diff --git a/litmus/edf_common.c b/litmus/edf_common.c
2637new file mode 100644
2638index 0000000..d7567ac
2639--- /dev/null
2640+++ b/litmus/edf_common.c
2641@@ -0,0 +1,94 @@
2642+/*
2643+ * kernel/edf_common.c
2644+ *
2645+ * Common functions for EDF based scheduler.
2646+ */
2647+
2648+#include <linux/percpu.h>
2649+#include <linux/sched.h>
2650+#include <linux/list.h>
2651+
2652+#include <litmus/litmus.h>
2653+#include <litmus/sched_plugin.h>
2654+#include <litmus/sched_trace.h>
2655+
2656+
2657+#include <litmus/edf_common.h>
2658+
2659+/* edf_higher_prio - returns true if first has a higher EDF priority
2660+ * than second. Deadline ties are broken by PID.
2661+ *
2662+ * first first must not be NULL and a real-time task.
2663+ * second may be NULL or a non-rt task.
2664+ */
2665+int edf_higher_prio(struct task_struct* first,
2666+ struct task_struct* second)
2667+{
2668+ struct task_struct *first_task = first;
2669+ struct task_struct *second_task = second;
2670+
2671+ /* Check for inherited priorities. Change task
2672+ * used for comparison in such a case.
2673+ */
2674+ if (first && first->rt_param.inh_task)
2675+ first_task = first->rt_param.inh_task;
2676+ if (second && second->rt_param.inh_task)
2677+ second_task = second->rt_param.inh_task;
2678+
2679+ return
2680+ /* does the second task exist and is it a real-time task? If
2681+ * not, the first task (which is a RT task) has higher
2682+ * priority.
2683+ */
2684+ !second_task || !is_realtime(second_task) ||
2685+
2686+ /* is the deadline of the first task earlier?
2687+ * Then it has higher priority.
2688+ */
2689+ earlier_deadline(first_task, second_task) ||
2690+
2691+ /* Do we have a deadline tie?
2692+ * Then break by PID.
2693+ */
2694+ (get_deadline(first_task) == get_deadline(second_task) &&
2695+ (first_task->pid < second_task->pid ||
2696+
2697+ /* If the PIDs are the same then the task with the inherited
2698+ * priority wins.
2699+ */
2700+ (first_task->pid == second_task->pid &&
2701+ !second->rt_param.inh_task)));
2702+}
2703+
2704+int edf_ready_order(struct heap_node* a, struct heap_node* b)
2705+{
2706+ return edf_higher_prio(heap2task(a), heap2task(b));
2707+}
2708+
2709+void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
2710+ release_job_t release)
2711+{
2712+ rt_domain_init(rt, edf_ready_order, resched, release);
2713+}
2714+
2715+/* need_to_preempt - check whether the task t needs to be preempted
2716+ * call only with irqs disabled and with ready_lock acquired
2717+ * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
2718+ */
2719+int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t)
2720+{
2721+ /* we need the read lock for edf_ready_queue */
2722+ /* no need to preempt if there is nothing pending */
2723+ if (!__jobs_pending(rt))
2724+ return 0;
2725+ /* we need to reschedule if t doesn't exist */
2726+ if (!t)
2727+ return 1;
2728+
2729+ /* NOTE: We cannot check for non-preemptibility since we
2730+ * don't know what address space we're currently in.
2731+ */
2732+
2733+ /* make sure to get non-rt stuff out of the way */
2734+ return !is_realtime(t) || edf_higher_prio(__next_ready(rt), t);
2735+}
2736diff --git a/litmus/fdso.c b/litmus/fdso.c
2737new file mode 100644
2738index 0000000..81ab0af
2739--- /dev/null
2740+++ b/litmus/fdso.c
2741@@ -0,0 +1,282 @@
2742+/* fdso.c - file descriptor attached shared objects
2743+ *
2744+ * (c) 2007 B. Brandenburg, LITMUS^RT project
2745+ *
2746+ * Notes:
2747+ * - objects descriptor (OD) tables are not cloned during a fork.
2748+ * - objects are created on-demand, and freed after the last reference
2749+ * is dropped.
2750+ * - for now, object types are hard coded.
2751+ * - As long as we have live objects, we keep a reference to the inode.
2752+ */
2753+
2754+#include <linux/errno.h>
2755+#include <linux/sched.h>
2756+#include <linux/mutex.h>
2757+#include <linux/file.h>
2758+#include <asm/uaccess.h>
2759+
2760+#include <litmus/fdso.h>
2761+
2762+extern struct fdso_ops fmlp_sem_ops;
2763+extern struct fdso_ops srp_sem_ops;
2764+
2765+static const struct fdso_ops* fdso_ops[] = {
2766+ &fmlp_sem_ops,
2767+ &srp_sem_ops,
2768+};
2769+
2770+static void* fdso_create(obj_type_t type)
2771+{
2772+ if (fdso_ops[type]->create)
2773+ return fdso_ops[type]->create();
2774+ else
2775+ return NULL;
2776+}
2777+
2778+static void fdso_destroy(obj_type_t type, void* obj)
2779+{
2780+ fdso_ops[type]->destroy(obj);
2781+}
2782+
2783+static int fdso_open(struct od_table_entry* entry, void* __user config)
2784+{
2785+ if (fdso_ops[entry->obj->type]->open)
2786+ return fdso_ops[entry->obj->type]->open(entry, config);
2787+ else
2788+ return 0;
2789+}
2790+
2791+static int fdso_close(struct od_table_entry* entry)
2792+{
2793+ if (fdso_ops[entry->obj->type]->close)
2794+ return fdso_ops[entry->obj->type]->close(entry);
2795+ else
2796+ return 0;
2797+}
2798+
2799+/* inode must be locked already */
2800+static struct inode_obj_id* alloc_inode_obj(struct inode* inode,
2801+ obj_type_t type,
2802+ unsigned int id)
2803+{
2804+ struct inode_obj_id* obj;
2805+ void* raw_obj;
2806+
2807+ raw_obj = fdso_create(type);
2808+ if (!raw_obj)
2809+ return NULL;
2810+
2811+ obj = kmalloc(sizeof(struct inode_obj_id), GFP_KERNEL);
2812+ if (!obj)
2813+ return NULL;
2814+ INIT_LIST_HEAD(&obj->list);
2815+ atomic_set(&obj->count, 1);
2816+ obj->type = type;
2817+ obj->id = id;
2818+ obj->obj = raw_obj;
2819+ obj->inode = inode;
2820+
2821+ list_add(&obj->list, &inode->i_obj_list);
2822+ atomic_inc(&inode->i_count);
2823+
2824+ printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id);
2825+ return obj;
2826+}
2827+
2828+/* inode must be locked already */
2829+static struct inode_obj_id* get_inode_obj(struct inode* inode,
2830+ obj_type_t type,
2831+ unsigned int id)
2832+{
2833+ struct list_head* pos;
2834+ struct inode_obj_id* obj = NULL;
2835+
2836+ list_for_each(pos, &inode->i_obj_list) {
2837+ obj = list_entry(pos, struct inode_obj_id, list);
2838+ if (obj->id == id && obj->type == type) {
2839+ atomic_inc(&obj->count);
2840+ return obj;
2841+ }
2842+ }
2843+ printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id);
2844+ return NULL;
2845+}
2846+
2847+
2848+static void put_inode_obj(struct inode_obj_id* obj)
2849+{
2850+ struct inode* inode;
2851+ int let_go = 0;
2852+
2853+ inode = obj->inode;
2854+ if (atomic_dec_and_test(&obj->count)) {
2855+
2856+ mutex_lock(&inode->i_obj_mutex);
2857+ /* no new references can be obtained */
2858+ if (!atomic_read(&obj->count)) {
2859+ list_del(&obj->list);
2860+ fdso_destroy(obj->type, obj->obj);
2861+ kfree(obj);
2862+ let_go = 1;
2863+ }
2864+ mutex_unlock(&inode->i_obj_mutex);
2865+ if (let_go)
2866+ iput(inode);
2867+ }
2868+}
2869+
2870+static struct od_table_entry* get_od_entry(struct task_struct* t)
2871+{
2872+ struct od_table_entry* table;
2873+ int i;
2874+
2875+
2876+ table = t->od_table;
2877+ if (!table) {
2878+ table = (struct od_table_entry*)
2879+ kzalloc(sizeof(struct od_table_entry) *
2880+ MAX_OBJECT_DESCRIPTORS, GFP_KERNEL);
2881+ t->od_table = table;
2882+ }
2883+
2884+ for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++)
2885+ if (!table[i].used) {
2886+ table[i].used = 1;
2887+ return table + i;
2888+ }
2889+ return NULL;
2890+}
2891+
2892+static int put_od_entry(struct od_table_entry* od)
2893+{
2894+ put_inode_obj(od->obj);
2895+ od->used = 0;
2896+ return 0;
2897+}
2898+
2899+void exit_od_table(struct task_struct* t)
2900+{
2901+ int i;
2902+
2903+ if (t->od_table) {
2904+ for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++)
2905+ if (t->od_table[i].used)
2906+ put_od_entry(t->od_table + i);
2907+ kfree(t->od_table);
2908+ t->od_table = NULL;
2909+ }
2910+}
2911+
2912+static int do_sys_od_open(struct file* file, obj_type_t type, int id,
2913+ void* __user config)
2914+{
2915+ int idx = 0, err;
2916+ struct inode* inode;
2917+ struct inode_obj_id* obj = NULL;
2918+ struct od_table_entry* entry;
2919+
2920+ inode = file->f_dentry->d_inode;
2921+
2922+ entry = get_od_entry(current);
2923+ if (!entry)
2924+ return -ENOMEM;
2925+
2926+ mutex_lock(&inode->i_obj_mutex);
2927+ obj = get_inode_obj(inode, type, id);
2928+ if (!obj)
2929+ obj = alloc_inode_obj(inode, type, id);
2930+ if (!obj) {
2931+ idx = -ENOMEM;
2932+ entry->used = 0;
2933+ } else {
2934+ entry->obj = obj;
2935+ entry->extra = NULL;
2936+ idx = entry - current->od_table;
2937+ }
2938+
2939+ mutex_unlock(&inode->i_obj_mutex);
2940+
2941+ err = fdso_open(entry, config);
2942+ if (err < 0) {
2943+ /* The class rejected the open call.
2944+ * We need to clean up and tell user space.
2945+ */
2946+ put_od_entry(entry);
2947+ idx = err;
2948+ }
2949+
2950+ return idx;
2951+}
2952+
2953+
2954+struct od_table_entry* __od_lookup(int od)
2955+{
2956+ struct task_struct *t = current;
2957+
2958+ if (!t->od_table)
2959+ return NULL;
2960+ if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
2961+ return NULL;
2962+ if (!t->od_table[od].used)
2963+ return NULL;
2964+ return t->od_table + od;
2965+}
2966+
2967+
2968+asmlinkage int sys_od_open(int fd, int type, int obj_id, void* __user config)
2969+{
2970+ int ret = 0;
2971+ struct file* file;
2972+
2973+ /*
2974+ 1) get file from fd, get inode from file
2975+ 2) lock inode
2976+ 3) try to lookup object
2977+ 4) if not present create and enqueue object, inc inode refcnt
2978+ 5) increment refcnt of object
2979+ 6) alloc od_table_entry, setup ptrs
2980+ 7) unlock inode
2981+ 8) return offset in od_table as OD
2982+ */
2983+
2984+ if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) {
2985+ ret = -EINVAL;
2986+ goto out;
2987+ }
2988+
2989+ file = fget(fd);
2990+ if (!file) {
2991+ ret = -EBADF;
2992+ goto out;
2993+ }
2994+
2995+ ret = do_sys_od_open(file, type, obj_id, config);
2996+
2997+ fput(file);
2998+
2999+out:
3000+ return ret;
3001+}
3002+
3003+
3004+asmlinkage int sys_od_close(int od)
3005+{
3006+ int ret = -EINVAL;
3007+ struct task_struct *t = current;
3008+
3009+ if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
3010+ return ret;
3011+
3012+ if (!t->od_table || !t->od_table[od].used)
3013+ return ret;
3014+
3015+
3016+ /* give the class a chance to reject the close
3017+ */
3018+ ret = fdso_close(t->od_table + od);
3019+ if (ret == 0)
3020+ ret = put_od_entry(t->od_table + od);
3021+
3022+ return ret;
3023+}
3024diff --git a/litmus/fmlp.c b/litmus/fmlp.c
3025new file mode 100644
3026index 0000000..f34eeea
3027--- /dev/null
3028+++ b/litmus/fmlp.c
3029@@ -0,0 +1,262 @@
3030+/*
3031+ * FMLP implementation.
3032+ * Much of the code here is borrowed from include/asm-i386/semaphore.h.
3033+ */
3034+
3035+#include <asm/atomic.h>
3036+#include <asm/semaphore.h>
3037+#include <linux/sched.h>
3038+#include <linux/wait.h>
3039+#include <linux/spinlock.h>
3040+#include <litmus/litmus.h>
3041+#include <litmus/sched_plugin.h>
3042+#include <litmus/edf_common.h>
3043+
3044+#include <litmus/fdso.h>
3045+
3046+#include <litmus/trace.h>
3047+
3048+#ifdef CONFIG_FMLP
3049+
3050+static void* create_fmlp_semaphore(void)
3051+{
3052+ struct pi_semaphore* sem;
3053+ int i;
3054+
3055+ sem = kmalloc(sizeof(struct pi_semaphore), GFP_KERNEL);
3056+ if (!sem)
3057+ return NULL;
3058+ atomic_set(&sem->count, 1);
3059+ sem->sleepers = 0;
3060+ init_waitqueue_head(&sem->wait);
3061+ sem->hp.task = NULL;
3062+ sem->holder = NULL;
3063+ for (i = 0; i < NR_CPUS; i++)
3064+ sem->hp.cpu_task[i] = NULL;
3065+ return sem;
3066+}
3067+
3068+static int open_fmlp_semaphore(struct od_table_entry* entry, void* __user arg)
3069+{
3070+ if (!fmlp_active())
3071+ return -EBUSY;
3072+ return 0;
3073+}
3074+
3075+static void destroy_fmlp_semaphore(void* sem)
3076+{
3077+ /* XXX assert invariants */
3078+ kfree(sem);
3079+}
3080+
3081+struct fdso_ops fmlp_sem_ops = {
3082+ .create = create_fmlp_semaphore,
3083+ .open = open_fmlp_semaphore,
3084+ .destroy = destroy_fmlp_semaphore
3085+};
3086+
3087+struct wq_pair {
3088+ struct task_struct* tsk;
3089+ struct pi_semaphore* sem;
3090+};
3091+
3092+static int rt_pi_wake_up(wait_queue_t *wait, unsigned mode, int sync,
3093+ void *key)
3094+{
3095+ struct wq_pair* wqp = (struct wq_pair*) wait->private;
3096+ set_rt_flags(wqp->tsk, RT_F_EXIT_SEM);
3097+ litmus->inherit_priority(wqp->sem, wqp->tsk);
3098+ TRACE_TASK(wqp->tsk,
3099+ "woken up by rt_pi_wake_up() (RT_F_SEM_EXIT, PI)\n");
3100+ /* point to task for default_wake_function() */
3101+ wait->private = wqp->tsk;
3102+ default_wake_function(wait, mode, sync, key);
3103+
3104+ /* Always return true since we know that if we encountered a task
3105+ * that was already running the wake_up raced with the schedule in
3106+ * rt_pi_down(). In that case the task in rt_pi_down() will be scheduled
3107+ * immediately and own the lock. We must not wake up another task in
3108+ * any case.
3109+ */
3110+ return 1;
3111+}
3112+
3113+/* caller is responsible for locking */
3114+int edf_set_hp_task(struct pi_semaphore *sem)
3115+{
3116+ struct list_head *tmp, *next;
3117+ struct task_struct *queued;
3118+ int ret = 0;
3119+
3120+ sem->hp.task = NULL;
3121+ list_for_each_safe(tmp, next, &sem->wait.task_list) {
3122+ queued = ((struct wq_pair*)
3123+ list_entry(tmp, wait_queue_t,
3124+ task_list)->private)->tsk;
3125+
3126+ /* Compare task prios, find high prio task. */
3127+ if (edf_higher_prio(queued, sem->hp.task)) {
3128+ sem->hp.task = queued;
3129+ ret = 1;
3130+ }
3131+ }
3132+ return ret;
3133+}
3134+
3135+/* caller is responsible for locking */
3136+int edf_set_hp_cpu_task(struct pi_semaphore *sem, int cpu)
3137+{
3138+ struct list_head *tmp, *next;
3139+ struct task_struct *queued;
3140+ int ret = 0;
3141+
3142+ sem->hp.cpu_task[cpu] = NULL;
3143+ list_for_each_safe(tmp, next, &sem->wait.task_list) {
3144+ queued = ((struct wq_pair*)
3145+ list_entry(tmp, wait_queue_t,
3146+ task_list)->private)->tsk;
3147+
3148+ /* Compare task prios, find high prio task. */
3149+ if (get_partition(queued) == cpu &&
3150+ edf_higher_prio(queued, sem->hp.cpu_task[cpu])) {
3151+ sem->hp.cpu_task[cpu] = queued;
3152+ ret = 1;
3153+ }
3154+ }
3155+ return ret;
3156+}
3157+
3158+static int do_fmlp_down(struct pi_semaphore* sem)
3159+{
3160+ unsigned long flags;
3161+ struct task_struct *tsk = current;
3162+ struct wq_pair pair;
3163+ int suspended = 1;
3164+ wait_queue_t wait = {
3165+ .private = &pair,
3166+ .func = rt_pi_wake_up,
3167+ .task_list = {NULL, NULL}
3168+ };
3169+
3170+ pair.tsk = tsk;
3171+ pair.sem = sem;
3172+ spin_lock_irqsave(&sem->wait.lock, flags);
3173+
3174+ if (atomic_dec_return(&sem->count) < 0 ||
3175+ waitqueue_active(&sem->wait)) {
3176+ /* we need to suspend */
3177+ tsk->state = TASK_UNINTERRUPTIBLE;
3178+ add_wait_queue_exclusive_locked(&sem->wait, &wait);
3179+
3180+ TRACE_CUR("suspends on PI lock %p\n", sem);
3181+ litmus->pi_block(sem, tsk);
3182+
3183+ /* release lock before sleeping */
3184+ spin_unlock_irqrestore(&sem->wait.lock, flags);
3185+
3186+ TS_PI_DOWN_END;
3187+ preempt_enable_no_resched();
3188+
3189+
3190+ /* we depend on the FIFO order
3191+ * Thus, we don't need to recheck when we wake up, we
3192+ * are guaranteed to have the lock since there is only one
3193+ * wake up per release
3194+ */
3195+ schedule();
3196+
3197+ TRACE_CUR("woke up, now owns PI lock %p\n", sem);
3198+
3199+ /* try_to_wake_up() set our state to TASK_RUNNING,
3200+ * all we need to do is to remove our wait queue entry
3201+ */
3202+ remove_wait_queue(&sem->wait, &wait);
3203+ } else {
3204+ /* no priority inheritance necessary, since there are no queued
3205+ * tasks.
3206+ */
3207+ suspended = 0;
3208+ TRACE_CUR("acquired PI lock %p, no contention\n", sem);
3209+ sem->holder = tsk;
3210+ sem->hp.task = tsk;
3211+ litmus->inherit_priority(sem, tsk);
3212+ spin_unlock_irqrestore(&sem->wait.lock, flags);
3213+ }
3214+ return suspended;
3215+}
3216+
3217+static void do_fmlp_up(struct pi_semaphore* sem)
3218+{
3219+ unsigned long flags;
3220+
3221+ spin_lock_irqsave(&sem->wait.lock, flags);
3222+
3223+ TRACE_CUR("releases PI lock %p\n", sem);
3224+ litmus->return_priority(sem);
3225+ sem->holder = NULL;
3226+ if (atomic_inc_return(&sem->count) < 1)
3227+ /* there is a task queued */
3228+ wake_up_locked(&sem->wait);
3229+
3230+ spin_unlock_irqrestore(&sem->wait.lock, flags);
3231+}
3232+
3233+asmlinkage long sys_fmlp_down(int sem_od)
3234+{
3235+ long ret = 0;
3236+ struct pi_semaphore * sem;
3237+ int suspended = 0;
3238+
3239+ preempt_disable();
3240+ TS_PI_DOWN_START;
3241+
3242+ sem = lookup_fmlp_sem(sem_od);
3243+ if (sem)
3244+ suspended = do_fmlp_down(sem);
3245+ else
3246+ ret = -EINVAL;
3247+
3248+ if (!suspended) {
3249+ TS_PI_DOWN_END;
3250+ preempt_enable();
3251+ }
3252+
3253+ return ret;
3254+}
3255+
3256+asmlinkage long sys_fmlp_up(int sem_od)
3257+{
3258+ long ret = 0;
3259+ struct pi_semaphore * sem;
3260+
3261+ preempt_disable();
3262+ TS_PI_UP_START;
3263+
3264+ sem = lookup_fmlp_sem(sem_od);
3265+ if (sem)
3266+ do_fmlp_up(sem);
3267+ else
3268+ ret = -EINVAL;
3269+
3270+
3271+ TS_PI_UP_END;
3272+ preempt_enable();
3273+
3274+ return ret;
3275+}
3276+
3277+#else
3278+
3279+struct fdso_ops fmlp_sem_ops = {};
3280+
3281+asmlinkage long sys_fmlp_down(int sem_od)
3282+{
3283+ return -ENOSYS;
3284+}
3285+
3286+asmlinkage long sys_fmlp_up(int sem_od)
3287+{
3288+ return -ENOSYS;
3289+}
3290+
3291+#endif
3292diff --git a/litmus/ft_event.c b/litmus/ft_event.c
3293new file mode 100644
3294index 0000000..6084b6d
3295--- /dev/null
3296+++ b/litmus/ft_event.c
3297@@ -0,0 +1,43 @@
3298+#include <linux/types.h>
3299+
3300+#include <litmus/feather_trace.h>
3301+
3302+#ifndef __ARCH_HAS_FEATHER_TRACE
3303+/* provide dummy implementation */
3304+
3305+int ft_events[MAX_EVENTS];
3306+
3307+int ft_enable_event(unsigned long id)
3308+{
3309+ if (id < MAX_EVENTS) {
3310+ ft_events[id]++;
3311+ return 1;
3312+ } else
3313+ return 0;
3314+}
3315+
3316+int ft_disable_event(unsigned long id)
3317+{
3318+ if (id < MAX_EVENTS && ft_events[id]) {
3319+ ft_events[id]--;
3320+ return 1;
3321+ } else
3322+ return 0;
3323+}
3324+
3325+int ft_disable_all_events(void)
3326+{
3327+ int i;
3328+
3329+ for (i = 0; i < MAX_EVENTS; i++)
3330+ ft_events[i] = 0;
3331+
3332+ return MAX_EVENTS;
3333+}
3334+
3335+int ft_is_event_enabled(unsigned long id)
3336+{
3337+ return id < MAX_EVENTS && ft_events[id];
3338+}
3339+
3340+#endif
3341diff --git a/litmus/jobs.c b/litmus/jobs.c
3342new file mode 100644
3343index 0000000..e294bc5
3344--- /dev/null
3345+++ b/litmus/jobs.c
3346@@ -0,0 +1,43 @@
3347+/* litmus/jobs.c - common job control code
3348+ */
3349+
3350+#include <linux/sched.h>
3351+
3352+#include <litmus/litmus.h>
3353+#include <litmus/jobs.h>
3354+
3355+void prepare_for_next_period(struct task_struct *t)
3356+{
3357+ BUG_ON(!t);
3358+ /* prepare next release */
3359+ t->rt_param.job_params.release = t->rt_param.job_params.deadline;
3360+ t->rt_param.job_params.deadline += get_rt_period(t);
3361+ t->rt_param.job_params.exec_time = 0;
3362+ /* update job sequence number */
3363+ t->rt_param.job_params.job_no++;
3364+
3365+ /* don't confuse Linux */
3366+ t->time_slice = 1;
3367+}
3368+
3369+void release_at(struct task_struct *t, lt_t start)
3370+{
3371+ t->rt_param.job_params.deadline = start;
3372+ prepare_for_next_period(t);
3373+ set_rt_flags(t, RT_F_RUNNING);
3374+}
3375+
3376+
3377+/*
3378+ * Deactivate current task until the beginning of the next period.
3379+ */
3380+long complete_job(void)
3381+{
3382+ /* Mark that we do not excute anymore */
3383+ set_rt_flags(current, RT_F_SLEEP);
3384+ /* call schedule, this will return when a new job arrives
3385+ * it also takes care of preparing for the next release
3386+ */
3387+ schedule();
3388+ return 0;
3389+}
3390diff --git a/litmus/litmus.c b/litmus/litmus.c
3391new file mode 100644
3392index 0000000..979985e
3393--- /dev/null
3394+++ b/litmus/litmus.c
3395@@ -0,0 +1,826 @@
3396+/* litmus.c -- Implementation of the LITMUS syscalls, the LITMUS intialization code,
3397+ * and the procfs interface..
3398+ */
3399+#include <asm/uaccess.h>
3400+#include <linux/uaccess.h>
3401+#include <linux/sysrq.h>
3402+
3403+#include <linux/module.h>
3404+#include <linux/proc_fs.h>
3405+#include <linux/slab.h>
3406+
3407+#include <litmus/litmus.h>
3408+#include <linux/sched.h>
3409+#include <litmus/sched_plugin.h>
3410+
3411+#include <litmus/heap.h>
3412+
3413+#include <litmus/trace.h>
3414+
3415+/* Number of RT tasks that exist in the system */
3416+atomic_t rt_task_count = ATOMIC_INIT(0);
3417+static DEFINE_SPINLOCK(task_transition_lock);
3418+
3419+/* Give log messages sequential IDs. */
3420+atomic_t __log_seq_no = ATOMIC_INIT(0);
3421+
3422+/* To send signals from the scheduler
3423+ * Must drop locks first.
3424+ */
3425+static LIST_HEAD(sched_sig_list);
3426+static DEFINE_SPINLOCK(sched_sig_list_lock);
3427+
3428+static struct kmem_cache * heap_node_cache;
3429+
3430+/*
3431+ * sys_set_task_rt_param
3432+ * @pid: Pid of the task which scheduling parameters must be changed
3433+ * @param: New real-time extension parameters such as the execution cost and
3434+ * period
3435+ * Syscall for manipulating with task rt extension params
3436+ * Returns EFAULT if param is NULL.
3437+ * ESRCH if pid is not corrsponding
3438+ * to a valid task.
3439+ * EINVAL if either period or execution cost is <=0
3440+ * EPERM if pid is a real-time task
3441+ * 0 if success
3442+ *
3443+ * Only non-real-time tasks may be configured with this system call
3444+ * to avoid races with the scheduler. In practice, this means that a
3445+ * task's parameters must be set _before_ calling sys_prepare_rt_task()
3446+ */
3447+asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
3448+{
3449+ struct rt_task tp;
3450+ struct task_struct *target;
3451+ int retval = -EINVAL;
3452+
3453+ printk("Setting up rt task parameters for process %d.\n", pid);
3454+
3455+ if (pid < 0 || param == 0) {
3456+ goto out;
3457+ }
3458+ if (copy_from_user(&tp, param, sizeof(tp))) {
3459+ retval = -EFAULT;
3460+ goto out;
3461+ }
3462+
3463+ /* Task search and manipulation must be protected */
3464+ read_lock_irq(&tasklist_lock);
3465+ if (!(target = find_task_by_pid(pid))) {
3466+ retval = -ESRCH;
3467+ goto out_unlock;
3468+ }
3469+
3470+ if (is_realtime(target)) {
3471+ /* The task is already a real-time task.
3472+ * We cannot not allow parameter changes at this point.
3473+ */
3474+ retval = -EBUSY;
3475+ goto out_unlock;
3476+ }
3477+
3478+ if (tp.exec_cost <= 0)
3479+ goto out_unlock;
3480+ if (tp.period <= 0)
3481+ goto out_unlock;
3482+ if (!cpu_online(tp.cpu))
3483+ goto out_unlock;
3484+ if (tp.period < tp.exec_cost)
3485+ {
3486+ printk(KERN_INFO "litmus: real-time task %d rejected "
3487+ "because wcet > period\n", pid);
3488+ goto out_unlock;
3489+ }
3490+
3491+ target->rt_param.task_params = tp;
3492+
3493+ retval = 0;
3494+ out_unlock:
3495+ read_unlock_irq(&tasklist_lock);
3496+ out:
3497+ return retval;
3498+}
3499+
3500+/* Getter of task's RT params
3501+ * returns EINVAL if param or pid is NULL
3502+ * returns ESRCH if pid does not correspond to a valid task
3503+ * returns EFAULT if copying of parameters has failed.
3504+ */
3505+asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param)
3506+{
3507+ int retval = -EINVAL;
3508+ struct task_struct *source;
3509+ struct rt_task lp;
3510+ if (param == 0 || pid < 0)
3511+ goto out;
3512+ read_lock(&tasklist_lock);
3513+ if (!(source = find_task_by_pid(pid))) {
3514+ retval = -ESRCH;
3515+ goto out_unlock;
3516+ }
3517+ lp = source->rt_param.task_params;
3518+ read_unlock(&tasklist_lock);
3519+ /* Do copying outside the lock */
3520+ retval =
3521+ copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
3522+ return retval;
3523+ out_unlock:
3524+ read_unlock(&tasklist_lock);
3525+ out:
3526+ return retval;
3527+
3528+}
3529+
3530+/*
3531+ * This is the crucial function for periodic task implementation,
3532+ * It checks if a task is periodic, checks if such kind of sleep
3533+ * is permitted and calls plugin-specific sleep, which puts the
3534+ * task into a wait array.
3535+ * returns 0 on successful wakeup
3536+ * returns EPERM if current conditions do not permit such sleep
3537+ * returns EINVAL if current task is not able to go to sleep
3538+ */
3539+asmlinkage long sys_complete_job(void)
3540+{
3541+ int retval = -EPERM;
3542+ if (!is_realtime(current)) {
3543+ retval = -EINVAL;
3544+ goto out;
3545+ }
3546+ /* Task with negative or zero period cannot sleep */
3547+ if (get_rt_period(current) <= 0) {
3548+ retval = -EINVAL;
3549+ goto out;
3550+ }
3551+ /* The plugin has to put the task into an
3552+ * appropriate queue and call schedule
3553+ */
3554+ retval = litmus->complete_job();
3555+ out:
3556+ return retval;
3557+}
3558+
3559+/* This is an "improved" version of sys_complete_job that
3560+ * addresses the problem of unintentionally missing a job after
3561+ * an overrun.
3562+ *
3563+ * returns 0 on successful wakeup
3564+ * returns EPERM if current conditions do not permit such sleep
3565+ * returns EINVAL if current task is not able to go to sleep
3566+ */
3567+asmlinkage long sys_wait_for_job_release(unsigned int job)
3568+{
3569+ int retval = -EPERM;
3570+ if (!is_realtime(current)) {
3571+ retval = -EINVAL;
3572+ goto out;
3573+ }
3574+
3575+ /* Task with negative or zero period cannot sleep */
3576+ if (get_rt_period(current) <= 0) {
3577+ retval = -EINVAL;
3578+ goto out;
3579+ }
3580+
3581+ retval = 0;
3582+
3583+ /* first wait until we have "reached" the desired job
3584+ *
3585+ * This implementation has at least two problems:
3586+ *
3587+ * 1) It doesn't gracefully handle the wrap around of
3588+ * job_no. Since LITMUS is a prototype, this is not much
3589+ * of a problem right now.
3590+ *
3591+ * 2) It is theoretically racy if a job release occurs
3592+ * between checking job_no and calling sleep_next_period().
3593+ * A proper solution would requiring adding another callback
3594+ * in the plugin structure and testing the condition with
3595+ * interrupts disabled.
3596+ *
3597+ * FIXME: At least problem 2 should be taken care of eventually.
3598+ */
3599+ while (!retval && job > current->rt_param.job_params.job_no)
3600+ /* If the last job overran then job <= job_no and we
3601+ * don't send the task to sleep.
3602+ */
3603+ retval = litmus->complete_job();
3604+ out:
3605+ return retval;
3606+}
3607+
3608+/* This is a helper syscall to query the current job sequence number.
3609+ *
3610+ * returns 0 on successful query
3611+ * returns EPERM if task is not a real-time task.
3612+ * returns EFAULT if &job is not a valid pointer.
3613+ */
3614+asmlinkage long sys_query_job_no(unsigned int __user *job)
3615+{
3616+ int retval = -EPERM;
3617+ if (is_realtime(current))
3618+ retval = put_user(current->rt_param.job_params.job_no, job);
3619+
3620+ return retval;
3621+}
3622+
3623+struct sched_sig {
3624+ struct list_head list;
3625+ struct task_struct* task;
3626+ unsigned int signal:31;
3627+ int force:1;
3628+};
3629+
3630+static void __scheduler_signal(struct task_struct *t, unsigned int signo,
3631+ int force)
3632+{
3633+ struct sched_sig* sig;
3634+
3635+ sig = kmalloc(GFP_ATOMIC, sizeof(struct sched_sig));
3636+ if (!sig) {
3637+ TRACE_TASK(t, "dropping signal: %u\n", t);
3638+ return;
3639+ }
3640+
3641+ spin_lock(&sched_sig_list_lock);
3642+
3643+ sig->signal = signo;
3644+ sig->force = force;
3645+ sig->task = t;
3646+ get_task_struct(t);
3647+ list_add(&sig->list, &sched_sig_list);
3648+
3649+ spin_unlock(&sched_sig_list_lock);
3650+}
3651+
3652+void scheduler_signal(struct task_struct *t, unsigned int signo)
3653+{
3654+ __scheduler_signal(t, signo, 0);
3655+}
3656+
3657+void force_scheduler_signal(struct task_struct *t, unsigned int signo)
3658+{
3659+ __scheduler_signal(t, signo, 1);
3660+}
3661+
3662+/* FIXME: get rid of the locking and do this on a per-processor basis */
3663+void send_scheduler_signals(void)
3664+{
3665+ unsigned long flags;
3666+ struct list_head *p, *extra;
3667+ struct siginfo info;
3668+ struct sched_sig* sig;
3669+ struct task_struct* t;
3670+ struct list_head claimed;
3671+
3672+ if (spin_trylock_irqsave(&sched_sig_list_lock, flags)) {
3673+ if (list_empty(&sched_sig_list))
3674+ p = NULL;
3675+ else {
3676+ p = sched_sig_list.next;
3677+ list_del(&sched_sig_list);
3678+ INIT_LIST_HEAD(&sched_sig_list);
3679+ }
3680+ spin_unlock_irqrestore(&sched_sig_list_lock, flags);
3681+
3682+ /* abort if there are no signals */
3683+ if (!p)
3684+ return;
3685+
3686+ /* take signal list we just obtained */
3687+ list_add(&claimed, p);
3688+
3689+ list_for_each_safe(p, extra, &claimed) {
3690+ list_del(p);
3691+ sig = list_entry(p, struct sched_sig, list);
3692+ t = sig->task;
3693+ info.si_signo = sig->signal;
3694+ info.si_errno = 0;
3695+ info.si_code = SI_KERNEL;
3696+ info.si_pid = 1;
3697+ info.si_uid = 0;
3698+ TRACE("sending signal %d to %d\n", info.si_signo,
3699+ t->pid);
3700+ if (sig->force)
3701+ force_sig_info(sig->signal, &info, t);
3702+ else
3703+ send_sig_info(sig->signal, &info, t);
3704+ put_task_struct(t);
3705+ kfree(sig);
3706+ }
3707+ }
3708+
3709+}
3710+
3711+#ifdef CONFIG_NP_SECTION
3712+
3713+static inline void np_mem_error(struct task_struct* t, const char* reason)
3714+{
3715+ if (t->state != TASK_DEAD && !(t->flags & PF_EXITING)) {
3716+ TRACE("np section: %s => %s/%d killed\n",
3717+ reason, t->comm, t->pid);
3718+ force_scheduler_signal(t, SIGKILL);
3719+ }
3720+}
3721+
3722+/* sys_register_np_flag() allows real-time tasks to register an
3723+ * np section indicator.
3724+ * returns 0 if the flag was successfully registered
3725+ * returns EINVAL if current task is not a real-time task
3726+ * returns EFAULT if *flag couldn't be written
3727+ */
3728+asmlinkage long sys_register_np_flag(short __user *flag)
3729+{
3730+ int retval = -EINVAL;
3731+ short test_val = RT_PREEMPTIVE;
3732+
3733+ /* avoid races with the scheduler */
3734+ preempt_disable();
3735+ TRACE("reg_np_flag(%p) for %s/%d\n", flag,
3736+ current->comm, current->pid);
3737+
3738+ /* Let's first try to write to the address.
3739+ * That way it is initialized and any bugs
3740+ * involving dangling pointers will caught
3741+ * early.
3742+ * NULL indicates disabling np section support
3743+ * and should not be tested.
3744+ */
3745+ if (flag)
3746+ retval = poke_kernel_address(test_val, flag);
3747+ else
3748+ retval = 0;
3749+ TRACE("reg_np_flag: retval=%d\n", retval);
3750+ if (unlikely(0 != retval))
3751+ np_mem_error(current, "np flag: not writable");
3752+ else
3753+ /* the pointer is ok */
3754+ current->rt_param.np_flag = flag;
3755+
3756+ preempt_enable();
3757+ return retval;
3758+}
3759+
3760+
3761+void request_exit_np(struct task_struct *t)
3762+{
3763+ int ret;
3764+ short flag;
3765+
3766+ /* We can only do this if t is actually currently scheduled on this CPU
3767+ * because otherwise we are in the wrong address space. Thus make sure
3768+ * to check.
3769+ */
3770+ BUG_ON(t != current);
3771+
3772+ if (unlikely(!is_realtime(t) || !t->rt_param.np_flag)) {
3773+ TRACE_TASK(t, "request_exit_np(): BAD TASK!\n");
3774+ return;
3775+ }
3776+
3777+ flag = RT_EXIT_NP_REQUESTED;
3778+ ret = poke_kernel_address(flag, t->rt_param.np_flag + 1);
3779+ TRACE("request_exit_np(%s/%d)\n", t->comm, t->pid);
3780+ if (unlikely(0 != ret))
3781+ np_mem_error(current, "request_exit_np(): flag not writable");
3782+
3783+}
3784+
3785+
3786+int is_np(struct task_struct* t)
3787+{
3788+ int ret;
3789+ unsigned short flag = 0x5858; /* = XX, looks nicer in debug*/
3790+
3791+ BUG_ON(t != current);
3792+
3793+ if (unlikely(t->rt_param.kernel_np))
3794+ return 1;
3795+ else if (unlikely(t->rt_param.np_flag == NULL) ||
3796+ t->flags & PF_EXITING ||
3797+ t->state == TASK_DEAD)
3798+ return 0;
3799+ else {
3800+ /* This is the tricky part. The process has registered a
3801+ * non-preemptive section marker. We now need to check whether
3802+ * it is set to to NON_PREEMPTIVE. Along the way we could
3803+ * discover that the pointer points to an unmapped region (=>
3804+ * kill the task) or that the location contains some garbage
3805+ * value (=> also kill the task). Killing the task in any case
3806+ * forces userspace to play nicely. Any bugs will be discovered
3807+ * immediately.
3808+ */
3809+ ret = probe_kernel_address(t->rt_param.np_flag, flag);
3810+ if (0 == ret && (flag == RT_NON_PREEMPTIVE ||
3811+ flag == RT_PREEMPTIVE))
3812+ return flag != RT_PREEMPTIVE;
3813+ else {
3814+ /* either we could not read from the address or
3815+ * it contained garbage => kill the process
3816+ * FIXME: Should we cause a SEGFAULT instead?
3817+ */
3818+ TRACE("is_np: ret=%d flag=%c%c (%x)\n", ret,
3819+ flag & 0xff, (flag >> 8) & 0xff, flag);
3820+ np_mem_error(t, "is_np() could not read");
3821+ return 0;
3822+ }
3823+ }
3824+}
3825+
3826+/*
3827+ * sys_exit_np() allows real-time tasks to signal that it left a
3828+ * non-preemptable section. It will be called after the kernel requested a
3829+ * callback in the preemption indicator flag.
3830+ * returns 0 if the signal was valid and processed.
3831+ * returns EINVAL if current task is not a real-time task
3832+ */
3833+asmlinkage long sys_exit_np(void)
3834+{
3835+ int retval = -EINVAL;
3836+
3837+ TS_EXIT_NP_START;
3838+
3839+ if (!is_realtime(current))
3840+ goto out;
3841+
3842+ TRACE("sys_exit_np(%s/%d)\n", current->comm, current->pid);
3843+ /* force rescheduling so that we can be preempted */
3844+ set_tsk_need_resched(current);
3845+ retval = 0;
3846+ out:
3847+
3848+ TS_EXIT_NP_END;
3849+ return retval;
3850+}
3851+
3852+#else /* !CONFIG_NP_SECTION */
3853+
3854+asmlinkage long sys_register_np_flag(short __user *flag)
3855+{
3856+ return -ENOSYS;
3857+}
3858+
3859+asmlinkage long sys_exit_np(void)
3860+{
3861+ return -ENOSYS;
3862+}
3863+
3864+#endif /* CONFIG_NP_SECTION */
3865+
3866+
3867+/* p is a real-time task. Re-init its state as a best-effort task. */
3868+static void reinit_litmus_state(struct task_struct* p, int restore)
3869+{
3870+ struct rt_task user_config = {};
3871+ __user short *np_flag = NULL;
3872+
3873+ if (restore) {
3874+ /* Safe user-space provided configuration data. */
3875+ user_config = p->rt_param.task_params;
3876+ np_flag = p->rt_param.np_flag;
3877+ }
3878+
3879+ /* We probably should not be inheriting any task's priority
3880+ * at this point in time.
3881+ */
3882+ WARN_ON(p->rt_param.inh_task);
3883+
3884+ /* We need to restore the priority of the task. */
3885+// __setscheduler(p, p->rt_param.old_policy, p->rt_param.old_prio);
3886+
3887+ /* Cleanup everything else. */
3888+ memset(&p->rt_param, 0, sizeof(struct rt_task));
3889+
3890+ /* Restore preserved fields. */
3891+ if (restore) {
3892+ p->rt_param.task_params = user_config;
3893+ p->rt_param.np_flag = np_flag;
3894+ }
3895+}
3896+
3897+long litmus_admit_task(struct task_struct* tsk)
3898+{
3899+ long retval;
3900+ long flags;
3901+
3902+ BUG_ON(is_realtime(tsk));
3903+
3904+ if (get_rt_period(tsk) == 0 ||
3905+ get_exec_cost(tsk) > get_rt_period(tsk)) {
3906+ TRACE_TASK(tsk, "litmus admit: invalid task parameters "
3907+ "(%lu, %lu)\n",
3908+ get_exec_cost(tsk), get_rt_period(tsk));
3909+ return -EINVAL;
3910+ }
3911+
3912+ if (!cpu_online(get_partition(tsk)))
3913+ {
3914+ TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n",
3915+ get_partition(tsk));
3916+ return -EINVAL;
3917+ }
3918+
3919+ INIT_LIST_HEAD(&tsk_rt(tsk)->list);
3920+
3921+ /* avoid scheduler plugin changing underneath us */
3922+ spin_lock_irqsave(&task_transition_lock, flags);
3923+ retval = litmus->admit_task(tsk);
3924+
3925+ /* allocate heap node for this task */
3926+ tsk_rt(tsk)->heap_node = kmem_cache_alloc(heap_node_cache, GFP_ATOMIC);
3927+ if (!tsk_rt(tsk)->heap_node)
3928+ retval = -ENOMEM;
3929+ else
3930+ heap_node_init(&tsk_rt(tsk)->heap_node, tsk);
3931+
3932+ if (!retval)
3933+ atomic_inc(&rt_task_count);
3934+
3935+ spin_unlock_irqrestore(&task_transition_lock, flags);
3936+
3937+ return retval;
3938+
3939+}
3940+
3941+void litmus_exit_task(struct task_struct* tsk)
3942+{
3943+ if (is_realtime(tsk)) {
3944+ litmus->task_exit(tsk);
3945+ BUG_ON(heap_node_in_heap(tsk_rt(tsk)->heap_node));
3946+ kmem_cache_free(heap_node_cache, tsk_rt(tsk)->heap_node);
3947+ atomic_dec(&rt_task_count);
3948+ reinit_litmus_state(tsk, 1);
3949+ }
3950+}
3951+
3952+/* Switching a plugin in use is tricky.
3953+ * We must watch out that no real-time tasks exists
3954+ * (and that none is created in parallel) and that the plugin is not
3955+ * currently in use on any processor (in theory).
3956+ *
3957+ * For now, we don't enforce the second part since it is unlikely to cause
3958+ * any trouble by itself as long as we don't unload modules.
3959+ */
3960+int switch_sched_plugin(struct sched_plugin* plugin)
3961+{
3962+ long flags;
3963+ int ret = 0;
3964+
3965+ BUG_ON(!plugin);
3966+
3967+ /* stop task transitions */
3968+ spin_lock_irqsave(&task_transition_lock, flags);
3969+
3970+ /* don't switch if there are active real-time tasks */
3971+ if (atomic_read(&rt_task_count) == 0) {
3972+ printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
3973+ litmus = plugin;
3974+ } else
3975+ ret = -EBUSY;
3976+
3977+ spin_unlock_irqrestore(&task_transition_lock, flags);
3978+ return ret;
3979+}
3980+
3981+/* Called upon fork.
3982+ * p is the newly forked task.
3983+ */
3984+void litmus_fork(struct task_struct* p)
3985+{
3986+ if (is_realtime(p))
3987+ /* clean out any litmus related state, don't preserve anything*/
3988+ reinit_litmus_state(p, 0);
3989+}
3990+
3991+/* Called upon execve().
3992+ * current is doing the exec.
3993+ * Don't let address space specific stuff leak.
3994+ */
3995+void litmus_exec(void)
3996+{
3997+ struct task_struct* p = current;
3998+
3999+ if (is_realtime(p)) {
4000+ WARN_ON(p->rt_param.inh_task);
4001+ p->rt_param.np_flag = NULL;
4002+ }
4003+}
4004+
4005+void exit_litmus(struct task_struct *dead_tsk)
4006+{
4007+ if (is_realtime(dead_tsk))
4008+ litmus_exit_task(dead_tsk);
4009+}
4010+
4011+
4012+void list_qsort(struct list_head* list, list_cmp_t less_than)
4013+{
4014+ struct list_head lt;
4015+ struct list_head geq;
4016+ struct list_head *pos, *extra, *pivot;
4017+ int n_lt = 0, n_geq = 0;
4018+ BUG_ON(!list);
4019+
4020+ if (list->next == list)
4021+ return;
4022+
4023+ INIT_LIST_HEAD(&lt);
4024+ INIT_LIST_HEAD(&geq);
4025+
4026+ pivot = list->next;
4027+ list_del(pivot);
4028+ list_for_each_safe(pos, extra, list) {
4029+ list_del(pos);
4030+ if (less_than(pos, pivot)) {
4031+ list_add(pos, &lt);
4032+ n_lt++;
4033+ } else {
4034+ list_add(pos, &geq);
4035+ n_geq++;
4036+ }
4037+ }
4038+ if (n_lt < n_geq) {
4039+ list_qsort(&lt, less_than);
4040+ list_qsort(&geq, less_than);
4041+ } else {
4042+ list_qsort(&geq, less_than);
4043+ list_qsort(&lt, less_than);
4044+ }
4045+ list_splice(&geq, list);
4046+ list_add(pivot, list);
4047+ list_splice(&lt, list);
4048+}
4049+
4050+#ifdef CONFIG_MAGIC_SYSRQ
4051+int sys_kill(int pid, int sig);
4052+
4053+static void sysrq_handle_kill_rt_tasks(int key, struct tty_struct *tty)
4054+{
4055+ struct task_struct *t;
4056+ read_lock(&tasklist_lock);
4057+ for_each_process(t) {
4058+ if (is_realtime(t)) {
4059+ sys_kill(t->pid, SIGKILL);
4060+ }
4061+ }
4062+ read_unlock(&tasklist_lock);
4063+}
4064+
4065+static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
4066+ .handler = sysrq_handle_kill_rt_tasks,
4067+ .help_msg = "Quit-rt-tasks",
4068+ .action_msg = "sent SIGKILL to all real-time tasks",
4069+};
4070+#endif
4071+
4072+static int proc_read_stats(char *page, char **start,
4073+ off_t off, int count,
4074+ int *eof, void *data)
4075+{
4076+ int len;
4077+
4078+ len = snprintf(page, PAGE_SIZE,
4079+ "real-time task count = %d\n",
4080+ atomic_read(&rt_task_count));
4081+ return len;
4082+}
4083+
4084+static int proc_read_plugins(char *page, char **start,
4085+ off_t off, int count,
4086+ int *eof, void *data)
4087+{
4088+ int len;
4089+
4090+ len = print_sched_plugins(page, PAGE_SIZE);
4091+ return len;
4092+}
4093+
4094+static int proc_read_curr(char *page, char **start,
4095+ off_t off, int count,
4096+ int *eof, void *data)
4097+{
4098+ int len;
4099+
4100+ len = snprintf(page, PAGE_SIZE, "%s\n", litmus->plugin_name);
4101+ return len;
4102+}
4103+
4104+static int proc_write_curr(struct file *file,
4105+ const char *buffer,
4106+ unsigned long count,
4107+ void *data)
4108+{
4109+ int len, ret;
4110+ char name[65];
4111+ struct sched_plugin* found;
4112+
4113+ if(count > 64)
4114+ len = 64;
4115+ else
4116+ len = count;
4117+
4118+ if(copy_from_user(name, buffer, len))
4119+ return -EFAULT;
4120+
4121+ name[len] = '\0';
4122+ /* chomp name */
4123+ if (len > 1 && name[len - 1] == '\n')
4124+ name[len - 1] = '\0';
4125+
4126+ found = find_sched_plugin(name);
4127+
4128+ if (found) {
4129+ ret = switch_sched_plugin(found);
4130+ if (ret != 0)
4131+ printk(KERN_INFO "Could not switch plugin: %d\n", ret);
4132+ } else
4133+ printk(KERN_INFO "Plugin '%s' is unknown.\n", name);
4134+
4135+ return len;
4136+}
4137+
4138+
4139+static struct proc_dir_entry *litmus_dir = NULL,
4140+ *curr_file = NULL,
4141+ *stat_file = NULL,
4142+ *plugs_file = NULL;
4143+
4144+static int __init init_litmus_proc(void)
4145+{
4146+ litmus_dir = proc_mkdir("litmus", NULL);
4147+ if (!litmus_dir) {
4148+ printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n");
4149+ return -ENOMEM;
4150+ }
4151+ litmus_dir->owner = THIS_MODULE;
4152+
4153+ curr_file = create_proc_entry("active_plugin",
4154+ 0644, litmus_dir);
4155+ if (!curr_file) {
4156+ printk(KERN_ERR "Could not allocate active_plugin "
4157+ "procfs entry.\n");
4158+ return -ENOMEM;
4159+ }
4160+ curr_file->owner = THIS_MODULE;
4161+ curr_file->read_proc = proc_read_curr;
4162+ curr_file->write_proc = proc_write_curr;
4163+
4164+ stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
4165+ proc_read_stats, NULL);
4166+
4167+ plugs_file = create_proc_read_entry("plugins", 0444, litmus_dir,
4168+ proc_read_plugins, NULL);
4169+
4170+ return 0;
4171+}
4172+
4173+static void exit_litmus_proc(void)
4174+{
4175+ if (plugs_file)
4176+ remove_proc_entry("plugins", litmus_dir);
4177+ if (stat_file)
4178+ remove_proc_entry("stats", litmus_dir);
4179+ if (curr_file)
4180+ remove_proc_entry("active_plugin", litmus_dir);
4181+ if (litmus_dir)
4182+ remove_proc_entry("litmus", NULL);
4183+}
4184+
4185+extern struct sched_plugin linux_sched_plugin;
4186+
4187+static int __init _init_litmus(void)
4188+{
4189+ /* Common initializers,
4190+ * mode change lock is used to enforce single mode change
4191+ * operation.
4192+ */
4193+ printk("Starting LITMUS^RT kernel\n");
4194+
4195+ register_sched_plugin(&linux_sched_plugin);
4196+
4197+ heap_node_cache = KMEM_CACHE(heap_node, 0);
4198+ if (!heap_node_cache)
4199+ return -ENOMEM;
4200+
4201+#ifdef CONFIG_MAGIC_SYSRQ
4202+ /* offer some debugging help */
4203+ if (!register_sysrq_key('q', &sysrq_kill_rt_tasks_op))
4204+ printk("Registered kill rt tasks magic sysrq.\n");
4205+ else
4206+ printk("Could not register kill rt tasks magic sysrq.\n");
4207+#endif
4208+
4209+ init_litmus_proc();
4210+
4211+ return 0;
4212+}
4213+
4214+static void _exit_litmus(void)
4215+{
4216+ exit_litmus_proc();
4217+ kmem_cache_destroy(heap_node_cache);
4218+}
4219+
4220+module_init(_init_litmus);
4221+module_exit(_exit_litmus);
4222diff --git a/litmus/norqlock.c b/litmus/norqlock.c
4223new file mode 100644
4224index 0000000..11f85d3
4225--- /dev/null
4226+++ b/litmus/norqlock.c
4227@@ -0,0 +1,56 @@
4228+#include <linux/list.h>
4229+#include <linux/bitops.h>
4230+#include <linux/percpu.h>
4231+#include <linux/module.h>
4232+#include <linux/smp.h>
4233+
4234+#include <litmus/norqlock.h>
4235+
4236+struct worklist {
4237+ struct no_rqlock_work* next;
4238+};
4239+
4240+static DEFINE_PER_CPU(struct worklist, norq_worklist) = {NULL};
4241+
4242+void init_no_rqlock_work(struct no_rqlock_work* w, work_t work,
4243+ unsigned long arg)
4244+{
4245+ w->active = 0;
4246+ w->work = work;
4247+ w->arg = arg;
4248+ w->next = NULL;
4249+}
4250+
4251+void __do_without_rqlock(struct no_rqlock_work *work)
4252+{
4253+ long flags;
4254+ struct worklist* wl;
4255+
4256+ local_irq_save(flags);
4257+ wl = &__get_cpu_var(norq_worklist);
4258+ work->next = wl->next;
4259+ wl->next = work;
4260+ local_irq_restore(flags);
4261+}
4262+
4263+void tick_no_rqlock(void)
4264+{
4265+ long flags;
4266+ struct no_rqlock_work *todo, *next;
4267+
4268+ local_irq_save(flags);
4269+
4270+ next = __get_cpu_var(norq_worklist).next;
4271+ __get_cpu_var(norq_worklist).next = NULL;
4272+
4273+ while (next) {
4274+ todo = next;
4275+ next = next->next;
4276+ todo->next = NULL;
4277+ smp_mb__before_clear_bit();
4278+ clear_bit(0, (void*) &todo->active);
4279+ todo->work(todo->arg);
4280+ }
4281+
4282+ local_irq_restore(flags);
4283+}
4284diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
4285new file mode 100644
4286index 0000000..2880308
4287--- /dev/null
4288+++ b/litmus/rt_domain.c
4289@@ -0,0 +1,138 @@
4290+/*
4291+ * kernel/rt_domain.c
4292+ *
4293+ * LITMUS real-time infrastructure. This file contains the
4294+ * functions that manipulate RT domains. RT domains are an abstraction
4295+ * of a ready queue and a release queue.
4296+ */
4297+
4298+#include <linux/percpu.h>
4299+#include <linux/sched.h>
4300+#include <linux/list.h>
4301+
4302+#include <litmus/litmus.h>
4303+#include <litmus/sched_plugin.h>
4304+#include <litmus/sched_trace.h>
4305+
4306+#include <litmus/rt_domain.h>
4307+
4308+#include <litmus/trace.h>
4309+
4310+static int dummy_resched(rt_domain_t *rt)
4311+{
4312+ return 0;
4313+}
4314+
4315+static int dummy_order(struct heap_node* a, struct heap_node* b)
4316+{
4317+ return 0;
4318+}
4319+
4320+/* default implementation: use default lock */
4321+static void default_release_job(struct task_struct* t, rt_domain_t* rt)
4322+{
4323+ add_ready(rt, t);
4324+}
4325+
4326+static enum hrtimer_restart release_job_timer(struct hrtimer *timer)
4327+{
4328+ struct task_struct *t;
4329+
4330+ TS_RELEASE_START;
4331+
4332+ t = container_of(timer, struct task_struct,
4333+ rt_param.release_timer);
4334+
4335+ get_domain(t)->release_job(t, get_domain(t));
4336+
4337+ TS_RELEASE_END;
4338+
4339+ return HRTIMER_NORESTART;
4340+}
4341+
4342+static void setup_job_release_timer(struct task_struct *task)
4343+{
4344+ hrtimer_init(&release_timer(task), CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
4345+ release_timer(task).function = release_job_timer;
4346+#ifdef CONFIG_HIGH_RES_TIMERS
4347+ release_timer(task).cb_mode = HRTIMER_CB_IRQSAFE_NO_RESTART;
4348+#endif
4349+ /* Expiration time of timer is release time of task. */
4350+ release_timer(task).expires = ns_to_ktime(get_release(task));
4351+
4352+ TRACE_TASK(task, "arming release timer rel=%llu at %llu\n",
4353+ get_release(task), litmus_clock());
4354+
4355+ hrtimer_start(&release_timer(task), release_timer(task).expires,
4356+ HRTIMER_MODE_ABS);
4357+}
4358+
4359+static void arm_release_timers(unsigned long _rt)
4360+{
4361+ rt_domain_t *rt = (rt_domain_t*) _rt;
4362+ unsigned long flags;
4363+ struct list_head alt;
4364+ struct list_head *pos, *safe;
4365+ struct task_struct* t;
4366+
4367+ spin_lock_irqsave(&rt->release_lock, flags);
4368+ list_replace_init(&rt->release_queue, &alt);
4369+ spin_unlock_irqrestore(&rt->release_lock, flags);
4370+
4371+ list_for_each_safe(pos, safe, &alt) {
4372+ t = list_entry(pos, struct task_struct, rt_param.list);
4373+ list_del(pos);
4374+ setup_job_release_timer(t);
4375+ }
4376+}
4377+
4378+
4379+void rt_domain_init(rt_domain_t *rt,
4380+ heap_prio_t order,
4381+ check_resched_needed_t check,
4382+ release_job_t release
4383+ )
4384+{
4385+ BUG_ON(!rt);
4386+ if (!check)
4387+ check = dummy_resched;
4388+ if (!release)
4389+ release = default_release_job;
4390+ if (!order)
4391+ order = dummy_order;
4392+ heap_init(&rt->ready_queue);
4393+ INIT_LIST_HEAD(&rt->release_queue);
4394+ spin_lock_init(&rt->ready_lock);
4395+ spin_lock_init(&rt->release_lock);
4396+ rt->check_resched = check;
4397+ rt->release_job = release;
4398+ rt->order = order;
4399+ init_no_rqlock_work(&rt->arm_timers, arm_release_timers, (unsigned long) rt);
4400+}
4401+
4402+/* add_ready - add a real-time task to the rt ready queue. It must be runnable.
4403+ * @new: the newly released task
4404+ */
4405+void __add_ready(rt_domain_t* rt, struct task_struct *new)
4406+{
4407+ TRACE("rt: adding %s/%d (%llu, %llu) rel=%llu to ready queue at %llu\n",
4408+ new->comm, new->pid, get_exec_cost(new), get_rt_period(new),
4409+ get_release(new), litmus_clock());
4410+
4411+ BUG_ON(heap_node_in_heap(tsk_rt(new)->heap_node));
4412+
4413+ heap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node);
4414+ rt->check_resched(rt);
4415+}
4416+
4417+/* add_release - add a real-time task to the rt release queue.
4418+ * @task: the sleeping task
4419+ */
4420+void __add_release(rt_domain_t* rt, struct task_struct *task)
4421+{
4422+ TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task));
4423+ list_add(&tsk_rt(task)->list, &rt->release_queue);
4424+ task->rt_param.domain = rt;
4425+ do_without_rqlock(&rt->arm_timers);
4426+}
4427+
4428diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
4429new file mode 100755
4430index 0000000..2ac14cd
4431--- /dev/null
4432+++ b/litmus/sched_cedf.c
4433@@ -0,0 +1,717 @@
4434+/*
4435+ * kernel/sched_cedf.c
4436+ *
4437+ * Implementation of the Clustered EDF (C-EDF) scheduling algorithm.
4438+ * Linking is included so that support for synchronization (e.g., through
4439+ * the implementation of a "CSN-EDF" algorithm) can be added later if desired.
4440+ *
4441+ * This version uses the simple approach and serializes all scheduling
4442+ * decisions by the use of a queue lock. This is probably not the
4443+ * best way to do it, but it should suffice for now.
4444+ */
4445+
4446+#include <linux/spinlock.h>
4447+#include <linux/percpu.h>
4448+#include <linux/sched.h>
4449+#include <linux/list.h>
4450+
4451+#include <litmus/litmus.h>
4452+#include <litmus/jobs.h>
4453+#include <litmus/sched_plugin.h>
4454+#include <litmus/edf_common.h>
4455+#include <litmus/sched_trace.h>
4456+
4457+#include <linux/module.h>
4458+
4459+/* Overview of C-EDF operations.
4460+ *
4461+ * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage
4462+ * structure (NOT the actually scheduled
4463+ * task). If there is another linked task To
4464+ * already it will set To->linked_on = NO_CPU
4465+ * (thereby removing its association with this
4466+ * CPU). However, it will not requeue the
4467+ * previously linked task (if any). It will set
4468+ * T's state to RT_F_RUNNING and check whether
4469+ * it is already running somewhere else. If T
4470+ * is scheduled somewhere else it will link
4471+ * it to that CPU instead (and pull the linked
4472+ * task to cpu). T may be NULL.
4473+ *
4474+ * unlink(T) - Unlink removes T from all scheduler data
4475+ * structures. If it is linked to some CPU it
4476+ * will link NULL to that CPU. If it is
4477+ * currently queued in the cedf queue for
4478+ * a partition, it will be removed from
4479+ * the rt_domain. It is safe to call
4480+ * unlink(T) if T is not linked. T may not
4481+ * be NULL.
4482+ *
4483+ * requeue(T) - Requeue will insert T into the appropriate
4484+ * queue. If the system is in real-time mode and
4485+ * the T is released already, it will go into the
4486+ * ready queue. If the system is not in
4487+ * real-time mode is T, then T will go into the
4488+ * release queue. If T's release time is in the
4489+ * future, it will go into the release
4490+ * queue. That means that T's release time/job
4491+ * no/etc. has to be updated before requeue(T) is
4492+ * called. It is not safe to call requeue(T)
4493+ * when T is already queued. T may not be NULL.
4494+ *
4495+ * cedf_job_arrival(T) - This is the catch-all function when T enters
4496+ * the system after either a suspension or at a
4497+ * job release. It will queue T (which means it
4498+ * is not safe to call cedf_job_arrival(T) if
4499+ * T is already queued) and then check whether a
4500+ * preemption is necessary. If a preemption is
4501+ * necessary it will update the linkage
4502+ * accordingly and cause scheduled to be called
4503+ * (either with an IPI or need_resched). It is
4504+ * safe to call cedf_job_arrival(T) if T's
4505+ * next job has not been actually released yet
4506+ * (release time in the future). T will be put
4507+ * on the release queue in that case.
4508+ *
4509+ * job_completion(T) - Take care of everything that needs to be done
4510+ * to prepare T for its next release and place
4511+ * it in the right queue with
4512+ * cedf_job_arrival().
4513+ *
4514+ *
4515+ * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is
4516+ * equivalent to unlink(T). Note that if you unlink a task from a CPU none of
4517+ * the functions will automatically propagate pending task from the ready queue
4518+ * to a linked task. This is the job of the calling function ( by means of
4519+ * __take_ready).
4520+ */
4521+
4522+/* cpu_entry_t - maintain the linked and scheduled state
4523+ */
4524+typedef struct {
4525+ int cpu;
4526+ struct task_struct* linked; /* only RT tasks */
4527+ struct task_struct* scheduled; /* only RT tasks */
4528+ struct list_head list;
4529+ atomic_t will_schedule; /* prevent unneeded IPIs */
4530+} cpu_entry_t;
4531+DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
4532+
4533+cpu_entry_t* cedf_cpu_entries_array[NR_CPUS];
4534+
4535+#define set_will_schedule() \
4536+ (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 1))
4537+#define clear_will_schedule() \
4538+ (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 0))
4539+#define test_will_schedule(cpu) \
4540+ (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
4541+
4542+#define NO_CPU 0xffffffff
4543+
4544+/* Cluster size -- currently four. This is a variable to allow for
4545+ * the possibility of changing the cluster size online in the future.
4546+ */
4547+int cluster_size = 4;
4548+
4549+typedef struct {
4550+ rt_domain_t domain;
4551+ int first_cpu;
4552+ int last_cpu;
4553+
4554+ /* the cpus queue themselves according to priority in here */
4555+ struct list_head cedf_cpu_queue;
4556+
4557+ /* per-partition spinlock: protects the domain and
4558+ * serializes scheduling decisions
4559+ */
4560+#define slock domain.ready_lock
4561+} cedf_domain_t;
4562+
4563+DEFINE_PER_CPU(cedf_domain_t*, cedf_domains) = NULL;
4564+
4565+cedf_domain_t* cedf_domains_array[NR_CPUS];
4566+
4567+
4568+/* These are defined similarly to partitioning, except that a
4569+ * tasks partition is any cpu of the cluster to which it
4570+ * is assigned, typically the lowest-numbered cpu.
4571+ */
4572+#define local_edf (&__get_cpu_var(cedf_domains)->domain)
4573+#define local_cedf __get_cpu_var(cedf_domains)
4574+#define remote_edf(cpu) (&per_cpu(cedf_domains, cpu)->domain)
4575+#define remote_cedf(cpu) per_cpu(cedf_domains, cpu)
4576+#define task_edf(task) remote_edf(get_partition(task))
4577+#define task_cedf(task) remote_cedf(get_partition(task))
4578+
4579+/* update_cpu_position - Move the cpu entry to the correct place to maintain
4580+ * order in the cpu queue. Caller must hold cedf lock.
4581+ *
4582+ * This really should be a heap.
4583+ */
4584+static void update_cpu_position(cpu_entry_t *entry)
4585+{
4586+ cpu_entry_t *other;
4587+ struct list_head *cedf_cpu_queue =
4588+ &(remote_cedf(entry->cpu))->cedf_cpu_queue;
4589+ struct list_head *pos;
4590+
4591+ BUG_ON(!cedf_cpu_queue);
4592+
4593+ if (likely(in_list(&entry->list)))
4594+ list_del(&entry->list);
4595+ /* if we do not execute real-time jobs we just move
4596+ * to the end of the queue
4597+ */
4598+ if (entry->linked) {
4599+ list_for_each(pos, cedf_cpu_queue) {
4600+ other = list_entry(pos, cpu_entry_t, list);
4601+ if (edf_higher_prio(entry->linked, other->linked)) {
4602+ __list_add(&entry->list, pos->prev, pos);
4603+ return;
4604+ }
4605+ }
4606+ }
4607+ /* if we get this far we have the lowest priority job */
4608+ list_add_tail(&entry->list, cedf_cpu_queue);
4609+}
4610+
4611+/* link_task_to_cpu - Update the link of a CPU.
4612+ * Handles the case where the to-be-linked task is already
4613+ * scheduled on a different CPU.
4614+ */
4615+static noinline void link_task_to_cpu(struct task_struct* linked,
4616+ cpu_entry_t *entry)
4617+{
4618+ cpu_entry_t *sched;
4619+ struct task_struct* tmp;
4620+ int on_cpu;
4621+
4622+ BUG_ON(linked && !is_realtime(linked));
4623+
4624+ /* Cannot link task to a CPU that doesn't belong to its partition... */
4625+ BUG_ON(linked && remote_cedf(entry->cpu) != task_cedf(linked));
4626+
4627+ /* Currently linked task is set to be unlinked. */
4628+ if (entry->linked) {
4629+ entry->linked->rt_param.linked_on = NO_CPU;
4630+ }
4631+
4632+ /* Link new task to CPU. */
4633+ if (linked) {
4634+ set_rt_flags(linked, RT_F_RUNNING);
4635+ /* handle task is already scheduled somewhere! */
4636+ on_cpu = linked->rt_param.scheduled_on;
4637+ if (on_cpu != NO_CPU) {
4638+ sched = &per_cpu(cedf_cpu_entries, on_cpu);
4639+ /* this should only happen if not linked already */
4640+ BUG_ON(sched->linked == linked);
4641+
4642+ /* If we are already scheduled on the CPU to which we
4643+ * wanted to link, we don't need to do the swap --
4644+ * we just link ourselves to the CPU and depend on
4645+ * the caller to get things right.
4646+ */
4647+ if (entry != sched) {
4648+ tmp = sched->linked;
4649+ linked->rt_param.linked_on = sched->cpu;
4650+ sched->linked = linked;
4651+ update_cpu_position(sched);
4652+ linked = tmp;
4653+ }
4654+ }
4655+ if (linked) /* might be NULL due to swap */
4656+ linked->rt_param.linked_on = entry->cpu;
4657+ }
4658+ entry->linked = linked;
4659+
4660+ if (entry->linked)
4661+ TRACE_TASK(entry->linked, "linked to CPU %d, state:%d\n",
4662+ entry->cpu, entry->linked->state);
4663+ else
4664+ TRACE("NULL linked to CPU %d\n", entry->cpu);
4665+
4666+ update_cpu_position(entry);
4667+}
4668+
4669+/* unlink - Make sure a task is not linked any longer to an entry
4670+ * where it was linked before. Must hold cedf_lock.
4671+ */
4672+static noinline void unlink(struct task_struct* t)
4673+{
4674+ cpu_entry_t *entry;
4675+
4676+ if (unlikely(!t)) {
4677+ TRACE_BUG_ON(!t);
4678+ return;
4679+ }
4680+
4681+ if (t->rt_param.linked_on != NO_CPU) {
4682+ /* unlink */
4683+ entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on);
4684+ t->rt_param.linked_on = NO_CPU;
4685+ link_task_to_cpu(NULL, entry);
4686+ } else if (is_queued(t)) {
4687+ /* This is an interesting situation: t is scheduled,
4688+ * but was just recently unlinked. It cannot be
4689+ * linked anywhere else (because then it would have
4690+ * been relinked to this CPU), thus it must be in some
4691+ * queue. We must remove it from the list in this
4692+ * case.
4693+ */
4694+ remove(task_edf(t), t);
4695+ }
4696+}
4697+
4698+
4699+/* preempt - force a CPU to reschedule
4700+ */
4701+static noinline void preempt(cpu_entry_t *entry)
4702+{
4703+ /* We cannot make the is_np() decision here if it is a remote CPU
4704+ * because requesting exit_np() requires that we currently use the
4705+ * address space of the task. Thus, in the remote case we just send
4706+ * the IPI and let schedule() handle the problem.
4707+ */
4708+
4709+ if (smp_processor_id() == entry->cpu) {
4710+ if (entry->scheduled && is_np(entry->scheduled))
4711+ request_exit_np(entry->scheduled);
4712+ else
4713+ set_tsk_need_resched(current);
4714+ } else
4715+ /* in case that it is a remote CPU we have to defer the
4716+ * the decision to the remote CPU
4717+ * FIXME: We could save a few IPI's here if we leave the flag
4718+ * set when we are waiting for a np_exit().
4719+ */
4720+ if (!test_will_schedule(entry->cpu))
4721+ smp_send_reschedule(entry->cpu);
4722+}
4723+
4724+/* requeue - Put an unlinked task into c-edf domain.
4725+ * Caller must hold cedf_lock.
4726+ */
4727+static noinline void requeue(struct task_struct* task)
4728+{
4729+ cedf_domain_t* cedf;
4730+ rt_domain_t* edf;
4731+
4732+ BUG_ON(!task);
4733+ /* sanity check rt_list before insertion */
4734+ BUG_ON(is_queued(task));
4735+
4736+ /* Get correct real-time domain. */
4737+ cedf = task_cedf(task);
4738+ edf = &cedf->domain;
4739+
4740+ if (get_rt_flags(task) == RT_F_SLEEP) {
4741+ /* this task has expired
4742+ * _schedule has already taken care of updating
4743+ * the release and
4744+ * deadline. We just must check if it has been released.
4745+ */
4746+ if (is_released(task, litmus_clock()))
4747+ __add_ready(edf, task);
4748+ else {
4749+ /* it has got to wait */
4750+ add_release(edf, task);
4751+ }
4752+
4753+ } else
4754+ /* this is a forced preemption
4755+ * thus the task stays in the ready_queue
4756+ * we only must make it available to others
4757+ */
4758+ __add_ready(edf, task);
4759+}
4760+
4761+/* cedf_job_arrival: task is either resumed or released */
4762+static noinline void cedf_job_arrival(struct task_struct* task)
4763+{
4764+ cpu_entry_t* last;
4765+ cedf_domain_t* cedf;
4766+ rt_domain_t* edf;
4767+ struct list_head *cedf_cpu_queue;
4768+
4769+ BUG_ON(!task);
4770+
4771+ /* Get correct real-time domain. */
4772+ cedf = task_cedf(task);
4773+ edf = &cedf->domain;
4774+ cedf_cpu_queue = &cedf->cedf_cpu_queue;
4775+
4776+ BUG_ON(!cedf);
4777+ BUG_ON(!edf);
4778+ BUG_ON(!cedf_cpu_queue);
4779+ BUG_ON(list_empty(cedf_cpu_queue));
4780+
4781+ /* first queue arriving job */
4782+ requeue(task);
4783+
4784+ /* then check for any necessary preemptions */
4785+ last = list_entry(cedf_cpu_queue->prev, cpu_entry_t, list);
4786+ if (edf_preemption_needed(edf, last->linked)) {
4787+ /* preemption necessary */
4788+ task = __take_ready(edf);
4789+ TRACE("job_arrival: task %d linked to %d, state:%d\n",
4790+ task->pid, last->cpu, task->state);
4791+ if (last->linked)
4792+ requeue(last->linked);
4793+
4794+ link_task_to_cpu(task, last);
4795+ preempt(last);
4796+ }
4797+}
4798+
4799+/* check for current job releases */
4800+static void cedf_job_release(struct task_struct* t, rt_domain_t* _)
4801+{
4802+ cedf_domain_t* cedf = task_cedf(t);
4803+ unsigned long flags;
4804+
4805+ BUG_ON(!t);
4806+ BUG_ON(!cedf);
4807+
4808+ spin_lock_irqsave(&cedf->slock, flags);
4809+ sched_trace_job_release(queued);
4810+ cedf_job_arrival(t);
4811+ spin_unlock_irqrestore(&cedf->slock, flags);
4812+}
4813+
4814+/* cedf_tick - this function is called for every local timer
4815+ * interrupt.
4816+ *
4817+ * checks whether the current task has expired and checks
4818+ * whether we need to preempt it if it has not expired
4819+ */
4820+static void cedf_tick(struct task_struct* t)
4821+{
4822+ BUG_ON(!t);
4823+
4824+ if (is_realtime(t) && budget_exhausted(t)) {
4825+ if (!is_np(t)) {
4826+ /* np tasks will be preempted when they become
4827+ * preemptable again
4828+ */
4829+ set_tsk_need_resched(t);
4830+ set_will_schedule();
4831+ TRACE("cedf_scheduler_tick: "
4832+ "%d is preemptable (state:%d) "
4833+ " => FORCE_RESCHED\n", t->pid, t->state);
4834+ } else {
4835+ TRACE("cedf_scheduler_tick: "
4836+ "%d is non-preemptable (state:%d), "
4837+ "preemption delayed.\n", t->pid, t->state);
4838+ request_exit_np(t);
4839+ }
4840+ }
4841+}
4842+
4843+/* caller holds cedf_lock */
4844+static noinline void job_completion(struct task_struct *t)
4845+{
4846+ BUG_ON(!t);
4847+
4848+ sched_trace_job_completion(t);
4849+
4850+ TRACE_TASK(t, "job_completion(). [state:%d]\n", t->state);
4851+
4852+ /* set flags */
4853+ set_rt_flags(t, RT_F_SLEEP);
4854+ /* prepare for next period */
4855+ prepare_for_next_period(t);
4856+ /* unlink */
4857+ unlink(t);
4858+ /* requeue
4859+ * But don't requeue a blocking task. */
4860+ if (is_running(t))
4861+ cedf_job_arrival(t);
4862+}
4863+
4864+/* Getting schedule() right is a bit tricky. schedule() may not make any
4865+ * assumptions on the state of the current task since it may be called for a
4866+ * number of reasons. The reasons include a scheduler_tick() determined that it
4867+ * was necessary, because sys_exit_np() was called, because some Linux
4868+ * subsystem determined so, or even (in the worst case) because there is a bug
4869+ * hidden somewhere. Thus, we must take extreme care to determine what the
4870+ * current state is.
4871+ *
4872+ * The CPU could currently be scheduling a task (or not), be linked (or not).
4873+ *
4874+ * The following assertions for the scheduled task could hold:
4875+ *
4876+ * - !is_running(scheduled) // the job blocks
4877+ * - scheduled->timeslice == 0 // the job completed (forcefully)
4878+ * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
4879+ * - linked != scheduled // we need to reschedule (for any reason)
4880+ * - is_np(scheduled) // rescheduling must be delayed,
4881+ * sys_exit_np must be requested
4882+ *
4883+ * Any of these can occur together.
4884+ */
4885+static struct task_struct* cedf_schedule(struct task_struct * prev)
4886+{
4887+ cedf_domain_t* cedf = local_cedf;
4888+ rt_domain_t* edf = &cedf->domain;
4889+ cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
4890+ int out_of_time, sleep, preempt, np,
4891+ exists, blocks;
4892+ struct task_struct* next = NULL;
4893+
4894+ BUG_ON(!prev);
4895+ BUG_ON(!cedf);
4896+ BUG_ON(!edf);
4897+ BUG_ON(!entry);
4898+ BUG_ON(cedf != remote_cedf(entry->cpu));
4899+ BUG_ON(is_realtime(prev) && cedf != task_cedf(prev));
4900+
4901+ /* Will be released in finish_switch. */
4902+ spin_lock(&cedf->slock);
4903+ clear_will_schedule();
4904+
4905+ /* sanity checking */
4906+ BUG_ON(entry->scheduled && entry->scheduled != prev);
4907+ BUG_ON(entry->scheduled && !is_realtime(prev));
4908+ BUG_ON(is_realtime(prev) && !entry->scheduled);
4909+
4910+ /* (0) Determine state */
4911+ exists = entry->scheduled != NULL;
4912+ blocks = exists && !is_running(entry->scheduled);
4913+ out_of_time = exists && budget_exhausted(entry->scheduled);
4914+ np = exists && is_np(entry->scheduled);
4915+ sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
4916+ preempt = entry->scheduled != entry->linked;
4917+
4918+ /* If a task blocks we have no choice but to reschedule.
4919+ */
4920+ if (blocks)
4921+ unlink(entry->scheduled);
4922+
4923+ /* Request a sys_exit_np() call if we would like to preempt but cannot.
4924+ * We need to make sure to update the link structure anyway in case
4925+ * that we are still linked. Multiple calls to request_exit_np() don't
4926+ * hurt.
4927+ */
4928+ if (np && (out_of_time || preempt || sleep)) {
4929+ unlink(entry->scheduled);
4930+ request_exit_np(entry->scheduled);
4931+ }
4932+
4933+ /* Any task that is preemptable and either exhausts its execution
4934+ * budget or wants to sleep completes. We may have to reschedule after
4935+ * this. Don't do a job completion if blocks (can't have timers
4936+ * running for blocked jobs). Preemption go first for the same reason.
4937+ */
4938+ if (!np && (out_of_time || sleep) && !blocks && !preempt)
4939+ job_completion(entry->scheduled);
4940+
4941+ /* Link pending task if we became unlinked.
4942+ */
4943+ if (!entry->linked)
4944+ link_task_to_cpu(__take_ready(edf), entry);
4945+
4946+ /* The final scheduling decision. Do we need to switch for some reason?
4947+ * If linked different from scheduled select linked as next.
4948+ */
4949+ if ((!np || blocks) &&
4950+ entry->linked != entry->scheduled) {
4951+ /* Schedule a linked job? */
4952+ if (entry->linked) {
4953+ entry->linked->rt_param.scheduled_on = entry->cpu;
4954+ next = entry->linked;
4955+ }
4956+ if (entry->scheduled) {
4957+ /* not gonna be scheduled soon */
4958+ entry->scheduled->rt_param.scheduled_on = NO_CPU;
4959+ TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
4960+ }
4961+ } else
4962+ /* Only override Linux scheduler if we have real-time task
4963+ * scheduled that needs to continue.
4964+ */
4965+ if (exists)
4966+ next = prev;
4967+
4968+ spin_unlock(&cedf->slock);
4969+
4970+ return next;
4971+}
4972+
4973+/* _finish_switch - we just finished the switch away from prev
4974+ */
4975+static void cedf_finish_switch(struct task_struct *prev)
4976+{
4977+ cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
4978+
4979+ BUG_ON(!prev);
4980+ BUG_ON(!entry);
4981+
4982+ entry->scheduled = is_realtime(current) ? current : NULL;
4983+}
4984+
4985+/* Prepare a task for running in RT mode
4986+ */
4987+static void cedf_task_new(struct task_struct *t, int on_rq, int running)
4988+{
4989+ unsigned long flags;
4990+ cedf_domain_t* cedf = task_cedf(t);
4991+ cpu_entry_t* entry;
4992+
4993+ BUG_ON(!cedf);
4994+
4995+ spin_lock_irqsave(&cedf->slock, flags);
4996+ if (running) {
4997+ entry = &per_cpu(cedf_cpu_entries, task_cpu(t));
4998+ BUG_ON(!entry);
4999+ BUG_ON(entry->scheduled);
5000+ entry->scheduled = t;
5001+ t->rt_param.scheduled_on = task_cpu(t);
5002+ } else
5003+ t->rt_param.scheduled_on = NO_CPU;
5004+ t->rt_param.linked_on = NO_CPU;
5005+
5006+ /* setup job params */
5007+ release_at(t, litmus_clock());
5008+
5009+ cedf_job_arrival(t);
5010+ spin_unlock_irqrestore(&cedf->slock, flags);
5011+}
5012+
5013+
5014+static void cedf_task_wake_up(struct task_struct *task)
5015+{
5016+ unsigned long flags;
5017+ cedf_domain_t* cedf;
5018+ lt_t now;
5019+
5020+ BUG_ON(!task);
5021+
5022+ cedf = task_cedf(task);
5023+ BUG_ON(!cedf);
5024+
5025+ spin_lock_irqsave(&cedf->slock, flags);
5026+ /* We need to take suspensions because of semaphores into
5027+ * account! If a job resumes after being suspended due to acquiring
5028+ * a semaphore, it should never be treated as a new job release.
5029+ */
5030+ if (get_rt_flags(task) == RT_F_EXIT_SEM) {
5031+ set_rt_flags(task, RT_F_RUNNING);
5032+ } else {
5033+ now = litmus_clock();
5034+ if (is_tardy(task, now)) {
5035+ /* new sporadic release */
5036+ release_at(task, now);
5037+ sched_trace_job_release(task);
5038+ }
5039+ else if (task->time_slice)
5040+ /* came back in time before deadline
5041+ */
5042+ set_rt_flags(task, RT_F_RUNNING);
5043+ }
5044+ cedf_job_arrival(task);
5045+ spin_unlock_irqrestore(&cedf->slock, flags);
5046+}
5047+
5048+
5049+static void cedf_task_block(struct task_struct *t)
5050+{
5051+ unsigned long flags;
5052+
5053+ BUG_ON(!t);
5054+
5055+ /* unlink if necessary */
5056+ spin_lock_irqsave(&task_cedf(t)->slock, flags);
5057+ unlink(t);
5058+ spin_unlock_irqrestore(&task_cedf(t)->slock, flags);
5059+
5060+ BUG_ON(!is_realtime(t));
5061+}
5062+
5063+static void cedf_task_exit(struct task_struct * t)
5064+{
5065+ unsigned long flags;
5066+
5067+ BUG_ON(!t);
5068+
5069+ /* unlink if necessary */
5070+ spin_lock_irqsave(&task_cedf(t)->slock, flags);
5071+ unlink(t);
5072+ if (tsk_rt(t)->scheduled_on != NO_CPU) {
5073+ cedf_cpu_entries_array[tsk_rt(t)->scheduled_on]->
5074+ scheduled = NULL;
5075+ tsk_rt(t)->scheduled_on = NO_CPU;
5076+ }
5077+ spin_unlock_irqrestore(&task_cedf(t)->slock, flags);
5078+
5079+ BUG_ON(!is_realtime(t));
5080+ TRACE_TASK(t, "RIP\n");
5081+}
5082+
5083+static long cedf_admit_task(struct task_struct* tsk)
5084+{
5085+ return (task_cpu(tsk) >= task_cedf(tsk)->first_cpu &&
5086+ task_cpu(tsk) <= task_cedf(tsk)->last_cpu) ? 0 : -EINVAL;
5087+}
5088+
5089+
5090+/* Plugin object */
5091+static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
5092+ .plugin_name = "C-EDF",
5093+ .finish_switch = cedf_finish_switch,
5094+ .tick = cedf_tick,
5095+ .task_new = cedf_task_new,
5096+ .complete_job = complete_job,
5097+ .task_exit = cedf_task_exit,
5098+ .schedule = cedf_schedule,
5099+ .task_wake_up = cedf_task_wake_up,
5100+ .task_block = cedf_task_block,
5101+ .admit_task = cedf_admit_task
5102+};
5103+
5104+static void cedf_domain_init(int first_cpu, int last_cpu)
5105+{
5106+ int cpu;
5107+
5108+ /* Create new domain for this cluster. */
5109+ cedf_domain_t *new_cedf_domain = kmalloc(sizeof(cedf_domain_t),
5110+ GFP_KERNEL);
5111+
5112+ /* Initialize cluster domain. */
5113+ edf_domain_init(&new_cedf_domain->domain, NULL,
5114+ cedf_job_release);
5115+ new_cedf_domain->first_cpu = first_cpu;
5116+ new_cedf_domain->last_cpu = last_cpu;
5117+ INIT_LIST_HEAD(&new_cedf_domain->cedf_cpu_queue);
5118+
5119+ /* Assign all cpus in cluster to point to this domain. */
5120+ for (cpu = first_cpu; cpu <= last_cpu; cpu++) {
5121+ remote_cedf(cpu) = new_cedf_domain;
5122+ cedf_domains_array[cpu] = new_cedf_domain;
5123+ }
5124+}
5125+
5126+static int __init init_cedf(void)
5127+{
5128+ int cpu;
5129+ cpu_entry_t *entry;
5130+
5131+ /* initialize CPU state */
5132+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
5133+ entry = &per_cpu(cedf_cpu_entries, cpu);
5134+ cedf_cpu_entries_array[cpu] = entry;
5135+ atomic_set(&entry->will_schedule, 0);
5136+ entry->linked = NULL;
5137+ entry->scheduled = NULL;
5138+ entry->cpu = cpu;
5139+ INIT_LIST_HEAD(&entry->list);
5140+ }
5141+
5142+ /* initialize all cluster domains */
5143+ for (cpu = 0; cpu < NR_CPUS; cpu += cluster_size)
5144+ cedf_domain_init(cpu, cpu+cluster_size-1);
5145+
5146+ return register_sched_plugin(&cedf_plugin);
5147+}
5148+
5149+module_init(init_cedf);
5150+
5151diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
5152new file mode 100644
5153index 0000000..e32a4e8
5154--- /dev/null
5155+++ b/litmus/sched_gsn_edf.c
5156@@ -0,0 +1,731 @@
5157+/*
5158+ * kernel/sched_gsn_edf.c
5159+ *
5160+ * Implementation of the GSN-EDF scheduling algorithm.
5161+ *
5162+ * This version uses the simple approach and serializes all scheduling
5163+ * decisions by the use of a queue lock. This is probably not the
5164+ * best way to do it, but it should suffice for now.
5165+ */
5166+
5167+#include <linux/spinlock.h>
5168+#include <linux/percpu.h>
5169+#include <linux/sched.h>
5170+#include <linux/list.h>
5171+
5172+#include <litmus/litmus.h>
5173+#include <litmus/jobs.h>
5174+#include <litmus/sched_plugin.h>
5175+#include <litmus/edf_common.h>
5176+#include <litmus/sched_trace.h>
5177+
5178+#include <linux/module.h>
5179+
5180+/* Overview of GSN-EDF operations.
5181+ *
5182+ * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This
5183+ * description only covers how the individual operations are implemented in
5184+ * LITMUS.
5185+ *
5186+ * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage
5187+ * structure (NOT the actually scheduled
5188+ * task). If there is another linked task To
5189+ * already it will set To->linked_on = NO_CPU
5190+ * (thereby removing its association with this
5191+ * CPU). However, it will not requeue the
5192+ * previously linked task (if any). It will set
5193+ * T's state to RT_F_RUNNING and check whether
5194+ * it is already running somewhere else. If T
5195+ * is scheduled somewhere else it will link
5196+ * it to that CPU instead (and pull the linked
5197+ * task to cpu). T may be NULL.
5198+ *
5199+ * unlink(T) - Unlink removes T from all scheduler data
5200+ * structures. If it is linked to some CPU it
5201+ * will link NULL to that CPU. If it is
5202+ * currently queued in the gsnedf queue it will
5203+ * be removed from the rt_domain. It is safe to
5204+ * call unlink(T) if T is not linked. T may not
5205+ * be NULL.
5206+ *
5207+ * requeue(T) - Requeue will insert T into the appropriate
5208+ * queue. If the system is in real-time mode and
5209+ * the T is released already, it will go into the
5210+ * ready queue. If the system is not in
5211+ * real-time mode is T, then T will go into the
5212+ * release queue. If T's release time is in the
5213+ * future, it will go into the release
5214+ * queue. That means that T's release time/job
5215+ * no/etc. has to be updated before requeu(T) is
5216+ * called. It is not safe to call requeue(T)
5217+ * when T is already queued. T may not be NULL.
5218+ *
5219+ * gsnedf_job_arrival(T) - This is the catch all function when T enters
5220+ * the system after either a suspension or at a
5221+ * job release. It will queue T (which means it
5222+ * is not safe to call gsnedf_job_arrival(T) if
5223+ * T is already queued) and then check whether a
5224+ * preemption is necessary. If a preemption is
5225+ * necessary it will update the linkage
5226+ * accordingly and cause scheduled to be called
5227+ * (either with an IPI or need_resched). It is
5228+ * safe to call gsnedf_job_arrival(T) if T's
5229+ * next job has not been actually released yet
5230+ * (releast time in the future). T will be put
5231+ * on the release queue in that case.
5232+ *
5233+ * job_completion(T) - Take care of everything that needs to be done
5234+ * to prepare T for its next release and place
5235+ * it in the right queue with
5236+ * gsnedf_job_arrival().
5237+ *
5238+ *
5239+ * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is
5240+ * equivalent to unlink(T). Note that if you unlink a task from a CPU none of
5241+ * the functions will automatically propagate pending task from the ready queue
5242+ * to a linked task. This is the job of the calling function ( by means of
5243+ * __take_ready).
5244+ */
5245+
5246+
5247+/* cpu_entry_t - maintain the linked and scheduled state
5248+ */
5249+typedef struct {
5250+ int cpu;
5251+ struct task_struct* linked; /* only RT tasks */
5252+ struct task_struct* scheduled; /* only RT tasks */
5253+ struct list_head list;
5254+ atomic_t will_schedule; /* prevent unneeded IPIs */
5255+} cpu_entry_t;
5256+DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries);
5257+
5258+cpu_entry_t* gsnedf_cpus[NR_CPUS];
5259+
5260+#define set_will_schedule() \
5261+ (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 1))
5262+#define clear_will_schedule() \
5263+ (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 0))
5264+#define test_will_schedule(cpu) \
5265+ (atomic_read(&per_cpu(gsnedf_cpu_entries, cpu).will_schedule))
5266+
5267+
5268+#define NO_CPU 0xffffffff
5269+
5270+/* the cpus queue themselves according to priority in here */
5271+static LIST_HEAD(gsnedf_cpu_queue);
5272+
5273+static rt_domain_t gsnedf;
5274+#define gsnedf_lock (gsnedf.ready_lock)
5275+
5276+/* update_cpu_position - Move the cpu entry to the correct place to maintain
5277+ * order in the cpu queue. Caller must hold gsnedf lock.
5278+ *
5279+ * This really should be a heap.
5280+ */
5281+static void update_cpu_position(cpu_entry_t *entry)
5282+{
5283+ cpu_entry_t *other;
5284+ struct list_head *pos;
5285+
5286+ if (likely(in_list(&entry->list)))
5287+ list_del(&entry->list);
5288+ /* if we do not execute real-time jobs we just move
5289+ * to the end of the queue
5290+ */
5291+ if (entry->linked) {
5292+ list_for_each(pos, &gsnedf_cpu_queue) {
5293+ other = list_entry(pos, cpu_entry_t, list);
5294+ if (edf_higher_prio(entry->linked, other->linked)) {
5295+ __list_add(&entry->list, pos->prev, pos);
5296+ return;
5297+ }
5298+ }
5299+ }
5300+ /* if we get this far we have the lowest priority job */
5301+ list_add_tail(&entry->list, &gsnedf_cpu_queue);
5302+}
5303+
5304+/* link_task_to_cpu - Update the link of a CPU.
5305+ * Handles the case where the to-be-linked task is already
5306+ * scheduled on a different CPU.
5307+ */
5308+static noinline void link_task_to_cpu(struct task_struct* linked,
5309+ cpu_entry_t *entry)
5310+{
5311+ cpu_entry_t *sched;
5312+ struct task_struct* tmp;
5313+ int on_cpu;
5314+
5315+ BUG_ON(linked && !is_realtime(linked));
5316+
5317+ /* Currently linked task is set to be unlinked. */
5318+ if (entry->linked) {
5319+ entry->linked->rt_param.linked_on = NO_CPU;
5320+ }
5321+
5322+ /* Link new task to CPU. */
5323+ if (linked) {
5324+ set_rt_flags(linked, RT_F_RUNNING);
5325+ /* handle task is already scheduled somewhere! */
5326+ on_cpu = linked->rt_param.scheduled_on;
5327+ if (on_cpu != NO_CPU) {
5328+ sched = &per_cpu(gsnedf_cpu_entries, on_cpu);
5329+ /* this should only happen if not linked already */
5330+ BUG_ON(sched->linked == linked);
5331+
5332+ /* If we are already scheduled on the CPU to which we
5333+ * wanted to link, we don't need to do the swap --
5334+ * we just link ourselves to the CPU and depend on
5335+ * the caller to get things right.
5336+ */
5337+ if (entry != sched) {
5338+ TRACE_TASK(linked,
5339+ "already scheduled on %d, updating link.\n",
5340+ sched->cpu);
5341+ tmp = sched->linked;
5342+ linked->rt_param.linked_on = sched->cpu;
5343+ sched->linked = linked;
5344+ update_cpu_position(sched);
5345+ linked = tmp;
5346+ }
5347+ }
5348+ if (linked) /* might be NULL due to swap */
5349+ linked->rt_param.linked_on = entry->cpu;
5350+ }
5351+ entry->linked = linked;
5352+ if (linked)
5353+ TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
5354+ else
5355+ TRACE("NULL linked to %d.\n", entry->cpu);
5356+ update_cpu_position(entry);
5357+}
5358+
5359+/* unlink - Make sure a task is not linked any longer to an entry
5360+ * where it was linked before. Must hold gsnedf_lock.
5361+ */
5362+static noinline void unlink(struct task_struct* t)
5363+{
5364+ cpu_entry_t *entry;
5365+
5366+ if (unlikely(!t)) {
5367+ TRACE_BUG_ON(!t);
5368+ return;
5369+ }
5370+
5371+ if (t->rt_param.linked_on != NO_CPU) {
5372+ /* unlink */
5373+ entry = &per_cpu(gsnedf_cpu_entries, t->rt_param.linked_on);
5374+ t->rt_param.linked_on = NO_CPU;
5375+ link_task_to_cpu(NULL, entry);
5376+ } else if (is_queued(t)) {
5377+ /* This is an interesting situation: t is scheduled,
5378+ * but was just recently unlinked. It cannot be
5379+ * linked anywhere else (because then it would have
5380+ * been relinked to this CPU), thus it must be in some
5381+ * queue. We must remove it from the list in this
5382+ * case.
5383+ */
5384+ remove(&gsnedf, t);
5385+ }
5386+}
5387+
5388+
5389+/* preempt - force a CPU to reschedule
5390+ */
5391+static noinline void preempt(cpu_entry_t *entry)
5392+{
5393+ /* We cannot make the is_np() decision here if it is a remote CPU
5394+ * because requesting exit_np() requires that we currently use the
5395+ * address space of the task. Thus, in the remote case we just send
5396+ * the IPI and let schedule() handle the problem.
5397+ */
5398+
5399+ if (smp_processor_id() == entry->cpu) {
5400+ if (entry->scheduled && is_np(entry->scheduled))
5401+ request_exit_np(entry->scheduled);
5402+ else
5403+ set_tsk_need_resched(current);
5404+ } else
5405+ /* in case that it is a remote CPU we have to defer the
5406+ * the decision to the remote CPU
5407+ * FIXME: We could save a few IPI's here if we leave the flag
5408+ * set when we are waiting for a np_exit().
5409+ */
5410+ if (!test_will_schedule(entry->cpu))
5411+ smp_send_reschedule(entry->cpu);
5412+}
5413+
5414+/* requeue - Put an unlinked task into gsn-edf domain.
5415+ * Caller must hold gsnedf_lock.
5416+ */
5417+static noinline void requeue(struct task_struct* task)
5418+{
5419+ BUG_ON(!task);
5420+ /* sanity check before insertion */
5421+ BUG_ON(is_queued(task));
5422+
5423+ if (get_rt_flags(task) == RT_F_SLEEP) {
5424+ /* this task has expired
5425+ * _schedule has already taken care of updating
5426+ * the release and
5427+ * deadline. We just must check if it has been released.
5428+ */
5429+ if (is_released(task, litmus_clock()))
5430+ __add_ready(&gsnedf, task);
5431+ else {
5432+ /* it has got to wait */
5433+ add_release(&gsnedf, task);
5434+ }
5435+
5436+ } else
5437+ /* this is a forced preemption
5438+ * thus the task stays in the ready_queue
5439+ * we only must make it available to others
5440+ */
5441+ __add_ready(&gsnedf, task);
5442+}
5443+
5444+/* gsnedf_job_arrival: task is either resumed or released */
5445+static noinline void gsnedf_job_arrival(struct task_struct* task)
5446+{
5447+ cpu_entry_t* last;
5448+
5449+ BUG_ON(list_empty(&gsnedf_cpu_queue));
5450+ BUG_ON(!task);
5451+
5452+ /* first queue arriving job */
5453+ requeue(task);
5454+
5455+ /* then check for any necessary preemptions */
5456+ last = list_entry(gsnedf_cpu_queue.prev, cpu_entry_t, list);
5457+ if (edf_preemption_needed(&gsnedf, last->linked)) {
5458+ /* preemption necessary */
5459+ task = __take_ready(&gsnedf);
5460+ TRACE("job_arrival: attempting to link task %d to %d\n",
5461+ task->pid, last->cpu);
5462+ if (last->linked)
5463+ requeue(last->linked);
5464+
5465+ link_task_to_cpu(task, last);
5466+ preempt(last);
5467+ }
5468+}
5469+
5470+/* check for current job releases */
5471+static void gsnedf_job_release(struct task_struct* t, rt_domain_t* _)
5472+{
5473+ unsigned long flags;
5474+
5475+ spin_lock_irqsave(&gsnedf_lock, flags);
5476+
5477+ sched_trace_job_release(queued);
5478+ gsnedf_job_arrival(t);
5479+
5480+ spin_unlock_irqrestore(&gsnedf_lock, flags);
5481+}
5482+
5483+/* caller holds gsnedf_lock */
5484+static noinline void job_completion(struct task_struct *t)
5485+{
5486+ BUG_ON(!t);
5487+
5488+ sched_trace_job_completion(t);
5489+
5490+ TRACE_TASK(t, "job_completion().\n");
5491+
5492+ /* set flags */
5493+ set_rt_flags(t, RT_F_SLEEP);
5494+ /* prepare for next period */
5495+ prepare_for_next_period(t);
5496+ /* unlink */
5497+ unlink(t);
5498+ /* requeue
5499+ * But don't requeue a blocking task. */
5500+ if (is_running(t))
5501+ gsnedf_job_arrival(t);
5502+}
5503+
5504+/* gsnedf_tick - this function is called for every local timer
5505+ * interrupt.
5506+ *
5507+ * checks whether the current task has expired and checks
5508+ * whether we need to preempt it if it has not expired
5509+ */
5510+static void gsnedf_tick(struct task_struct* t)
5511+{
5512+ if (is_realtime(t) && budget_exhausted(t)) {
5513+ if (!is_np(t)) {
5514+ /* np tasks will be preempted when they become
5515+ * preemptable again
5516+ */
5517+ set_tsk_need_resched(t);
5518+ set_will_schedule();
5519+ TRACE("gsnedf_scheduler_tick: "
5520+ "%d is preemptable "
5521+ " => FORCE_RESCHED\n", t->pid);
5522+ } else {
5523+ TRACE("gsnedf_scheduler_tick: "
5524+ "%d is non-preemptable, "
5525+ "preemption delayed.\n", t->pid);
5526+ request_exit_np(t);
5527+ }
5528+ }
5529+}
5530+
5531+/* Getting schedule() right is a bit tricky. schedule() may not make any
5532+ * assumptions on the state of the current task since it may be called for a
5533+ * number of reasons. The reasons include a scheduler_tick() determined that it
5534+ * was necessary, because sys_exit_np() was called, because some Linux
5535+ * subsystem determined so, or even (in the worst case) because there is a bug
5536+ * hidden somewhere. Thus, we must take extreme care to determine what the
5537+ * current state is.
5538+ *
5539+ * The CPU could currently be scheduling a task (or not), be linked (or not).
5540+ *
5541+ * The following assertions for the scheduled task could hold:
5542+ *
5543+ * - !is_running(scheduled) // the job blocks
5544+ * - scheduled->timeslice == 0 // the job completed (forcefully)
5545+ * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
5546+ * - linked != scheduled // we need to reschedule (for any reason)
5547+ * - is_np(scheduled) // rescheduling must be delayed,
5548+ * sys_exit_np must be requested
5549+ *
5550+ * Any of these can occur together.
5551+ */
5552+static struct task_struct* gsnedf_schedule(struct task_struct * prev)
5553+{
5554+ cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
5555+ int out_of_time, sleep, preempt, np, exists, blocks;
5556+ struct task_struct* next = NULL;
5557+
5558+ /* Will be released in finish_switch. */
5559+ spin_lock(&gsnedf_lock);
5560+ clear_will_schedule();
5561+
5562+ /* sanity checking */
5563+ BUG_ON(entry->scheduled && entry->scheduled != prev);
5564+ BUG_ON(entry->scheduled && !is_realtime(prev));
5565+ BUG_ON(is_realtime(prev) && !entry->scheduled);
5566+
5567+ /* (0) Determine state */
5568+ exists = entry->scheduled != NULL;
5569+ blocks = exists && !is_running(entry->scheduled);
5570+ out_of_time = exists && budget_exhausted(entry->scheduled);
5571+ np = exists && is_np(entry->scheduled);
5572+ sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
5573+ preempt = entry->scheduled != entry->linked;
5574+
5575+ TRACE_TASK(prev, "invoked gsnedf_schedule.\n");
5576+
5577+ if (exists)
5578+ TRACE_TASK(prev,
5579+ "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
5580+ "state:%d sig:%d\n",
5581+ blocks, out_of_time, np, sleep, preempt,
5582+ prev->state, signal_pending(prev));
5583+ if (entry->linked && preempt)
5584+ TRACE_TASK(prev, "will be preempted by %s/%d\n",
5585+ entry->linked->comm, entry->linked->pid);
5586+
5587+
5588+ /* If a task blocks we have no choice but to reschedule.
5589+ */
5590+ if (blocks)
5591+ unlink(entry->scheduled);
5592+
5593+ /* Request a sys_exit_np() call if we would like to preempt but cannot.
5594+ * We need to make sure to update the link structure anyway in case
5595+ * that we are still linked. Multiple calls to request_exit_np() don't
5596+ * hurt.
5597+ */
5598+ if (np && (out_of_time || preempt || sleep)) {
5599+ unlink(entry->scheduled);
5600+ request_exit_np(entry->scheduled);
5601+ }
5602+
5603+ /* Any task that is preemptable and either exhausts its execution
5604+ * budget or wants to sleep completes. We may have to reschedule after
5605+ * this. Don't do a job completion if we block (can't have timers running
5606+ * for blocked jobs). Preemption go first for the same reason.
5607+ */
5608+ if (!np && (out_of_time || sleep) && !blocks && !preempt)
5609+ job_completion(entry->scheduled);
5610+
5611+ /* Link pending task if we became unlinked.
5612+ */
5613+ if (!entry->linked)
5614+ link_task_to_cpu(__take_ready(&gsnedf), entry);
5615+
5616+ /* The final scheduling decision. Do we need to switch for some reason?
5617+ * If linked is different from scheduled, then select linked as next.
5618+ */
5619+ if ((!np || blocks) &&
5620+ entry->linked != entry->scheduled) {
5621+ /* Schedule a linked job? */
5622+ if (entry->linked) {
5623+ entry->linked->rt_param.scheduled_on = entry->cpu;
5624+ next = entry->linked;
5625+ }
5626+ if (entry->scheduled) {
5627+ /* not gonna be scheduled soon */
5628+ entry->scheduled->rt_param.scheduled_on = NO_CPU;
5629+ TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
5630+ }
5631+ } else
5632+ /* Only override Linux scheduler if we have a real-time task
5633+ * scheduled that needs to continue.
5634+ */
5635+ if (exists)
5636+ next = prev;
5637+
5638+ spin_unlock(&gsnedf_lock);
5639+
5640+ TRACE("gsnedf_lock released, next=0x%p\n", next);
5641+
5642+
5643+ if (next)
5644+ TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
5645+ else if (exists && !next)
5646+ TRACE("becomes idle at %llu.\n", litmus_clock());
5647+
5648+
5649+ return next;
5650+}
5651+
5652+
5653+/* _finish_switch - we just finished the switch away from prev
5654+ */
5655+static void gsnedf_finish_switch(struct task_struct *prev)
5656+{
5657+ cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
5658+
5659+ entry->scheduled = is_realtime(current) ? current : NULL;
5660+ TRACE_TASK(prev, "switched away from\n");
5661+}
5662+
5663+
5664+/* Prepare a task for running in RT mode
5665+ */
5666+static void gsnedf_task_new(struct task_struct * t, int on_rq, int running)
5667+{
5668+ unsigned long flags;
5669+ cpu_entry_t* entry;
5670+
5671+ TRACE("gsn edf: task new %d\n", t->pid);
5672+
5673+ spin_lock_irqsave(&gsnedf_lock, flags);
5674+ if (running) {
5675+ entry = &per_cpu(gsnedf_cpu_entries, task_cpu(t));
5676+ BUG_ON(entry->scheduled);
5677+ entry->scheduled = t;
5678+ t->rt_param.scheduled_on = task_cpu(t);
5679+ } else
5680+ t->rt_param.scheduled_on = NO_CPU;
5681+ t->rt_param.linked_on = NO_CPU;
5682+
5683+ /* setup job params */
5684+ release_at(t, litmus_clock());
5685+
5686+ gsnedf_job_arrival(t);
5687+ spin_unlock_irqrestore(&gsnedf_lock, flags);
5688+}
5689+
5690+static void gsnedf_task_wake_up(struct task_struct *task)
5691+{
5692+ unsigned long flags;
5693+ lt_t now;
5694+
5695+ TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
5696+
5697+ spin_lock_irqsave(&gsnedf_lock, flags);
5698+ /* We need to take suspensions because of semaphores into
5699+ * account! If a job resumes after being suspended due to acquiring
5700+ * a semaphore, it should never be treated as a new job release.
5701+ */
5702+ if (get_rt_flags(task) == RT_F_EXIT_SEM) {
5703+ set_rt_flags(task, RT_F_RUNNING);
5704+ } else {
5705+ now = litmus_clock();
5706+ if (is_tardy(task, now)) {
5707+ /* new sporadic release */
5708+ release_at(task, now);
5709+ sched_trace_job_release(task);
5710+ }
5711+ else if (task->time_slice)
5712+ /* came back in time before deadline
5713+ */
5714+ set_rt_flags(task, RT_F_RUNNING);
5715+ }
5716+ gsnedf_job_arrival(task);
5717+ spin_unlock_irqrestore(&gsnedf_lock, flags);
5718+}
5719+
5720+static void gsnedf_task_block(struct task_struct *t)
5721+{
5722+ unsigned long flags;
5723+
5724+ TRACE_TASK(t, "block at %llu\n", litmus_clock());
5725+
5726+ /* unlink if necessary */
5727+ spin_lock_irqsave(&gsnedf_lock, flags);
5728+ unlink(t);
5729+ spin_unlock_irqrestore(&gsnedf_lock, flags);
5730+
5731+ BUG_ON(!is_realtime(t));
5732+}
5733+
5734+
5735+static void gsnedf_task_exit(struct task_struct * t)
5736+{
5737+ unsigned long flags;
5738+
5739+ /* unlink if necessary */
5740+ spin_lock_irqsave(&gsnedf_lock, flags);
5741+ unlink(t);
5742+ if (tsk_rt(t)->scheduled_on != NO_CPU) {
5743+ gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
5744+ tsk_rt(t)->scheduled_on = NO_CPU;
5745+ }
5746+ spin_unlock_irqrestore(&gsnedf_lock, flags);
5747+
5748+ BUG_ON(!is_realtime(t));
5749+ TRACE_TASK(t, "RIP\n");
5750+}
5751+
5752+#ifdef CONFIG_FMLP
5753+static long gsnedf_pi_block(struct pi_semaphore *sem,
5754+ struct task_struct *new_waiter)
5755+{
5756+ /* This callback has to handle the situation where a new waiter is
5757+ * added to the wait queue of the semaphore.
5758+ *
5759+ * We must check if has a higher priority than the currently
5760+ * highest-priority task, and then potentially reschedule.
5761+ */
5762+
5763+ BUG_ON(!new_waiter);
5764+
5765+ if (edf_higher_prio(new_waiter, sem->hp.task)) {
5766+ TRACE_TASK(new_waiter, " boosts priority\n");
5767+ /* called with IRQs disabled */
5768+ spin_lock(&gsnedf_lock);
5769+ /* store new highest-priority task */
5770+ sem->hp.task = new_waiter;
5771+ if (sem->holder) {
5772+ /* let holder inherit */
5773+ sem->holder->rt_param.inh_task = new_waiter;
5774+ unlink(sem->holder);
5775+ gsnedf_job_arrival(sem->holder);
5776+ }
5777+ spin_unlock(&gsnedf_lock);
5778+ }
5779+
5780+ return 0;
5781+}
5782+
5783+static long gsnedf_inherit_priority(struct pi_semaphore *sem,
5784+ struct task_struct *new_owner)
5785+{
5786+ /* We don't need to acquire the gsnedf_lock since at the time of this
5787+ * call new_owner isn't actually scheduled yet (it's still sleeping)
5788+ * and since the calling function already holds sem->wait.lock, which
5789+ * prevents concurrent sem->hp.task changes.
5790+ */
5791+
5792+ if (sem->hp.task && sem->hp.task != new_owner) {
5793+ new_owner->rt_param.inh_task = sem->hp.task;
5794+ TRACE_TASK(new_owner, "inherited priority from %s/%d\n",
5795+ sem->hp.task->comm, sem->hp.task->pid);
5796+ } else
5797+ TRACE_TASK(new_owner,
5798+ "cannot inherit priority, "
5799+ "no higher priority job waits.\n");
5800+ return 0;
5801+}
5802+
5803+/* This function is called on a semaphore release, and assumes that
5804+ * the current task is also the semaphore holder.
5805+ */
5806+static long gsnedf_return_priority(struct pi_semaphore *sem)
5807+{
5808+ struct task_struct* t = current;
5809+ int ret = 0;
5810+
5811+ /* Find new highest-priority semaphore task
5812+ * if holder task is the current hp.task.
5813+ *
5814+ * Calling function holds sem->wait.lock.
5815+ */
5816+ if (t == sem->hp.task)
5817+ edf_set_hp_task(sem);
5818+
5819+ TRACE_CUR("gsnedf_return_priority for lock %p\n", sem);
5820+
5821+ if (t->rt_param.inh_task) {
5822+ /* interrupts already disabled by PI code */
5823+ spin_lock(&gsnedf_lock);
5824+
5825+ /* Reset inh_task to NULL. */
5826+ t->rt_param.inh_task = NULL;
5827+
5828+ /* Check if rescheduling is necessary */
5829+ unlink(t);
5830+ gsnedf_job_arrival(t);
5831+ spin_unlock(&gsnedf_lock);
5832+ }
5833+
5834+ return ret;
5835+}
5836+
5837+#endif
5838+
5839+static long gsnedf_admit_task(struct task_struct* tsk)
5840+{
5841+ return 0;
5842+}
5843+
5844+
5845+/* Plugin object */
5846+static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
5847+ .plugin_name = "GSN-EDF",
5848+ .finish_switch = gsnedf_finish_switch,
5849+ .tick = gsnedf_tick,
5850+ .task_new = gsnedf_task_new,
5851+ .complete_job = complete_job,
5852+ .task_exit = gsnedf_task_exit,
5853+ .schedule = gsnedf_schedule,
5854+ .task_wake_up = gsnedf_task_wake_up,
5855+ .task_block = gsnedf_task_block,
5856+#ifdef CONFIG_FMLP
5857+ .fmlp_active = 1,
5858+ .pi_block = gsnedf_pi_block,
5859+ .inherit_priority = gsnedf_inherit_priority,
5860+ .return_priority = gsnedf_return_priority,
5861+#endif
5862+ .admit_task = gsnedf_admit_task
5863+};
5864+
5865+
5866+static int __init init_gsn_edf(void)
5867+{
5868+ int cpu;
5869+ cpu_entry_t *entry;
5870+
5871+ /* initialize CPU state */
5872+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
5873+ entry = &per_cpu(gsnedf_cpu_entries, cpu);
5874+ gsnedf_cpus[cpu] = entry;
5875+ atomic_set(&entry->will_schedule, 0);
5876+ entry->linked = NULL;
5877+ entry->scheduled = NULL;
5878+ entry->cpu = cpu;
5879+ INIT_LIST_HEAD(&entry->list);
5880+ }
5881+
5882+ edf_domain_init(&gsnedf, NULL, gsnedf_job_release);
5883+ return register_sched_plugin(&gsn_edf_plugin);
5884+}
5885+
5886+
5887+module_init(init_gsn_edf);
5888diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
5889new file mode 100644
5890index 0000000..1e541e4
5891--- /dev/null
5892+++ b/litmus/sched_litmus.c
5893@@ -0,0 +1,228 @@
5894+/* This file is included from kernel/sched.c */
5895+
5896+#include <litmus/litmus.h>
5897+#include <litmus/sched_plugin.h>
5898+
5899+static void update_time_litmus(struct rq *rq, struct task_struct *p)
5900+{
5901+ lt_t now = litmus_clock();
5902+ p->rt_param.job_params.exec_time +=
5903+ now - p->rt_param.job_params.exec_start;
5904+ p->rt_param.job_params.exec_start = now;
5905+}
5906+
5907+static void double_rq_lock(struct rq *rq1, struct rq *rq2);
5908+static void double_rq_unlock(struct rq *rq1, struct rq *rq2);
5909+
5910+static void litmus_tick(struct rq *rq, struct task_struct *p)
5911+{
5912+ if (is_realtime(p))
5913+ update_time_litmus(rq, p);
5914+ litmus->tick(p);
5915+}
5916+
5917+#define NO_CPU -1
5918+
5919+static void litmus_schedule(struct rq *rq, struct task_struct *prev)
5920+{
5921+ struct rq* other_rq;
5922+ long prev_state;
5923+ lt_t _maybe_deadlock = 0;
5924+ /* WARNING: rq is _not_ locked! */
5925+ if (is_realtime(prev))
5926+ update_time_litmus(rq, prev);
5927+
5928+ /* let the plugin schedule */
5929+ rq->litmus_next = litmus->schedule(prev);
5930+
5931+ /* check if a global plugin pulled a task from a different RQ */
5932+ if (rq->litmus_next && task_rq(rq->litmus_next) != rq) {
5933+ /* we need to migrate the task */
5934+ other_rq = task_rq(rq->litmus_next);
5935+ TRACE_TASK(rq->litmus_next, "migrate from %d\n", other_rq->cpu);
5936+
5937+ /* while we drop the lock, the prev task could change its
5938+ * state
5939+ */
5940+ prev_state = prev->state;
5941+ mb();
5942+ spin_unlock(&rq->lock);
5943+
5944+ /* Don't race with a concurrent switch.
5945+ * This could deadlock in the case of cross or circular migrations.
5946+ * It's the job of the plugin to make sure that doesn't happen.
5947+ */
5948+ TRACE_TASK(rq->litmus_next, "stack_in_use=%d\n",
5949+ rq->litmus_next->rt_param.stack_in_use);
5950+ if (rq->litmus_next->rt_param.stack_in_use != NO_CPU) {
5951+ TRACE_TASK(rq->litmus_next, "waiting to deschedule\n");
5952+ _maybe_deadlock = litmus_clock();
5953+ }
5954+ while (rq->litmus_next->rt_param.stack_in_use != NO_CPU) {
5955+ cpu_relax();
5956+ mb();
5957+ if (rq->litmus_next->rt_param.stack_in_use == NO_CPU)
5958+ TRACE_TASK(rq->litmus_next,
5959+ "descheduled. Proceeding.\n");
5960+ if (lt_before(_maybe_deadlock + 10000000, litmus_clock())) {
5961+ /* We've been spinning for 10ms.
5962+ * Something can't be right!
5963+ * Let's abandon the task and bail out; at least
5964+ * we will have debug info instead of a hard
5965+ * deadlock.
5966+ */
5967+ TRACE_TASK(rq->litmus_next,
5968+ "stack too long in use. Deadlock?\n");
5969+ rq->litmus_next = NULL;
5970+
5971+ /* bail out */
5972+ spin_lock(&rq->lock);
5973+ return;
5974+ }
5975+ }
5976+#ifdef __ARCH_WANT_UNLOCKED_CTXSW
5977+ if (rq->litmus_next->oncpu)
5978+ TRACE_TASK(rq->litmus_next, "waiting for !oncpu");
5979+ while (rq->litmus_next->oncpu) {
5980+ cpu_relax();
5981+ mb();
5982+ }
5983+#endif
5984+ double_rq_lock(rq, other_rq);
5985+ mb();
5986+ if (prev->state != prev_state && is_realtime(prev)) {
5987+ TRACE_TASK(prev,
5988+ "state changed while we dropped"
5989+ " the lock: now=%d, old=%d\n",
5990+ prev->state, prev_state);
5991+ if (prev_state && !prev->state) {
5992+ /* prev task became unblocked
5993+ * we need to simulate normal sequence of events
5994+ * to scheduler plugins.
5995+ */
5996+ litmus->task_block(prev);
5997+ litmus->task_wake_up(prev);
5998+ }
5999+ }
6000+
6001+ set_task_cpu(rq->litmus_next, smp_processor_id());
6002+
6003+ /* DEBUG: now that we have the lock we need to make sure a
6004+ * couple of things still hold:
6005+ * - it is still a real-time task
6006+ * - it is still runnable (could have been stopped)
6007+ */
6008+ if (!is_realtime(rq->litmus_next) ||
6009+ !is_running(rq->litmus_next)) {
6010+ /* BAD BAD BAD */
6011+ TRACE_TASK(rq->litmus_next,
6012+ "migration invariant FAILED: rt=%d running=%d\n",
6013+ is_realtime(rq->litmus_next),
6014+ is_running(rq->litmus_next));
6015+ /* drop the task */
6016+ rq->litmus_next = NULL;
6017+ }
6018+ /* release the other CPU's runqueue, but keep ours */
6019+ spin_unlock(&other_rq->lock);
6020+ }
6021+ if (rq->litmus_next)
6022+ rq->litmus_next->rt_param.stack_in_use = rq->cpu;
6023+}
6024+
6025+static void enqueue_task_litmus(struct rq *rq, struct task_struct *p, int wakeup)
6026+{
6027+ if (wakeup)
6028+ litmus->task_wake_up(p);
6029+ else
6030+ TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n");
6031+}
6032+
6033+static void dequeue_task_litmus(struct rq *rq, struct task_struct *p, int sleep)
6034+{
6035+ if (sleep)
6036+ litmus->task_block(p);
6037+ else
6038+ TRACE_TASK(p, "ignoring a dequeue, not going to sleep.\n");
6039+}
6040+
6041+static void yield_task_litmus(struct rq *rq)
6042+{
6043+ BUG_ON(rq->curr != current);
6044+ litmus->complete_job();
6045+}
6046+
6047+/* Plugins are responsible for this.
6048+ */
6049+static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p)
6050+{
6051+}
6052+
6053+/* has already been taken care of */
6054+static void put_prev_task_litmus(struct rq *rq, struct task_struct *p)
6055+{
6056+}
6057+
6058+static struct task_struct *pick_next_task_litmus(struct rq *rq)
6059+{
6060+ struct task_struct* picked = rq->litmus_next;
6061+ rq->litmus_next = NULL;
6062+ if (picked)
6063+ picked->rt_param.job_params.exec_start = litmus_clock();
6064+ return picked;
6065+}
6066+
6067+static void task_tick_litmus(struct rq *rq, struct task_struct *p)
6068+{
6069+}
6070+
6071+/* This is called when a task became a real-time task, either due
6072+ * to a SCHED_* class transition or due to PI mutex inheritance.\
6073+ * We don't handle Linux PI mutex inheritance yet. Use LITMUS provided
6074+ * synchronization primitives instead.
6075+ */
6076+static void set_curr_task_litmus(struct rq *rq)
6077+{
6078+ rq->curr->rt_param.job_params.exec_start = litmus_clock();
6079+}
6080+
6081+
6082+#ifdef CONFIG_SMP
6083+
6084+/* we don't repartition at runtime */
6085+
6086+static unsigned long
6087+load_balance_litmus(struct rq *this_rq, int this_cpu, struct rq *busiest,
6088+ unsigned long max_load_move,
6089+ struct sched_domain *sd, enum cpu_idle_type idle,
6090+ int *all_pinned, int *this_best_prio)
6091+{
6092+ return 0;
6093+}
6094+
6095+static int
6096+move_one_task_litmus(struct rq *this_rq, int this_cpu, struct rq *busiest,
6097+ struct sched_domain *sd, enum cpu_idle_type idle)
6098+{
6099+ return 0;
6100+}
6101+#endif
6102+
6103+const struct sched_class litmus_sched_class = {
6104+ .next = &rt_sched_class,
6105+ .enqueue_task = enqueue_task_litmus,
6106+ .dequeue_task = dequeue_task_litmus,
6107+ .yield_task = yield_task_litmus,
6108+
6109+ .check_preempt_curr = check_preempt_curr_litmus,
6110+
6111+ .pick_next_task = pick_next_task_litmus,
6112+ .put_prev_task = put_prev_task_litmus,
6113+
6114+#ifdef CONFIG_SMP
6115+ .load_balance = load_balance_litmus,
6116+ .move_one_task = move_one_task_litmus,
6117+#endif
6118+
6119+ .set_curr_task = set_curr_task_litmus,
6120+ .task_tick = task_tick_litmus,
6121+};
6122diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c
6123new file mode 100755
6124index 0000000..6f95688
6125--- /dev/null
6126+++ b/litmus/sched_pfair.c
6127@@ -0,0 +1,785 @@
6128+/*
6129+ * kernel/sched_pfair.c
6130+ *
6131+ * Implementation of the (global) Pfair scheduling algorithm.
6132+ *
6133+ */
6134+
6135+#include <asm/div64.h>
6136+#include <linux/delay.h>
6137+#include <linux/module.h>
6138+#include <linux/spinlock.h>
6139+#include <linux/percpu.h>
6140+#include <linux/sched.h>
6141+#include <linux/list.h>
6142+
6143+#include <litmus/litmus.h>
6144+#include <litmus/jobs.h>
6145+#include <litmus/rt_domain.h>
6146+#include <litmus/sched_plugin.h>
6147+#include <litmus/sched_trace.h>
6148+
6149+#include <litmus/heap.h>
6150+
6151+/* Tick period is used to convert ns-specified execution
6152+ * costs and periods into tick-based equivalents.
6153+ */
6154+extern ktime_t tick_period;
6155+
6156+/* make the unit explicit */
6157+typedef unsigned long quanta_t;
6158+
6159+struct subtask {
6160+ /* measured in quanta relative to job release */
6161+ quanta_t release;
6162+ quanta_t deadline;
6163+ quanta_t overlap; /* called "b bit" by PD^2 */
6164+ quanta_t group_deadline;
6165+};
6166+
6167+struct pfair_param {
6168+ quanta_t quanta; /* number of subtasks */
6169+ quanta_t cur; /* index of current subtask */
6170+
6171+ quanta_t release; /* in quanta */
6172+ quanta_t period; /* in quanta */
6173+
6174+ quanta_t last_quantum; /* when scheduled last */
6175+ int last_cpu; /* where scheduled last */
6176+
6177+ unsigned int present; /* Can the task be scheduled? */
6178+
6179+ struct subtask subtasks[0]; /* allocate together with pfair_param */
6180+};
6181+
6182+#define tsk_pfair(tsk) ((tsk)->rt_param.pfair)
6183+
6184+struct pfair_state {
6185+ int cpu;
6186+ volatile quanta_t cur_tick; /* updated by the CPU that is advancing
6187+ * the time */
6188+ volatile quanta_t local_tick; /* What tick is the local CPU currently
6189+ * executing? Updated only by the local
6190+ * CPU. In QEMU, this may lag behind the
6191+ * current tick. In a real system, with
6192+ * proper timers and aligned quanta,
6193+ * that should only be the
6194+ * case for a very short time after the
6195+ * time advanced. With staggered quanta,
6196+ * it will lag for the duration of the
6197+ * offset.
6198+ */
6199+
6200+ struct task_struct* linked; /* the task that should be executing */
6201+ struct task_struct* local; /* the local copy of linked */
6202+ struct task_struct* scheduled; /* what is actually scheduled */
6203+
6204+ unsigned long missed_quanta;
6205+};
6206+
6207+/* Currently, we limit the maximum period of any task to 1000 quanta.
6208+ * The reason is that it makes the implementation easier since we do not
6209+ * need to reallocate the release wheel on task arrivals.
6210+ * In the future
6211+ */
6212+#define PFAIR_MAX_PERIOD 1000
6213+
6214+/* This is the release queue wheel. It is indexed by pfair_time %
6215+ * PFAIR_MAX_PERIOD. Each heap is ordered by PFAIR priority, so that it can be
6216+ * merged with the ready queue.
6217+ */
6218+static struct heap release_queue[PFAIR_MAX_PERIOD];
6219+
6220+DEFINE_PER_CPU(struct pfair_state, pfair_state);
6221+struct pfair_state* pstate[NR_CPUS]; /* short cut */
6222+
6223+#define NO_CPU 0xffffffff
6224+
6225+static quanta_t pfair_time = 0; /* the "official" PFAIR clock */
6226+static quanta_t merge_time = 0; /* Updated after the release queue has been
6227+ * merged. Used by drop_all_references().
6228+ */
6229+
6230+static rt_domain_t pfair;
6231+
6232+/* The pfair_lock is used to serialize all scheduling events.
6233+ */
6234+#define pfair_lock pfair.ready_lock
6235+
6236+/* Enable for lots of trace info.
6237+ * #define PFAIR_DEBUG
6238+ */
6239+
6240+#ifdef PFAIR_DEBUG
6241+#define PTRACE_TASK(t, f, args...) TRACE_TASK(t, f, # args)
6242+#define PTRACE(f, args...) TRACE(f, # args)
6243+#else
6244+#define PTRACE_TASK(t, f, args...)
6245+#define PTRACE(f, args...)
6246+#endif
6247+
6248+/* gcc will inline all of these accessor functions... */
6249+static struct subtask* cur_subtask(struct task_struct* t)
6250+{
6251+ return tsk_pfair(t)->subtasks + tsk_pfair(t)->cur;
6252+}
6253+
6254+static quanta_t cur_deadline(struct task_struct* t)
6255+{
6256+ return cur_subtask(t)->deadline + tsk_pfair(t)->release;
6257+}
6258+
6259+static quanta_t cur_release(struct task_struct* t)
6260+{
6261+#ifdef EARLY_RELEASE
6262+ /* only the release of the first subtask counts when we early
6263+ * release */
6264+ return tsk_pfair(t)->release;
6265+#else
6266+ return cur_subtask(t)->release + tsk_pfair(t)->release;
6267+#endif
6268+}
6269+
6270+static quanta_t cur_sub_release(struct task_struct* t)
6271+{
6272+ return cur_subtask(t)->release + tsk_pfair(t)->release;
6273+}
6274+
6275+static quanta_t cur_overlap(struct task_struct* t)
6276+{
6277+ return cur_subtask(t)->overlap;
6278+}
6279+
6280+static quanta_t cur_group_deadline(struct task_struct* t)
6281+{
6282+ quanta_t gdl = cur_subtask(t)->group_deadline;
6283+ if (gdl)
6284+ return gdl + tsk_pfair(t)->release;
6285+ else
6286+ return gdl;
6287+}
6288+
6289+enum round {
6290+ FLOOR,
6291+ CEIL
6292+};
6293+
6294+static quanta_t time2quanta(lt_t time, enum round round)
6295+{
6296+ s64 quantum_length = ktime_to_ns(tick_period);
6297+
6298+ if (do_div(time, quantum_length) && round == CEIL)
6299+ time++;
6300+ return (quanta_t) time;
6301+}
6302+
6303+static int pfair_higher_prio(struct task_struct* first,
6304+ struct task_struct* second)
6305+{
6306+ return /* first task must exist */
6307+ first && (
6308+ /* Does the second task exist and is it a real-time task? If
6309+ * not, the first task (which is a RT task) has higher
6310+ * priority.
6311+ */
6312+ !second || !is_realtime(second) ||
6313+
6314+ /* Is the (subtask) deadline of the first task earlier?
6315+ * Then it has higher priority.
6316+ */
6317+ time_before(cur_deadline(first), cur_deadline(second)) ||
6318+
6319+ /* Do we have a deadline tie?
6320+ * Then break by B-bit.
6321+ */
6322+ (cur_deadline(first) == cur_deadline(second) &&
6323+ cur_overlap(first) > cur_overlap(second)) ||
6324+
6325+ /* Do we have a B-bit tie?
6326+ * Then break by group deadline.
6327+ */
6328+ (cur_overlap(first) == cur_overlap(second) &&
6329+ time_after(cur_group_deadline(first),
6330+ cur_group_deadline(second))) ||
6331+
6332+ /* Do we have a group deadline tie?
6333+ * Then break by PID, which are unique.
6334+ */
6335+ (cur_group_deadline(first) ==
6336+ cur_group_deadline(second) &&
6337+ first->pid < second->pid));
6338+}
6339+
6340+int pfair_ready_order(struct heap_node* a, struct heap_node* b)
6341+{
6342+ return pfair_higher_prio(heap2task(a), heap2task(b));
6343+}
6344+
6345+/* return the proper release queue for time t */
6346+static struct heap* relq(quanta_t t)
6347+{
6348+ struct heap* rq = &release_queue[t % PFAIR_MAX_PERIOD];
6349+ return rq;
6350+}
6351+
6352+static void prepare_release(struct task_struct* t, quanta_t at)
6353+{
6354+ tsk_pfair(t)->release = at;
6355+ tsk_pfair(t)->cur = 0;
6356+}
6357+
6358+static void __pfair_add_release(struct task_struct* t, struct heap* queue)
6359+{
6360+ heap_insert(pfair_ready_order, queue,
6361+ tsk_rt(t)->heap_node);
6362+}
6363+
6364+static void pfair_add_release(struct task_struct* t)
6365+{
6366+ BUG_ON(heap_node_in_heap(tsk_rt(t)->heap_node));
6367+ __pfair_add_release(t, relq(cur_release(t)));
6368+}
6369+
6370+/* pull released tasks from the release queue */
6371+static void poll_releases(quanta_t time)
6372+{
6373+ heap_union(pfair_ready_order, &pfair.ready_queue, relq(time));
6374+ merge_time = time;
6375+}
6376+
6377+static void check_preempt(struct task_struct* t)
6378+{
6379+ int cpu = NO_CPU;
6380+ if (tsk_rt(t)->linked_on != tsk_rt(t)->scheduled_on &&
6381+ tsk_pfair(t)->present) {
6382+ /* the task can be scheduled and
6383+ * is not scheduled where it ought to be scheduled
6384+ */
6385+ cpu = tsk_rt(t)->linked_on != NO_CPU ?
6386+ tsk_rt(t)->linked_on :
6387+ tsk_rt(t)->scheduled_on;
6388+ PTRACE_TASK(t, "linked_on:%d, scheduled_on:%d\n",
6389+ tsk_rt(t)->linked_on, tsk_rt(t)->scheduled_on);
6390+ /* preempt */
6391+ if (cpu == smp_processor_id())
6392+ set_tsk_need_resched(current);
6393+ else {
6394+ smp_send_reschedule(cpu);
6395+ }
6396+ }
6397+}
6398+
6399+/* returns 1 if the task needs to go the release queue */
6400+static int advance_subtask(quanta_t time, struct task_struct* t, int cpu)
6401+{
6402+ struct pfair_param* p = tsk_pfair(t);
6403+
6404+ p->cur = (p->cur + 1) % p->quanta;
6405+ TRACE_TASK(t, "on %d advanced to subtask %lu\n",
6406+ cpu,
6407+ p->cur);
6408+ if (!p->cur) {
6409+ /* we start a new job */
6410+ get_rt_flags(t) = RT_F_RUNNING;
6411+ prepare_for_next_period(t);
6412+ p->release += p->period;
6413+ }
6414+ return time_after(cur_release(t), time);
6415+}
6416+
6417+static void advance_subtasks(quanta_t time)
6418+{
6419+ int cpu, missed;
6420+ struct task_struct* l;
6421+ struct pfair_param* p;
6422+
6423+ for_each_online_cpu(cpu) {
6424+ l = pstate[cpu]->linked;
6425+ missed = pstate[cpu]->linked != pstate[cpu]->local;
6426+ if (l) {
6427+ p = tsk_pfair(l);
6428+ p->last_quantum = time;
6429+ p->last_cpu = cpu;
6430+ if (advance_subtask(time, l, cpu)) {
6431+ pstate[cpu]->linked = NULL;
6432+ pfair_add_release(l);
6433+ }
6434+ }
6435+ }
6436+}
6437+
6438+static int target_cpu(quanta_t time, struct task_struct* t, int default_cpu)
6439+{
6440+ int cpu;
6441+ if (tsk_rt(t)->scheduled_on != NO_CPU) {
6442+ /* always observe scheduled_on linkage */
6443+ default_cpu = tsk_rt(t)->scheduled_on;
6444+ PTRACE_TASK(t, "forced on %d (scheduled on)\n", default_cpu);
6445+ } else if (tsk_pfair(t)->last_quantum == time - 1) {
6446+ /* back2back quanta */
6447+ /* Only observe last_quantum if no scheduled_on is in the way.
6448+ * This should only kick in if a CPU missed quanta, and that
6449+ * *should* only happen in QEMU.
6450+ */
6451+ cpu = tsk_pfair(t)->last_cpu;
6452+ if (!pstate[cpu]->linked ||
6453+ tsk_rt(pstate[cpu]->linked)->scheduled_on != cpu) {
6454+ default_cpu = cpu;
6455+ PTRACE_TASK(t, "forced on %d (linked on)\n",
6456+ default_cpu);
6457+ } else {
6458+ PTRACE_TASK(t, "DID NOT force on %d (linked on)\n",
6459+ default_cpu);
6460+ }
6461+ }
6462+ return default_cpu;
6463+}
6464+
6465+/* returns one if linking was redirected */
6466+static int pfair_link(quanta_t time, int cpu,
6467+ struct task_struct* t)
6468+{
6469+ int target = target_cpu(time, t, cpu);
6470+ struct task_struct* prev = pstate[cpu]->linked;
6471+ struct task_struct* other;
6472+
6473+ PTRACE_TASK(t, "linked to %d for quantum %lu\n", target, time);
6474+ if (target != cpu) {
6475+ other = pstate[target]->linked;
6476+ pstate[target]->linked = t;
6477+ tsk_rt(t)->linked_on = target;
6478+ if (!other)
6479+ /* linked ok, but reschedule this CPU */
6480+ return 1;
6481+ if (target < cpu) {
6482+ /* link other to cpu instead */
6483+ tsk_rt(other)->linked_on = cpu;
6484+ pstate[cpu]->linked = other;
6485+ if (prev) {
6486+ /* prev got pushed back into the ready queue */
6487+ tsk_rt(prev)->linked_on = NO_CPU;
6488+ __add_ready(&pfair, prev);
6489+ }
6490+ /* we are done with this cpu */
6491+ return 0;
6492+ } else {
6493+ /* re-add other, it's original CPU was not considered yet */
6494+ tsk_rt(other)->linked_on = NO_CPU;
6495+ __add_ready(&pfair, other);
6496+ /* reschedule this CPU */
6497+ return 1;
6498+ }
6499+ } else {
6500+ pstate[cpu]->linked = t;
6501+ tsk_rt(t)->linked_on = cpu;
6502+ if (prev) {
6503+ /* prev got pushed back into the ready queue */
6504+ tsk_rt(prev)->linked_on = NO_CPU;
6505+ __add_ready(&pfair, prev);
6506+ }
6507+ /* we are done with this CPU */
6508+ return 0;
6509+ }
6510+}
6511+
6512+static void schedule_subtasks(quanta_t time)
6513+{
6514+ int cpu, retry;
6515+
6516+ for_each_online_cpu(cpu) {
6517+ retry = 1;
6518+ while (retry) {
6519+ if (pfair_higher_prio(__peek_ready(&pfair),
6520+ pstate[cpu]->linked))
6521+ retry = pfair_link(time, cpu,
6522+ __take_ready(&pfair));
6523+ else
6524+ retry = 0;
6525+ }
6526+ }
6527+}
6528+
6529+static void schedule_next_quantum(quanta_t time)
6530+{
6531+ int cpu;
6532+
6533+ PTRACE("<<< Q %lu at %llu\n",
6534+ time, litmus_clock());
6535+
6536+ /* called with interrupts disabled */
6537+ spin_lock(&pfair_lock);
6538+
6539+ advance_subtasks(time);
6540+ poll_releases(time);
6541+ schedule_subtasks(time);
6542+
6543+ spin_unlock(&pfair_lock);
6544+
6545+ /* We are done. Advance time. */
6546+ mb();
6547+ for (cpu = 0; cpu < NR_CPUS; cpu++)
6548+ pstate[cpu]->cur_tick = pfair_time;
6549+ PTRACE(">>> Q %lu at %llu\n",
6550+ time, litmus_clock());
6551+}
6552+
6553+/* pfair_tick - this function is called for every local timer
6554+ * interrupt.
6555+ */
6556+static void pfair_tick(struct task_struct* t)
6557+{
6558+ struct pfair_state* state = &__get_cpu_var(pfair_state);
6559+ quanta_t time, loc, cur;
6560+
6561+ /* Attempt to advance time. First CPU to get here 00
6562+ * will prepare the next quantum.
6563+ */
6564+ time = cmpxchg(&pfair_time,
6565+ state->local_tick, /* expected */
6566+ state->local_tick + 1 /* next */
6567+ );
6568+ if (time == state->local_tick)
6569+ /* exchange succeeded */
6570+ schedule_next_quantum(time + 1);
6571+
6572+ /* Spin locally until time advances. */
6573+ while (1) {
6574+ mb();
6575+ cur = state->cur_tick;
6576+ loc = state->local_tick;
6577+ if (time_before(loc, cur)) {
6578+ if (loc + 1 != cur) {
6579+ TRACE("MISSED quantum! loc:%lu -> cur:%lu\n",
6580+ loc, cur);
6581+ state->missed_quanta++;
6582+ }
6583+ break;
6584+ }
6585+ cpu_relax();
6586+ }
6587+
6588+ /* copy state info */
6589+ state->local_tick = state->cur_tick;
6590+ state->local = state->linked;
6591+ if (state->local && tsk_pfair(state->local)->present &&
6592+ state->local != current)
6593+ set_tsk_need_resched(current);
6594+}
6595+
6596+static int safe_to_schedule(struct task_struct* t, int cpu)
6597+{
6598+ int where = tsk_rt(t)->scheduled_on;
6599+ if (where != NO_CPU && where != cpu) {
6600+ TRACE_TASK(t, "BAD: can't be scheduled on %d, "
6601+ "scheduled already on %d.\n", cpu, where);
6602+ return 0;
6603+ } else
6604+ return tsk_pfair(t)->present && get_rt_flags(t) == RT_F_RUNNING;
6605+}
6606+
6607+static struct task_struct* pfair_schedule(struct task_struct * prev)
6608+{
6609+ struct pfair_state* state = &__get_cpu_var(pfair_state);
6610+ int blocks;
6611+ struct task_struct* next = NULL;
6612+
6613+ spin_lock(&pfair_lock);
6614+
6615+ blocks = is_realtime(prev) && !is_running(prev);
6616+
6617+ if (blocks)
6618+ tsk_pfair(prev)->present = 0;
6619+
6620+ if (state->local && safe_to_schedule(state->local, state->cpu))
6621+ next = state->local;
6622+
6623+ if (prev != next) {
6624+ tsk_rt(prev)->scheduled_on = NO_CPU;
6625+ if (next)
6626+ tsk_rt(next)->scheduled_on = state->cpu;
6627+ }
6628+
6629+ spin_unlock(&pfair_lock);
6630+
6631+ if (next)
6632+ TRACE_TASK(next, "scheduled rel=%lu at %lu\n",
6633+ tsk_pfair(next)->release, pfair_time);
6634+ else if (is_realtime(prev))
6635+ TRACE("Becomes idle at %lu\n", pfair_time);
6636+
6637+ return next;
6638+}
6639+
6640+static void pfair_task_new(struct task_struct * t, int on_rq, int running)
6641+{
6642+ unsigned long flags;
6643+
6644+ TRACE("pfair: task new %d state:%d\n", t->pid, t->state);
6645+
6646+ spin_lock_irqsave(&pfair_lock, flags);
6647+ if (running)
6648+ t->rt_param.scheduled_on = task_cpu(t);
6649+ else
6650+ t->rt_param.scheduled_on = NO_CPU;
6651+
6652+ prepare_release(t, pfair_time + 1);
6653+ tsk_pfair(t)->present = running;
6654+ pfair_add_release(t);
6655+ check_preempt(t);
6656+
6657+ spin_unlock_irqrestore(&pfair_lock, flags);
6658+}
6659+
6660+static void pfair_task_wake_up(struct task_struct *t)
6661+{
6662+ unsigned long flags;
6663+
6664+ TRACE_TASK(t, "wakes at %lld, release=%lu, pfair_time:%lu\n",
6665+ cur_release(t), pfair_time);
6666+
6667+ spin_lock_irqsave(&pfair_lock, flags);
6668+
6669+ tsk_pfair(t)->present = 1;
6670+
6671+ /* It is a little unclear how to deal with Pfair
6672+ * tasks that block for a while and then wake.
6673+ * For now, we assume that such suspensions are included
6674+ * in the stated execution time of the task, and thus
6675+ * count as execution time for our purposes. Thus, if the
6676+ * task is currently linked somewhere, it may resume, otherwise
6677+ * it has to wait for its next quantum allocation.
6678+ */
6679+
6680+ check_preempt(t);
6681+
6682+ spin_unlock_irqrestore(&pfair_lock, flags);
6683+}
6684+
6685+static void pfair_task_block(struct task_struct *t)
6686+{
6687+ BUG_ON(!is_realtime(t));
6688+ TRACE_TASK(t, "blocks at %lld, state:%d\n",
6689+ (lt_t) jiffies, t->state);
6690+}
6691+
6692+/* caller must hold pfair_lock */
6693+static void drop_all_references(struct task_struct *t)
6694+{
6695+ int cpu;
6696+ struct pfair_state* s;
6697+ struct heap* q;
6698+ if (heap_node_in_heap(tsk_rt(t)->heap_node)) {
6699+ /* figure out what queue the node is in */
6700+ if (time_before_eq(cur_release(t), merge_time))
6701+ q = &pfair.ready_queue;
6702+ else
6703+ q = relq(cur_release(t));
6704+ heap_delete(pfair_ready_order, q,
6705+ tsk_rt(t)->heap_node);
6706+ }
6707+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
6708+ s = &per_cpu(pfair_state, cpu);
6709+ if (s->linked == t)
6710+ s->linked = NULL;
6711+ if (s->local == t)
6712+ s->local = NULL;
6713+ if (s->scheduled == t)
6714+ s->scheduled = NULL;
6715+ }
6716+}
6717+
6718+static void pfair_task_exit(struct task_struct * t)
6719+{
6720+ unsigned long flags;
6721+
6722+ BUG_ON(!is_realtime(t));
6723+
6724+ /* Remote task from release or ready queue, and ensure
6725+ * that it is not the scheduled task for ANY CPU. We
6726+ * do this blanket check because occassionally when
6727+ * tasks exit while blocked, the task_cpu of the task
6728+ * might not be the same as the CPU that the PFAIR scheduler
6729+ * has chosen for it.
6730+ */
6731+ spin_lock_irqsave(&pfair_lock, flags);
6732+
6733+ TRACE_TASK(t, "RIP, state:%d\n", t->state);
6734+ drop_all_references(t);
6735+
6736+ spin_unlock_irqrestore(&pfair_lock, flags);
6737+
6738+ kfree(t->rt_param.pfair);
6739+ t->rt_param.pfair = NULL;
6740+}
6741+
6742+
6743+static void pfair_release_at(struct task_struct* task, lt_t start)
6744+{
6745+ unsigned long flags;
6746+ lt_t now = litmus_clock();
6747+ quanta_t release, delta;
6748+
6749+ BUG_ON(!is_realtime(task));
6750+
6751+ spin_lock_irqsave(&pfair_lock, flags);
6752+ if (lt_before(now, start)) {
6753+ delta = time2quanta((long long) start - (long long) now, CEIL);
6754+ if (delta >= PFAIR_MAX_PERIOD)
6755+ delta = PFAIR_MAX_PERIOD - 1;
6756+ } else
6757+ delta = 10; /* release in 10 ticks */
6758+
6759+ release = pfair_time + delta;
6760+
6761+ drop_all_references(task);
6762+ prepare_release(task, release);
6763+ pfair_add_release(task);
6764+ spin_unlock_irqrestore(&pfair_lock, flags);
6765+}
6766+
6767+static void init_subtask(struct subtask* sub, unsigned long i,
6768+ lt_t quanta, lt_t period)
6769+{
6770+ /* since i is zero-based, the formulas are shifted by one */
6771+ lt_t tmp;
6772+
6773+ /* release */
6774+ tmp = period * i;
6775+ do_div(tmp, quanta); /* floor */
6776+ sub->release = (quanta_t) tmp;
6777+
6778+ /* deadline */
6779+ tmp = period * (i + 1);
6780+ if (do_div(tmp, quanta)) /* ceil */
6781+ tmp++;
6782+ sub->deadline = (quanta_t) tmp;
6783+
6784+ /* next release */
6785+ tmp = period * (i + 1);
6786+ do_div(tmp, quanta); /* floor */
6787+ sub->overlap = sub->deadline - (quanta_t) tmp;
6788+
6789+ /* Group deadline.
6790+ * Based on the formula given in Uma's thesis.
6791+ */
6792+ if (2 * quanta >= period) {
6793+ /* heavy */
6794+ tmp = (sub->deadline - (i + 1)) * period;
6795+ if (do_div(tmp, (period - quanta))) /* ceil */
6796+ tmp++;
6797+ sub->group_deadline = (quanta_t) tmp;
6798+ } else
6799+ sub->group_deadline = 0;
6800+}
6801+
6802+static void dump_subtasks(struct task_struct* t)
6803+{
6804+ unsigned long i;
6805+ for (i = 0; i < t->rt_param.pfair->quanta; i++)
6806+ TRACE_TASK(t, "SUBTASK %lu: rel=%lu dl=%lu bbit:%lu gdl:%lu\n",
6807+ i + 1,
6808+ t->rt_param.pfair->subtasks[i].release,
6809+ t->rt_param.pfair->subtasks[i].deadline,
6810+ t->rt_param.pfair->subtasks[i].overlap,
6811+ t->rt_param.pfair->subtasks[i].group_deadline);
6812+}
6813+
6814+static long pfair_admit_task(struct task_struct* t)
6815+{
6816+ lt_t quanta;
6817+ lt_t period;
6818+ s64 quantum_length = ktime_to_ns(tick_period);
6819+ struct pfair_param* param;
6820+ unsigned long i;
6821+
6822+ /* Pfair is a tick-based method, so the time
6823+ * of interest is jiffies. Calculate tick-based
6824+ * times for everything.
6825+ * (Ceiling of exec cost, floor of period.)
6826+ */
6827+
6828+ quanta = get_exec_cost(t);
6829+ period = get_rt_period(t);
6830+
6831+ quanta = time2quanta(get_exec_cost(t), CEIL);
6832+
6833+ if (do_div(period, quantum_length))
6834+ printk(KERN_WARNING
6835+ "The period of %s/%d is not a multiple of %llu.\n",
6836+ t->comm, t->pid, (unsigned long long) quantum_length);
6837+
6838+ if (period >= PFAIR_MAX_PERIOD) {
6839+ printk(KERN_WARNING
6840+ "PFAIR: Rejecting task %s/%d; its period is too long.\n",
6841+ t->comm, t->pid);
6842+ return -EINVAL;
6843+ }
6844+
6845+ param = kmalloc(sizeof(struct pfair_param) +
6846+ quanta * sizeof(struct subtask), GFP_ATOMIC);
6847+
6848+ if (!param)
6849+ return -ENOMEM;
6850+
6851+ param->quanta = quanta;
6852+ param->cur = 0;
6853+ param->release = 0;
6854+ param->period = period;
6855+
6856+ for (i = 0; i < quanta; i++)
6857+ init_subtask(param->subtasks + i, i, quanta, period);
6858+
6859+ if (t->rt_param.pfair)
6860+ /* get rid of stale allocation */
6861+ kfree(t->rt_param.pfair);
6862+
6863+ t->rt_param.pfair = param;
6864+
6865+ /* spew out some debug info */
6866+ dump_subtasks(t);
6867+
6868+ return 0;
6869+}
6870+
6871+/* Plugin object */
6872+static struct sched_plugin pfair_plugin __cacheline_aligned_in_smp = {
6873+ .plugin_name = "PFAIR",
6874+ .tick = pfair_tick,
6875+ .task_new = pfair_task_new,
6876+ .task_exit = pfair_task_exit,
6877+ .schedule = pfair_schedule,
6878+ .task_wake_up = pfair_task_wake_up,
6879+ .task_block = pfair_task_block,
6880+ .admit_task = pfair_admit_task,
6881+ .release_at = pfair_release_at,
6882+ .complete_job = complete_job
6883+};
6884+
6885+static int __init init_pfair(void)
6886+{
6887+ int cpu, i;
6888+ struct pfair_state *state;
6889+
6890+ /* initialize release queue */
6891+ for (i = 0; i < PFAIR_MAX_PERIOD; i++)
6892+ heap_init(&release_queue[i]);
6893+
6894+ /* initialize CPU state */
6895+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
6896+ state = &per_cpu(pfair_state, cpu);
6897+ state->cpu = cpu;
6898+ state->cur_tick = 0;
6899+ state->local_tick = 0;
6900+ state->linked = NULL;
6901+ state->local = NULL;
6902+ state->scheduled = NULL;
6903+ state->missed_quanta = 0;
6904+ pstate[cpu] = state;
6905+ }
6906+
6907+ rt_domain_init(&pfair, pfair_ready_order, NULL, NULL);
6908+ return register_sched_plugin(&pfair_plugin);
6909+}
6910+
6911+module_init(init_pfair);
6912+
6913diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
6914new file mode 100644
6915index 0000000..497d703
6916--- /dev/null
6917+++ b/litmus/sched_plugin.c
6918@@ -0,0 +1,185 @@
6919+/* sched_plugin.c -- core infrastructure for the scheduler plugin system
6920+ *
6921+ * This file includes the initialization of the plugin system, the no-op Linux
6922+ * scheduler plugin and some dummy functions.
6923+ */
6924+
6925+#include <linux/list.h>
6926+#include <linux/spinlock.h>
6927+
6928+#include <litmus/litmus.h>
6929+#include <litmus/sched_plugin.h>
6930+
6931+#include <litmus/jobs.h>
6932+
6933+/*************************************************************
6934+ * Dummy plugin functions *
6935+ *************************************************************/
6936+
6937+static void litmus_dummy_finish_switch(struct task_struct * prev)
6938+{
6939+}
6940+
6941+static struct task_struct* litmus_dummy_schedule(struct task_struct * prev)
6942+{
6943+ return NULL;
6944+}
6945+
6946+static void litmus_dummy_tick(struct task_struct* tsk)
6947+{
6948+}
6949+
6950+static long litmus_dummy_admit_task(struct task_struct* tsk)
6951+{
6952+ printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n",
6953+ tsk->comm, tsk->pid);
6954+ return -EINVAL;
6955+}
6956+
6957+static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running)
6958+{
6959+}
6960+
6961+static void litmus_dummy_task_wake_up(struct task_struct *task)
6962+{
6963+}
6964+
6965+static void litmus_dummy_task_block(struct task_struct *task)
6966+{
6967+}
6968+
6969+static void litmus_dummy_task_exit(struct task_struct *task)
6970+{
6971+}
6972+
6973+static long litmus_dummy_complete_job(void)
6974+{
6975+ return -ENOSYS;
6976+}
6977+
6978+#ifdef CONFIG_FMLP
6979+
6980+static long litmus_dummy_inherit_priority(struct pi_semaphore *sem,
6981+ struct task_struct *new_owner)
6982+{
6983+ return -ENOSYS;
6984+}
6985+
6986+static long litmus_dummy_return_priority(struct pi_semaphore *sem)
6987+{
6988+ return -ENOSYS;
6989+}
6990+
6991+static long litmus_dummy_pi_block(struct pi_semaphore *sem,
6992+ struct task_struct *new_waiter)
6993+{
6994+ return -ENOSYS;
6995+}
6996+
6997+#endif
6998+
6999+
7000+/* The default scheduler plugin. It doesn't do anything and lets Linux do its
7001+ * job.
7002+ */
7003+struct sched_plugin linux_sched_plugin = {
7004+ .plugin_name = "Linux",
7005+ .tick = litmus_dummy_tick,
7006+ .task_new = litmus_dummy_task_new,
7007+ .task_exit = litmus_dummy_task_exit,
7008+ .task_wake_up = litmus_dummy_task_wake_up,
7009+ .task_block = litmus_dummy_task_block,
7010+ .complete_job = litmus_dummy_complete_job,
7011+ .schedule = litmus_dummy_schedule,
7012+ .finish_switch = litmus_dummy_finish_switch,
7013+#ifdef CONFIG_FMLP
7014+ .inherit_priority = litmus_dummy_inherit_priority,
7015+ .return_priority = litmus_dummy_return_priority,
7016+ .pi_block = litmus_dummy_pi_block,
7017+#endif
7018+ .admit_task = litmus_dummy_admit_task
7019+};
7020+
7021+/*
7022+ * The reference to current plugin that is used to schedule tasks within
7023+ * the system. It stores references to actual function implementations
7024+ * Should be initialized by calling "init_***_plugin()"
7025+ */
7026+struct sched_plugin *litmus = &linux_sched_plugin;
7027+
7028+/* the list of registered scheduling plugins */
7029+static LIST_HEAD(sched_plugins);
7030+static DEFINE_SPINLOCK(sched_plugins_lock);
7031+
7032+#define CHECK(func) {\
7033+ if (!plugin->func) \
7034+ plugin->func = litmus_dummy_ ## func;}
7035+
7036+/* FIXME: get reference to module */
7037+int register_sched_plugin(struct sched_plugin* plugin)
7038+{
7039+ printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n",
7040+ plugin->plugin_name);
7041+
7042+ /* make sure we don't trip over null pointers later */
7043+ CHECK(finish_switch);
7044+ CHECK(schedule);
7045+ CHECK(tick);
7046+ CHECK(task_wake_up);
7047+ CHECK(task_exit);
7048+ CHECK(task_block);
7049+ CHECK(task_new);
7050+ CHECK(complete_job);
7051+#ifdef CONFIG_FMLP
7052+ CHECK(inherit_priority);
7053+ CHECK(return_priority);
7054+ CHECK(pi_block);
7055+#endif
7056+ CHECK(admit_task);
7057+
7058+ if (!plugin->release_at)
7059+ plugin->release_at = release_at;
7060+
7061+ spin_lock(&sched_plugins_lock);
7062+ list_add(&plugin->list, &sched_plugins);
7063+ spin_unlock(&sched_plugins_lock);
7064+
7065+ return 0;
7066+}
7067+
7068+
7069+/* FIXME: reference counting, etc. */
7070+struct sched_plugin* find_sched_plugin(const char* name)
7071+{
7072+ struct list_head *pos;
7073+ struct sched_plugin *plugin;
7074+
7075+ spin_lock(&sched_plugins_lock);
7076+ list_for_each(pos, &sched_plugins) {
7077+ plugin = list_entry(pos, struct sched_plugin, list);
7078+ if (!strcmp(plugin->plugin_name, name))
7079+ goto out_unlock;
7080+ }
7081+ plugin = NULL;
7082+
7083+out_unlock:
7084+ spin_unlock(&sched_plugins_lock);
7085+ return plugin;
7086+}
7087+
7088+int print_sched_plugins(char* buf, int max)
7089+{
7090+ int count = 0;
7091+ struct list_head *pos;
7092+ struct sched_plugin *plugin;
7093+
7094+ spin_lock(&sched_plugins_lock);
7095+ list_for_each(pos, &sched_plugins) {
7096+ plugin = list_entry(pos, struct sched_plugin, list);
7097+ count += snprintf(buf + count, max - count, "%s\n", plugin->plugin_name);
7098+ if (max - count <= 0)
7099+ break;
7100+ }
7101+ spin_unlock(&sched_plugins_lock);
7102+ return count;
7103+}
7104diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
7105new file mode 100644
7106index 0000000..0e9c9dd
7107--- /dev/null
7108+++ b/litmus/sched_psn_edf.c
7109@@ -0,0 +1,454 @@
7110+
7111+/*
7112+ * kernel/sched_psn_edf.c
7113+ *
7114+ * Implementation of the PSN-EDF scheduler plugin.
7115+ * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
7116+ *
7117+ * Suspensions and non-preemptable sections are supported.
7118+ * Priority inheritance is not supported.
7119+ */
7120+
7121+#include <linux/percpu.h>
7122+#include <linux/sched.h>
7123+#include <linux/list.h>
7124+#include <linux/spinlock.h>
7125+
7126+#include <linux/module.h>
7127+
7128+#include <litmus/litmus.h>
7129+#include <litmus/jobs.h>
7130+#include <litmus/sched_plugin.h>
7131+#include <litmus/edf_common.h>
7132+
7133+
7134+typedef struct {
7135+ rt_domain_t domain;
7136+ int cpu;
7137+ struct task_struct* scheduled; /* only RT tasks */
7138+
7139+/* scheduling lock
7140+ */
7141+#define slock domain.ready_lock
7142+/* protects the domain and
7143+ * serializes scheduling decisions
7144+ */
7145+} psnedf_domain_t;
7146+
7147+DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
7148+
7149+#define local_edf (&__get_cpu_var(psnedf_domains).domain)
7150+#define local_pedf (&__get_cpu_var(psnedf_domains))
7151+#define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain)
7152+#define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu))
7153+#define task_edf(task) remote_edf(get_partition(task))
7154+#define task_pedf(task) remote_pedf(get_partition(task))
7155+
7156+
7157+static void psnedf_domain_init(psnedf_domain_t* pedf,
7158+ check_resched_needed_t check,
7159+ release_job_t release,
7160+ int cpu)
7161+{
7162+ edf_domain_init(&pedf->domain, check, release);
7163+ pedf->cpu = cpu;
7164+ pedf->scheduled = NULL;
7165+}
7166+
7167+static void requeue(struct task_struct* t, rt_domain_t *edf)
7168+{
7169+ if (t->state != TASK_RUNNING)
7170+ TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
7171+
7172+ set_rt_flags(t, RT_F_RUNNING);
7173+ if (is_released(t, litmus_clock()))
7174+ __add_ready(edf, t);
7175+ else
7176+ add_release(edf, t); /* it has got to wait */
7177+}
7178+
7179+/* we assume the lock is being held */
7180+static void preempt(psnedf_domain_t *pedf)
7181+{
7182+ if (smp_processor_id() == pedf->cpu) {
7183+ if (pedf->scheduled && is_np(pedf->scheduled))
7184+ request_exit_np(pedf->scheduled);
7185+ else
7186+ set_tsk_need_resched(current);
7187+ } else
7188+ /* in case that it is a remote CPU we have to defer the
7189+ * the decision to the remote CPU
7190+ */
7191+ smp_send_reschedule(pedf->cpu);
7192+}
7193+
7194+/* This check is trivial in partioned systems as we only have to consider
7195+ * the CPU of the partition.
7196+ */
7197+static int psnedf_check_resched(rt_domain_t *edf)
7198+{
7199+ psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
7200+ int ret = 0;
7201+
7202+ /* because this is a callback from rt_domain_t we already hold
7203+ * the necessary lock for the ready queue
7204+ */
7205+ if (edf_preemption_needed(edf, pedf->scheduled)) {
7206+ preempt(pedf);
7207+ ret = 1;
7208+ }
7209+ return ret;
7210+}
7211+
7212+static void psnedf_tick(struct task_struct *t)
7213+{
7214+ psnedf_domain_t *pedf = local_pedf;
7215+
7216+ /* Check for inconsistency. We don't need the lock for this since
7217+ * ->scheduled is only changed in schedule, which obviously is not
7218+ * executing in parallel on this CPU
7219+ */
7220+ BUG_ON(is_realtime(t) && t != pedf->scheduled);
7221+
7222+ if (is_realtime(t) && budget_exhausted(t)) {
7223+ if (!is_np(t))
7224+ set_tsk_need_resched(t);
7225+ else {
7226+ TRACE("psnedf_scheduler_tick: "
7227+ "%d is non-preemptable, "
7228+ "preemption delayed.\n", t->pid);
7229+ request_exit_np(t);
7230+ }
7231+ }
7232+}
7233+
7234+static void job_completion(struct task_struct* t)
7235+{
7236+ TRACE_TASK(t, "job_completion().\n");
7237+ set_rt_flags(t, RT_F_SLEEP);
7238+ prepare_for_next_period(t);
7239+}
7240+
7241+static struct task_struct* psnedf_schedule(struct task_struct * prev)
7242+{
7243+ psnedf_domain_t* pedf = local_pedf;
7244+ rt_domain_t* edf = &pedf->domain;
7245+ struct task_struct* next;
7246+
7247+ int out_of_time, sleep, preempt,
7248+ np, exists, blocks, resched;
7249+
7250+ spin_lock(&pedf->slock);
7251+
7252+ /* sanity checking */
7253+ BUG_ON(pedf->scheduled && pedf->scheduled != prev);
7254+ BUG_ON(pedf->scheduled && !is_realtime(prev));
7255+
7256+ /* (0) Determine state */
7257+ exists = pedf->scheduled != NULL;
7258+ blocks = exists && !is_running(pedf->scheduled);
7259+ out_of_time = exists && budget_exhausted(pedf->scheduled);
7260+ np = exists && is_np(pedf->scheduled);
7261+ sleep = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP;
7262+ preempt = edf_preemption_needed(edf, prev);
7263+
7264+ /* If we need to preempt do so.
7265+ * The following checks set resched to 1 in case of special
7266+ * circumstances.
7267+ */
7268+ resched = preempt;
7269+
7270+ /* If a task blocks we have no choice but to reschedule.
7271+ */
7272+ if (blocks)
7273+ resched = 1;
7274+
7275+ /* Request a sys_exit_np() call if we would like to preempt but cannot.
7276+ * Multiple calls to request_exit_np() don't hurt.
7277+ */
7278+ if (np && (out_of_time || preempt || sleep))
7279+ request_exit_np(pedf->scheduled);
7280+
7281+ /* Any task that is preemptable and either exhausts its execution
7282+ * budget or wants to sleep completes. We may have to reschedule after
7283+ * this.
7284+ */
7285+ if (!np && (out_of_time || sleep) && !blocks) {
7286+ job_completion(pedf->scheduled);
7287+ resched = 1;
7288+ }
7289+
7290+ /* The final scheduling decision. Do we need to switch for some reason?
7291+ * Switch if we are in RT mode and have no task or if we need to
7292+ * resched.
7293+ */
7294+ next = NULL;
7295+ if ((!np || blocks) && (resched || !exists)) {
7296+ /* Take care of a previously scheduled
7297+ * job by taking it out of the Linux runqueue.
7298+ */
7299+ if (pedf->scheduled && !blocks)
7300+ requeue(pedf->scheduled, edf);
7301+ next = __take_ready(edf);
7302+ } else
7303+ /* Only override Linux scheduler if we have a real-time task
7304+ * scheduled that needs to continue.
7305+ */
7306+ if (exists)
7307+ next = prev;
7308+
7309+ if (next) {
7310+ TRACE_TASK(next, " == next\n");
7311+ set_rt_flags(next, RT_F_RUNNING);
7312+ } else {
7313+ TRACE("becoming idle.\n");
7314+ }
7315+
7316+ pedf->scheduled = next;
7317+ spin_unlock(&pedf->slock);
7318+
7319+ return next;
7320+}
7321+
7322+
7323+/* Prepare a task for running in RT mode
7324+ */
7325+static void psnedf_task_new(struct task_struct * t, int on_rq, int running)
7326+{
7327+ rt_domain_t* edf = task_edf(t);
7328+ psnedf_domain_t* pedf = task_pedf(t);
7329+ unsigned long flags;
7330+
7331+ TRACE_TASK(t, "new\n");
7332+
7333+ /* setup job parameters */
7334+ release_at(t, litmus_clock());
7335+
7336+ /* The task should be running in the queue, otherwise signal
7337+ * code will try to wake it up with fatal consequences.
7338+ */
7339+ spin_lock_irqsave(&pedf->slock, flags);
7340+ if (running) {
7341+ /* there shouldn't be anything else running at the time */
7342+ BUG_ON(pedf->scheduled);
7343+ pedf->scheduled = t;
7344+ } else {
7345+ requeue(t, edf);
7346+ /* maybe we have to reschedule */
7347+ preempt(pedf);
7348+ }
7349+ spin_unlock_irqrestore(&pedf->slock, flags);
7350+}
7351+
7352+static void psnedf_task_wake_up(struct task_struct *task)
7353+{
7354+ unsigned long flags;
7355+ psnedf_domain_t* pedf = task_pedf(task);
7356+ rt_domain_t* edf = task_edf(task);
7357+ lt_t now;
7358+
7359+ TRACE_TASK(task, "wake up\n");
7360+ spin_lock_irqsave(&pedf->slock, flags);
7361+ BUG_ON(is_queued(task));
7362+ /* We need to take suspensions because of semaphores into
7363+ * account! If a job resumes after being suspended due to acquiring
7364+ * a semaphore, it should never be treated as a new job release.
7365+ *
7366+ * FIXME: This should be done in some more predictable and userspace-controlled way.
7367+ */
7368+ now = litmus_clock();
7369+ if (is_tardy(task, now) &&
7370+ get_rt_flags(task) != RT_F_EXIT_SEM) {
7371+ /* new sporadic release */
7372+ release_at(task, now);
7373+ sched_trace_job_release(task);
7374+ }
7375+ requeue(task, edf);
7376+ spin_unlock_irqrestore(&pedf->slock, flags);
7377+ TRACE_TASK(task, "wake up done\n");
7378+}
7379+
7380+static void psnedf_task_block(struct task_struct *t)
7381+{
7382+ /* only running tasks can block, thus t is in no queue */
7383+ TRACE_TASK(t, "block, state=%d\n", t->state);
7384+ BUG_ON(!is_realtime(t));
7385+ BUG_ON(is_queued(t));
7386+}
7387+
7388+static void psnedf_task_exit(struct task_struct * t)
7389+{
7390+ unsigned long flags;
7391+ psnedf_domain_t* pedf = task_pedf(t);
7392+ rt_domain_t* edf;
7393+
7394+ spin_lock_irqsave(&pedf->slock, flags);
7395+ if (is_queued(t)) {
7396+ /* dequeue */
7397+ edf = task_edf(t);
7398+ remove(edf, t);
7399+ }
7400+ if (pedf->scheduled == t)
7401+ pedf->scheduled = NULL;
7402+ preempt(pedf);
7403+ spin_unlock_irqrestore(&pedf->slock, flags);
7404+}
7405+
7406+#ifdef CONFIG_FMLP
7407+static long psnedf_pi_block(struct pi_semaphore *sem,
7408+ struct task_struct *new_waiter)
7409+{
7410+ psnedf_domain_t* pedf;
7411+ rt_domain_t* edf;
7412+ struct task_struct* t;
7413+ int cpu = get_partition(new_waiter);
7414+
7415+ BUG_ON(!new_waiter);
7416+
7417+ if (edf_higher_prio(new_waiter, sem->hp.cpu_task[cpu])) {
7418+ TRACE_TASK(new_waiter, " boosts priority\n");
7419+ pedf = task_pedf(new_waiter);
7420+ edf = task_edf(new_waiter);
7421+
7422+ /* interrupts already disabled */
7423+ spin_lock(&pedf->slock);
7424+
7425+ /* store new highest-priority task */
7426+ sem->hp.cpu_task[cpu] = new_waiter;
7427+ if (sem->holder &&
7428+ get_partition(sem->holder) == get_partition(new_waiter)) {
7429+ /* let holder inherit */
7430+ sem->holder->rt_param.inh_task = new_waiter;
7431+ t = sem->holder;
7432+ if (is_queued(t)) {
7433+ /* queued in domain*/
7434+ remove(edf, t);
7435+ /* readd to make priority change take place */
7436+ /* FIXME: this looks outdated */
7437+ if (is_released(t, litmus_clock()))
7438+ __add_ready(edf, t);
7439+ else
7440+ add_release(edf, t);
7441+ }
7442+ }
7443+
7444+ /* check if we need to reschedule */
7445+ if (edf_preemption_needed(edf, current))
7446+ preempt(pedf);
7447+
7448+ spin_unlock(&pedf->slock);
7449+ }
7450+
7451+ return 0;
7452+}
7453+
7454+static long psnedf_inherit_priority(struct pi_semaphore *sem,
7455+ struct task_struct *new_owner)
7456+{
7457+ int cpu = get_partition(new_owner);
7458+
7459+ new_owner->rt_param.inh_task = sem->hp.cpu_task[cpu];
7460+ if (sem->hp.cpu_task[cpu] && new_owner != sem->hp.cpu_task[cpu]) {
7461+ TRACE_TASK(new_owner,
7462+ "inherited priority from %s/%d\n",
7463+ sem->hp.cpu_task[cpu]->comm,
7464+ sem->hp.cpu_task[cpu]->pid);
7465+ } else
7466+ TRACE_TASK(new_owner,
7467+ "cannot inherit priority: "
7468+ "no higher priority job waits on this CPU!\n");
7469+ /* make new owner non-preemptable as required by FMLP under
7470+ * PSN-EDF.
7471+ */
7472+ make_np(new_owner);
7473+ return 0;
7474+}
7475+
7476+
7477+/* This function is called on a semaphore release, and assumes that
7478+ * the current task is also the semaphore holder.
7479+ */
7480+static long psnedf_return_priority(struct pi_semaphore *sem)
7481+{
7482+ struct task_struct* t = current;
7483+ psnedf_domain_t* pedf = task_pedf(t);
7484+ rt_domain_t* edf = task_edf(t);
7485+ int ret = 0;
7486+ int cpu = get_partition(current);
7487+
7488+
7489+ /* Find new highest-priority semaphore task
7490+ * if holder task is the current hp.cpu_task[cpu].
7491+ *
7492+ * Calling function holds sem->wait.lock.
7493+ */
7494+ if (t == sem->hp.cpu_task[cpu])
7495+ edf_set_hp_cpu_task(sem, cpu);
7496+
7497+ take_np(t);
7498+ if (current->rt_param.inh_task) {
7499+ TRACE_CUR("return priority of %s/%d\n",
7500+ current->rt_param.inh_task->comm,
7501+ current->rt_param.inh_task->pid);
7502+ spin_lock(&pedf->slock);
7503+
7504+ /* Reset inh_task to NULL. */
7505+ current->rt_param.inh_task = NULL;
7506+
7507+ /* check if we need to reschedule */
7508+ if (edf_preemption_needed(edf, current))
7509+ preempt(pedf);
7510+
7511+ spin_unlock(&pedf->slock);
7512+ } else
7513+ TRACE_CUR(" no priority to return %p\n", sem);
7514+
7515+ return ret;
7516+}
7517+
7518+#endif
7519+
7520+static long psnedf_admit_task(struct task_struct* tsk)
7521+{
7522+ return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
7523+}
7524+
7525+/* Plugin object */
7526+static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
7527+ .plugin_name = "PSN-EDF",
7528+#ifdef CONFIG_SRP
7529+ .srp_active = 1,
7530+#endif
7531+ .tick = psnedf_tick,
7532+ .task_new = psnedf_task_new,
7533+ .complete_job = complete_job,
7534+ .task_exit = psnedf_task_exit,
7535+ .schedule = psnedf_schedule,
7536+ .task_wake_up = psnedf_task_wake_up,
7537+ .task_block = psnedf_task_block,
7538+#ifdef CONFIG_FMLP
7539+ .fmlp_active = 1,
7540+ .pi_block = psnedf_pi_block,
7541+ .inherit_priority = psnedf_inherit_priority,
7542+ .return_priority = psnedf_return_priority,
7543+#endif
7544+ .admit_task = psnedf_admit_task
7545+};
7546+
7547+
7548+static int __init init_psn_edf(void)
7549+{
7550+ int i;
7551+
7552+ for (i = 0; i < NR_CPUS; i++)
7553+ {
7554+ psnedf_domain_init(remote_pedf(i),
7555+ psnedf_check_resched,
7556+ NULL, i);
7557+ }
7558+ return register_sched_plugin(&psn_edf_plugin);
7559+}
7560+
7561+
7562+
7563+module_init(init_psn_edf);
7564diff --git a/litmus/sched_trace.c b/litmus/sched_trace.c
7565new file mode 100644
7566index 0000000..4344785
7567--- /dev/null
7568+++ b/litmus/sched_trace.c
7569@@ -0,0 +1,569 @@
7570+/* sched_trace.c -- record scheduling events to a byte stream.
7571+ *
7572+ * TODO: Move ring buffer to a lockfree implementation.
7573+ */
7574+
7575+#include <linux/spinlock.h>
7576+#include <linux/fs.h>
7577+#include <linux/cdev.h>
7578+#include <asm/semaphore.h>
7579+#include <asm/uaccess.h>
7580+#include <linux/module.h>
7581+
7582+#include <litmus/sched_trace.h>
7583+#include <litmus/litmus.h>
7584+
7585+
7586+typedef struct {
7587+ /* guard read and write pointers */
7588+ spinlock_t lock;
7589+ /* guard against concurrent freeing of buffer */
7590+ rwlock_t del_lock;
7591+
7592+ /* memory allocated for ring buffer */
7593+ unsigned long order;
7594+ char* buf;
7595+ char* end;
7596+
7597+ /* Read/write pointer. May not cross.
7598+ * They point to the position of next write and
7599+ * last read.
7600+ */
7601+ char* writep;
7602+ char* readp;
7603+
7604+} ring_buffer_t;
7605+
7606+#define EMPTY_RING_BUFFER { \
7607+ .lock = SPIN_LOCK_UNLOCKED, \
7608+ .del_lock = RW_LOCK_UNLOCKED, \
7609+ .buf = NULL, \
7610+ .end = NULL, \
7611+ .writep = NULL, \
7612+ .readp = NULL \
7613+}
7614+
7615+void rb_init(ring_buffer_t* buf)
7616+{
7617+ *buf = (ring_buffer_t) EMPTY_RING_BUFFER;
7618+}
7619+
7620+int rb_alloc_buf(ring_buffer_t* buf, unsigned long order)
7621+{
7622+ unsigned long flags;
7623+ int error = 0;
7624+ char *mem;
7625+
7626+ /* do memory allocation while not atomic */
7627+ mem = (char *) __get_free_pages(GFP_KERNEL, order);
7628+ if (!mem)
7629+ return -ENOMEM;
7630+ write_lock_irqsave(&buf->del_lock, flags);
7631+ BUG_ON(buf->buf);
7632+ buf->buf = mem;
7633+ buf->end = buf->buf + PAGE_SIZE * (1 << order) - 1;
7634+ memset(buf->buf, 0xff, buf->end - buf->buf);
7635+ buf->order = order;
7636+ buf->writep = buf->buf + 1;
7637+ buf->readp = buf->buf;
7638+ write_unlock_irqrestore(&buf->del_lock, flags);
7639+ return error;
7640+}
7641+
7642+int rb_free_buf(ring_buffer_t* buf)
7643+{
7644+ unsigned long flags;
7645+ int error = 0;
7646+ write_lock_irqsave(&buf->del_lock, flags);
7647+ BUG_ON(!buf->buf);
7648+ free_pages((unsigned long) buf->buf, buf->order);
7649+ buf->buf = NULL;
7650+ buf->end = NULL;
7651+ buf->writep = NULL;
7652+ buf->readp = NULL;
7653+ write_unlock_irqrestore(&buf->del_lock, flags);
7654+ return error;
7655+}
7656+
7657+/* Assumption: concurrent writes are serialized externally
7658+ *
7659+ * Will only succeed if there is enough space for all len bytes.
7660+ */
7661+int rb_put(ring_buffer_t* buf, char* mem, size_t len)
7662+{
7663+ unsigned long flags;
7664+ char* r , *w;
7665+ int error = 0;
7666+ read_lock_irqsave(&buf->del_lock, flags);
7667+ if (!buf->buf) {
7668+ error = -ENODEV;
7669+ goto out;
7670+ }
7671+ spin_lock(&buf->lock);
7672+ r = buf->readp;
7673+ w = buf->writep;
7674+ spin_unlock(&buf->lock);
7675+ if (r < w && buf->end - w >= len - 1) {
7676+ /* easy case: there is enough space in the buffer
7677+ * to write it in one continous chunk*/
7678+ memcpy(w, mem, len);
7679+ w += len;
7680+ if (w > buf->end)
7681+ /* special case: fit exactly into buffer
7682+ * w is now buf->end + 1
7683+ */
7684+ w = buf->buf;
7685+ } else if (w < r && r - w >= len) { /* >= len because may not cross */
7686+ /* we are constrained by the read pointer but we there
7687+ * is enough space
7688+ */
7689+ memcpy(w, mem, len);
7690+ w += len;
7691+ } else if (r <= w && buf->end - w < len - 1) {
7692+ /* the wrap around case: there may or may not be space */
7693+ if ((buf->end - w) + (r - buf->buf) >= len - 1) {
7694+ /* copy chunk that fits at the end */
7695+ memcpy(w, mem, buf->end - w + 1);
7696+ mem += buf->end - w + 1;
7697+ len -= (buf->end - w + 1);
7698+ w = buf->buf;
7699+ /* copy the rest */
7700+ memcpy(w, mem, len);
7701+ w += len;
7702+ }
7703+ else
7704+ error = -ENOMEM;
7705+ } else {
7706+ error = -ENOMEM;
7707+ }
7708+ if (!error) {
7709+ spin_lock(&buf->lock);
7710+ buf->writep = w;
7711+ spin_unlock(&buf->lock);
7712+ }
7713+ out:
7714+ read_unlock_irqrestore(&buf->del_lock, flags);
7715+ return error;
7716+}
7717+
7718+/* Assumption: concurrent reads are serialized externally */
7719+int rb_get(ring_buffer_t* buf, char* mem, size_t len)
7720+{
7721+ unsigned long flags;
7722+ char* r , *w;
7723+ int error = 0;
7724+ read_lock_irqsave(&buf->del_lock, flags);
7725+ if (!buf->buf) {
7726+ error = -ENODEV;
7727+ goto out;
7728+ }
7729+ spin_lock(&buf->lock);
7730+ r = buf->readp;
7731+ w = buf->writep;
7732+ spin_unlock(&buf->lock);
7733+
7734+ if (w <= r && buf->end - r >= len) {
7735+ /* easy case: there is enough data in the buffer
7736+ * to get it in one chunk*/
7737+ memcpy(mem, r + 1, len);
7738+ r += len;
7739+ error = len;
7740+
7741+ } else if (r + 1 < w && w - r - 1 >= len) {
7742+ /* we are constrained by the write pointer but
7743+ * there is enough data
7744+ */
7745+ memcpy(mem, r + 1, len);
7746+ r += len;
7747+ error = len;
7748+
7749+ } else if (r + 1 < w && w - r - 1 < len) {
7750+ /* we are constrained by the write pointer and there
7751+ * there is not enough data
7752+ */
7753+ memcpy(mem, r + 1, w - r - 1);
7754+ error = w - r - 1;
7755+ r += w - r - 1;
7756+
7757+ } else if (w <= r && buf->end - r < len) {
7758+ /* the wrap around case: there may or may not be enough data
7759+ * first let's get what is available
7760+ */
7761+ memcpy(mem, r + 1, buf->end - r);
7762+ error += (buf->end - r);
7763+ mem += (buf->end - r);
7764+ len -= (buf->end - r);
7765+ r += (buf->end - r);
7766+
7767+ if (w > buf->buf) {
7768+ /* there is more to get */
7769+ r = buf->buf - 1;
7770+ if (w - r >= len) {
7771+ /* plenty */
7772+ memcpy(mem, r + 1, len);
7773+ error += len;
7774+ r += len;
7775+ } else {
7776+ memcpy(mem, r + 1, w - r - 1);
7777+ error += w - r - 1;
7778+ r += w - r - 1;
7779+ }
7780+ }
7781+ } /* nothing available */
7782+
7783+ if (error > 0) {
7784+ spin_lock(&buf->lock);
7785+ buf->readp = r;
7786+ spin_unlock(&buf->lock);
7787+ }
7788+ out:
7789+ read_unlock_irqrestore(&buf->del_lock, flags);
7790+ return error;
7791+}
7792+
7793+
7794+
7795+/******************************************************************************/
7796+/* DEVICE FILE DRIVER */
7797+/******************************************************************************/
7798+
7799+
7800+
7801+/* Allocate a buffer of about 1 MB per CPU.
7802+ *
7803+ */
7804+#define BUFFER_ORDER 8
7805+
7806+typedef struct {
7807+ ring_buffer_t buf;
7808+ atomic_t reader_cnt;
7809+ struct semaphore reader_mutex;
7810+} trace_buffer_t;
7811+
7812+
7813+/* This does not initialize the semaphore!! */
7814+
7815+#define EMPTY_TRACE_BUFFER \
7816+ { .buf = EMPTY_RING_BUFFER, .reader_cnt = ATOMIC_INIT(0)}
7817+
7818+static DEFINE_PER_CPU(trace_buffer_t, trace_buffer);
7819+
7820+#ifdef CONFIG_SCHED_DEBUG_TRACE
7821+static spinlock_t log_buffer_lock = SPIN_LOCK_UNLOCKED;
7822+#endif
7823+static trace_buffer_t log_buffer = EMPTY_TRACE_BUFFER;
7824+
7825+static void init_buffers(void)
7826+{
7827+ int i;
7828+
7829+ for (i = 0; i < NR_CPUS; i++) {
7830+ rb_init(&per_cpu(trace_buffer, i).buf);
7831+ init_MUTEX(&per_cpu(trace_buffer, i).reader_mutex);
7832+ atomic_set(&per_cpu(trace_buffer, i).reader_cnt, 0);
7833+ }
7834+ /* only initialize the mutex, the rest was initialized as part
7835+ * of the static initialization macro
7836+ */
7837+ init_MUTEX(&log_buffer.reader_mutex);
7838+}
7839+
7840+static int trace_release(struct inode *in, struct file *filp)
7841+{
7842+ int error = -EINVAL;
7843+ trace_buffer_t* buf = filp->private_data;
7844+
7845+ BUG_ON(!filp->private_data);
7846+
7847+ if (down_interruptible(&buf->reader_mutex)) {
7848+ error = -ERESTARTSYS;
7849+ goto out;
7850+ }
7851+
7852+ /* last release must deallocate buffers */
7853+ if (atomic_dec_return(&buf->reader_cnt) == 0) {
7854+ error = rb_free_buf(&buf->buf);
7855+ }
7856+
7857+ up(&buf->reader_mutex);
7858+ out:
7859+ return error;
7860+}
7861+
7862+static ssize_t trace_read(struct file *filp, char __user *to, size_t len,
7863+ loff_t *f_pos)
7864+{
7865+ /* we ignore f_pos, this is strictly sequential */
7866+
7867+ ssize_t error = -EINVAL;
7868+ char* mem;
7869+ trace_buffer_t *buf = filp->private_data;
7870+
7871+ if (down_interruptible(&buf->reader_mutex)) {
7872+ error = -ERESTARTSYS;
7873+ goto out;
7874+ }
7875+
7876+ if (len > 64 * 1024)
7877+ len = 64 * 1024;
7878+ mem = kmalloc(len, GFP_KERNEL);
7879+ if (!mem) {
7880+ error = -ENOMEM;
7881+ goto out_unlock;
7882+ }
7883+
7884+ error = rb_get(&buf->buf, mem, len);
7885+ while (!error) {
7886+ set_current_state(TASK_INTERRUPTIBLE);
7887+ schedule_timeout(110);
7888+ if (signal_pending(current))
7889+ error = -ERESTARTSYS;
7890+ else
7891+ error = rb_get(&buf->buf, mem, len);
7892+ }
7893+
7894+ if (error > 0 && copy_to_user(to, mem, error))
7895+ error = -EFAULT;
7896+
7897+ kfree(mem);
7898+ out_unlock:
7899+ up(&buf->reader_mutex);
7900+ out:
7901+ return error;
7902+}
7903+
7904+
7905+/* trace_open - Open one of the per-CPU sched_trace buffers.
7906+ */
7907+static int trace_open(struct inode *in, struct file *filp)
7908+{
7909+ int error = -EINVAL;
7910+ int cpu = MINOR(in->i_rdev);
7911+ trace_buffer_t* buf;
7912+
7913+ if (!cpu_online(cpu)) {
7914+ printk(KERN_WARNING "sched trace: "
7915+ "CPU #%d is not online. (open failed)\n", cpu);
7916+ error = -ENODEV;
7917+ goto out;
7918+ }
7919+
7920+ buf = &per_cpu(trace_buffer, cpu);
7921+
7922+ if (down_interruptible(&buf->reader_mutex)) {
7923+ error = -ERESTARTSYS;
7924+ goto out;
7925+ }
7926+
7927+ /* first open must allocate buffers */
7928+ if (atomic_inc_return(&buf->reader_cnt) == 1) {
7929+ if ((error = rb_alloc_buf(&buf->buf, BUFFER_ORDER)))
7930+ {
7931+ atomic_dec(&buf->reader_cnt);
7932+ goto out_unlock;
7933+ }
7934+ }
7935+
7936+ error = 0;
7937+ filp->private_data = buf;
7938+
7939+ out_unlock:
7940+ up(&buf->reader_mutex);
7941+ out:
7942+ return error;
7943+}
7944+
7945+
7946+extern int trace_override;
7947+
7948+/* log_open - open the global log message ring buffer.
7949+ */
7950+static int log_open(struct inode *in, struct file *filp)
7951+{
7952+ int error = -EINVAL;
7953+ trace_buffer_t* buf;
7954+
7955+ buf = &log_buffer;
7956+
7957+ if (down_interruptible(&buf->reader_mutex)) {
7958+ error = -ERESTARTSYS;
7959+ goto out;
7960+ }
7961+
7962+ /* first open must allocate buffers */
7963+ if (atomic_inc_return(&buf->reader_cnt) == 1) {
7964+ if ((error = rb_alloc_buf(&buf->buf, BUFFER_ORDER)))
7965+ {
7966+ atomic_dec(&buf->reader_cnt);
7967+ goto out_unlock;
7968+ }
7969+ }
7970+
7971+ error = 0;
7972+ filp->private_data = buf;
7973+ printk(KERN_DEBUG "sched_trace buf: from 0x%p to 0x%p length: %lx\n",
7974+ buf->buf.buf, buf->buf.end, buf->buf.end - buf->buf.buf);
7975+ trace_override++;
7976+ out_unlock:
7977+ up(&buf->reader_mutex);
7978+ out:
7979+ return error;
7980+}
7981+
7982+static int log_release(struct inode *in, struct file *filp)
7983+{
7984+ int error = -EINVAL;
7985+ trace_buffer_t* buf = filp->private_data;
7986+
7987+ BUG_ON(!filp->private_data);
7988+
7989+ if (down_interruptible(&buf->reader_mutex)) {
7990+ error = -ERESTARTSYS;
7991+ goto out;
7992+ }
7993+
7994+ /* last release must deallocate buffers */
7995+ if (atomic_dec_return(&buf->reader_cnt) == 0) {
7996+ error = rb_free_buf(&buf->buf);
7997+ }
7998+
7999+ trace_override--;
8000+ up(&buf->reader_mutex);
8001+ out:
8002+ return error;
8003+}
8004+
8005+/******************************************************************************/
8006+/* Device Registration */
8007+/******************************************************************************/
8008+
8009+/* the major numbes are from the unassigned/local use block
8010+ *
8011+ * This should be converted to dynamic allocation at some point...
8012+ */
8013+#define TRACE_MAJOR 250
8014+#define LOG_MAJOR 251
8015+
8016+/* trace_fops - The file operations for accessing the per-CPU scheduling event
8017+ * trace buffers.
8018+ */
8019+struct file_operations trace_fops = {
8020+ .owner = THIS_MODULE,
8021+ .open = trace_open,
8022+ .release = trace_release,
8023+ .read = trace_read,
8024+};
8025+
8026+/* log_fops - The file operations for accessing the global LITMUS log message
8027+ * buffer.
8028+ *
8029+ * Except for opening the device file it uses the same operations as trace_fops.
8030+ */
8031+struct file_operations log_fops = {
8032+ .owner = THIS_MODULE,
8033+ .open = log_open,
8034+ .release = log_release,
8035+ .read = trace_read,
8036+};
8037+
8038+static int __init register_buffer_dev(const char* name,
8039+ struct file_operations* fops,
8040+ int major, int count)
8041+{
8042+ dev_t trace_dev;
8043+ struct cdev *cdev;
8044+ int error = 0;
8045+
8046+ trace_dev = MKDEV(major, 0);
8047+ error = register_chrdev_region(trace_dev, count, name);
8048+ if (error)
8049+ {
8050+ printk(KERN_WARNING "sched trace: "
8051+ "Could not register major/minor number %d\n", major);
8052+ return error;
8053+ }
8054+ cdev = cdev_alloc();
8055+ if (!cdev) {
8056+ printk(KERN_WARNING "sched trace: "
8057+ "Could not get a cdev for %s.\n", name);
8058+ return -ENOMEM;
8059+ }
8060+ cdev->owner = THIS_MODULE;
8061+ cdev->ops = fops;
8062+ error = cdev_add(cdev, trace_dev, count);
8063+ if (error) {
8064+ printk(KERN_WARNING "sched trace: "
8065+ "add_cdev failed for %s.\n", name);
8066+ return -ENOMEM;
8067+ }
8068+ return error;
8069+
8070+}
8071+
8072+static int __init init_sched_trace(void)
8073+{
8074+ int error1 = 0, error2 = 0;
8075+
8076+ printk("Initializing scheduler trace device\n");
8077+ init_buffers();
8078+
8079+ error1 = register_buffer_dev("schedtrace", &trace_fops,
8080+ TRACE_MAJOR, NR_CPUS);
8081+
8082+ error2 = register_buffer_dev("litmus_log", &log_fops,
8083+ LOG_MAJOR, 1);
8084+ if (error1 || error2)
8085+ return min(error1, error2);
8086+ else
8087+ return 0;
8088+}
8089+
8090+module_init(init_sched_trace);
8091+
8092+/******************************************************************************/
8093+/* KERNEL API */
8094+/******************************************************************************/
8095+
8096+/* The per-CPU LITMUS log buffer. Don't put it on the stack, it is too big for
8097+ * that and the kernel gets very picky with nested interrupts and small stacks.
8098+ */
8099+
8100+#ifdef CONFIG_SCHED_DEBUG_TRACE
8101+
8102+#define MSG_SIZE 255
8103+static DEFINE_PER_CPU(char[MSG_SIZE], fmt_buffer);
8104+
8105+/* sched_trace_log_message - This is the only function that accesses the the
8106+ * log buffer inside the kernel for writing.
8107+ * Concurrent access to it is serialized via the
8108+ * log_buffer_lock.
8109+ *
8110+ * The maximum length of a formatted message is 255.
8111+ */
8112+void sched_trace_log_message(const char* fmt, ...)
8113+{
8114+ unsigned long flags;
8115+ va_list args;
8116+ size_t len;
8117+ char* buf;
8118+
8119+ va_start(args, fmt);
8120+ local_irq_save(flags);
8121+
8122+ /* format message */
8123+ buf = __get_cpu_var(fmt_buffer);
8124+ len = vscnprintf(buf, MSG_SIZE, fmt, args);
8125+
8126+ spin_lock(&log_buffer_lock);
8127+ /* Don't copy the trailing null byte, we don't want null bytes
8128+ * in a text file.
8129+ */
8130+ rb_put(&log_buffer.buf, buf, len);
8131+ spin_unlock(&log_buffer_lock);
8132+
8133+ local_irq_restore(flags);
8134+ va_end(args);
8135+}
8136+
8137+#endif
8138+
8139diff --git a/litmus/srp.c b/litmus/srp.c
8140new file mode 100644
8141index 0000000..6e670f9
8142--- /dev/null
8143+++ b/litmus/srp.c
8144@@ -0,0 +1,318 @@
8145+/* ************************************************************************** */
8146+/* STACK RESOURCE POLICY */
8147+/* ************************************************************************** */
8148+
8149+#include <asm/atomic.h>
8150+#include <linux/wait.h>
8151+#include <litmus/litmus.h>
8152+#include <litmus/sched_plugin.h>
8153+
8154+#include <litmus/fdso.h>
8155+
8156+#include <litmus/trace.h>
8157+
8158+
8159+#ifdef CONFIG_SRP
8160+
8161+struct srp_priority {
8162+ struct list_head list;
8163+ unsigned int period;
8164+ pid_t pid;
8165+};
8166+
8167+#define list2prio(l) list_entry(l, struct srp_priority, list)
8168+
8169+/* SRP task priority comparison function. Smaller periods have highest
8170+ * priority, tie-break is PID. Special case: period == 0 <=> no priority
8171+ */
8172+static int srp_higher_prio(struct srp_priority* first,
8173+ struct srp_priority* second)
8174+{
8175+ if (!first->period)
8176+ return 0;
8177+ else
8178+ return !second->period ||
8179+ first->period < second->period || (
8180+ first->period == second->period &&
8181+ first->pid < second->pid);
8182+}
8183+
8184+struct srp {
8185+ struct list_head ceiling;
8186+ wait_queue_head_t ceiling_blocked;
8187+};
8188+
8189+
8190+atomic_t srp_objects_in_use = ATOMIC_INIT(0);
8191+
8192+DEFINE_PER_CPU(struct srp, srp);
8193+
8194+
8195+/* Initialize SRP semaphores at boot time. */
8196+static int __init srp_init(void)
8197+{
8198+ int i;
8199+
8200+ printk("Initializing SRP per-CPU ceilings...");
8201+ for (i = 0; i < NR_CPUS; i++) {
8202+ init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked);
8203+ INIT_LIST_HEAD(&per_cpu(srp, i).ceiling);
8204+ }
8205+ printk(" done!\n");
8206+
8207+ return 0;
8208+}
8209+module_init(srp_init);
8210+
8211+
8212+#define system_ceiling(srp) list2prio(srp->ceiling.next)
8213+
8214+
8215+#define UNDEF_SEM -2
8216+
8217+
8218+/* struct for uniprocessor SRP "semaphore" */
8219+struct srp_semaphore {
8220+ struct srp_priority ceiling;
8221+ struct task_struct* owner;
8222+ int cpu; /* cpu associated with this "semaphore" and resource */
8223+};
8224+
8225+#define ceiling2sem(c) container_of(c, struct srp_semaphore, ceiling)
8226+
8227+static int srp_exceeds_ceiling(struct task_struct* first,
8228+ struct srp* srp)
8229+{
8230+ return list_empty(&srp->ceiling) ||
8231+ get_rt_period(first) < system_ceiling(srp)->period ||
8232+ (get_rt_period(first) == system_ceiling(srp)->period &&
8233+ first->pid < system_ceiling(srp)->pid) ||
8234+ ceiling2sem(system_ceiling(srp))->owner == first;
8235+}
8236+
8237+static void srp_add_prio(struct srp* srp, struct srp_priority* prio)
8238+{
8239+ struct list_head *pos;
8240+ if (in_list(&prio->list)) {
8241+ printk(KERN_CRIT "WARNING: SRP violation detected, prio is already in "
8242+ "ceiling list! cpu=%d, srp=%p\n", smp_processor_id(), ceiling2sem(prio));
8243+ return;
8244+ }
8245+ list_for_each(pos, &srp->ceiling)
8246+ if (unlikely(srp_higher_prio(prio, list2prio(pos)))) {
8247+ __list_add(&prio->list, pos->prev, pos);
8248+ return;
8249+ }
8250+
8251+ list_add_tail(&prio->list, &srp->ceiling);
8252+}
8253+
8254+
8255+static void* create_srp_semaphore(void)
8256+{
8257+ struct srp_semaphore* sem;
8258+
8259+ sem = kmalloc(sizeof(*sem), GFP_KERNEL);
8260+ if (!sem)
8261+ return NULL;
8262+
8263+ INIT_LIST_HEAD(&sem->ceiling.list);
8264+ sem->ceiling.period = 0;
8265+ sem->cpu = UNDEF_SEM;
8266+ sem->owner = NULL;
8267+ atomic_inc(&srp_objects_in_use);
8268+ return sem;
8269+}
8270+
8271+static noinline int open_srp_semaphore(struct od_table_entry* entry, void* __user arg)
8272+{
8273+ struct srp_semaphore* sem = (struct srp_semaphore*) entry->obj->obj;
8274+ int ret = 0;
8275+ struct task_struct* t = current;
8276+ struct srp_priority t_prio;
8277+
8278+ TRACE("opening SRP semaphore %p, cpu=%d\n", sem, sem->cpu);
8279+ if (!srp_active())
8280+ return -EBUSY;
8281+
8282+ if (sem->cpu == UNDEF_SEM)
8283+ sem->cpu = get_partition(t);
8284+ else if (sem->cpu != get_partition(t))
8285+ ret = -EPERM;
8286+
8287+ if (ret == 0) {
8288+ t_prio.period = get_rt_period(t);
8289+ t_prio.pid = t->pid;
8290+ if (srp_higher_prio(&t_prio, &sem->ceiling)) {
8291+ sem->ceiling.period = t_prio.period;
8292+ sem->ceiling.pid = t_prio.pid;
8293+ }
8294+ }
8295+
8296+ return ret;
8297+}
8298+
8299+static void destroy_srp_semaphore(void* sem)
8300+{
8301+ /* XXX invariants */
8302+ atomic_dec(&srp_objects_in_use);
8303+ kfree(sem);
8304+}
8305+
8306+struct fdso_ops srp_sem_ops = {
8307+ .create = create_srp_semaphore,
8308+ .open = open_srp_semaphore,
8309+ .destroy = destroy_srp_semaphore
8310+};
8311+
8312+
8313+static void do_srp_down(struct srp_semaphore* sem)
8314+{
8315+ /* Update ceiling. */
8316+ srp_add_prio(&__get_cpu_var(srp), &sem->ceiling);
8317+ WARN_ON(sem->owner != NULL);
8318+ sem->owner = current;
8319+ TRACE_CUR("acquired srp 0x%p\n", sem);
8320+}
8321+
8322+static void do_srp_up(struct srp_semaphore* sem)
8323+{
8324+ /* Determine new system priority ceiling for this CPU. */
8325+ WARN_ON(!in_list(&sem->ceiling.list));
8326+ if (in_list(&sem->ceiling.list))
8327+ list_del(&sem->ceiling.list);
8328+
8329+ sem->owner = NULL;
8330+
8331+ /* Wake tasks on this CPU, if they exceed current ceiling. */
8332+ TRACE_CUR("released srp 0x%p\n", sem);
8333+ wake_up_all(&__get_cpu_var(srp).ceiling_blocked);
8334+}
8335+
8336+/* Adjust the system-wide priority ceiling if resource is claimed. */
8337+asmlinkage long sys_srp_down(int sem_od)
8338+{
8339+ int cpu;
8340+ int ret = -EINVAL;
8341+ struct srp_semaphore* sem;
8342+
8343+ /* disabling preemptions is sufficient protection since
8344+ * SRP is strictly per CPU and we don't interfere with any
8345+ * interrupt handlers
8346+ */
8347+ preempt_disable();
8348+ TS_SRP_DOWN_START;
8349+
8350+ cpu = smp_processor_id();
8351+ sem = lookup_srp_sem(sem_od);
8352+ if (sem && sem->cpu == cpu) {
8353+ do_srp_down(sem);
8354+ ret = 0;
8355+ }
8356+
8357+ TS_SRP_DOWN_END;
8358+ preempt_enable();
8359+ return ret;
8360+}
8361+
8362+/* Adjust the system-wide priority ceiling if resource is freed. */
8363+asmlinkage long sys_srp_up(int sem_od)
8364+{
8365+ int cpu;
8366+ int ret = -EINVAL;
8367+ struct srp_semaphore* sem;
8368+
8369+ preempt_disable();
8370+ TS_SRP_UP_START;
8371+
8372+ cpu = smp_processor_id();
8373+ sem = lookup_srp_sem(sem_od);
8374+
8375+ if (sem && sem->cpu == cpu) {
8376+ do_srp_up(sem);
8377+ ret = 0;
8378+ }
8379+
8380+ TS_SRP_UP_END;
8381+ preempt_enable();
8382+ return ret;
8383+}
8384+
8385+static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync,
8386+ void *key)
8387+{
8388+ int cpu = smp_processor_id();
8389+ struct task_struct *tsk = wait->private;
8390+ if (cpu != get_partition(tsk))
8391+ TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b",
8392+ get_partition(tsk));
8393+ else if (srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
8394+ return default_wake_function(wait, mode, sync, key);
8395+ return 0;
8396+}
8397+
8398+
8399+
8400+static void do_ceiling_block(struct task_struct *tsk)
8401+{
8402+ wait_queue_t wait = {
8403+ .private = tsk,
8404+ .func = srp_wake_up,
8405+ .task_list = {NULL, NULL}
8406+ };
8407+
8408+ tsk->state = TASK_UNINTERRUPTIBLE;
8409+ add_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
8410+ tsk->rt_param.srp_non_recurse = 1;
8411+ preempt_enable_no_resched();
8412+ schedule();
8413+ preempt_disable();
8414+ tsk->rt_param.srp_non_recurse = 0;
8415+ remove_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
8416+}
8417+
8418+/* Wait for current task priority to exceed system-wide priority ceiling.
8419+ */
8420+void srp_ceiling_block(void)
8421+{
8422+ struct task_struct *tsk = current;
8423+
8424+ /* Only applies to real-time tasks, but optimize for RT tasks. */
8425+ if (unlikely(!is_realtime(tsk)))
8426+ return;
8427+
8428+ /* Avoid recursive ceiling blocking. */
8429+ if (unlikely(tsk->rt_param.srp_non_recurse))
8430+ return;
8431+
8432+ /* Bail out early if there aren't any SRP resources around. */
8433+ if (likely(!atomic_read(&srp_objects_in_use)))
8434+ return;
8435+
8436+ preempt_disable();
8437+ if (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) {
8438+ TRACE_CUR("is priority ceiling blocked.\n");
8439+ while (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
8440+ do_ceiling_block(tsk);
8441+ TRACE_CUR("finally exceeds system ceiling.\n");
8442+ } else
8443+ TRACE_CUR("is not priority ceiling blocked\n");
8444+ preempt_enable();
8445+}
8446+
8447+
8448+#else
8449+
8450+asmlinkage long sys_srp_down(int sem_od)
8451+{
8452+ return -ENOSYS;
8453+}
8454+
8455+asmlinkage long sys_srp_up(int sem_od)
8456+{
8457+ return -ENOSYS;
8458+}
8459+
8460+struct fdso_ops srp_sem_ops = {};
8461+
8462+#endif
8463diff --git a/litmus/sync.c b/litmus/sync.c
8464new file mode 100644
8465index 0000000..c16f1dd
8466--- /dev/null
8467+++ b/litmus/sync.c
8468@@ -0,0 +1,86 @@
8469+/* litmus/sync.c - Support for synchronous and asynchronous task system releases.
8470+ *
8471+ *
8472+ */
8473+
8474+#include <asm/atomic.h>
8475+#include <asm/uaccess.h>
8476+#include <linux/spinlock.h>
8477+#include <linux/list.h>
8478+#include <linux/sched.h>
8479+#include <linux/completion.h>
8480+
8481+#include <litmus/litmus.h>
8482+#include <litmus/sched_plugin.h>
8483+#include <litmus/jobs.h>
8484+
8485+static DECLARE_COMPLETION(ts_release);
8486+
8487+static long do_wait_for_ts_release(void)
8488+{
8489+ long ret = 0;
8490+
8491+ /* If the interruption races with a release, the completion object
8492+ * may have a non-zero counter. To avoid this problem, this should
8493+ * be replaced by wait_for_completion().
8494+ *
8495+ * For debugging purposes, this is interruptible for now.
8496+ */
8497+ ret = wait_for_completion_interruptible(&ts_release);
8498+
8499+ return ret;
8500+}
8501+
8502+
8503+static long do_release_ts(lt_t start)
8504+{
8505+ int task_count = 0;
8506+ long flags;
8507+ struct list_head *pos;
8508+ struct task_struct *t;
8509+
8510+
8511+ spin_lock_irqsave(&ts_release.wait.lock, flags);
8512+ TRACE("<<<<<< synchronous task system release >>>>>>\n");
8513+
8514+ list_for_each(pos, &ts_release.wait.task_list) {
8515+ t = (struct task_struct*) list_entry(pos,
8516+ struct __wait_queue,
8517+ task_list)->private;
8518+ task_count++;
8519+ litmus->release_at(t, start + t->rt_param.task_params.phase);
8520+ }
8521+
8522+ spin_unlock_irqrestore(&ts_release.wait.lock, flags);
8523+
8524+ complete_n(&ts_release, task_count);
8525+
8526+ return task_count;
8527+}
8528+
8529+
8530+asmlinkage long sys_wait_for_ts_release(void)
8531+{
8532+ long ret = -EPERM;
8533+ struct task_struct *t = current;
8534+
8535+ if (is_realtime(t))
8536+ ret = do_wait_for_ts_release();
8537+
8538+ return ret;
8539+}
8540+
8541+
8542+asmlinkage long sys_release_ts(lt_t __user *__delay)
8543+{
8544+ long ret;
8545+ lt_t delay;
8546+
8547+ /* FIXME: check capabilities... */
8548+
8549+ ret = copy_from_user(&delay, __delay, sizeof(lt_t));
8550+ if (ret == 0)
8551+ ret = do_release_ts(litmus_clock() + delay);
8552+
8553+ return ret;
8554+}
8555diff --git a/litmus/trace.c b/litmus/trace.c
8556new file mode 100644
8557index 0000000..dadb09d
8558--- /dev/null
8559+++ b/litmus/trace.c
8560@@ -0,0 +1,335 @@
8561+#include <linux/sched.h>
8562+#include <linux/fs.h>
8563+#include <linux/cdev.h>
8564+#include <asm/semaphore.h>
8565+#include <asm/uaccess.h>
8566+#include <linux/module.h>
8567+
8568+#include <litmus/litmus.h>
8569+#include <litmus/trace.h>
8570+
8571+/******************************************************************************/
8572+/* Allocation */
8573+/******************************************************************************/
8574+
8575+struct ft_buffer* trace_ts_buf = NULL;
8576+
8577+static unsigned int ts_seq_no = 0;
8578+
8579+static inline void __save_timestamp(unsigned long event, uint8_t type)
8580+{
8581+ unsigned int seq_no;
8582+ struct timestamp *ts;
8583+ seq_no = fetch_and_inc((int *) &ts_seq_no);
8584+ if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
8585+ ts->event = event;
8586+ ts->timestamp = ft_timestamp();
8587+ ts->seq_no = seq_no;
8588+ ts->cpu = raw_smp_processor_id();
8589+ ts->task_type = type;
8590+ ft_buffer_finish_write(trace_ts_buf, ts);
8591+ }
8592+}
8593+
8594+feather_callback void save_timestamp(unsigned long event)
8595+{
8596+ __save_timestamp(event, TSK_UNKNOWN);
8597+}
8598+
8599+feather_callback void save_timestamp_def(unsigned long event, unsigned long type)
8600+{
8601+ __save_timestamp(event, (uint8_t) type);
8602+}
8603+
8604+feather_callback void save_timestamp_task(unsigned long event, unsigned long t_ptr)
8605+{
8606+ int rt = is_realtime((struct task_struct *) t_ptr);
8607+ __save_timestamp(event, rt ? TSK_RT : TSK_BE);
8608+}
8609+
8610+static struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size)
8611+{
8612+ struct ft_buffer* buf;
8613+ size_t total = (size + 1) * count;
8614+ char* mem;
8615+ int order = 0, pages = 1;
8616+
8617+ buf = kmalloc(sizeof(struct ft_buffer), GFP_KERNEL);
8618+ if (!buf)
8619+ return NULL;
8620+
8621+ total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0);
8622+ while (pages < total) {
8623+ order++;
8624+ pages *= 2;
8625+ }
8626+
8627+ mem = (char*) __get_free_pages(GFP_KERNEL, order);
8628+ if (!mem) {
8629+ kfree(buf);
8630+ return NULL;
8631+ }
8632+
8633+ if (!init_ft_buffer(buf, count, size,
8634+ mem + (count * size), /* markers at the end */
8635+ mem)) { /* buffer objects */
8636+ free_pages((unsigned long) mem, order);
8637+ kfree(buf);
8638+ return NULL;
8639+ }
8640+ return buf;
8641+}
8642+
8643+static void free_ft_buffer(struct ft_buffer* buf)
8644+{
8645+ int order = 0, pages = 1;
8646+ size_t total;
8647+
8648+ if (buf) {
8649+ total = (buf->slot_size + 1) * buf->slot_count;
8650+ total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0);
8651+ while (pages < total) {
8652+ order++;
8653+ pages *= 2;
8654+ }
8655+ free_pages((unsigned long) buf->buffer_mem, order);
8656+ kfree(buf);
8657+ }
8658+}
8659+
8660+
8661+/******************************************************************************/
8662+/* DEVICE FILE DRIVER */
8663+/******************************************************************************/
8664+
8665+#define NO_TIMESTAMPS (2 << 19) /* that should be 8 megs of ram, we may not get
8666+ * as much */
8667+
8668+static DECLARE_MUTEX(feather_lock);
8669+static int use_count = 0;
8670+
8671+/* used for draining the FT buffers */
8672+static int enabled_events = 0;
8673+
8674+static int trace_release(struct inode *in, struct file *filp)
8675+{
8676+ int err = -EINVAL;
8677+
8678+ if (down_interruptible(&feather_lock)) {
8679+ err = -ERESTARTSYS;
8680+ goto out;
8681+ }
8682+
8683+ printk(KERN_ALERT "%s/%d disconnects from feather trace device. "
8684+ "use_count=%d\n",
8685+ current->comm, current->pid, use_count);
8686+
8687+ if (use_count == 1) {
8688+ /* disable events */
8689+ ft_disable_all_events();
8690+ enabled_events = 0;
8691+
8692+ /* wait for any pending events to complete */
8693+ set_current_state(TASK_UNINTERRUPTIBLE);
8694+ schedule_timeout(HZ);
8695+
8696+ printk(KERN_ALERT "Failed trace writes: %u\n",
8697+ trace_ts_buf->failed_writes);
8698+
8699+ free_ft_buffer(trace_ts_buf);
8700+ trace_ts_buf = NULL;
8701+ }
8702+
8703+ /* dummy entry to make linker happy */
8704+ ft_event0(666, save_timestamp);
8705+
8706+ use_count--;
8707+ up(&feather_lock);
8708+out:
8709+ return err;
8710+}
8711+
8712+static ssize_t trace_read(struct file *filp, char __user *to, size_t len,
8713+ loff_t *f_pos)
8714+{
8715+ /* we ignore f_pos, this is strictly sequential */
8716+ ssize_t error = 0;
8717+ struct timestamp ts;
8718+
8719+ if (down_interruptible(&feather_lock)) {
8720+ error = -ERESTARTSYS;
8721+ goto out;
8722+ }
8723+
8724+
8725+ while (len >= sizeof(struct timestamp)) {
8726+ if (ft_buffer_read(trace_ts_buf, &ts)) {
8727+ /* FIXME: avoid double copy */
8728+ if (copy_to_user(to, &ts, sizeof(struct timestamp))) {
8729+ error = -EFAULT;
8730+ break;
8731+ } else {
8732+ len -= sizeof(struct timestamp);
8733+ to += sizeof(struct timestamp);
8734+ error += sizeof(struct timestamp);
8735+ }
8736+ } else if (enabled_events) {
8737+ /* only wait if there are any events enabled */
8738+ set_current_state(TASK_INTERRUPTIBLE);
8739+ schedule_timeout(50);
8740+ if (signal_pending(current)) {
8741+ error = -ERESTARTSYS;
8742+ break;
8743+ }
8744+ } else
8745+ /* nothing left to get, return to user space */
8746+ break;
8747+ }
8748+ up(&feather_lock);
8749+out:
8750+ return error;
8751+}
8752+
8753+#define ENABLE_CMD 0
8754+#define DISABLE_CMD 1
8755+
8756+typedef uint32_t cmd_t;
8757+
8758+static ssize_t trace_write(struct file *filp, const char __user *from,
8759+ size_t len, loff_t *f_pos)
8760+{
8761+ ssize_t error = -EINVAL;
8762+ cmd_t cmd;
8763+ cmd_t id;
8764+
8765+ if (len % sizeof(cmd_t) || len < 2 * sizeof(cmd_t))
8766+ goto out;
8767+
8768+ if (copy_from_user(&cmd, from, sizeof(cmd_t))) {
8769+ error = -EFAULT;
8770+ goto out;
8771+ }
8772+ len -= sizeof(cmd_t);
8773+ from += sizeof(cmd_t);
8774+
8775+ if (cmd != ENABLE_CMD && cmd != DISABLE_CMD)
8776+ goto out;
8777+
8778+ if (down_interruptible(&feather_lock)) {
8779+ error = -ERESTARTSYS;
8780+ goto out;
8781+ }
8782+
8783+ error = sizeof(cmd_t);
8784+ while (len) {
8785+ if (copy_from_user(&id, from, sizeof(cmd_t))) {
8786+ error = -EFAULT;
8787+ goto out;
8788+ }
8789+ len -= sizeof(cmd_t);
8790+ from += sizeof(cmd_t);
8791+ if (cmd) {
8792+ printk(KERN_INFO
8793+ "Disabling feather-trace event %d.\n", (int) id);
8794+ ft_disable_event(id);
8795+ enabled_events--;
8796+ } else {
8797+ printk(KERN_INFO
8798+ "Enabling feather-trace event %d.\n", (int) id);
8799+ ft_enable_event(id);
8800+ enabled_events++;
8801+ }
8802+ error += sizeof(cmd_t);
8803+ }
8804+
8805+ up(&feather_lock);
8806+ out:
8807+ return error;
8808+}
8809+
8810+static int trace_open(struct inode *in, struct file *filp)
8811+{
8812+ int err = 0;
8813+ unsigned int count = NO_TIMESTAMPS;
8814+
8815+ if (down_interruptible(&feather_lock)) {
8816+ err = -ERESTARTSYS;
8817+ goto out;
8818+ }
8819+
8820+ while (count && !trace_ts_buf) {
8821+ printk("trace: trying to allocate %u time stamps.\n", count);
8822+ trace_ts_buf = alloc_ft_buffer(count, sizeof(struct timestamp));
8823+ count /= 2;
8824+ }
8825+ if (!trace_ts_buf)
8826+ err = -ENOMEM;
8827+ else
8828+ use_count++;
8829+
8830+ up(&feather_lock);
8831+out:
8832+ return err;
8833+}
8834+
8835+/******************************************************************************/
8836+/* Device Registration */
8837+/******************************************************************************/
8838+
8839+#define FT_TRACE_MAJOR 252
8840+
8841+struct file_operations ft_trace_fops = {
8842+ .owner = THIS_MODULE,
8843+ .open = trace_open,
8844+ .release = trace_release,
8845+ .write = trace_write,
8846+ .read = trace_read,
8847+};
8848+
8849+
8850+static int __init register_buffer_dev(const char* name,
8851+ struct file_operations* fops,
8852+ int major, int count)
8853+{
8854+ dev_t trace_dev;
8855+ struct cdev *cdev;
8856+ int error = 0;
8857+
8858+ trace_dev = MKDEV(major, 0);
8859+ error = register_chrdev_region(trace_dev, count, name);
8860+ if (error)
8861+ {
8862+ printk(KERN_WARNING "trace: "
8863+ "Could not register major/minor number %d\n", major);
8864+ return error;
8865+ }
8866+ cdev = cdev_alloc();
8867+ if (!cdev) {
8868+ printk(KERN_WARNING "trace: "
8869+ "Could not get a cdev for %s.\n", name);
8870+ return -ENOMEM;
8871+ }
8872+ cdev->owner = THIS_MODULE;
8873+ cdev->ops = fops;
8874+ error = cdev_add(cdev, trace_dev, count);
8875+ if (error) {
8876+ printk(KERN_WARNING "trace: "
8877+ "add_cdev failed for %s.\n", name);
8878+ return -ENOMEM;
8879+ }
8880+ return error;
8881+
8882+}
8883+
8884+static int __init init_sched_trace(void)
8885+{
8886+ int error = 0;
8887+
8888+ printk("Initializing Feather-Trace device\n");
8889+
8890+ error = register_buffer_dev("ft_trace", &ft_trace_fops,
8891+ FT_TRACE_MAJOR, 1);
8892+ return error;
8893+}
8894+
8895+module_init(init_sched_trace);
diff --git a/index2.html b/index2.html
index f47ca11..883f9ef 100644
--- a/index2.html
+++ b/index2.html
@@ -216,15 +216,18 @@ Technology and Applications Symposium</cite>, pp. 342-353, April 2008.
216 <li> 216 <li>
217 Major changes (compared to LITMUS<sup>RT</sup> 2007.3): 217 Major changes (compared to LITMUS<sup>RT</sup> 2007.3):
218 <ul> 218 <ul>
219 <li>LITMUS<sup>RT</sup> ported to Linux 2.6.24. 219 <li>LITMUS<sup>RT</sup> was ported to Linux 2.6.24.
220 </li> 220 </li>
221 <li>LITMUS<sup>RT</sup> ported to <span style="src">sparc64</span> 221 <li>LITMUS<sup>RT</sup> was ported to <span style="src">sparc64</span>.
222 </li> 222 </li>
223 <li>LITMUS<sup>RT</sup> is now a proper scheduling class (<span class="src">SCHED_LITMUS</span>) 223 <li>LITMUS<sup>RT</sup> is now a proper scheduling class (<span class="src">SCHED_LITMUS</span>).
224 </li>
225 <li>
226 LITMUS<sup>RT</sup> queues are now based on mergeable heaps.
224 </li> 227 </li>
225 <li>Support for multi-threaded real-time tasks. 228 <li>Support for multi-threaded real-time tasks.
226 </li> 229 </li>
227 <li>Scheduler plugins can be selected during runtime; no reboot required. 230 <li>Scheduler plugins can be selected at runtime; no reboot required.
228 </li> 231 </li>
229 <li> 232 <li>
230 Many bug fixes. 233 Many bug fixes.