summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjörn B. Brandenburg <bbb@cs.unc.edu>2010-10-21 16:08:46 -0400
committerBjörn B. Brandenburg <bbb@cs.unc.edu>2010-10-21 16:08:46 -0400
commit0d769b3bb0fa07600a7d36d4e0b045e404f7e753 (patch)
tree9fbeafc68d37c59f24418a80a1fd72189f43b032
parentd76ce58bf5072779d7e1616cd42d2cd6bb7cf230 (diff)
Add 2010.2 release
-rw-r--r--download/2010.2/SHA256SUMS3
-rw-r--r--download/2010.2/ft_tools-2010.2.tgzbin0 -> 5577 bytes
-rw-r--r--download/2010.2/liblitmus-2010.2.tgzbin0 -> 17962 bytes
-rw-r--r--download/2010.2/litmus-rt-2010.2.patch11076
-rw-r--r--index.html44
5 files changed, 11119 insertions, 4 deletions
diff --git a/download/2010.2/SHA256SUMS b/download/2010.2/SHA256SUMS
new file mode 100644
index 0000000..19f5f12
--- /dev/null
+++ b/download/2010.2/SHA256SUMS
@@ -0,0 +1,3 @@
1b911c0a77b0bfd4d73928404338f6a1d98279340d9288a32deb0c5c1e4281469 ft_tools-2010.2.tgz
2d2b772cd6c3a03c1329b259ad4a2bfbf9f7268a5699b8f988b85aa1eafe7600a liblitmus-2010.2.tgz
3c460952c4c91076392e889ef457cf231d5ecbcf7fbf72257ff84c0e63be7f9da litmus-rt-2010.2.patch
diff --git a/download/2010.2/ft_tools-2010.2.tgz b/download/2010.2/ft_tools-2010.2.tgz
new file mode 100644
index 0000000..4d95abb
--- /dev/null
+++ b/download/2010.2/ft_tools-2010.2.tgz
Binary files differ
diff --git a/download/2010.2/liblitmus-2010.2.tgz b/download/2010.2/liblitmus-2010.2.tgz
new file mode 100644
index 0000000..abeb6c2
--- /dev/null
+++ b/download/2010.2/liblitmus-2010.2.tgz
Binary files differ
diff --git a/download/2010.2/litmus-rt-2010.2.patch b/download/2010.2/litmus-rt-2010.2.patch
new file mode 100644
index 0000000..6dcfc56
--- /dev/null
+++ b/download/2010.2/litmus-rt-2010.2.patch
@@ -0,0 +1,11076 @@
1 Makefile | 4 +-
2 arch/x86/Kconfig | 8 +
3 arch/x86/include/asm/entry_arch.h | 1 +
4 arch/x86/include/asm/feather_trace.h | 17 +
5 arch/x86/include/asm/feather_trace_32.h | 79 +++
6 arch/x86/include/asm/feather_trace_64.h | 67 +++
7 arch/x86/include/asm/hw_irq.h | 3 +
8 arch/x86/include/asm/irq_vectors.h | 5 +
9 arch/x86/include/asm/processor.h | 4 +
10 arch/x86/include/asm/unistd_32.h | 6 +-
11 arch/x86/include/asm/unistd_64.h | 4 +
12 arch/x86/kernel/Makefile | 2 +
13 arch/x86/kernel/cpu/intel_cacheinfo.c | 17 +
14 arch/x86/kernel/entry_64.S | 2 +
15 arch/x86/kernel/ft_event.c | 118 ++++
16 arch/x86/kernel/irqinit.c | 3 +
17 arch/x86/kernel/smp.c | 28 +
18 arch/x86/kernel/syscall_table_32.S | 14 +
19 fs/exec.c | 13 +-
20 fs/inode.c | 2 +
21 include/linux/completion.h | 1 +
22 include/linux/fs.h | 21 +-
23 include/linux/hrtimer.h | 32 ++
24 include/linux/sched.h | 17 +-
25 include/linux/smp.h | 5 +
26 include/linux/tick.h | 5 +
27 include/litmus/bheap.h | 77 +++
28 include/litmus/budget.h | 8 +
29 include/litmus/edf_common.h | 27 +
30 include/litmus/fdso.h | 70 +++
31 include/litmus/feather_buffer.h | 94 ++++
32 include/litmus/feather_trace.h | 65 +++
33 include/litmus/ftdev.h | 49 ++
34 include/litmus/jobs.h | 9 +
35 include/litmus/litmus.h | 267 +++++++++
36 include/litmus/rt_domain.h | 182 +++++++
37 include/litmus/rt_param.h | 196 +++++++
38 include/litmus/sched_plugin.h | 162 ++++++
39 include/litmus/sched_trace.h | 192 +++++++
40 include/litmus/trace.h | 113 ++++
41 include/litmus/unistd_32.h | 23 +
42 include/litmus/unistd_64.h | 37 ++
43 kernel/exit.c | 4 +
44 kernel/fork.c | 7 +
45 kernel/hrtimer.c | 95 ++++
46 kernel/printk.c | 14 +-
47 kernel/sched.c | 106 ++++-
48 kernel/sched_fair.c | 2 +-
49 kernel/sched_rt.c | 2 +-
50 kernel/time/tick-sched.c | 48 ++-
51 litmus/Kconfig | 134 +++++
52 litmus/Makefile | 25 +
53 litmus/bheap.c | 314 +++++++++++
54 litmus/budget.c | 109 ++++
55 litmus/ctrldev.c | 150 +++++
56 litmus/edf_common.c | 102 ++++
57 litmus/fdso.c | 281 ++++++++++
58 litmus/fmlp.c | 268 +++++++++
59 litmus/ft_event.c | 43 ++
60 litmus/ftdev.c | 360 +++++++++++++
61 litmus/jobs.c | 43 ++
62 litmus/litmus.c | 799 +++++++++++++++++++++++++++
63 litmus/rt_domain.c | 355 ++++++++++++
64 litmus/sched_cedf.c | 773 ++++++++++++++++++++++++++
65 litmus/sched_gsn_edf.c | 842 +++++++++++++++++++++++++++++
66 litmus/sched_litmus.c | 315 +++++++++++
67 litmus/sched_pfair.c | 897 +++++++++++++++++++++++++++++++
68 litmus/sched_plugin.c | 265 +++++++++
69 litmus/sched_psn_edf.c | 482 +++++++++++++++++
70 litmus/sched_task_trace.c | 204 +++++++
71 litmus/sched_trace.c | 378 +++++++++++++
72 litmus/srp.c | 318 +++++++++++
73 litmus/sync.c | 104 ++++
74 litmus/trace.c | 103 ++++
75 74 files changed, 9954 insertions(+), 37 deletions(-)
76
77diff --git a/Makefile b/Makefile
78index ebc8225..316557d 100644
79--- a/Makefile
80+++ b/Makefile
81@@ -1,7 +1,7 @@
82 VERSION = 2
83 PATCHLEVEL = 6
84 SUBLEVEL = 34
85-EXTRAVERSION =
86+EXTRAVERSION =-litmus2010
87 NAME = Sheep on Meth
88
89 # *DOCUMENTATION*
90@@ -650,7 +650,7 @@ export mod_strip_cmd
91
92
93 ifeq ($(KBUILD_EXTMOD),)
94-core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/
95+core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ litmus/
96
97 vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
98 $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
99diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
100index 9458685..7b2c8db 100644
101--- a/arch/x86/Kconfig
102+++ b/arch/x86/Kconfig
103@@ -2125,3 +2125,11 @@ source "crypto/Kconfig"
104 source "arch/x86/kvm/Kconfig"
105
106 source "lib/Kconfig"
107+
108+config ARCH_HAS_FEATHER_TRACE
109+ def_bool y
110+
111+config ARCH_HAS_SEND_PULL_TIMERS
112+ def_bool y
113+
114+source "litmus/Kconfig"
115diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
116index 8e8ec66..5d07dea 100644
117--- a/arch/x86/include/asm/entry_arch.h
118+++ b/arch/x86/include/asm/entry_arch.h
119@@ -13,6 +13,7 @@
120 BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
121 BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
122 BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
123+BUILD_INTERRUPT(pull_timers_interrupt,PULL_TIMERS_VECTOR)
124 BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
125 BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
126
127diff --git a/arch/x86/include/asm/feather_trace.h b/arch/x86/include/asm/feather_trace.h
128new file mode 100644
129index 0000000..4fd3163
130--- /dev/null
131+++ b/arch/x86/include/asm/feather_trace.h
132@@ -0,0 +1,17 @@
133+#ifndef _ARCH_FEATHER_TRACE_H
134+#define _ARCH_FEATHER_TRACE_H
135+
136+#include <asm/msr.h>
137+
138+static inline unsigned long long ft_timestamp(void)
139+{
140+ return __native_read_tsc();
141+}
142+
143+#ifdef CONFIG_X86_32
144+#include "feather_trace_32.h"
145+#else
146+#include "feather_trace_64.h"
147+#endif
148+
149+#endif
150diff --git a/arch/x86/include/asm/feather_trace_32.h b/arch/x86/include/asm/feather_trace_32.h
151new file mode 100644
152index 0000000..70202f9
153--- /dev/null
154+++ b/arch/x86/include/asm/feather_trace_32.h
155@@ -0,0 +1,79 @@
156+/* Do not directly include this file. Include feather_trace.h instead */
157+
158+#define feather_callback __attribute__((regparm(0)))
159+
160+/*
161+ * make the compiler reload any register that is not saved in
162+ * a cdecl function call
163+ */
164+#define CLOBBER_LIST "memory", "cc", "eax", "ecx", "edx"
165+
166+#define ft_event(id, callback) \
167+ __asm__ __volatile__( \
168+ "1: jmp 2f \n\t" \
169+ " call " #callback " \n\t" \
170+ ".section __event_table, \"aw\" \n\t" \
171+ ".long " #id ", 0, 1b, 2f \n\t" \
172+ ".previous \n\t" \
173+ "2: \n\t" \
174+ : : : CLOBBER_LIST)
175+
176+#define ft_event0(id, callback) \
177+ __asm__ __volatile__( \
178+ "1: jmp 2f \n\t" \
179+ " subl $4, %%esp \n\t" \
180+ " movl $" #id ", (%%esp) \n\t" \
181+ " call " #callback " \n\t" \
182+ " addl $4, %%esp \n\t" \
183+ ".section __event_table, \"aw\" \n\t" \
184+ ".long " #id ", 0, 1b, 2f \n\t" \
185+ ".previous \n\t" \
186+ "2: \n\t" \
187+ : : : CLOBBER_LIST)
188+
189+#define ft_event1(id, callback, param) \
190+ __asm__ __volatile__( \
191+ "1: jmp 2f \n\t" \
192+ " subl $8, %%esp \n\t" \
193+ " movl %0, 4(%%esp) \n\t" \
194+ " movl $" #id ", (%%esp) \n\t" \
195+ " call " #callback " \n\t" \
196+ " addl $8, %%esp \n\t" \
197+ ".section __event_table, \"aw\" \n\t" \
198+ ".long " #id ", 0, 1b, 2f \n\t" \
199+ ".previous \n\t" \
200+ "2: \n\t" \
201+ : : "r" (param) : CLOBBER_LIST)
202+
203+#define ft_event2(id, callback, param, param2) \
204+ __asm__ __volatile__( \
205+ "1: jmp 2f \n\t" \
206+ " subl $12, %%esp \n\t" \
207+ " movl %1, 8(%%esp) \n\t" \
208+ " movl %0, 4(%%esp) \n\t" \
209+ " movl $" #id ", (%%esp) \n\t" \
210+ " call " #callback " \n\t" \
211+ " addl $12, %%esp \n\t" \
212+ ".section __event_table, \"aw\" \n\t" \
213+ ".long " #id ", 0, 1b, 2f \n\t" \
214+ ".previous \n\t" \
215+ "2: \n\t" \
216+ : : "r" (param), "r" (param2) : CLOBBER_LIST)
217+
218+
219+#define ft_event3(id, callback, p, p2, p3) \
220+ __asm__ __volatile__( \
221+ "1: jmp 2f \n\t" \
222+ " subl $16, %%esp \n\t" \
223+ " movl %2, 12(%%esp) \n\t" \
224+ " movl %1, 8(%%esp) \n\t" \
225+ " movl %0, 4(%%esp) \n\t" \
226+ " movl $" #id ", (%%esp) \n\t" \
227+ " call " #callback " \n\t" \
228+ " addl $16, %%esp \n\t" \
229+ ".section __event_table, \"aw\" \n\t" \
230+ ".long " #id ", 0, 1b, 2f \n\t" \
231+ ".previous \n\t" \
232+ "2: \n\t" \
233+ : : "r" (p), "r" (p2), "r" (p3) : CLOBBER_LIST)
234+
235diff --git a/arch/x86/include/asm/feather_trace_64.h b/arch/x86/include/asm/feather_trace_64.h
236new file mode 100644
237index 0000000..54ac2ae
238--- /dev/null
239+++ b/arch/x86/include/asm/feather_trace_64.h
240@@ -0,0 +1,67 @@
241+/* Do not directly include this file. Include feather_trace.h instead */
242+
243+/* regparm is the default on x86_64 */
244+#define feather_callback
245+
246+# define _EVENT_TABLE(id,from,to) \
247+ ".section __event_table, \"aw\"\n\t" \
248+ ".balign 8\n\t" \
249+ ".quad " #id ", 0, " #from ", " #to " \n\t" \
250+ ".previous \n\t"
251+
252+/*
253+ * x86_64 callee only owns rbp, rbx, r12 -> r15
254+ * the called can freely modify the others
255+ */
256+#define CLOBBER_LIST "memory", "cc", "rdi", "rsi", "rdx", "rcx", \
257+ "r8", "r9", "r10", "r11", "rax"
258+
259+#define ft_event(id, callback) \
260+ __asm__ __volatile__( \
261+ "1: jmp 2f \n\t" \
262+ " call " #callback " \n\t" \
263+ _EVENT_TABLE(id,1b,2f) \
264+ "2: \n\t" \
265+ : : : CLOBBER_LIST)
266+
267+#define ft_event0(id, callback) \
268+ __asm__ __volatile__( \
269+ "1: jmp 2f \n\t" \
270+ " movq $" #id ", %%rdi \n\t" \
271+ " call " #callback " \n\t" \
272+ _EVENT_TABLE(id,1b,2f) \
273+ "2: \n\t" \
274+ : : : CLOBBER_LIST)
275+
276+#define ft_event1(id, callback, param) \
277+ __asm__ __volatile__( \
278+ "1: jmp 2f \n\t" \
279+ " movq %0, %%rsi \n\t" \
280+ " movq $" #id ", %%rdi \n\t" \
281+ " call " #callback " \n\t" \
282+ _EVENT_TABLE(id,1b,2f) \
283+ "2: \n\t" \
284+ : : "r" (param) : CLOBBER_LIST)
285+
286+#define ft_event2(id, callback, param, param2) \
287+ __asm__ __volatile__( \
288+ "1: jmp 2f \n\t" \
289+ " movq %1, %%rdx \n\t" \
290+ " movq %0, %%rsi \n\t" \
291+ " movq $" #id ", %%rdi \n\t" \
292+ " call " #callback " \n\t" \
293+ _EVENT_TABLE(id,1b,2f) \
294+ "2: \n\t" \
295+ : : "r" (param), "r" (param2) : CLOBBER_LIST)
296+
297+#define ft_event3(id, callback, p, p2, p3) \
298+ __asm__ __volatile__( \
299+ "1: jmp 2f \n\t" \
300+ " movq %2, %%rcx \n\t" \
301+ " movq %1, %%rdx \n\t" \
302+ " movq %0, %%rsi \n\t" \
303+ " movq $" #id ", %%rdi \n\t" \
304+ " call " #callback " \n\t" \
305+ _EVENT_TABLE(id,1b,2f) \
306+ "2: \n\t" \
307+ : : "r" (p), "r" (p2), "r" (p3) : CLOBBER_LIST)
308diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
309index 46c0fe0..c174115 100644
310--- a/arch/x86/include/asm/hw_irq.h
311+++ b/arch/x86/include/asm/hw_irq.h
312@@ -53,6 +53,8 @@ extern void threshold_interrupt(void);
313 extern void call_function_interrupt(void);
314 extern void call_function_single_interrupt(void);
315
316+extern void pull_timers_interrupt(void);
317+
318 /* IOAPIC */
319 #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs))
320 extern unsigned long io_apic_irqs;
321@@ -122,6 +124,7 @@ extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
322 extern void smp_reschedule_interrupt(struct pt_regs *);
323 extern void smp_call_function_interrupt(struct pt_regs *);
324 extern void smp_call_function_single_interrupt(struct pt_regs *);
325+extern void smp_pull_timers_interrupt(struct pt_regs *);
326 #ifdef CONFIG_X86_32
327 extern void smp_invalidate_interrupt(struct pt_regs *);
328 #else
329diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
330index 8767d99..bb5318b 100644
331--- a/arch/x86/include/asm/irq_vectors.h
332+++ b/arch/x86/include/asm/irq_vectors.h
333@@ -109,6 +109,11 @@
334 #define LOCAL_TIMER_VECTOR 0xef
335
336 /*
337+ * LITMUS^RT pull timers IRQ vector
338+ */
339+#define PULL_TIMERS_VECTOR 0xee
340+
341+/*
342 * Generic system vector for platform specific use
343 */
344 #define X86_PLATFORM_IPI_VECTOR 0xed
345diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
346index b753ea5..48426f9 100644
347--- a/arch/x86/include/asm/processor.h
348+++ b/arch/x86/include/asm/processor.h
349@@ -173,6 +173,10 @@ extern void print_cpu_info(struct cpuinfo_x86 *);
350 extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
351 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
352 extern unsigned short num_cache_leaves;
353+#ifdef CONFIG_SYSFS
354+extern int get_shared_cpu_map(cpumask_var_t mask,
355+ unsigned int cpu, int index);
356+#endif
357
358 extern void detect_extended_topology(struct cpuinfo_x86 *c);
359 extern void detect_ht(struct cpuinfo_x86 *c);
360diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
361index beb9b5f..987e523 100644
362--- a/arch/x86/include/asm/unistd_32.h
363+++ b/arch/x86/include/asm/unistd_32.h
364@@ -344,9 +344,13 @@
365 #define __NR_perf_event_open 336
366 #define __NR_recvmmsg 337
367
368+#define __NR_LITMUS 338
369+
370+#include "litmus/unistd_32.h"
371+
372 #ifdef __KERNEL__
373
374-#define NR_syscalls 338
375+#define NR_syscalls 338 + NR_litmus_syscalls
376
377 #define __ARCH_WANT_IPC_PARSE_VERSION
378 #define __ARCH_WANT_OLD_READDIR
379diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
380index ff4307b..b21c3b2 100644
381--- a/arch/x86/include/asm/unistd_64.h
382+++ b/arch/x86/include/asm/unistd_64.h
383@@ -664,6 +664,10 @@ __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
384 #define __NR_recvmmsg 299
385 __SYSCALL(__NR_recvmmsg, sys_recvmmsg)
386
387+#define __NR_LITMUS 299
388+
389+#include "litmus/unistd_64.h"
390+
391 #ifndef __NO_STUBS
392 #define __ARCH_WANT_OLD_READDIR
393 #define __ARCH_WANT_OLD_STAT
394diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
395index 4c58352..d09934e 100644
396--- a/arch/x86/kernel/Makefile
397+++ b/arch/x86/kernel/Makefile
398@@ -117,6 +117,8 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
399
400 obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
401
402+obj-$(CONFIG_FEATHER_TRACE) += ft_event.o
403+
404 ###
405 # 64 bit specific files
406 ifeq ($(CONFIG_X86_64),y)
407diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
408index 95962a9..94d8e47 100644
409--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
410+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
411@@ -632,6 +632,23 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
412 static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
413 #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
414
415+/* returns CPUs that share the index cache with cpu */
416+int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index)
417+{
418+ int ret = 0;
419+ struct _cpuid4_info *this_leaf;
420+
421+ if (index >= num_cache_leaves) {
422+ index = num_cache_leaves - 1;
423+ ret = index;
424+ }
425+
426+ this_leaf = CPUID4_INFO_IDX(cpu,index);
427+ cpumask_copy(mask, to_cpumask(this_leaf->shared_cpu_map));
428+
429+ return ret;
430+}
431+
432 #ifdef CONFIG_SMP
433 static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
434 {
435diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
436index 0697ff1..b9ec6cd 100644
437--- a/arch/x86/kernel/entry_64.S
438+++ b/arch/x86/kernel/entry_64.S
439@@ -1016,6 +1016,8 @@ apicinterrupt CALL_FUNCTION_VECTOR \
440 call_function_interrupt smp_call_function_interrupt
441 apicinterrupt RESCHEDULE_VECTOR \
442 reschedule_interrupt smp_reschedule_interrupt
443+apicinterrupt PULL_TIMERS_VECTOR \
444+ pull_timers_interrupt smp_pull_timers_interrupt
445 #endif
446
447 apicinterrupt ERROR_APIC_VECTOR \
448diff --git a/arch/x86/kernel/ft_event.c b/arch/x86/kernel/ft_event.c
449new file mode 100644
450index 0000000..37cc332
451--- /dev/null
452+++ b/arch/x86/kernel/ft_event.c
453@@ -0,0 +1,118 @@
454+#include <linux/types.h>
455+
456+#include <litmus/feather_trace.h>
457+
458+/* the feather trace management functions assume
459+ * exclusive access to the event table
460+ */
461+
462+#ifndef CONFIG_DEBUG_RODATA
463+
464+#define BYTE_JUMP 0xeb
465+#define BYTE_JUMP_LEN 0x02
466+
467+/* for each event, there is an entry in the event table */
468+struct trace_event {
469+ long id;
470+ long count;
471+ long start_addr;
472+ long end_addr;
473+};
474+
475+extern struct trace_event __start___event_table[];
476+extern struct trace_event __stop___event_table[];
477+
478+/* Workaround: if no events are defined, then the event_table section does not
479+ * exist and the above references cause linker errors. This could probably be
480+ * fixed by adjusting the linker script, but it is easier to maintain for us if
481+ * we simply create a dummy symbol in the event table section.
482+ */
483+int __event_table_dummy[0] __attribute__ ((section("__event_table")));
484+
485+int ft_enable_event(unsigned long id)
486+{
487+ struct trace_event* te = __start___event_table;
488+ int count = 0;
489+ char* delta;
490+ unsigned char* instr;
491+
492+ while (te < __stop___event_table) {
493+ if (te->id == id && ++te->count == 1) {
494+ instr = (unsigned char*) te->start_addr;
495+ /* make sure we don't clobber something wrong */
496+ if (*instr == BYTE_JUMP) {
497+ delta = (((unsigned char*) te->start_addr) + 1);
498+ *delta = 0;
499+ }
500+ }
501+ if (te->id == id)
502+ count++;
503+ te++;
504+ }
505+
506+ printk(KERN_DEBUG "ft_enable_event: enabled %d events\n", count);
507+ return count;
508+}
509+
510+int ft_disable_event(unsigned long id)
511+{
512+ struct trace_event* te = __start___event_table;
513+ int count = 0;
514+ char* delta;
515+ unsigned char* instr;
516+
517+ while (te < __stop___event_table) {
518+ if (te->id == id && --te->count == 0) {
519+ instr = (unsigned char*) te->start_addr;
520+ if (*instr == BYTE_JUMP) {
521+ delta = (((unsigned char*) te->start_addr) + 1);
522+ *delta = te->end_addr - te->start_addr -
523+ BYTE_JUMP_LEN;
524+ }
525+ }
526+ if (te->id == id)
527+ count++;
528+ te++;
529+ }
530+
531+ printk(KERN_DEBUG "ft_disable_event: disabled %d events\n", count);
532+ return count;
533+}
534+
535+int ft_disable_all_events(void)
536+{
537+ struct trace_event* te = __start___event_table;
538+ int count = 0;
539+ char* delta;
540+ unsigned char* instr;
541+
542+ while (te < __stop___event_table) {
543+ if (te->count) {
544+ instr = (unsigned char*) te->start_addr;
545+ if (*instr == BYTE_JUMP) {
546+ delta = (((unsigned char*) te->start_addr)
547+ + 1);
548+ *delta = te->end_addr - te->start_addr -
549+ BYTE_JUMP_LEN;
550+ te->count = 0;
551+ count++;
552+ }
553+ }
554+ te++;
555+ }
556+ return count;
557+}
558+
559+int ft_is_event_enabled(unsigned long id)
560+{
561+ struct trace_event* te = __start___event_table;
562+
563+ while (te < __stop___event_table) {
564+ if (te->id == id)
565+ return te->count;
566+ te++;
567+ }
568+ return 0;
569+}
570+
571+#endif
572diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
573index 0ed2d30..a760ce1 100644
574--- a/arch/x86/kernel/irqinit.c
575+++ b/arch/x86/kernel/irqinit.c
576@@ -189,6 +189,9 @@ static void __init smp_intr_init(void)
577 alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
578 call_function_single_interrupt);
579
580+ /* IPI for hrtimer pulling on remote cpus */
581+ alloc_intr_gate(PULL_TIMERS_VECTOR, pull_timers_interrupt);
582+
583 /* Low priority IPI to cleanup after moving an irq */
584 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
585 set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
586diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
587index d801210..97af589 100644
588--- a/arch/x86/kernel/smp.c
589+++ b/arch/x86/kernel/smp.c
590@@ -23,6 +23,9 @@
591 #include <linux/cpu.h>
592 #include <linux/gfp.h>
593
594+#include <litmus/litmus.h>
595+#include <litmus/trace.h>
596+
597 #include <asm/mtrr.h>
598 #include <asm/tlbflush.h>
599 #include <asm/mmu_context.h>
600@@ -118,6 +121,7 @@ static void native_smp_send_reschedule(int cpu)
601 WARN_ON(1);
602 return;
603 }
604+ TS_SEND_RESCHED_START(cpu);
605 apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
606 }
607
608@@ -147,6 +151,16 @@ void native_send_call_func_ipi(const struct cpumask *mask)
609 free_cpumask_var(allbutself);
610 }
611
612+/* trigger timers on remote cpu */
613+void smp_send_pull_timers(int cpu)
614+{
615+ if (unlikely(cpu_is_offline(cpu))) {
616+ WARN_ON(1);
617+ return;
618+ }
619+ apic->send_IPI_mask(cpumask_of(cpu), PULL_TIMERS_VECTOR);
620+}
621+
622 /*
623 * this function calls the 'stop' function on all other CPUs in the system.
624 */
625@@ -198,7 +212,12 @@ static void native_smp_send_stop(void)
626 void smp_reschedule_interrupt(struct pt_regs *regs)
627 {
628 ack_APIC_irq();
629+ /* LITMUS^RT needs this interrupt to proper reschedule
630+ * on this cpu
631+ */
632+ set_tsk_need_resched(current);
633 inc_irq_stat(irq_resched_count);
634+ TS_SEND_RESCHED_END;
635 /*
636 * KVM uses this interrupt to force a cpu out of guest mode
637 */
638@@ -222,6 +241,15 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
639 irq_exit();
640 }
641
642+extern void hrtimer_pull(void);
643+
644+void smp_pull_timers_interrupt(struct pt_regs *regs)
645+{
646+ ack_APIC_irq();
647+ TRACE("pull timer interrupt\n");
648+ hrtimer_pull();
649+}
650+
651 struct smp_ops smp_ops = {
652 .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
653 .smp_prepare_cpus = native_smp_prepare_cpus,
654diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
655index 8b37293..5da9a68 100644
656--- a/arch/x86/kernel/syscall_table_32.S
657+++ b/arch/x86/kernel/syscall_table_32.S
658@@ -337,3 +337,17 @@ ENTRY(sys_call_table)
659 .long sys_rt_tgsigqueueinfo /* 335 */
660 .long sys_perf_event_open
661 .long sys_recvmmsg
662+ .long sys_set_rt_task_param /* LITMUS^RT 338 */
663+ .long sys_get_rt_task_param
664+ .long sys_complete_job
665+ .long sys_od_open
666+ .long sys_od_close
667+ .long sys_fmlp_down
668+ .long sys_fmlp_up
669+ .long sys_srp_down
670+ .long sys_srp_up
671+ .long sys_query_job_no
672+ .long sys_wait_for_job_release
673+ .long sys_wait_for_ts_release
674+ .long sys_release_ts
675+ .long sys_null_call
676diff --git a/fs/exec.c b/fs/exec.c
677index e6e94c6..0293087 100644
678--- a/fs/exec.c
679+++ b/fs/exec.c
680@@ -19,7 +19,7 @@
681 * current->executable is only used by the procfs. This allows a dispatch
682 * table to check for several different types of binary formats. We keep
683 * trying until we recognize the file or we run out of supported binary
684- * formats.
685+ * formats.
686 */
687
688 #include <linux/slab.h>
689@@ -56,6 +56,8 @@
690 #include <linux/fs_struct.h>
691 #include <linux/pipe_fs_i.h>
692
693+#include <litmus/litmus.h>
694+
695 #include <asm/uaccess.h>
696 #include <asm/mmu_context.h>
697 #include <asm/tlb.h>
698@@ -79,7 +81,7 @@ int __register_binfmt(struct linux_binfmt * fmt, int insert)
699 insert ? list_add(&fmt->lh, &formats) :
700 list_add_tail(&fmt->lh, &formats);
701 write_unlock(&binfmt_lock);
702- return 0;
703+ return 0;
704 }
705
706 EXPORT_SYMBOL(__register_binfmt);
707@@ -1045,7 +1047,7 @@ void setup_new_exec(struct linux_binprm * bprm)
708 group */
709
710 current->self_exec_id++;
711-
712+
713 flush_signal_handlers(current, 0);
714 flush_old_files(current->files);
715 }
716@@ -1135,8 +1137,8 @@ int check_unsafe_exec(struct linux_binprm *bprm)
717 return res;
718 }
719
720-/*
721- * Fill the binprm structure from the inode.
722+/*
723+ * Fill the binprm structure from the inode.
724 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
725 *
726 * This may be called multiple times for binary chains (scripts for example).
727@@ -1348,6 +1350,7 @@ int do_execve(char * filename,
728 goto out_unmark;
729
730 sched_exec();
731+ litmus_exec();
732
733 bprm->file = file;
734 bprm->filename = filename;
735diff --git a/fs/inode.c b/fs/inode.c
736index 407bf39..aaaaf09 100644
737--- a/fs/inode.c
738+++ b/fs/inode.c
739@@ -271,6 +271,8 @@ void inode_init_once(struct inode *inode)
740 #ifdef CONFIG_FSNOTIFY
741 INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries);
742 #endif
743+ INIT_LIST_HEAD(&inode->i_obj_list);
744+ mutex_init(&inode->i_obj_mutex);
745 }
746 EXPORT_SYMBOL(inode_init_once);
747
748diff --git a/include/linux/completion.h b/include/linux/completion.h
749index 4a6b604..258bec1 100644
750--- a/include/linux/completion.h
751+++ b/include/linux/completion.h
752@@ -88,6 +88,7 @@ extern bool completion_done(struct completion *x);
753
754 extern void complete(struct completion *);
755 extern void complete_all(struct completion *);
756+extern void complete_n(struct completion *, int n);
757
758 /**
759 * INIT_COMPLETION: - reinitialize a completion structure
760diff --git a/include/linux/fs.h b/include/linux/fs.h
761index 44f35ae..8949184 100644
762--- a/include/linux/fs.h
763+++ b/include/linux/fs.h
764@@ -15,8 +15,8 @@
765 * nr_file rlimit, so it's safe to set up a ridiculously high absolute
766 * upper limit on files-per-process.
767 *
768- * Some programs (notably those using select()) may have to be
769- * recompiled to take full advantage of the new limits..
770+ * Some programs (notably those using select()) may have to be
771+ * recompiled to take full advantage of the new limits..
772 */
773
774 /* Fixed constants first: */
775@@ -173,7 +173,7 @@ struct inodes_stat_t {
776 #define SEL_EX 4
777
778 /* public flags for file_system_type */
779-#define FS_REQUIRES_DEV 1
780+#define FS_REQUIRES_DEV 1
781 #define FS_BINARY_MOUNTDATA 2
782 #define FS_HAS_SUBTYPE 4
783 #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
784@@ -471,7 +471,7 @@ struct iattr {
785 */
786 #include <linux/quota.h>
787
788-/**
789+/**
790 * enum positive_aop_returns - aop return codes with specific semantics
791 *
792 * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
793@@ -481,7 +481,7 @@ struct iattr {
794 * be a candidate for writeback again in the near
795 * future. Other callers must be careful to unlock
796 * the page if they get this return. Returned by
797- * writepage();
798+ * writepage();
799 *
800 * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
801 * unlocked it and the page might have been truncated.
802@@ -720,6 +720,7 @@ static inline int mapping_writably_mapped(struct address_space *mapping)
803
804 struct posix_acl;
805 #define ACL_NOT_CACHED ((void *)(-1))
806+struct inode_obj_id_table;
807
808 struct inode {
809 struct hlist_node i_hash;
810@@ -788,6 +789,8 @@ struct inode {
811 struct posix_acl *i_acl;
812 struct posix_acl *i_default_acl;
813 #endif
814+ struct list_head i_obj_list;
815+ struct mutex i_obj_mutex;
816 void *i_private; /* fs or device private pointer */
817 };
818
819@@ -1000,10 +1003,10 @@ static inline int file_check_writeable(struct file *filp)
820
821 #define MAX_NON_LFS ((1UL<<31) - 1)
822
823-/* Page cache limit. The filesystems should put that into their s_maxbytes
824- limits, otherwise bad things can happen in VM. */
825+/* Page cache limit. The filesystems should put that into their s_maxbytes
826+ limits, otherwise bad things can happen in VM. */
827 #if BITS_PER_LONG==32
828-#define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
829+#define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
830 #elif BITS_PER_LONG==64
831 #define MAX_LFS_FILESIZE 0x7fffffffffffffffUL
832 #endif
833@@ -2129,7 +2132,7 @@ extern int may_open(struct path *, int, int);
834
835 extern int kernel_read(struct file *, loff_t, char *, unsigned long);
836 extern struct file * open_exec(const char *);
837-
838+
839 /* fs/dcache.c -- generic fs support functions */
840 extern int is_subdir(struct dentry *, struct dentry *);
841 extern int path_is_under(struct path *, struct path *);
842diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
843index 5d86fb2..9470a9e 100644
844--- a/include/linux/hrtimer.h
845+++ b/include/linux/hrtimer.h
846@@ -167,6 +167,7 @@ struct hrtimer_clock_base {
847 * @nr_retries: Total number of hrtimer interrupt retries
848 * @nr_hangs: Total number of hrtimer interrupt hangs
849 * @max_hang_time: Maximum time spent in hrtimer_interrupt
850+ * @to_pull: LITMUS^RT list of timers to be pulled on this cpu
851 */
852 struct hrtimer_cpu_base {
853 raw_spinlock_t lock;
854@@ -180,8 +181,32 @@ struct hrtimer_cpu_base {
855 unsigned long nr_hangs;
856 ktime_t max_hang_time;
857 #endif
858+ struct list_head to_pull;
859 };
860
861+#ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS
862+
863+#define HRTIMER_START_ON_INACTIVE 0
864+#define HRTIMER_START_ON_QUEUED 1
865+
866+/*
867+ * struct hrtimer_start_on_info - save timer info on remote cpu
868+ * @list: list of hrtimer_start_on_info on remote cpu (to_pull)
869+ * @timer: timer to be triggered on remote cpu
870+ * @time: time event
871+ * @mode: timer mode
872+ * @state: activity flag
873+ */
874+struct hrtimer_start_on_info {
875+ struct list_head list;
876+ struct hrtimer *timer;
877+ ktime_t time;
878+ enum hrtimer_mode mode;
879+ atomic_t state;
880+};
881+
882+#endif
883+
884 static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
885 {
886 timer->_expires = time;
887@@ -348,6 +373,13 @@ __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
888 unsigned long delta_ns,
889 const enum hrtimer_mode mode, int wakeup);
890
891+#ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS
892+extern void hrtimer_start_on_info_init(struct hrtimer_start_on_info *info);
893+extern int hrtimer_start_on(int cpu, struct hrtimer_start_on_info *info,
894+ struct hrtimer *timer, ktime_t time,
895+ const enum hrtimer_mode mode);
896+#endif
897+
898 extern int hrtimer_cancel(struct hrtimer *timer);
899 extern int hrtimer_try_to_cancel(struct hrtimer *timer);
900
901diff --git a/include/linux/sched.h b/include/linux/sched.h
902index 2b7b81d..225347d 100644
903--- a/include/linux/sched.h
904+++ b/include/linux/sched.h
905@@ -38,6 +38,7 @@
906 #define SCHED_BATCH 3
907 /* SCHED_ISO: reserved but not implemented yet */
908 #define SCHED_IDLE 5
909+#define SCHED_LITMUS 6
910 /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
911 #define SCHED_RESET_ON_FORK 0x40000000
912
913@@ -94,6 +95,8 @@ struct sched_param {
914
915 #include <asm/processor.h>
916
917+#include <litmus/rt_param.h>
918+
919 struct exec_domain;
920 struct futex_pi_state;
921 struct robust_list_head;
922@@ -1166,6 +1169,7 @@ struct sched_rt_entity {
923 };
924
925 struct rcu_node;
926+struct od_table_entry;
927
928 struct task_struct {
929 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
930@@ -1250,9 +1254,9 @@ struct task_struct {
931 unsigned long stack_canary;
932 #endif
933
934- /*
935+ /*
936 * pointers to (original) parent process, youngest child, younger sibling,
937- * older sibling, respectively. (p->father can be replaced with
938+ * older sibling, respectively. (p->father can be replaced with
939 * p->real_parent->pid)
940 */
941 struct task_struct *real_parent; /* real parent process */
942@@ -1464,6 +1468,13 @@ struct task_struct {
943 int make_it_fail;
944 #endif
945 struct prop_local_single dirties;
946+
947+ /* LITMUS RT parameters and state */
948+ struct rt_param rt_param;
949+
950+ /* references to PI semaphores, etc. */
951+ struct od_table_entry *od_table;
952+
953 #ifdef CONFIG_LATENCYTOP
954 int latency_record_count;
955 struct latency_record latency_record[LT_SAVECOUNT];
956@@ -2018,7 +2029,7 @@ static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, s
957 spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
958
959 return ret;
960-}
961+}
962
963 extern void block_all_signals(int (*notifier)(void *priv), void *priv,
964 sigset_t *mask);
965diff --git a/include/linux/smp.h b/include/linux/smp.h
966index cfa2d20..f86d407 100644
967--- a/include/linux/smp.h
968+++ b/include/linux/smp.h
969@@ -80,6 +80,11 @@ int smp_call_function_any(const struct cpumask *mask,
970 void (*func)(void *info), void *info, int wait);
971
972 /*
973+ * sends a 'pull timer' event to a remote CPU
974+ */
975+extern void smp_send_pull_timers(int cpu);
976+
977+/*
978 * Generic and arch helpers
979 */
980 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
981diff --git a/include/linux/tick.h b/include/linux/tick.h
982index d2ae79e..25d0cf4 100644
983--- a/include/linux/tick.h
984+++ b/include/linux/tick.h
985@@ -73,6 +73,11 @@ extern int tick_is_oneshot_available(void);
986 extern struct tick_device *tick_get_device(int cpu);
987
988 # ifdef CONFIG_HIGH_RES_TIMERS
989+/* LITMUS^RT tick alignment */
990+#define LINUX_DEFAULT_TICKS 0
991+#define LITMUS_ALIGNED_TICKS 1
992+#define LITMUS_STAGGERED_TICKS 2
993+
994 extern int tick_init_highres(void);
995 extern int tick_program_event(ktime_t expires, int force);
996 extern void tick_setup_sched_timer(void);
997diff --git a/include/litmus/bheap.h b/include/litmus/bheap.h
998new file mode 100644
999index 0000000..cf4864a
1000--- /dev/null
1001+++ b/include/litmus/bheap.h
1002@@ -0,0 +1,77 @@
1003+/* bheaps.h -- Binomial Heaps
1004+ *
1005+ * (c) 2008, 2009 Bjoern Brandenburg
1006+ */
1007+
1008+#ifndef BHEAP_H
1009+#define BHEAP_H
1010+
1011+#define NOT_IN_HEAP UINT_MAX
1012+
1013+struct bheap_node {
1014+ struct bheap_node* parent;
1015+ struct bheap_node* next;
1016+ struct bheap_node* child;
1017+
1018+ unsigned int degree;
1019+ void* value;
1020+ struct bheap_node** ref;
1021+};
1022+
1023+struct bheap {
1024+ struct bheap_node* head;
1025+ /* We cache the minimum of the heap.
1026+ * This speeds up repeated peek operations.
1027+ */
1028+ struct bheap_node* min;
1029+};
1030+
1031+typedef int (*bheap_prio_t)(struct bheap_node* a, struct bheap_node* b);
1032+
1033+void bheap_init(struct bheap* heap);
1034+void bheap_node_init(struct bheap_node** ref_to_bheap_node_ptr, void* value);
1035+
1036+static inline int bheap_node_in_heap(struct bheap_node* h)
1037+{
1038+ return h->degree != NOT_IN_HEAP;
1039+}
1040+
1041+static inline int bheap_empty(struct bheap* heap)
1042+{
1043+ return heap->head == NULL && heap->min == NULL;
1044+}
1045+
1046+/* insert (and reinitialize) a node into the heap */
1047+void bheap_insert(bheap_prio_t higher_prio,
1048+ struct bheap* heap,
1049+ struct bheap_node* node);
1050+
1051+/* merge addition into target */
1052+void bheap_union(bheap_prio_t higher_prio,
1053+ struct bheap* target,
1054+ struct bheap* addition);
1055+
1056+struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
1057+ struct bheap* heap);
1058+
1059+struct bheap_node* bheap_take(bheap_prio_t higher_prio,
1060+ struct bheap* heap);
1061+
1062+void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap);
1063+int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node);
1064+
1065+void bheap_delete(bheap_prio_t higher_prio,
1066+ struct bheap* heap,
1067+ struct bheap_node* node);
1068+
1069+/* allocate from memcache */
1070+struct bheap_node* bheap_node_alloc(int gfp_flags);
1071+void bheap_node_free(struct bheap_node* hn);
1072+
1073+/* allocate a heap node for value and insert into the heap */
1074+int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
1075+ void* value, int gfp_flags);
1076+
1077+void* bheap_take_del(bheap_prio_t higher_prio,
1078+ struct bheap* heap);
1079+#endif
1080diff --git a/include/litmus/budget.h b/include/litmus/budget.h
1081new file mode 100644
1082index 0000000..732530e
1083--- /dev/null
1084+++ b/include/litmus/budget.h
1085@@ -0,0 +1,8 @@
1086+#ifndef _LITMUS_BUDGET_H_
1087+#define _LITMUS_BUDGET_H_
1088+
1089+/* Update the per-processor enforcement timer (arm/reproram/cancel) for
1090+ * the next task. */
1091+void update_enforcement_timer(struct task_struct* t);
1092+
1093+#endif
1094diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h
1095new file mode 100644
1096index 0000000..80d4321
1097--- /dev/null
1098+++ b/include/litmus/edf_common.h
1099@@ -0,0 +1,27 @@
1100+/*
1101+ * EDF common data structures and utility functions shared by all EDF
1102+ * based scheduler plugins
1103+ */
1104+
1105+/* CLEANUP: Add comments and make it less messy.
1106+ *
1107+ */
1108+
1109+#ifndef __UNC_EDF_COMMON_H__
1110+#define __UNC_EDF_COMMON_H__
1111+
1112+#include <litmus/rt_domain.h>
1113+
1114+void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
1115+ release_jobs_t release);
1116+
1117+int edf_higher_prio(struct task_struct* first,
1118+ struct task_struct* second);
1119+
1120+int edf_ready_order(struct bheap_node* a, struct bheap_node* b);
1121+
1122+int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t);
1123+
1124+int edf_set_hp_task(struct pi_semaphore *sem);
1125+int edf_set_hp_cpu_task(struct pi_semaphore *sem, int cpu);
1126+#endif
1127diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
1128new file mode 100644
1129index 0000000..61f1b5b
1130--- /dev/null
1131+++ b/include/litmus/fdso.h
1132@@ -0,0 +1,70 @@
1133+/* fdso.h - file descriptor attached shared objects
1134+ *
1135+ * (c) 2007 B. Brandenburg, LITMUS^RT project
1136+ */
1137+
1138+#ifndef _LINUX_FDSO_H_
1139+#define _LINUX_FDSO_H_
1140+
1141+#include <linux/list.h>
1142+#include <asm/atomic.h>
1143+
1144+#include <linux/fs.h>
1145+#include <linux/slab.h>
1146+
1147+#define MAX_OBJECT_DESCRIPTORS 32
1148+
1149+typedef enum {
1150+ MIN_OBJ_TYPE = 0,
1151+
1152+ FMLP_SEM = 0,
1153+ SRP_SEM = 1,
1154+
1155+ MAX_OBJ_TYPE = 1
1156+} obj_type_t;
1157+
1158+struct inode_obj_id {
1159+ struct list_head list;
1160+ atomic_t count;
1161+ struct inode* inode;
1162+
1163+ obj_type_t type;
1164+ void* obj;
1165+ unsigned int id;
1166+};
1167+
1168+
1169+struct od_table_entry {
1170+ unsigned int used;
1171+
1172+ struct inode_obj_id* obj;
1173+ void* extra;
1174+};
1175+
1176+struct fdso_ops {
1177+ void* (*create) (void);
1178+ void (*destroy)(void*);
1179+ int (*open) (struct od_table_entry*, void* __user);
1180+ int (*close) (struct od_table_entry*);
1181+};
1182+
1183+/* translate a userspace supplied od into the raw table entry
1184+ * returns NULL if od is invalid
1185+ */
1186+struct od_table_entry* __od_lookup(int od);
1187+
1188+/* translate a userspace supplied od into the associated object
1189+ * returns NULL if od is invalid
1190+ */
1191+static inline void* od_lookup(int od, obj_type_t type)
1192+{
1193+ struct od_table_entry* e = __od_lookup(od);
1194+ return e && e->obj->type == type ? e->obj->obj : NULL;
1195+}
1196+
1197+#define lookup_fmlp_sem(od)((struct pi_semaphore*) od_lookup(od, FMLP_SEM))
1198+#define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
1199+#define lookup_ics(od) ((struct ics*) od_lookup(od, ICS_ID))
1200+
1201+
1202+#endif
1203diff --git a/include/litmus/feather_buffer.h b/include/litmus/feather_buffer.h
1204new file mode 100644
1205index 0000000..6c18277
1206--- /dev/null
1207+++ b/include/litmus/feather_buffer.h
1208@@ -0,0 +1,94 @@
1209+#ifndef _FEATHER_BUFFER_H_
1210+#define _FEATHER_BUFFER_H_
1211+
1212+/* requires UINT_MAX and memcpy */
1213+
1214+#define SLOT_FREE 0
1215+#define SLOT_BUSY 1
1216+#define SLOT_READY 2
1217+
1218+struct ft_buffer {
1219+ unsigned int slot_count;
1220+ unsigned int slot_size;
1221+
1222+ int free_count;
1223+ unsigned int write_idx;
1224+ unsigned int read_idx;
1225+
1226+ char* slots;
1227+ void* buffer_mem;
1228+ unsigned int failed_writes;
1229+};
1230+
1231+static inline int init_ft_buffer(struct ft_buffer* buf,
1232+ unsigned int slot_count,
1233+ unsigned int slot_size,
1234+ char* slots,
1235+ void* buffer_mem)
1236+{
1237+ int i = 0;
1238+ if (!slot_count || UINT_MAX % slot_count != slot_count - 1) {
1239+ /* The slot count must divide UNIT_MAX + 1 so that when it
1240+ * wraps around the index correctly points to 0.
1241+ */
1242+ return 0;
1243+ } else {
1244+ buf->slot_count = slot_count;
1245+ buf->slot_size = slot_size;
1246+ buf->slots = slots;
1247+ buf->buffer_mem = buffer_mem;
1248+ buf->free_count = slot_count;
1249+ buf->write_idx = 0;
1250+ buf->read_idx = 0;
1251+ buf->failed_writes = 0;
1252+ for (i = 0; i < slot_count; i++)
1253+ buf->slots[i] = SLOT_FREE;
1254+ return 1;
1255+ }
1256+}
1257+
1258+static inline int ft_buffer_start_write(struct ft_buffer* buf, void **ptr)
1259+{
1260+ int free = fetch_and_dec(&buf->free_count);
1261+ unsigned int idx;
1262+ if (free <= 0) {
1263+ fetch_and_inc(&buf->free_count);
1264+ *ptr = 0;
1265+ fetch_and_inc(&buf->failed_writes);
1266+ return 0;
1267+ } else {
1268+ idx = fetch_and_inc((int*) &buf->write_idx) % buf->slot_count;
1269+ buf->slots[idx] = SLOT_BUSY;
1270+ *ptr = ((char*) buf->buffer_mem) + idx * buf->slot_size;
1271+ return 1;
1272+ }
1273+}
1274+
1275+static inline void ft_buffer_finish_write(struct ft_buffer* buf, void *ptr)
1276+{
1277+ unsigned int idx = ((char*) ptr - (char*) buf->buffer_mem) / buf->slot_size;
1278+ buf->slots[idx] = SLOT_READY;
1279+}
1280+
1281+
1282+/* exclusive reader access is assumed */
1283+static inline int ft_buffer_read(struct ft_buffer* buf, void* dest)
1284+{
1285+ unsigned int idx;
1286+ if (buf->free_count == buf->slot_count)
1287+ /* nothing available */
1288+ return 0;
1289+ idx = buf->read_idx % buf->slot_count;
1290+ if (buf->slots[idx] == SLOT_READY) {
1291+ memcpy(dest, ((char*) buf->buffer_mem) + idx * buf->slot_size,
1292+ buf->slot_size);
1293+ buf->slots[idx] = SLOT_FREE;
1294+ buf->read_idx++;
1295+ fetch_and_inc(&buf->free_count);
1296+ return 1;
1297+ } else
1298+ return 0;
1299+}
1300+
1301+
1302+#endif
1303diff --git a/include/litmus/feather_trace.h b/include/litmus/feather_trace.h
1304new file mode 100644
1305index 0000000..028dfb2
1306--- /dev/null
1307+++ b/include/litmus/feather_trace.h
1308@@ -0,0 +1,65 @@
1309+#ifndef _FEATHER_TRACE_H_
1310+#define _FEATHER_TRACE_H_
1311+
1312+#include <asm/atomic.h>
1313+
1314+int ft_enable_event(unsigned long id);
1315+int ft_disable_event(unsigned long id);
1316+int ft_is_event_enabled(unsigned long id);
1317+int ft_disable_all_events(void);
1318+
1319+/* atomic_* funcitons are inline anyway */
1320+static inline int fetch_and_inc(int *val)
1321+{
1322+ return atomic_add_return(1, (atomic_t*) val) - 1;
1323+}
1324+
1325+static inline int fetch_and_dec(int *val)
1326+{
1327+ return atomic_sub_return(1, (atomic_t*) val) + 1;
1328+}
1329+
1330+/* Don't use rewriting implementation if kernel text pages are read-only.
1331+ * Ftrace gets around this by using the identity mapping, but that's more
1332+ * effort that is warrented right now for Feather-Trace.
1333+ * Eventually, it may make sense to replace Feather-Trace with ftrace.
1334+ */
1335+#if defined(CONFIG_ARCH_HAS_FEATHER_TRACE) && !defined(CONFIG_DEBUG_RODATA)
1336+
1337+#include <asm/feather_trace.h>
1338+
1339+#else /* !__ARCH_HAS_FEATHER_TRACE */
1340+
1341+/* provide default implementation */
1342+
1343+#include <asm/timex.h> /* for get_cycles() */
1344+
1345+static inline unsigned long long ft_timestamp(void)
1346+{
1347+ return get_cycles();
1348+}
1349+
1350+#define feather_callback
1351+
1352+#define MAX_EVENTS 1024
1353+
1354+extern int ft_events[MAX_EVENTS];
1355+
1356+#define ft_event(id, callback) \
1357+ if (ft_events[id]) callback();
1358+
1359+#define ft_event0(id, callback) \
1360+ if (ft_events[id]) callback(id);
1361+
1362+#define ft_event1(id, callback, param) \
1363+ if (ft_events[id]) callback(id, param);
1364+
1365+#define ft_event2(id, callback, param, param2) \
1366+ if (ft_events[id]) callback(id, param, param2);
1367+
1368+#define ft_event3(id, callback, p, p2, p3) \
1369+ if (ft_events[id]) callback(id, p, p2, p3);
1370+
1371+#endif /* __ARCH_HAS_FEATHER_TRACE */
1372+
1373+#endif
1374diff --git a/include/litmus/ftdev.h b/include/litmus/ftdev.h
1375new file mode 100644
1376index 0000000..7697b46
1377--- /dev/null
1378+++ b/include/litmus/ftdev.h
1379@@ -0,0 +1,49 @@
1380+#ifndef _LITMUS_FTDEV_H_
1381+#define _LITMUS_FTDEV_H_
1382+
1383+#include <litmus/feather_trace.h>
1384+#include <litmus/feather_buffer.h>
1385+#include <linux/mutex.h>
1386+#include <linux/cdev.h>
1387+
1388+#define MAX_FTDEV_MINORS NR_CPUS
1389+
1390+#define FTDEV_ENABLE_CMD 0
1391+#define FTDEV_DISABLE_CMD 1
1392+
1393+struct ftdev;
1394+
1395+/* return 0 if buffer can be opened, otherwise -$REASON */
1396+typedef int (*ftdev_can_open_t)(struct ftdev* dev, unsigned int buf_no);
1397+/* return 0 on success, otherwise -$REASON */
1398+typedef int (*ftdev_alloc_t)(struct ftdev* dev, unsigned int buf_no);
1399+typedef void (*ftdev_free_t)(struct ftdev* dev, unsigned int buf_no);
1400+
1401+
1402+struct ftdev_event;
1403+
1404+struct ftdev_minor {
1405+ struct ft_buffer* buf;
1406+ unsigned int readers;
1407+ struct mutex lock;
1408+ /* FIXME: filter for authorized events */
1409+ struct ftdev_event* events;
1410+};
1411+
1412+struct ftdev {
1413+ struct cdev cdev;
1414+ /* FIXME: don't waste memory, allocate dynamically */
1415+ struct ftdev_minor minor[MAX_FTDEV_MINORS];
1416+ unsigned int minor_cnt;
1417+ ftdev_alloc_t alloc;
1418+ ftdev_free_t free;
1419+ ftdev_can_open_t can_open;
1420+};
1421+
1422+struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size);
1423+void free_ft_buffer(struct ft_buffer* buf);
1424+
1425+void ftdev_init(struct ftdev* ftdev, struct module* owner);
1426+int register_ftdev(struct ftdev* ftdev, const char* name, int major);
1427+
1428+#endif
1429diff --git a/include/litmus/jobs.h b/include/litmus/jobs.h
1430new file mode 100644
1431index 0000000..9bd361e
1432--- /dev/null
1433+++ b/include/litmus/jobs.h
1434@@ -0,0 +1,9 @@
1435+#ifndef __LITMUS_JOBS_H__
1436+#define __LITMUS_JOBS_H__
1437+
1438+void prepare_for_next_period(struct task_struct *t);
1439+void release_at(struct task_struct *t, lt_t start);
1440+long complete_job(void);
1441+
1442+#endif
1443+
1444diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
1445new file mode 100644
1446index 0000000..5d20276
1447--- /dev/null
1448+++ b/include/litmus/litmus.h
1449@@ -0,0 +1,267 @@
1450+/*
1451+ * Constant definitions related to
1452+ * scheduling policy.
1453+ */
1454+
1455+#ifndef _LINUX_LITMUS_H_
1456+#define _LINUX_LITMUS_H_
1457+
1458+#include <linux/jiffies.h>
1459+#include <litmus/sched_trace.h>
1460+
1461+#ifdef CONFIG_RELEASE_MASTER
1462+extern atomic_t release_master_cpu;
1463+#endif
1464+
1465+extern atomic_t __log_seq_no;
1466+
1467+#define TRACE(fmt, args...) \
1468+ sched_trace_log_message("%d P%d: " fmt, atomic_add_return(1, &__log_seq_no), \
1469+ raw_smp_processor_id(), ## args)
1470+
1471+#define TRACE_TASK(t, fmt, args...) \
1472+ TRACE("(%s/%d) " fmt, (t)->comm, (t)->pid, ##args)
1473+
1474+#define TRACE_CUR(fmt, args...) \
1475+ TRACE_TASK(current, fmt, ## args)
1476+
1477+#define TRACE_BUG_ON(cond) \
1478+ do { if (cond) TRACE("BUG_ON(%s) at %s:%d " \
1479+ "called from %p current=%s/%d state=%d " \
1480+ "flags=%x partition=%d cpu=%d rtflags=%d"\
1481+ " job=%u timeslice=%u\n", \
1482+ #cond, __FILE__, __LINE__, __builtin_return_address(0), current->comm, \
1483+ current->pid, current->state, current->flags, \
1484+ get_partition(current), smp_processor_id(), get_rt_flags(current), \
1485+ current->rt_param.job_params.job_no, \
1486+ current->rt.time_slice\
1487+ ); } while(0);
1488+
1489+
1490+/* in_list - is a given list_head queued on some list?
1491+ */
1492+static inline int in_list(struct list_head* list)
1493+{
1494+ return !( /* case 1: deleted */
1495+ (list->next == LIST_POISON1 &&
1496+ list->prev == LIST_POISON2)
1497+ ||
1498+ /* case 2: initialized */
1499+ (list->next == list &&
1500+ list->prev == list)
1501+ );
1502+}
1503+
1504+#define NO_CPU 0xffffffff
1505+
1506+void litmus_fork(struct task_struct *tsk);
1507+void litmus_exec(void);
1508+/* clean up real-time state of a task */
1509+void exit_litmus(struct task_struct *dead_tsk);
1510+
1511+long litmus_admit_task(struct task_struct *tsk);
1512+void litmus_exit_task(struct task_struct *tsk);
1513+
1514+#define is_realtime(t) ((t)->policy == SCHED_LITMUS)
1515+#define rt_transition_pending(t) \
1516+ ((t)->rt_param.transition_pending)
1517+
1518+#define tsk_rt(t) (&(t)->rt_param)
1519+
1520+/* Realtime utility macros */
1521+#define get_rt_flags(t) (tsk_rt(t)->flags)
1522+#define set_rt_flags(t,f) (tsk_rt(t)->flags=(f))
1523+#define get_exec_cost(t) (tsk_rt(t)->task_params.exec_cost)
1524+#define get_exec_time(t) (tsk_rt(t)->job_params.exec_time)
1525+#define get_rt_period(t) (tsk_rt(t)->task_params.period)
1526+#define get_rt_phase(t) (tsk_rt(t)->task_params.phase)
1527+#define get_partition(t) (tsk_rt(t)->task_params.cpu)
1528+#define get_deadline(t) (tsk_rt(t)->job_params.deadline)
1529+#define get_release(t) (tsk_rt(t)->job_params.release)
1530+#define get_class(t) (tsk_rt(t)->task_params.cls)
1531+
1532+inline static int budget_exhausted(struct task_struct* t)
1533+{
1534+ return get_exec_time(t) >= get_exec_cost(t);
1535+}
1536+
1537+inline static lt_t budget_remaining(struct task_struct* t)
1538+{
1539+ if (!budget_exhausted(t))
1540+ return get_exec_time(t) - get_exec_cost(t);
1541+ else
1542+ /* avoid overflow */
1543+ return 0;
1544+}
1545+
1546+#define budget_enforced(t) (tsk_rt(t)->task_params.budget_policy != NO_ENFORCEMENT)
1547+
1548+#define budget_precisely_enforced(t) (tsk_rt(t)->task_params.budget_policy \
1549+ == PRECISE_ENFORCEMENT)
1550+
1551+#define is_hrt(t) \
1552+ (tsk_rt(t)->task_params.class == RT_CLASS_HARD)
1553+#define is_srt(t) \
1554+ (tsk_rt(t)->task_params.class == RT_CLASS_SOFT)
1555+#define is_be(t) \
1556+ (tsk_rt(t)->task_params.class == RT_CLASS_BEST_EFFORT)
1557+
1558+/* Our notion of time within LITMUS: kernel monotonic time. */
1559+static inline lt_t litmus_clock(void)
1560+{
1561+ return ktime_to_ns(ktime_get());
1562+}
1563+
1564+/* A macro to convert from nanoseconds to ktime_t. */
1565+#define ns_to_ktime(t) ktime_add_ns(ktime_set(0, 0), t)
1566+
1567+#define get_domain(t) (tsk_rt(t)->domain)
1568+
1569+/* Honor the flag in the preempt_count variable that is set
1570+ * when scheduling is in progress.
1571+ */
1572+#define is_running(t) \
1573+ ((t)->state == TASK_RUNNING || \
1574+ task_thread_info(t)->preempt_count & PREEMPT_ACTIVE)
1575+
1576+#define is_blocked(t) \
1577+ (!is_running(t))
1578+#define is_released(t, now) \
1579+ (lt_before_eq(get_release(t), now))
1580+#define is_tardy(t, now) \
1581+ (lt_before_eq(tsk_rt(t)->job_params.deadline, now))
1582+
1583+/* real-time comparison macros */
1584+#define earlier_deadline(a, b) (lt_before(\
1585+ (a)->rt_param.job_params.deadline,\
1586+ (b)->rt_param.job_params.deadline))
1587+#define earlier_release(a, b) (lt_before(\
1588+ (a)->rt_param.job_params.release,\
1589+ (b)->rt_param.job_params.release))
1590+
1591+void preempt_if_preemptable(struct task_struct* t, int on_cpu);
1592+
1593+#ifdef CONFIG_SRP
1594+void srp_ceiling_block(void);
1595+#else
1596+#define srp_ceiling_block() /* nothing */
1597+#endif
1598+
1599+#define bheap2task(hn) ((struct task_struct*) hn->value)
1600+
1601+#ifdef CONFIG_NP_SECTION
1602+
1603+static inline int is_kernel_np(struct task_struct *t)
1604+{
1605+ return tsk_rt(t)->kernel_np;
1606+}
1607+
1608+static inline int is_user_np(struct task_struct *t)
1609+{
1610+ return tsk_rt(t)->ctrl_page ? tsk_rt(t)->ctrl_page->np_flag : 0;
1611+}
1612+
1613+static inline void request_exit_np(struct task_struct *t)
1614+{
1615+ if (is_user_np(t)) {
1616+ /* Set the flag that tells user space to call
1617+ * into the kernel at the end of a critical section. */
1618+ if (likely(tsk_rt(t)->ctrl_page)) {
1619+ TRACE_TASK(t, "setting delayed_preemption flag\n");
1620+ tsk_rt(t)->ctrl_page->delayed_preemption = 1;
1621+ }
1622+ }
1623+}
1624+
1625+static inline void clear_exit_np(struct task_struct *t)
1626+{
1627+ if (likely(tsk_rt(t)->ctrl_page))
1628+ tsk_rt(t)->ctrl_page->delayed_preemption = 0;
1629+}
1630+
1631+static inline void make_np(struct task_struct *t)
1632+{
1633+ tsk_rt(t)->kernel_np++;
1634+}
1635+
1636+/* Caller should check if preemption is necessary when
1637+ * the function return 0.
1638+ */
1639+static inline int take_np(struct task_struct *t)
1640+{
1641+ return --tsk_rt(t)->kernel_np;
1642+}
1643+
1644+#else
1645+
1646+static inline int is_kernel_np(struct task_struct* t)
1647+{
1648+ return 0;
1649+}
1650+
1651+static inline int is_user_np(struct task_struct* t)
1652+{
1653+ return 0;
1654+}
1655+
1656+static inline void request_exit_np(struct task_struct *t)
1657+{
1658+ /* request_exit_np() shouldn't be called if !CONFIG_NP_SECTION */
1659+ BUG();
1660+}
1661+
1662+static inline void clear_exit_np(struct task_struct* t)
1663+{
1664+}
1665+
1666+#endif
1667+
1668+static inline int is_np(struct task_struct *t)
1669+{
1670+#ifdef CONFIG_SCHED_DEBUG_TRACE
1671+ int kernel, user;
1672+ kernel = is_kernel_np(t);
1673+ user = is_user_np(t);
1674+ if (kernel || user)
1675+ TRACE_TASK(t, " is non-preemptive: kernel=%d user=%d\n",
1676+
1677+ kernel, user);
1678+ return kernel || user;
1679+#else
1680+ return unlikely(is_kernel_np(t) || is_user_np(t));
1681+#endif
1682+}
1683+
1684+static inline int is_present(struct task_struct* t)
1685+{
1686+ return t && tsk_rt(t)->present;
1687+}
1688+
1689+
1690+/* make the unit explicit */
1691+typedef unsigned long quanta_t;
1692+
1693+enum round {
1694+ FLOOR,
1695+ CEIL
1696+};
1697+
1698+
1699+/* Tick period is used to convert ns-specified execution
1700+ * costs and periods into tick-based equivalents.
1701+ */
1702+extern ktime_t tick_period;
1703+
1704+static inline quanta_t time2quanta(lt_t time, enum round round)
1705+{
1706+ s64 quantum_length = ktime_to_ns(tick_period);
1707+
1708+ if (do_div(time, quantum_length) && round == CEIL)
1709+ time++;
1710+ return (quanta_t) time;
1711+}
1712+
1713+/* By how much is cpu staggered behind CPU 0? */
1714+u64 cpu_stagger_offset(int cpu);
1715+
1716+#endif
1717diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h
1718new file mode 100644
1719index 0000000..ac24929
1720--- /dev/null
1721+++ b/include/litmus/rt_domain.h
1722@@ -0,0 +1,182 @@
1723+/* CLEANUP: Add comments and make it less messy.
1724+ *
1725+ */
1726+
1727+#ifndef __UNC_RT_DOMAIN_H__
1728+#define __UNC_RT_DOMAIN_H__
1729+
1730+#include <litmus/bheap.h>
1731+
1732+#define RELEASE_QUEUE_SLOTS 127 /* prime */
1733+
1734+struct _rt_domain;
1735+
1736+typedef int (*check_resched_needed_t)(struct _rt_domain *rt);
1737+typedef void (*release_jobs_t)(struct _rt_domain *rt, struct bheap* tasks);
1738+
1739+struct release_queue {
1740+ /* each slot maintains a list of release heaps sorted
1741+ * by release time */
1742+ struct list_head slot[RELEASE_QUEUE_SLOTS];
1743+};
1744+
1745+typedef struct _rt_domain {
1746+ /* runnable rt tasks are in here */
1747+ raw_spinlock_t ready_lock;
1748+ struct bheap ready_queue;
1749+
1750+ /* real-time tasks waiting for release are in here */
1751+ raw_spinlock_t release_lock;
1752+ struct release_queue release_queue;
1753+
1754+#ifdef CONFIG_RELEASE_MASTER
1755+ int release_master;
1756+#endif
1757+
1758+ /* for moving tasks to the release queue */
1759+ raw_spinlock_t tobe_lock;
1760+ struct list_head tobe_released;
1761+
1762+ /* how do we check if we need to kick another CPU? */
1763+ check_resched_needed_t check_resched;
1764+
1765+ /* how do we release jobs? */
1766+ release_jobs_t release_jobs;
1767+
1768+ /* how are tasks ordered in the ready queue? */
1769+ bheap_prio_t order;
1770+} rt_domain_t;
1771+
1772+struct release_heap {
1773+ /* list_head for per-time-slot list */
1774+ struct list_head list;
1775+ lt_t release_time;
1776+ /* all tasks to be released at release_time */
1777+ struct bheap heap;
1778+ /* used to trigger the release */
1779+ struct hrtimer timer;
1780+
1781+#ifdef CONFIG_RELEASE_MASTER
1782+ /* used to delegate releases */
1783+ struct hrtimer_start_on_info info;
1784+#endif
1785+ /* required for the timer callback */
1786+ rt_domain_t* dom;
1787+};
1788+
1789+
1790+static inline struct task_struct* __next_ready(rt_domain_t* rt)
1791+{
1792+ struct bheap_node *hn = bheap_peek(rt->order, &rt->ready_queue);
1793+ if (hn)
1794+ return bheap2task(hn);
1795+ else
1796+ return NULL;
1797+}
1798+
1799+void rt_domain_init(rt_domain_t *rt, bheap_prio_t order,
1800+ check_resched_needed_t check,
1801+ release_jobs_t relase);
1802+
1803+void __add_ready(rt_domain_t* rt, struct task_struct *new);
1804+void __merge_ready(rt_domain_t* rt, struct bheap *tasks);
1805+void __add_release(rt_domain_t* rt, struct task_struct *task);
1806+
1807+static inline struct task_struct* __take_ready(rt_domain_t* rt)
1808+{
1809+ struct bheap_node* hn = bheap_take(rt->order, &rt->ready_queue);
1810+ if (hn)
1811+ return bheap2task(hn);
1812+ else
1813+ return NULL;
1814+}
1815+
1816+static inline struct task_struct* __peek_ready(rt_domain_t* rt)
1817+{
1818+ struct bheap_node* hn = bheap_peek(rt->order, &rt->ready_queue);
1819+ if (hn)
1820+ return bheap2task(hn);
1821+ else
1822+ return NULL;
1823+}
1824+
1825+static inline int is_queued(struct task_struct *t)
1826+{
1827+ BUG_ON(!tsk_rt(t)->heap_node);
1828+ return bheap_node_in_heap(tsk_rt(t)->heap_node);
1829+}
1830+
1831+static inline void remove(rt_domain_t* rt, struct task_struct *t)
1832+{
1833+ bheap_delete(rt->order, &rt->ready_queue, tsk_rt(t)->heap_node);
1834+}
1835+
1836+static inline void add_ready(rt_domain_t* rt, struct task_struct *new)
1837+{
1838+ unsigned long flags;
1839+ /* first we need the write lock for rt_ready_queue */
1840+ raw_spin_lock_irqsave(&rt->ready_lock, flags);
1841+ __add_ready(rt, new);
1842+ raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
1843+}
1844+
1845+static inline void merge_ready(rt_domain_t* rt, struct bheap* tasks)
1846+{
1847+ unsigned long flags;
1848+ raw_spin_lock_irqsave(&rt->ready_lock, flags);
1849+ __merge_ready(rt, tasks);
1850+ raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
1851+}
1852+
1853+static inline struct task_struct* take_ready(rt_domain_t* rt)
1854+{
1855+ unsigned long flags;
1856+ struct task_struct* ret;
1857+ /* first we need the write lock for rt_ready_queue */
1858+ raw_spin_lock_irqsave(&rt->ready_lock, flags);
1859+ ret = __take_ready(rt);
1860+ raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
1861+ return ret;
1862+}
1863+
1864+
1865+static inline void add_release(rt_domain_t* rt, struct task_struct *task)
1866+{
1867+ unsigned long flags;
1868+ raw_spin_lock_irqsave(&rt->tobe_lock, flags);
1869+ __add_release(rt, task);
1870+ raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
1871+}
1872+
1873+#ifdef CONFIG_RELEASE_MASTER
1874+void __add_release_on(rt_domain_t* rt, struct task_struct *task,
1875+ int target_cpu);
1876+
1877+static inline void add_release_on(rt_domain_t* rt,
1878+ struct task_struct *task,
1879+ int target_cpu)
1880+{
1881+ unsigned long flags;
1882+ raw_spin_lock_irqsave(&rt->tobe_lock, flags);
1883+ __add_release_on(rt, task, target_cpu);
1884+ raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
1885+}
1886+#endif
1887+
1888+static inline int __jobs_pending(rt_domain_t* rt)
1889+{
1890+ return !bheap_empty(&rt->ready_queue);
1891+}
1892+
1893+static inline int jobs_pending(rt_domain_t* rt)
1894+{
1895+ unsigned long flags;
1896+ int ret;
1897+ /* first we need the write lock for rt_ready_queue */
1898+ raw_spin_lock_irqsave(&rt->ready_lock, flags);
1899+ ret = !bheap_empty(&rt->ready_queue);
1900+ raw_spin_unlock_irqrestore(&rt->ready_lock, flags);
1901+ return ret;
1902+}
1903+
1904+#endif
1905diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
1906new file mode 100644
1907index 0000000..a7a183f
1908--- /dev/null
1909+++ b/include/litmus/rt_param.h
1910@@ -0,0 +1,196 @@
1911+/*
1912+ * Definition of the scheduler plugin interface.
1913+ *
1914+ */
1915+#ifndef _LINUX_RT_PARAM_H_
1916+#define _LINUX_RT_PARAM_H_
1917+
1918+/* Litmus time type. */
1919+typedef unsigned long long lt_t;
1920+
1921+static inline int lt_after(lt_t a, lt_t b)
1922+{
1923+ return ((long long) b) - ((long long) a) < 0;
1924+}
1925+#define lt_before(a, b) lt_after(b, a)
1926+
1927+static inline int lt_after_eq(lt_t a, lt_t b)
1928+{
1929+ return ((long long) a) - ((long long) b) >= 0;
1930+}
1931+#define lt_before_eq(a, b) lt_after_eq(b, a)
1932+
1933+/* different types of clients */
1934+typedef enum {
1935+ RT_CLASS_HARD,
1936+ RT_CLASS_SOFT,
1937+ RT_CLASS_BEST_EFFORT
1938+} task_class_t;
1939+
1940+typedef enum {
1941+ NO_ENFORCEMENT, /* job may overrun unhindered */
1942+ QUANTUM_ENFORCEMENT, /* budgets are only checked on quantum boundaries */
1943+ PRECISE_ENFORCEMENT /* NOT IMPLEMENTED - enforced with hrtimers */
1944+} budget_policy_t;
1945+
1946+struct rt_task {
1947+ lt_t exec_cost;
1948+ lt_t period;
1949+ lt_t phase;
1950+ unsigned int cpu;
1951+ task_class_t cls;
1952+ budget_policy_t budget_policy; /* ignored by pfair */
1953+};
1954+
1955+/* The definition of the data that is shared between the kernel and real-time
1956+ * tasks via a shared page (see litmus/ctrldev.c).
1957+ *
1958+ * WARNING: User space can write to this, so don't trust
1959+ * the correctness of the fields!
1960+ *
1961+ * This servees two purposes: to enable efficient signaling
1962+ * of non-preemptive sections (user->kernel) and
1963+ * delayed preemptions (kernel->user), and to export
1964+ * some real-time relevant statistics such as preemption and
1965+ * migration data to user space. We can't use a device to export
1966+ * statistics because we want to avoid system call overhead when
1967+ * determining preemption/migration overheads).
1968+ */
1969+struct control_page {
1970+ /* Is the task currently in a non-preemptive section? */
1971+ int np_flag;
1972+ /* Should the task call into the kernel when it leaves
1973+ * its non-preemptive section? */
1974+ int delayed_preemption;
1975+
1976+ /* to be extended */
1977+};
1978+
1979+/* don't export internal data structures to user space (liblitmus) */
1980+#ifdef __KERNEL__
1981+
1982+struct _rt_domain;
1983+struct bheap_node;
1984+struct release_heap;
1985+
1986+struct rt_job {
1987+ /* Time instant the the job was or will be released. */
1988+ lt_t release;
1989+ /* What is the current deadline? */
1990+ lt_t deadline;
1991+
1992+ /* How much service has this job received so far? */
1993+ lt_t exec_time;
1994+
1995+ /* Which job is this. This is used to let user space
1996+ * specify which job to wait for, which is important if jobs
1997+ * overrun. If we just call sys_sleep_next_period() then we
1998+ * will unintentionally miss jobs after an overrun.
1999+ *
2000+ * Increase this sequence number when a job is released.
2001+ */
2002+ unsigned int job_no;
2003+};
2004+
2005+struct pfair_param;
2006+
2007+/* RT task parameters for scheduling extensions
2008+ * These parameters are inherited during clone and therefore must
2009+ * be explicitly set up before the task set is launched.
2010+ */
2011+struct rt_param {
2012+ /* is the task sleeping? */
2013+ unsigned int flags:8;
2014+
2015+ /* do we need to check for srp blocking? */
2016+ unsigned int srp_non_recurse:1;
2017+
2018+ /* is the task present? (true if it can be scheduled) */
2019+ unsigned int present:1;
2020+
2021+ /* user controlled parameters */
2022+ struct rt_task task_params;
2023+
2024+ /* timing parameters */
2025+ struct rt_job job_params;
2026+
2027+ /* task representing the current "inherited" task
2028+ * priority, assigned by inherit_priority and
2029+ * return priority in the scheduler plugins.
2030+ * could point to self if PI does not result in
2031+ * an increased task priority.
2032+ */
2033+ struct task_struct* inh_task;
2034+
2035+#ifdef CONFIG_NP_SECTION
2036+ /* For the FMLP under PSN-EDF, it is required to make the task
2037+ * non-preemptive from kernel space. In order not to interfere with
2038+ * user space, this counter indicates the kernel space np setting.
2039+ * kernel_np > 0 => task is non-preemptive
2040+ */
2041+ unsigned int kernel_np;
2042+#endif
2043+
2044+ /* This field can be used by plugins to store where the task
2045+ * is currently scheduled. It is the responsibility of the
2046+ * plugin to avoid race conditions.
2047+ *
2048+ * This used by GSN-EDF and PFAIR.
2049+ */
2050+ volatile int scheduled_on;
2051+
2052+ /* Is the stack of the task currently in use? This is updated by
2053+ * the LITMUS core.
2054+ *
2055+ * Be careful to avoid deadlocks!
2056+ */
2057+ volatile int stack_in_use;
2058+
2059+ /* This field can be used by plugins to store where the task
2060+ * is currently linked. It is the responsibility of the plugin
2061+ * to avoid race conditions.
2062+ *
2063+ * Used by GSN-EDF.
2064+ */
2065+ volatile int linked_on;
2066+
2067+ /* PFAIR/PD^2 state. Allocated on demand. */
2068+ struct pfair_param* pfair;
2069+
2070+ /* Fields saved before BE->RT transition.
2071+ */
2072+ int old_policy;
2073+ int old_prio;
2074+
2075+ /* ready queue for this task */
2076+ struct _rt_domain* domain;
2077+
2078+ /* heap element for this task
2079+ *
2080+ * Warning: Don't statically allocate this node. The heap
2081+ * implementation swaps these between tasks, thus after
2082+ * dequeuing from a heap you may end up with a different node
2083+ * then the one you had when enqueuing the task. For the same
2084+ * reason, don't obtain and store references to this node
2085+ * other than this pointer (which is updated by the heap
2086+ * implementation).
2087+ */
2088+ struct bheap_node* heap_node;
2089+ struct release_heap* rel_heap;
2090+
2091+ /* Used by rt_domain to queue task in release list.
2092+ */
2093+ struct list_head list;
2094+
2095+ /* Pointer to the page shared between userspace and kernel. */
2096+ struct control_page * ctrl_page;
2097+};
2098+
2099+/* Possible RT flags */
2100+#define RT_F_RUNNING 0x00000000
2101+#define RT_F_SLEEP 0x00000001
2102+#define RT_F_EXIT_SEM 0x00000008
2103+
2104+#endif
2105+
2106+#endif
2107diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
2108new file mode 100644
2109index 0000000..9c1c9f2
2110--- /dev/null
2111+++ b/include/litmus/sched_plugin.h
2112@@ -0,0 +1,162 @@
2113+/*
2114+ * Definition of the scheduler plugin interface.
2115+ *
2116+ */
2117+#ifndef _LINUX_SCHED_PLUGIN_H_
2118+#define _LINUX_SCHED_PLUGIN_H_
2119+
2120+#include <linux/sched.h>
2121+
2122+/* struct for semaphore with priority inheritance */
2123+struct pi_semaphore {
2124+ atomic_t count;
2125+ int sleepers;
2126+ wait_queue_head_t wait;
2127+ struct {
2128+ /* highest-prio holder/waiter */
2129+ struct task_struct *task;
2130+ struct task_struct* cpu_task[NR_CPUS];
2131+ } hp;
2132+ /* current lock holder */
2133+ struct task_struct *holder;
2134+};
2135+
2136+/************************ setup/tear down ********************/
2137+
2138+typedef long (*activate_plugin_t) (void);
2139+typedef long (*deactivate_plugin_t) (void);
2140+
2141+
2142+
2143+/********************* scheduler invocation ******************/
2144+
2145+/* Plugin-specific realtime tick handler */
2146+typedef void (*scheduler_tick_t) (struct task_struct *cur);
2147+/* Novell make sched decision function */
2148+typedef struct task_struct* (*schedule_t)(struct task_struct * prev);
2149+/* Clean up after the task switch has occured.
2150+ * This function is called after every (even non-rt) task switch.
2151+ */
2152+typedef void (*finish_switch_t)(struct task_struct *prev);
2153+
2154+
2155+/********************* task state changes ********************/
2156+
2157+/* Called to setup a new real-time task.
2158+ * Release the first job, enqueue, etc.
2159+ * Task may already be running.
2160+ */
2161+typedef void (*task_new_t) (struct task_struct *task,
2162+ int on_rq,
2163+ int running);
2164+
2165+/* Called to re-introduce a task after blocking.
2166+ * Can potentially be called multiple times.
2167+ */
2168+typedef void (*task_wake_up_t) (struct task_struct *task);
2169+/* called to notify the plugin of a blocking real-time task
2170+ * it will only be called for real-time tasks and before schedule is called */
2171+typedef void (*task_block_t) (struct task_struct *task);
2172+/* Called when a real-time task exits or changes to a different scheduling
2173+ * class.
2174+ * Free any allocated resources
2175+ */
2176+typedef void (*task_exit_t) (struct task_struct *);
2177+
2178+/* Called when the new_owner is released from the wait queue
2179+ * it should now inherit the priority from sem, _before_ it gets readded
2180+ * to any queue
2181+ */
2182+typedef long (*inherit_priority_t) (struct pi_semaphore *sem,
2183+ struct task_struct *new_owner);
2184+
2185+/* Called when the current task releases a semahpore where it might have
2186+ * inherited a piority from
2187+ */
2188+typedef long (*return_priority_t) (struct pi_semaphore *sem);
2189+
2190+/* Called when a task tries to acquire a semaphore and fails. Check if its
2191+ * priority is higher than that of the current holder.
2192+ */
2193+typedef long (*pi_block_t) (struct pi_semaphore *sem, struct task_struct *t);
2194+
2195+
2196+
2197+
2198+/********************* sys call backends ********************/
2199+/* This function causes the caller to sleep until the next release */
2200+typedef long (*complete_job_t) (void);
2201+
2202+typedef long (*admit_task_t)(struct task_struct* tsk);
2203+
2204+typedef void (*release_at_t)(struct task_struct *t, lt_t start);
2205+
2206+struct sched_plugin {
2207+ struct list_head list;
2208+ /* basic info */
2209+ char *plugin_name;
2210+
2211+ /* setup */
2212+ activate_plugin_t activate_plugin;
2213+ deactivate_plugin_t deactivate_plugin;
2214+
2215+#ifdef CONFIG_SRP
2216+ unsigned int srp_active;
2217+#endif
2218+
2219+ /* scheduler invocation */
2220+ scheduler_tick_t tick;
2221+ schedule_t schedule;
2222+ finish_switch_t finish_switch;
2223+
2224+ /* syscall backend */
2225+ complete_job_t complete_job;
2226+ release_at_t release_at;
2227+
2228+ /* task state changes */
2229+ admit_task_t admit_task;
2230+
2231+ task_new_t task_new;
2232+ task_wake_up_t task_wake_up;
2233+ task_block_t task_block;
2234+ task_exit_t task_exit;
2235+
2236+#ifdef CONFIG_FMLP
2237+ /* priority inheritance */
2238+ unsigned int fmlp_active;
2239+ inherit_priority_t inherit_priority;
2240+ return_priority_t return_priority;
2241+ pi_block_t pi_block;
2242+#endif
2243+} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
2244+
2245+
2246+extern struct sched_plugin *litmus;
2247+
2248+/* cluster size: cache_index = 2 L2, cache_index = 3 L3 */
2249+extern int cluster_cache_index;
2250+
2251+int register_sched_plugin(struct sched_plugin* plugin);
2252+struct sched_plugin* find_sched_plugin(const char* name);
2253+int print_sched_plugins(char* buf, int max);
2254+
2255+static inline int srp_active(void)
2256+{
2257+#ifdef CONFIG_SRP
2258+ return litmus->srp_active;
2259+#else
2260+ return 0;
2261+#endif
2262+}
2263+static inline int fmlp_active(void)
2264+{
2265+#ifdef CONFIG_FMLP
2266+ return litmus->fmlp_active;
2267+#else
2268+ return 0;
2269+#endif
2270+}
2271+
2272+extern struct sched_plugin linux_sched_plugin;
2273+
2274+#endif
2275diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
2276new file mode 100644
2277index 0000000..e1b0c97
2278--- /dev/null
2279+++ b/include/litmus/sched_trace.h
2280@@ -0,0 +1,192 @@
2281+/*
2282+ * sched_trace.h -- record scheduler events to a byte stream for offline analysis.
2283+ */
2284+#ifndef _LINUX_SCHED_TRACE_H_
2285+#define _LINUX_SCHED_TRACE_H_
2286+
2287+/* all times in nanoseconds */
2288+
2289+struct st_trace_header {
2290+ u8 type; /* Of what type is this record? */
2291+ u8 cpu; /* On which CPU was it recorded? */
2292+ u16 pid; /* PID of the task. */
2293+ u32 job; /* The job sequence number. */
2294+};
2295+
2296+#define ST_NAME_LEN 16
2297+struct st_name_data {
2298+ char cmd[ST_NAME_LEN];/* The name of the executable of this process. */
2299+};
2300+
2301+struct st_param_data { /* regular params */
2302+ u32 wcet;
2303+ u32 period;
2304+ u32 phase;
2305+ u8 partition;
2306+ u8 __unused[3];
2307+};
2308+
2309+struct st_release_data { /* A job is was/is going to be released. */
2310+ u64 release; /* What's the release time? */
2311+ u64 deadline; /* By when must it finish? */
2312+};
2313+
2314+struct st_assigned_data { /* A job was asigned to a CPU. */
2315+ u64 when;
2316+ u8 target; /* Where should it execute? */
2317+ u8 __unused[3];
2318+};
2319+
2320+struct st_switch_to_data { /* A process was switched to on a given CPU. */
2321+ u64 when; /* When did this occur? */
2322+ u32 exec_time; /* Time the current job has executed. */
2323+
2324+};
2325+
2326+struct st_switch_away_data { /* A process was switched away from on a given CPU. */
2327+ u64 when;
2328+ u64 exec_time;
2329+};
2330+
2331+struct st_completion_data { /* A job completed. */
2332+ u64 when;
2333+ u8 forced:1; /* Set to 1 if job overran and kernel advanced to the
2334+ * next task automatically; set to 0 otherwise.
2335+ */
2336+ u8 __uflags:7;
2337+ u8 __unused[3];
2338+};
2339+
2340+struct st_block_data { /* A task blocks. */
2341+ u64 when;
2342+ u64 __unused;
2343+};
2344+
2345+struct st_resume_data { /* A task resumes. */
2346+ u64 when;
2347+ u64 __unused;
2348+};
2349+
2350+struct st_sys_release_data {
2351+ u64 when;
2352+ u64 release;
2353+};
2354+
2355+#define DATA(x) struct st_ ## x ## _data x;
2356+
2357+typedef enum {
2358+ ST_NAME = 1, /* Start at one, so that we can spot
2359+ * uninitialized records. */
2360+ ST_PARAM,
2361+ ST_RELEASE,
2362+ ST_ASSIGNED,
2363+ ST_SWITCH_TO,
2364+ ST_SWITCH_AWAY,
2365+ ST_COMPLETION,
2366+ ST_BLOCK,
2367+ ST_RESUME,
2368+ ST_SYS_RELEASE,
2369+} st_event_record_type_t;
2370+
2371+struct st_event_record {
2372+ struct st_trace_header hdr;
2373+ union {
2374+ u64 raw[2];
2375+
2376+ DATA(name);
2377+ DATA(param);
2378+ DATA(release);
2379+ DATA(assigned);
2380+ DATA(switch_to);
2381+ DATA(switch_away);
2382+ DATA(completion);
2383+ DATA(block);
2384+ DATA(resume);
2385+ DATA(sys_release);
2386+
2387+ } data;
2388+};
2389+
2390+#undef DATA
2391+
2392+#ifdef __KERNEL__
2393+
2394+#include <linux/sched.h>
2395+#include <litmus/feather_trace.h>
2396+
2397+#ifdef CONFIG_SCHED_TASK_TRACE
2398+
2399+#define SCHED_TRACE(id, callback, task) \
2400+ ft_event1(id, callback, task)
2401+#define SCHED_TRACE2(id, callback, task, xtra) \
2402+ ft_event2(id, callback, task, xtra)
2403+
2404+/* provide prototypes; needed on sparc64 */
2405+#ifndef NO_TASK_TRACE_DECLS
2406+feather_callback void do_sched_trace_task_name(unsigned long id,
2407+ struct task_struct* task);
2408+feather_callback void do_sched_trace_task_param(unsigned long id,
2409+ struct task_struct* task);
2410+feather_callback void do_sched_trace_task_release(unsigned long id,
2411+ struct task_struct* task);
2412+feather_callback void do_sched_trace_task_switch_to(unsigned long id,
2413+ struct task_struct* task);
2414+feather_callback void do_sched_trace_task_switch_away(unsigned long id,
2415+ struct task_struct* task);
2416+feather_callback void do_sched_trace_task_completion(unsigned long id,
2417+ struct task_struct* task,
2418+ unsigned long forced);
2419+feather_callback void do_sched_trace_task_block(unsigned long id,
2420+ struct task_struct* task);
2421+feather_callback void do_sched_trace_task_resume(unsigned long id,
2422+ struct task_struct* task);
2423+feather_callback void do_sched_trace_sys_release(unsigned long id,
2424+ lt_t* start);
2425+#endif
2426+
2427+#else
2428+
2429+#define SCHED_TRACE(id, callback, task) /* no tracing */
2430+#define SCHED_TRACE2(id, callback, task, xtra) /* no tracing */
2431+
2432+#endif
2433+
2434+
2435+#define SCHED_TRACE_BASE_ID 500
2436+
2437+
2438+#define sched_trace_task_name(t) \
2439+ SCHED_TRACE(SCHED_TRACE_BASE_ID + 1, do_sched_trace_task_name, t)
2440+#define sched_trace_task_param(t) \
2441+ SCHED_TRACE(SCHED_TRACE_BASE_ID + 2, do_sched_trace_task_param, t)
2442+#define sched_trace_task_release(t) \
2443+ SCHED_TRACE(SCHED_TRACE_BASE_ID + 3, do_sched_trace_task_release, t)
2444+#define sched_trace_task_switch_to(t) \
2445+ SCHED_TRACE(SCHED_TRACE_BASE_ID + 4, do_sched_trace_task_switch_to, t)
2446+#define sched_trace_task_switch_away(t) \
2447+ SCHED_TRACE(SCHED_TRACE_BASE_ID + 5, do_sched_trace_task_switch_away, t)
2448+#define sched_trace_task_completion(t, forced) \
2449+ SCHED_TRACE2(SCHED_TRACE_BASE_ID + 6, do_sched_trace_task_completion, t, \
2450+ (unsigned long) forced)
2451+#define sched_trace_task_block(t) \
2452+ SCHED_TRACE(SCHED_TRACE_BASE_ID + 7, do_sched_trace_task_block, t)
2453+#define sched_trace_task_resume(t) \
2454+ SCHED_TRACE(SCHED_TRACE_BASE_ID + 8, do_sched_trace_task_resume, t)
2455+/* when is a pointer, it does not need an explicit cast to unsigned long */
2456+#define sched_trace_sys_release(when) \
2457+ SCHED_TRACE(SCHED_TRACE_BASE_ID + 9, do_sched_trace_sys_release, when)
2458+
2459+#define sched_trace_quantum_boundary() /* NOT IMPLEMENTED */
2460+
2461+#ifdef CONFIG_SCHED_DEBUG_TRACE
2462+void sched_trace_log_message(const char* fmt, ...);
2463+void dump_trace_buffer(int max);
2464+#else
2465+
2466+#define sched_trace_log_message(fmt, ...)
2467+
2468+#endif
2469+
2470+#endif /* __KERNEL__ */
2471+
2472+#endif
2473diff --git a/include/litmus/trace.h b/include/litmus/trace.h
2474new file mode 100644
2475index 0000000..b32c711
2476--- /dev/null
2477+++ b/include/litmus/trace.h
2478@@ -0,0 +1,113 @@
2479+#ifndef _SYS_TRACE_H_
2480+#define _SYS_TRACE_H_
2481+
2482+#ifdef CONFIG_SCHED_OVERHEAD_TRACE
2483+
2484+#include <litmus/feather_trace.h>
2485+#include <litmus/feather_buffer.h>
2486+
2487+
2488+/*********************** TIMESTAMPS ************************/
2489+
2490+enum task_type_marker {
2491+ TSK_BE,
2492+ TSK_RT,
2493+ TSK_UNKNOWN
2494+};
2495+
2496+struct timestamp {
2497+ uint64_t timestamp;
2498+ uint32_t seq_no;
2499+ uint8_t cpu;
2500+ uint8_t event;
2501+ uint8_t task_type;
2502+};
2503+
2504+/* tracing callbacks */
2505+feather_callback void save_timestamp(unsigned long event);
2506+feather_callback void save_timestamp_def(unsigned long event, unsigned long type);
2507+feather_callback void save_timestamp_task(unsigned long event, unsigned long t_ptr);
2508+feather_callback void save_timestamp_cpu(unsigned long event, unsigned long cpu);
2509+
2510+
2511+#define TIMESTAMP(id) ft_event0(id, save_timestamp)
2512+
2513+#define DTIMESTAMP(id, def) ft_event1(id, save_timestamp_def, (unsigned long) def)
2514+
2515+#define TTIMESTAMP(id, task) \
2516+ ft_event1(id, save_timestamp_task, (unsigned long) task)
2517+
2518+#define CTIMESTAMP(id, cpu) \
2519+ ft_event1(id, save_timestamp_cpu, (unsigned long) cpu)
2520+
2521+#else /* !CONFIG_SCHED_OVERHEAD_TRACE */
2522+
2523+#define TIMESTAMP(id) /* no tracing */
2524+
2525+#define DTIMESTAMP(id, def) /* no tracing */
2526+
2527+#define TTIMESTAMP(id, task) /* no tracing */
2528+
2529+#define CTIMESTAMP(id, cpu) /* no tracing */
2530+
2531+#endif
2532+
2533+
2534+/* Convention for timestamps
2535+ * =========================
2536+ *
2537+ * In order to process the trace files with a common tool, we use the following
2538+ * convention to measure execution times: The end time id of a code segment is
2539+ * always the next number after the start time event id.
2540+ */
2541+
2542+#define TS_SCHED_START DTIMESTAMP(100, TSK_UNKNOWN) /* we only
2543+ * care
2544+ * about
2545+ * next */
2546+#define TS_SCHED_END(t) TTIMESTAMP(101, t)
2547+#define TS_SCHED2_START(t) TTIMESTAMP(102, t)
2548+#define TS_SCHED2_END(t) TTIMESTAMP(103, t)
2549+
2550+#define TS_CXS_START(t) TTIMESTAMP(104, t)
2551+#define TS_CXS_END(t) TTIMESTAMP(105, t)
2552+
2553+#define TS_RELEASE_START DTIMESTAMP(106, TSK_RT)
2554+#define TS_RELEASE_END DTIMESTAMP(107, TSK_RT)
2555+
2556+#define TS_TICK_START(t) TTIMESTAMP(110, t)
2557+#define TS_TICK_END(t) TTIMESTAMP(111, t)
2558+
2559+
2560+#define TS_PLUGIN_SCHED_START /* TIMESTAMP(120) */ /* currently unused */
2561+#define TS_PLUGIN_SCHED_END /* TIMESTAMP(121) */
2562+
2563+#define TS_PLUGIN_TICK_START /* TIMESTAMP(130) */
2564+#define TS_PLUGIN_TICK_END /* TIMESTAMP(131) */
2565+
2566+#define TS_ENTER_NP_START TIMESTAMP(140)
2567+#define TS_ENTER_NP_END TIMESTAMP(141)
2568+
2569+#define TS_EXIT_NP_START TIMESTAMP(150)
2570+#define TS_EXIT_NP_END TIMESTAMP(151)
2571+
2572+#define TS_SRP_UP_START TIMESTAMP(160)
2573+#define TS_SRP_UP_END TIMESTAMP(161)
2574+#define TS_SRP_DOWN_START TIMESTAMP(162)
2575+#define TS_SRP_DOWN_END TIMESTAMP(163)
2576+
2577+#define TS_PI_UP_START TIMESTAMP(170)
2578+#define TS_PI_UP_END TIMESTAMP(171)
2579+#define TS_PI_DOWN_START TIMESTAMP(172)
2580+#define TS_PI_DOWN_END TIMESTAMP(173)
2581+
2582+#define TS_FIFO_UP_START TIMESTAMP(180)
2583+#define TS_FIFO_UP_END TIMESTAMP(181)
2584+#define TS_FIFO_DOWN_START TIMESTAMP(182)
2585+#define TS_FIFO_DOWN_END TIMESTAMP(183)
2586+
2587+#define TS_SEND_RESCHED_START(c) CTIMESTAMP(190, c)
2588+#define TS_SEND_RESCHED_END DTIMESTAMP(191, TSK_UNKNOWN)
2589+
2590+
2591+#endif /* !_SYS_TRACE_H_ */
2592diff --git a/include/litmus/unistd_32.h b/include/litmus/unistd_32.h
2593new file mode 100644
2594index 0000000..dbddc65
2595--- /dev/null
2596+++ b/include/litmus/unistd_32.h
2597@@ -0,0 +1,23 @@
2598+/*
2599+ * included from arch/x86/include/asm/unistd_32.h
2600+ *
2601+ * LITMUS^RT syscalls with "relative" numbers
2602+ */
2603+#define __LSC(x) (__NR_LITMUS + x)
2604+
2605+#define __NR_set_rt_task_param __LSC(0)
2606+#define __NR_get_rt_task_param __LSC(1)
2607+#define __NR_complete_job __LSC(2)
2608+#define __NR_od_open __LSC(3)
2609+#define __NR_od_close __LSC(4)
2610+#define __NR_fmlp_down __LSC(5)
2611+#define __NR_fmlp_up __LSC(6)
2612+#define __NR_srp_down __LSC(7)
2613+#define __NR_srp_up __LSC(8)
2614+#define __NR_query_job_no __LSC(9)
2615+#define __NR_wait_for_job_release __LSC(10)
2616+#define __NR_wait_for_ts_release __LSC(11)
2617+#define __NR_release_ts __LSC(12)
2618+#define __NR_null_call __LSC(13)
2619+
2620+#define NR_litmus_syscalls 14
2621diff --git a/include/litmus/unistd_64.h b/include/litmus/unistd_64.h
2622new file mode 100644
2623index 0000000..f0618e7
2624--- /dev/null
2625+++ b/include/litmus/unistd_64.h
2626@@ -0,0 +1,37 @@
2627+/*
2628+ * included from arch/x86/include/asm/unistd_64.h
2629+ *
2630+ * LITMUS^RT syscalls with "relative" numbers
2631+ */
2632+#define __LSC(x) (__NR_LITMUS + x)
2633+
2634+#define __NR_set_rt_task_param __LSC(0)
2635+__SYSCALL(__NR_set_rt_task_param, sys_set_rt_task_param)
2636+#define __NR_get_rt_task_param __LSC(1)
2637+__SYSCALL(__NR_get_rt_task_param, sys_get_rt_task_param)
2638+#define __NR_complete_job __LSC(2)
2639+__SYSCALL(__NR_complete_job, sys_complete_job)
2640+#define __NR_od_open __LSC(3)
2641+__SYSCALL(__NR_od_open, sys_od_open)
2642+#define __NR_od_close __LSC(4)
2643+__SYSCALL(__NR_od_close, sys_od_close)
2644+#define __NR_fmlp_down __LSC(5)
2645+__SYSCALL(__NR_fmlp_down, sys_fmlp_down)
2646+#define __NR_fmlp_up __LSC(6)
2647+__SYSCALL(__NR_fmlp_up, sys_fmlp_up)
2648+#define __NR_srp_down __LSC(7)
2649+__SYSCALL(__NR_srp_down, sys_srp_down)
2650+#define __NR_srp_up __LSC(8)
2651+__SYSCALL(__NR_srp_up, sys_srp_up)
2652+#define __NR_query_job_no __LSC(9)
2653+__SYSCALL(__NR_query_job_no, sys_query_job_no)
2654+#define __NR_wait_for_job_release __LSC(10)
2655+__SYSCALL(__NR_wait_for_job_release, sys_wait_for_job_release)
2656+#define __NR_wait_for_ts_release __LSC(11)
2657+__SYSCALL(__NR_wait_for_ts_release, sys_wait_for_ts_release)
2658+#define __NR_release_ts __LSC(12)
2659+__SYSCALL(__NR_release_ts, sys_release_ts)
2660+#define __NR_null_call __LSC(13)
2661+__SYSCALL(__NR_null_call, sys_null_call)
2662+
2663+#define NR_litmus_syscalls 14
2664diff --git a/kernel/exit.c b/kernel/exit.c
2665index 7f2683a..256ce8c 100644
2666--- a/kernel/exit.c
2667+++ b/kernel/exit.c
2668@@ -57,6 +57,8 @@
2669 #include <asm/mmu_context.h>
2670 #include "cred-internals.h"
2671
2672+extern void exit_od_table(struct task_struct *t);
2673+
2674 static void exit_mm(struct task_struct * tsk);
2675
2676 static void __unhash_process(struct task_struct *p)
2677@@ -968,6 +970,8 @@ NORET_TYPE void do_exit(long code)
2678 if (unlikely(tsk->audit_context))
2679 audit_free(tsk);
2680
2681+ exit_od_table(tsk);
2682+
2683 tsk->exit_code = code;
2684 taskstats_exit(tsk, group_dead);
2685
2686diff --git a/kernel/fork.c b/kernel/fork.c
2687index 4c14942..166eb78 100644
2688--- a/kernel/fork.c
2689+++ b/kernel/fork.c
2690@@ -75,6 +75,9 @@
2691
2692 #include <trace/events/sched.h>
2693
2694+#include <litmus/litmus.h>
2695+#include <litmus/sched_plugin.h>
2696+
2697 /*
2698 * Protected counters by write_lock_irq(&tasklist_lock)
2699 */
2700@@ -171,6 +174,7 @@ void __put_task_struct(struct task_struct *tsk)
2701 WARN_ON(atomic_read(&tsk->usage));
2702 WARN_ON(tsk == current);
2703
2704+ exit_litmus(tsk);
2705 exit_creds(tsk);
2706 delayacct_tsk_free(tsk);
2707
2708@@ -253,6 +257,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
2709
2710 tsk->stack = ti;
2711
2712+ /* Don't let the new task be a real-time task. */
2713+ litmus_fork(tsk);
2714+
2715 err = prop_local_init_single(&tsk->dirties);
2716 if (err)
2717 goto out;
2718diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
2719index 0086628..fdf9596 100644
2720--- a/kernel/hrtimer.c
2721+++ b/kernel/hrtimer.c
2722@@ -46,6 +46,8 @@
2723 #include <linux/sched.h>
2724 #include <linux/timer.h>
2725
2726+#include <litmus/litmus.h>
2727+
2728 #include <asm/uaccess.h>
2729
2730 #include <trace/events/timer.h>
2731@@ -1041,6 +1043,98 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
2732 }
2733 EXPORT_SYMBOL_GPL(hrtimer_start);
2734
2735+#ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS
2736+
2737+/**
2738+ * hrtimer_start_on_info_init - Initialize hrtimer_start_on_info
2739+ */
2740+void hrtimer_start_on_info_init(struct hrtimer_start_on_info *info)
2741+{
2742+ memset(info, 0, sizeof(struct hrtimer_start_on_info));
2743+ atomic_set(&info->state, HRTIMER_START_ON_INACTIVE);
2744+}
2745+
2746+/**
2747+ * hrtimer_pull - PULL_TIMERS_VECTOR callback on remote cpu
2748+ */
2749+void hrtimer_pull(void)
2750+{
2751+ struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
2752+ struct hrtimer_start_on_info *info;
2753+ struct list_head *pos, *safe, list;
2754+
2755+ raw_spin_lock(&base->lock);
2756+ list_replace_init(&base->to_pull, &list);
2757+ raw_spin_unlock(&base->lock);
2758+
2759+ list_for_each_safe(pos, safe, &list) {
2760+ info = list_entry(pos, struct hrtimer_start_on_info, list);
2761+ TRACE("pulled timer 0x%x\n", info->timer);
2762+ list_del(pos);
2763+ hrtimer_start(info->timer, info->time, info->mode);
2764+ }
2765+}
2766+
2767+/**
2768+ * hrtimer_start_on - trigger timer arming on remote cpu
2769+ * @cpu: remote cpu
2770+ * @info: save timer information for enqueuing on remote cpu
2771+ * @timer: timer to be pulled
2772+ * @time: expire time
2773+ * @mode: timer mode
2774+ */
2775+int hrtimer_start_on(int cpu, struct hrtimer_start_on_info* info,
2776+ struct hrtimer *timer, ktime_t time,
2777+ const enum hrtimer_mode mode)
2778+{
2779+ unsigned long flags;
2780+ struct hrtimer_cpu_base* base;
2781+ int in_use = 0, was_empty;
2782+
2783+ /* serialize access to info through the timer base */
2784+ lock_hrtimer_base(timer, &flags);
2785+
2786+ in_use = (atomic_read(&info->state) != HRTIMER_START_ON_INACTIVE);
2787+ if (!in_use) {
2788+ INIT_LIST_HEAD(&info->list);
2789+ info->timer = timer;
2790+ info->time = time;
2791+ info->mode = mode;
2792+ /* mark as in use */
2793+ atomic_set(&info->state, HRTIMER_START_ON_QUEUED);
2794+ }
2795+
2796+ unlock_hrtimer_base(timer, &flags);
2797+
2798+ if (!in_use) {
2799+ /* initiate pull */
2800+ preempt_disable();
2801+ if (cpu == smp_processor_id()) {
2802+ /* start timer locally; we may get called
2803+ * with rq->lock held, do not wake up anything
2804+ */
2805+ TRACE("hrtimer_start_on: starting on local CPU\n");
2806+ __hrtimer_start_range_ns(info->timer, info->time,
2807+ 0, info->mode, 0);
2808+ } else {
2809+ TRACE("hrtimer_start_on: pulling to remote CPU\n");
2810+ base = &per_cpu(hrtimer_bases, cpu);
2811+ raw_spin_lock_irqsave(&base->lock, flags);
2812+ was_empty = list_empty(&base->to_pull);
2813+ list_add(&info->list, &base->to_pull);
2814+ raw_spin_unlock_irqrestore(&base->lock, flags);
2815+ if (was_empty)
2816+ /* only send IPI if other no else
2817+ * has done so already
2818+ */
2819+ smp_send_pull_timers(cpu);
2820+ }
2821+ preempt_enable();
2822+ }
2823+ return in_use;
2824+}
2825+
2826+#endif
2827
2828 /**
2829 * hrtimer_try_to_cancel - try to deactivate a timer
2830@@ -1631,6 +1725,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
2831 cpu_base->clock_base[i].cpu_base = cpu_base;
2832
2833 hrtimer_init_hres(cpu_base);
2834+ INIT_LIST_HEAD(&cpu_base->to_pull);
2835 }
2836
2837 #ifdef CONFIG_HOTPLUG_CPU
2838diff --git a/kernel/printk.c b/kernel/printk.c
2839index 75077ad..ee54355 100644
2840--- a/kernel/printk.c
2841+++ b/kernel/printk.c
2842@@ -71,6 +71,13 @@ int console_printk[4] = {
2843 };
2844
2845 /*
2846+ * divert printk() messages when there is a LITMUS^RT debug listener
2847+ */
2848+#include <litmus/litmus.h>
2849+int trace_override = 0;
2850+int trace_recurse = 0;
2851+
2852+/*
2853 * Low level drivers may need that to know if they can schedule in
2854 * their unblank() callback or not. So let's export it.
2855 */
2856@@ -708,6 +715,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)
2857 /* Emit the output into the temporary buffer */
2858 printed_len += vscnprintf(printk_buf + printed_len,
2859 sizeof(printk_buf) - printed_len, fmt, args);
2860+ /* if LITMUS^RT tracer is active divert printk() msgs */
2861+ if (trace_override && !trace_recurse)
2862+ TRACE("%s", printk_buf);
2863
2864
2865 p = printk_buf;
2866@@ -777,7 +787,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
2867 * Try to acquire and then immediately release the
2868 * console semaphore. The release will do all the
2869 * actual magic (print out buffers, wake up klogd,
2870- * etc).
2871+ * etc).
2872 *
2873 * The acquire_console_semaphore_for_printk() function
2874 * will release 'logbuf_lock' regardless of whether it
2875@@ -1014,7 +1024,7 @@ int printk_needs_cpu(int cpu)
2876
2877 void wake_up_klogd(void)
2878 {
2879- if (waitqueue_active(&log_wait))
2880+ if (!trace_override && waitqueue_active(&log_wait))
2881 __raw_get_cpu_var(printk_pending) = 1;
2882 }
2883
2884diff --git a/kernel/sched.c b/kernel/sched.c
2885index 3c2a54f..5e3c509 100644
2886--- a/kernel/sched.c
2887+++ b/kernel/sched.c
2888@@ -78,6 +78,9 @@
2889
2890 #include "sched_cpupri.h"
2891
2892+#include <litmus/sched_trace.h>
2893+#include <litmus/trace.h>
2894+
2895 #define CREATE_TRACE_POINTS
2896 #include <trace/events/sched.h>
2897
2898@@ -450,6 +453,12 @@ struct rt_rq {
2899 #endif
2900 };
2901
2902+/* Litmus related fields in a runqueue */
2903+struct litmus_rq {
2904+ unsigned long nr_running;
2905+ struct task_struct *prev;
2906+};
2907+
2908 #ifdef CONFIG_SMP
2909
2910 /*
2911@@ -512,6 +521,7 @@ struct rq {
2912
2913 struct cfs_rq cfs;
2914 struct rt_rq rt;
2915+ struct litmus_rq litmus;
2916
2917 #ifdef CONFIG_FAIR_GROUP_SCHED
2918 /* list of leaf cfs_rq on this cpu: */
2919@@ -1833,7 +1843,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
2920
2921 static const struct sched_class rt_sched_class;
2922
2923-#define sched_class_highest (&rt_sched_class)
2924+#define sched_class_highest (&litmus_sched_class)
2925 #define for_each_class(class) \
2926 for (class = sched_class_highest; class; class = class->next)
2927
2928@@ -1932,6 +1942,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
2929 #include "sched_idletask.c"
2930 #include "sched_fair.c"
2931 #include "sched_rt.c"
2932+#include "../litmus/sched_litmus.c"
2933 #ifdef CONFIG_SCHED_DEBUG
2934 # include "sched_debug.c"
2935 #endif
2936@@ -2372,6 +2383,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2937 unsigned long flags;
2938 struct rq *rq;
2939
2940+ if (is_realtime(p))
2941+ TRACE_TASK(p, "try_to_wake_up() state:%d\n", p->state);
2942+
2943 if (!sched_feat(SYNC_WAKEUPS))
2944 wake_flags &= ~WF_SYNC;
2945
2946@@ -2390,7 +2404,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2947 orig_cpu = cpu;
2948
2949 #ifdef CONFIG_SMP
2950- if (unlikely(task_running(rq, p)))
2951+ if (unlikely(task_running(rq, p)) || is_realtime(p))
2952 goto out_activate;
2953
2954 /*
2955@@ -2497,6 +2511,8 @@ out_running:
2956 }
2957 #endif
2958 out:
2959+ if (is_realtime(p))
2960+ TRACE_TASK(p, "try_to_wake_up() done state:%d\n", p->state);
2961 task_rq_unlock(rq, &flags);
2962 put_cpu();
2963
2964@@ -2814,6 +2830,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
2965 */
2966 prev_state = prev->state;
2967 finish_arch_switch(prev);
2968+ litmus->finish_switch(prev);
2969+ prev->rt_param.stack_in_use = NO_CPU;
2970 #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
2971 local_irq_disable();
2972 #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
2973@@ -2843,6 +2861,15 @@ static inline void pre_schedule(struct rq *rq, struct task_struct *prev)
2974 {
2975 if (prev->sched_class->pre_schedule)
2976 prev->sched_class->pre_schedule(rq, prev);
2977+
2978+ /* LITMUS^RT not very clean hack: we need to save the prev task
2979+ * as our scheduling decision rely on it (as we drop the rq lock
2980+ * something in prev can change...); there is no way to escape
2981+ * this ack apart from modifying pick_nex_task(rq, _prev_) or
2982+ * falling back on the previous solution of decoupling
2983+ * scheduling decisions
2984+ */
2985+ rq->litmus.prev = prev;
2986 }
2987
2988 /* rq->lock is NOT held, but preemption is disabled */
2989@@ -3520,18 +3547,26 @@ void scheduler_tick(void)
2990
2991 sched_clock_tick();
2992
2993+ TS_TICK_START(current);
2994+
2995 raw_spin_lock(&rq->lock);
2996 update_rq_clock(rq);
2997 update_cpu_load(rq);
2998 curr->sched_class->task_tick(rq, curr, 0);
2999+
3000+ /* litmus_tick may force current to resched */
3001+ litmus_tick(rq, curr);
3002+
3003 raw_spin_unlock(&rq->lock);
3004
3005 perf_event_task_tick(curr);
3006
3007 #ifdef CONFIG_SMP
3008 rq->idle_at_tick = idle_cpu(cpu);
3009- trigger_load_balance(rq, cpu);
3010+ if (!is_realtime(current))
3011+ trigger_load_balance(rq, cpu);
3012 #endif
3013+ TS_TICK_END(current);
3014 }
3015
3016 notrace unsigned long get_parent_ip(unsigned long addr)
3017@@ -3672,12 +3707,20 @@ pick_next_task(struct rq *rq)
3018 /*
3019 * Optimization: we know that if all tasks are in
3020 * the fair class we can call that function directly:
3021- */
3022- if (likely(rq->nr_running == rq->cfs.nr_running)) {
3023+
3024+ * NOT IN LITMUS^RT!
3025+
3026+ * This breaks many assumptions in the plugins.
3027+ * Do not uncomment without thinking long and hard
3028+ * about how this affects global plugins such as GSN-EDF.
3029+
3030+ if (rq->nr_running == rq->cfs.nr_running) {
3031+ TRACE("taking shortcut in pick_next_task()\n");
3032 p = fair_sched_class.pick_next_task(rq);
3033 if (likely(p))
3034 return p;
3035 }
3036+ */
3037
3038 class = sched_class_highest;
3039 for ( ; ; ) {
3040@@ -3712,6 +3755,8 @@ need_resched:
3041
3042 release_kernel_lock(prev);
3043 need_resched_nonpreemptible:
3044+ TS_SCHED_START;
3045+ sched_trace_task_switch_away(prev);
3046
3047 schedule_debug(prev);
3048
3049@@ -3746,15 +3791,22 @@ need_resched_nonpreemptible:
3050 rq->curr = next;
3051 ++*switch_count;
3052
3053+ TS_SCHED_END(next);
3054+ TS_CXS_START(next);
3055 context_switch(rq, prev, next); /* unlocks the rq */
3056+ TS_CXS_END(current);
3057 /*
3058 * the context switch might have flipped the stack from under
3059 * us, hence refresh the local variables.
3060 */
3061 cpu = smp_processor_id();
3062 rq = cpu_rq(cpu);
3063- } else
3064+ } else {
3065+ TS_SCHED_END(prev);
3066 raw_spin_unlock_irq(&rq->lock);
3067+ }
3068+
3069+ sched_trace_task_switch_to(current);
3070
3071 post_schedule(rq);
3072
3073@@ -3767,6 +3819,9 @@ need_resched_nonpreemptible:
3074 preempt_enable_no_resched();
3075 if (need_resched())
3076 goto need_resched;
3077+
3078+ if (srp_active())
3079+ srp_ceiling_block();
3080 }
3081 EXPORT_SYMBOL(schedule);
3082
3083@@ -4043,6 +4098,17 @@ void complete_all(struct completion *x)
3084 }
3085 EXPORT_SYMBOL(complete_all);
3086
3087+void complete_n(struct completion *x, int n)
3088+{
3089+ unsigned long flags;
3090+
3091+ spin_lock_irqsave(&x->wait.lock, flags);
3092+ x->done += n;
3093+ __wake_up_common(&x->wait, TASK_NORMAL, n, 0, NULL);
3094+ spin_unlock_irqrestore(&x->wait.lock, flags);
3095+}
3096+EXPORT_SYMBOL(complete_n);
3097+
3098 static inline long __sched
3099 do_wait_for_common(struct completion *x, long timeout, int state)
3100 {
3101@@ -4471,7 +4537,9 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
3102 p->normal_prio = normal_prio(p);
3103 /* we are holding p->pi_lock already */
3104 p->prio = rt_mutex_getprio(p);
3105- if (rt_prio(p->prio))
3106+ if (p->policy == SCHED_LITMUS)
3107+ p->sched_class = &litmus_sched_class;
3108+ else if (rt_prio(p->prio))
3109 p->sched_class = &rt_sched_class;
3110 else
3111 p->sched_class = &fair_sched_class;
3112@@ -4516,7 +4584,7 @@ recheck:
3113
3114 if (policy != SCHED_FIFO && policy != SCHED_RR &&
3115 policy != SCHED_NORMAL && policy != SCHED_BATCH &&
3116- policy != SCHED_IDLE)
3117+ policy != SCHED_IDLE && policy != SCHED_LITMUS)
3118 return -EINVAL;
3119 }
3120
3121@@ -4531,6 +4599,8 @@ recheck:
3122 return -EINVAL;
3123 if (rt_policy(policy) != (param->sched_priority != 0))
3124 return -EINVAL;
3125+ if (policy == SCHED_LITMUS && policy == p->policy)
3126+ return -EINVAL;
3127
3128 /*
3129 * Allow unprivileged RT tasks to decrease priority:
3130@@ -4585,6 +4655,12 @@ recheck:
3131 return retval;
3132 }
3133
3134+ if (policy == SCHED_LITMUS) {
3135+ retval = litmus_admit_task(p);
3136+ if (retval)
3137+ return retval;
3138+ }
3139+
3140 /*
3141 * make sure no PI-waiters arrive (or leave) while we are
3142 * changing the priority of the task:
3143@@ -4612,10 +4688,19 @@ recheck:
3144
3145 p->sched_reset_on_fork = reset_on_fork;
3146
3147+ if (p->policy == SCHED_LITMUS)
3148+ litmus_exit_task(p);
3149+
3150 oldprio = p->prio;
3151 prev_class = p->sched_class;
3152 __setscheduler(rq, p, policy, param->sched_priority);
3153
3154+ if (policy == SCHED_LITMUS) {
3155+ p->rt_param.stack_in_use = running ? rq->cpu : NO_CPU;
3156+ p->rt_param.present = running;
3157+ litmus->task_new(p, on_rq, running);
3158+ }
3159+
3160 if (running)
3161 p->sched_class->set_curr_task(rq);
3162 if (on_rq) {
3163@@ -4785,10 +4870,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
3164 rcu_read_lock();
3165
3166 p = find_process_by_pid(pid);
3167- if (!p) {
3168+ /* Don't set affinity if task not found and for LITMUS tasks */
3169+ if (!p || is_realtime(p)) {
3170 rcu_read_unlock();
3171 put_online_cpus();
3172- return -ESRCH;
3173+ return p ? -EPERM : -ESRCH;
3174 }
3175
3176 /* Prevent p going away */
3177diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
3178index 5a5ea2c..b1af6d4 100644
3179--- a/kernel/sched_fair.c
3180+++ b/kernel/sched_fair.c
3181@@ -1708,7 +1708,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
3182 int sync = wake_flags & WF_SYNC;
3183 int scale = cfs_rq->nr_running >= sched_nr_latency;
3184
3185- if (unlikely(rt_prio(p->prio)))
3186+ if (unlikely(rt_prio(p->prio)) || p->policy == SCHED_LITMUS)
3187 goto preempt;
3188
3189 if (unlikely(p->sched_class != &fair_sched_class))
3190diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
3191index b5b920a..c2fbb02 100644
3192--- a/kernel/sched_rt.c
3193+++ b/kernel/sched_rt.c
3194@@ -1014,7 +1014,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
3195 */
3196 static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
3197 {
3198- if (p->prio < rq->curr->prio) {
3199+ if (p->prio < rq->curr->prio || p->policy == SCHED_LITMUS) {
3200 resched_task(rq->curr);
3201 return;
3202 }
3203diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
3204index f992762..0adc54b 100644
3205--- a/kernel/time/tick-sched.c
3206+++ b/kernel/time/tick-sched.c
3207@@ -721,6 +721,46 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
3208 }
3209
3210 /**
3211+ * tick_set_quanta_type - get the quanta type as a boot option
3212+ * Default is standard setup with ticks staggered over first
3213+ * half of tick period.
3214+ */
3215+int quanta_type = LINUX_DEFAULT_TICKS;
3216+static int __init tick_set_quanta_type(char *str)
3217+{
3218+ if (strcmp("aligned", str) == 0) {
3219+ quanta_type = LITMUS_ALIGNED_TICKS;
3220+ printk(KERN_INFO "LITMUS^RT: setting aligned quanta\n");
3221+ }
3222+ else if (strcmp("staggered", str) == 0) {
3223+ quanta_type = LITMUS_STAGGERED_TICKS;
3224+ printk(KERN_INFO "LITMUS^RT: setting staggered quanta\n");
3225+ }
3226+ return 1;
3227+}
3228+__setup("quanta=", tick_set_quanta_type);
3229+
3230+u64 cpu_stagger_offset(int cpu)
3231+{
3232+ u64 offset = 0;
3233+ switch (quanta_type) {
3234+ case LITMUS_ALIGNED_TICKS:
3235+ offset = 0;
3236+ break;
3237+ case LITMUS_STAGGERED_TICKS:
3238+ offset = ktime_to_ns(tick_period);
3239+ do_div(offset, num_possible_cpus());
3240+ offset *= cpu;
3241+ break;
3242+ default:
3243+ offset = ktime_to_ns(tick_period) >> 1;
3244+ do_div(offset, num_possible_cpus());
3245+ offset *= cpu;
3246+ }
3247+ return offset;
3248+}
3249+
3250+/**
3251 * tick_setup_sched_timer - setup the tick emulation timer
3252 */
3253 void tick_setup_sched_timer(void)
3254@@ -737,9 +777,11 @@ void tick_setup_sched_timer(void)
3255
3256 /* Get the next period (per cpu) */
3257 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
3258- offset = ktime_to_ns(tick_period) >> 1;
3259- do_div(offset, num_possible_cpus());
3260- offset *= smp_processor_id();
3261+
3262+ /* Offset must be set correctly to achieve desired quanta type. */
3263+ offset = cpu_stagger_offset(smp_processor_id());
3264+
3265+ /* Add the correct offset to expiration time */
3266 hrtimer_add_expires_ns(&ts->sched_timer, offset);
3267
3268 for (;;) {
3269diff --git a/litmus/Kconfig b/litmus/Kconfig
3270new file mode 100644
3271index 0000000..9888589
3272--- /dev/null
3273+++ b/litmus/Kconfig
3274@@ -0,0 +1,134 @@
3275+menu "LITMUS^RT"
3276+
3277+menu "Scheduling"
3278+
3279+config PLUGIN_CEDF
3280+ bool "Clustered-EDF"
3281+ depends on X86 && SYSFS
3282+ default y
3283+ help
3284+ Include the Clustered EDF (C-EDF) plugin in the kernel.
3285+ This is appropriate for large platforms with shared caches.
3286+ On smaller platforms (e.g., ARM PB11MPCore), using C-EDF
3287+ makes little sense since there aren't any shared caches.
3288+
3289+config PLUGIN_PFAIR
3290+ bool "PFAIR"
3291+ depends on HIGH_RES_TIMERS && !NO_HZ
3292+ default y
3293+ help
3294+ Include the PFAIR plugin (i.e., the PD^2 scheduler) in the kernel.
3295+ The PFAIR plugin requires high resolution timers (for staggered quanta)
3296+ and does not support NO_HZ (quanta could be missed when the system is idle).
3297+
3298+ If unsure, say Yes.
3299+
3300+config RELEASE_MASTER
3301+ bool "Release-master Support"
3302+ depends on ARCH_HAS_SEND_PULL_TIMERS
3303+ default n
3304+ help
3305+ Allow one processor to act as a dedicated interrupt processor
3306+ that services all timer interrupts, but that does not schedule
3307+ real-time tasks. See RTSS'09 paper for details
3308+ (http://www.cs.unc.edu/~anderson/papers.html).
3309+ Currently only supported by GSN-EDF.
3310+
3311+endmenu
3312+
3313+menu "Real-Time Synchronization"
3314+
3315+config NP_SECTION
3316+ bool "Non-preemptive section support"
3317+ default n
3318+ help
3319+ Allow tasks to become non-preemptable.
3320+ Note that plugins still need to explicitly support non-preemptivity.
3321+ Currently, only GSN-EDF and PSN-EDF have such support.
3322+
3323+ This is required to support the FMLP.
3324+ If disabled, all tasks will be considered preemptable at all times.
3325+
3326+config SRP
3327+ bool "Stack Resource Policy (SRP)"
3328+ default n
3329+ help
3330+ Include support for Baker's Stack Resource Policy.
3331+
3332+ Say Yes if you want FMLP local long critical section
3333+ synchronization support.
3334+
3335+config FMLP
3336+ bool "FMLP support"
3337+ depends on NP_SECTION
3338+ default n
3339+ help
3340+ Include support for deterministic multiprocessor real-time
3341+ synchronization support.
3342+
3343+ Say Yes if you want FMLP long critical section
3344+ synchronization support.
3345+
3346+endmenu
3347+
3348+menu "Tracing"
3349+
3350+config FEATHER_TRACE
3351+ bool "Feather-Trace Infrastructure"
3352+ default y
3353+ help
3354+ Feather-Trace basic tracing infrastructure. Includes device file
3355+ driver and instrumentation point support.
3356+
3357+ There are actually two implementations of Feather-Trace.
3358+ 1) A slower, but portable, default implementation.
3359+ 2) Architecture-specific implementations that rewrite kernel .text at runtime.
3360+
3361+ If enabled, Feather-Trace will be based on 2) if available (currently only for x86).
3362+ However, if DEBUG_RODATA=y, then Feather-Trace will choose option 1) in any case
3363+ to avoid problems with write-protected .text pages.
3364+
3365+ Bottom line: to avoid increased overheads, choose DEBUG_RODATA=n.
3366+
3367+ Note that this option only enables the basic Feather-Trace infrastructure;
3368+ you still need to enable SCHED_TASK_TRACE and/or SCHED_OVERHEAD_TRACE to
3369+ actually enable any events.
3370+
3371+config SCHED_TASK_TRACE
3372+ bool "Trace real-time tasks"
3373+ depends on FEATHER_TRACE
3374+ default y
3375+ help
3376+ Include support for the sched_trace_XXX() tracing functions. This
3377+ allows the collection of real-time task events such as job
3378+ completions, job releases, early completions, etc. This results in a
3379+ small overhead in the scheduling code. Disable if the overhead is not
3380+ acceptable (e.g., benchmarking).
3381+
3382+ Say Yes for debugging.
3383+ Say No for overhead tracing.
3384+
3385+config SCHED_OVERHEAD_TRACE
3386+ bool "Record timestamps for overhead measurements"
3387+ depends on FEATHER_TRACE
3388+ default n
3389+ help
3390+ Export event stream for overhead tracing.
3391+ Say Yes for overhead tracing.
3392+
3393+config SCHED_DEBUG_TRACE
3394+ bool "TRACE() debugging"
3395+ default y
3396+ help
3397+ Include support for sched_trace_log_messageg(), which is used to
3398+ implement TRACE(). If disabled, no TRACE() messages will be included
3399+ in the kernel, and no overheads due to debugging statements will be
3400+ incurred by the scheduler. Disable if the overhead is not acceptable
3401+ (e.g. benchmarking).
3402+
3403+ Say Yes for debugging.
3404+ Say No for overhead tracing.
3405+
3406+endmenu
3407+
3408+endmenu
3409diff --git a/litmus/Makefile b/litmus/Makefile
3410new file mode 100644
3411index 0000000..f301d28
3412--- /dev/null
3413+++ b/litmus/Makefile
3414@@ -0,0 +1,25 @@
3415+#
3416+# Makefile for LITMUS^RT
3417+#
3418+
3419+obj-y = sched_plugin.o litmus.o \
3420+ budget.o \
3421+ jobs.o \
3422+ sync.o \
3423+ rt_domain.o \
3424+ edf_common.o \
3425+ fdso.o \
3426+ srp.o \
3427+ fmlp.o \
3428+ bheap.o \
3429+ ctrldev.o \
3430+ sched_gsn_edf.o \
3431+ sched_psn_edf.o
3432+
3433+obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
3434+obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
3435+
3436+obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
3437+obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
3438+obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o
3439+obj-$(CONFIG_SCHED_OVERHEAD_TRACE) += trace.o
3440diff --git a/litmus/bheap.c b/litmus/bheap.c
3441new file mode 100644
3442index 0000000..528af97
3443--- /dev/null
3444+++ b/litmus/bheap.c
3445@@ -0,0 +1,314 @@
3446+#include "linux/kernel.h"
3447+#include "litmus/bheap.h"
3448+
3449+void bheap_init(struct bheap* heap)
3450+{
3451+ heap->head = NULL;
3452+ heap->min = NULL;
3453+}
3454+
3455+void bheap_node_init(struct bheap_node** _h, void* value)
3456+{
3457+ struct bheap_node* h = *_h;
3458+ h->parent = NULL;
3459+ h->next = NULL;
3460+ h->child = NULL;
3461+ h->degree = NOT_IN_HEAP;
3462+ h->value = value;
3463+ h->ref = _h;
3464+}
3465+
3466+
3467+/* make child a subtree of root */
3468+static void __bheap_link(struct bheap_node* root,
3469+ struct bheap_node* child)
3470+{
3471+ child->parent = root;
3472+ child->next = root->child;
3473+ root->child = child;
3474+ root->degree++;
3475+}
3476+
3477+/* merge root lists */
3478+static struct bheap_node* __bheap_merge(struct bheap_node* a,
3479+ struct bheap_node* b)
3480+{
3481+ struct bheap_node* head = NULL;
3482+ struct bheap_node** pos = &head;
3483+
3484+ while (a && b) {
3485+ if (a->degree < b->degree) {
3486+ *pos = a;
3487+ a = a->next;
3488+ } else {
3489+ *pos = b;
3490+ b = b->next;
3491+ }
3492+ pos = &(*pos)->next;
3493+ }
3494+ if (a)
3495+ *pos = a;
3496+ else
3497+ *pos = b;
3498+ return head;
3499+}
3500+
3501+/* reverse a linked list of nodes. also clears parent pointer */
3502+static struct bheap_node* __bheap_reverse(struct bheap_node* h)
3503+{
3504+ struct bheap_node* tail = NULL;
3505+ struct bheap_node* next;
3506+
3507+ if (!h)
3508+ return h;
3509+
3510+ h->parent = NULL;
3511+ while (h->next) {
3512+ next = h->next;
3513+ h->next = tail;
3514+ tail = h;
3515+ h = next;
3516+ h->parent = NULL;
3517+ }
3518+ h->next = tail;
3519+ return h;
3520+}
3521+
3522+static void __bheap_min(bheap_prio_t higher_prio, struct bheap* heap,
3523+ struct bheap_node** prev, struct bheap_node** node)
3524+{
3525+ struct bheap_node *_prev, *cur;
3526+ *prev = NULL;
3527+
3528+ if (!heap->head) {
3529+ *node = NULL;
3530+ return;
3531+ }
3532+
3533+ *node = heap->head;
3534+ _prev = heap->head;
3535+ cur = heap->head->next;
3536+ while (cur) {
3537+ if (higher_prio(cur, *node)) {
3538+ *node = cur;
3539+ *prev = _prev;
3540+ }
3541+ _prev = cur;
3542+ cur = cur->next;
3543+ }
3544+}
3545+
3546+static void __bheap_union(bheap_prio_t higher_prio, struct bheap* heap,
3547+ struct bheap_node* h2)
3548+{
3549+ struct bheap_node* h1;
3550+ struct bheap_node *prev, *x, *next;
3551+ if (!h2)
3552+ return;
3553+ h1 = heap->head;
3554+ if (!h1) {
3555+ heap->head = h2;
3556+ return;
3557+ }
3558+ h1 = __bheap_merge(h1, h2);
3559+ prev = NULL;
3560+ x = h1;
3561+ next = x->next;
3562+ while (next) {
3563+ if (x->degree != next->degree ||
3564+ (next->next && next->next->degree == x->degree)) {
3565+ /* nothing to do, advance */
3566+ prev = x;
3567+ x = next;
3568+ } else if (higher_prio(x, next)) {
3569+ /* x becomes the root of next */
3570+ x->next = next->next;
3571+ __bheap_link(x, next);
3572+ } else {
3573+ /* next becomes the root of x */
3574+ if (prev)
3575+ prev->next = next;
3576+ else
3577+ h1 = next;
3578+ __bheap_link(next, x);
3579+ x = next;
3580+ }
3581+ next = x->next;
3582+ }
3583+ heap->head = h1;
3584+}
3585+
3586+static struct bheap_node* __bheap_extract_min(bheap_prio_t higher_prio,
3587+ struct bheap* heap)
3588+{
3589+ struct bheap_node *prev, *node;
3590+ __bheap_min(higher_prio, heap, &prev, &node);
3591+ if (!node)
3592+ return NULL;
3593+ if (prev)
3594+ prev->next = node->next;
3595+ else
3596+ heap->head = node->next;
3597+ __bheap_union(higher_prio, heap, __bheap_reverse(node->child));
3598+ return node;
3599+}
3600+
3601+/* insert (and reinitialize) a node into the heap */
3602+void bheap_insert(bheap_prio_t higher_prio, struct bheap* heap,
3603+ struct bheap_node* node)
3604+{
3605+ struct bheap_node *min;
3606+ node->child = NULL;
3607+ node->parent = NULL;
3608+ node->next = NULL;
3609+ node->degree = 0;
3610+ if (heap->min && higher_prio(node, heap->min)) {
3611+ /* swap min cache */
3612+ min = heap->min;
3613+ min->child = NULL;
3614+ min->parent = NULL;
3615+ min->next = NULL;
3616+ min->degree = 0;
3617+ __bheap_union(higher_prio, heap, min);
3618+ heap->min = node;
3619+ } else
3620+ __bheap_union(higher_prio, heap, node);
3621+}
3622+
3623+void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap)
3624+{
3625+ struct bheap_node* min;
3626+ if (heap->min) {
3627+ min = heap->min;
3628+ heap->min = NULL;
3629+ bheap_insert(higher_prio, heap, min);
3630+ }
3631+}
3632+
3633+/* merge addition into target */
3634+void bheap_union(bheap_prio_t higher_prio,
3635+ struct bheap* target, struct bheap* addition)
3636+{
3637+ /* first insert any cached minima, if necessary */
3638+ bheap_uncache_min(higher_prio, target);
3639+ bheap_uncache_min(higher_prio, addition);
3640+ __bheap_union(higher_prio, target, addition->head);
3641+ /* this is a destructive merge */
3642+ addition->head = NULL;
3643+}
3644+
3645+struct bheap_node* bheap_peek(bheap_prio_t higher_prio,
3646+ struct bheap* heap)
3647+{
3648+ if (!heap->min)
3649+ heap->min = __bheap_extract_min(higher_prio, heap);
3650+ return heap->min;
3651+}
3652+
3653+struct bheap_node* bheap_take(bheap_prio_t higher_prio,
3654+ struct bheap* heap)
3655+{
3656+ struct bheap_node *node;
3657+ if (!heap->min)
3658+ heap->min = __bheap_extract_min(higher_prio, heap);
3659+ node = heap->min;
3660+ heap->min = NULL;
3661+ if (node)
3662+ node->degree = NOT_IN_HEAP;
3663+ return node;
3664+}
3665+
3666+int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node)
3667+{
3668+ struct bheap_node *parent;
3669+ struct bheap_node** tmp_ref;
3670+ void* tmp;
3671+
3672+ /* bubble up */
3673+ parent = node->parent;
3674+ while (parent && higher_prio(node, parent)) {
3675+ /* swap parent and node */
3676+ tmp = parent->value;
3677+ parent->value = node->value;
3678+ node->value = tmp;
3679+ /* swap references */
3680+ *(parent->ref) = node;
3681+ *(node->ref) = parent;
3682+ tmp_ref = parent->ref;
3683+ parent->ref = node->ref;
3684+ node->ref = tmp_ref;
3685+ /* step up */
3686+ node = parent;
3687+ parent = node->parent;
3688+ }
3689+
3690+ return parent != NULL;
3691+}
3692+
3693+void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap,
3694+ struct bheap_node* node)
3695+{
3696+ struct bheap_node *parent, *prev, *pos;
3697+ struct bheap_node** tmp_ref;
3698+ void* tmp;
3699+
3700+ if (heap->min != node) {
3701+ /* bubble up */
3702+ parent = node->parent;
3703+ while (parent) {
3704+ /* swap parent and node */
3705+ tmp = parent->value;
3706+ parent->value = node->value;
3707+ node->value = tmp;
3708+ /* swap references */
3709+ *(parent->ref) = node;
3710+ *(node->ref) = parent;
3711+ tmp_ref = parent->ref;
3712+ parent->ref = node->ref;
3713+ node->ref = tmp_ref;
3714+ /* step up */
3715+ node = parent;
3716+ parent = node->parent;
3717+ }
3718+ /* now delete:
3719+ * first find prev */
3720+ prev = NULL;
3721+ pos = heap->head;
3722+ while (pos != node) {
3723+ prev = pos;
3724+ pos = pos->next;
3725+ }
3726+ /* we have prev, now remove node */
3727+ if (prev)
3728+ prev->next = node->next;
3729+ else
3730+ heap->head = node->next;
3731+ __bheap_union(higher_prio, heap, __bheap_reverse(node->child));
3732+ } else
3733+ heap->min = NULL;
3734+ node->degree = NOT_IN_HEAP;
3735+}
3736+
3737+/* allocate a heap node for value and insert into the heap */
3738+int bheap_add(bheap_prio_t higher_prio, struct bheap* heap,
3739+ void* value, int gfp_flags)
3740+{
3741+ struct bheap_node* hn = bheap_node_alloc(gfp_flags);
3742+ if (likely(hn)) {
3743+ bheap_node_init(&hn, value);
3744+ bheap_insert(higher_prio, heap, hn);
3745+ }
3746+ return hn != NULL;
3747+}
3748+
3749+void* bheap_take_del(bheap_prio_t higher_prio,
3750+ struct bheap* heap)
3751+{
3752+ struct bheap_node* hn = bheap_take(higher_prio, heap);
3753+ void* ret = NULL;
3754+ if (hn) {
3755+ ret = hn->value;
3756+ bheap_node_free(hn);
3757+ }
3758+ return ret;
3759+}
3760diff --git a/litmus/budget.c b/litmus/budget.c
3761new file mode 100644
3762index 0000000..b99177a
3763--- /dev/null
3764+++ b/litmus/budget.c
3765@@ -0,0 +1,109 @@
3766+#include <linux/percpu.h>
3767+#include <linux/hrtimer.h>
3768+
3769+#include <litmus/litmus.h>
3770+
3771+struct enforcement_timer {
3772+ /* The enforcement timer is used to accurately police
3773+ * slice budgets. */
3774+ struct hrtimer timer;
3775+ int armed;
3776+};
3777+
3778+DEFINE_PER_CPU(struct enforcement_timer, budget_timer);
3779+
3780+static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer)
3781+{
3782+ struct enforcement_timer* et = container_of(timer,
3783+ struct enforcement_timer,
3784+ timer);
3785+ unsigned long flags;
3786+
3787+ local_irq_save(flags);
3788+ TRACE("enforcement timer fired.\n");
3789+ et->armed = 0;
3790+ /* activate scheduler */
3791+ set_tsk_need_resched(current);
3792+ local_irq_restore(flags);
3793+
3794+ return HRTIMER_NORESTART;
3795+}
3796+
3797+/* assumes called with IRQs off */
3798+static void cancel_enforcement_timer(struct enforcement_timer* et)
3799+{
3800+ int ret;
3801+
3802+ TRACE("cancelling enforcement timer.\n");
3803+
3804+ /* Since interrupts are disabled and et->armed is only
3805+ * modified locally, we do not need any locks.
3806+ */
3807+
3808+ if (et->armed) {
3809+ ret = hrtimer_try_to_cancel(&et->timer);
3810+ /* Should never be inactive. */
3811+ BUG_ON(ret == 0);
3812+ /* Should never be running concurrently. */
3813+ BUG_ON(ret == -1);
3814+
3815+ et->armed = 0;
3816+ }
3817+}
3818+
3819+/* assumes called with IRQs off */
3820+static void arm_enforcement_timer(struct enforcement_timer* et,
3821+ struct task_struct* t)
3822+{
3823+ lt_t when_to_fire;
3824+ TRACE_TASK(t, "arming enforcement timer.\n");
3825+
3826+ /* Calling this when there is no budget left for the task
3827+ * makes no sense, unless the task is non-preemptive. */
3828+ BUG_ON(budget_exhausted(t) && (!is_np(t)));
3829+
3830+ /* __hrtimer_start_range_ns() cancels the timer
3831+ * anyway, so we don't have to check whether it is still armed */
3832+
3833+ if (likely(!is_np(t))) {
3834+ when_to_fire = litmus_clock() + budget_remaining(t);
3835+ __hrtimer_start_range_ns(&et->timer,
3836+ ns_to_ktime(when_to_fire),
3837+ 0 /* delta */,
3838+ HRTIMER_MODE_ABS_PINNED,
3839+ 0 /* no wakeup */);
3840+ et->armed = 1;
3841+ }
3842+}
3843+
3844+
3845+/* expects to be called with IRQs off */
3846+void update_enforcement_timer(struct task_struct* t)
3847+{
3848+ struct enforcement_timer* et = &__get_cpu_var(budget_timer);
3849+
3850+ if (t && budget_precisely_enforced(t)) {
3851+ /* Make sure we call into the scheduler when this budget
3852+ * expires. */
3853+ arm_enforcement_timer(et, t);
3854+ } else if (et->armed) {
3855+ /* Make sure we don't cause unnecessary interrupts. */
3856+ cancel_enforcement_timer(et);
3857+ }
3858+}
3859+
3860+
3861+static int __init init_budget_enforcement(void)
3862+{
3863+ int cpu;
3864+ struct enforcement_timer* et;
3865+
3866+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
3867+ et = &per_cpu(budget_timer, cpu);
3868+ hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
3869+ et->timer.function = on_enforcement_timeout;
3870+ }
3871+ return 0;
3872+}
3873+
3874+module_init(init_budget_enforcement);
3875diff --git a/litmus/ctrldev.c b/litmus/ctrldev.c
3876new file mode 100644
3877index 0000000..6677a67
3878--- /dev/null
3879+++ b/litmus/ctrldev.c
3880@@ -0,0 +1,150 @@
3881+#include <linux/sched.h>
3882+#include <linux/mm.h>
3883+#include <linux/fs.h>
3884+#include <linux/miscdevice.h>
3885+#include <linux/module.h>
3886+
3887+#include <litmus/litmus.h>
3888+
3889+/* only one page for now, but we might want to add a RO version at some point */
3890+
3891+#define CTRL_NAME "litmus/ctrl"
3892+
3893+/* allocate t->rt_param.ctrl_page*/
3894+static int alloc_ctrl_page(struct task_struct *t)
3895+{
3896+ int err = 0;
3897+
3898+ /* only allocate if the task doesn't have one yet */
3899+ if (!tsk_rt(t)->ctrl_page) {
3900+ tsk_rt(t)->ctrl_page = (void*) get_zeroed_page(GFP_KERNEL);
3901+ if (!tsk_rt(t)->ctrl_page)
3902+ err = -ENOMEM;
3903+ /* will get de-allocated in task teardown */
3904+ TRACE_TASK(t, "%s ctrl_page = %p\n", __FUNCTION__,
3905+ tsk_rt(t)->ctrl_page);
3906+ }
3907+ return err;
3908+}
3909+
3910+static int map_ctrl_page(struct task_struct *t, struct vm_area_struct* vma)
3911+{
3912+ int err;
3913+ unsigned long pfn;
3914+
3915+ struct page* ctrl = virt_to_page(tsk_rt(t)->ctrl_page);
3916+
3917+ /* Increase ref count. Is decreased when vma is destroyed. */
3918+ get_page(ctrl);
3919+
3920+ /* compute page frame number */
3921+ pfn = page_to_pfn(ctrl);
3922+
3923+ TRACE_CUR(CTRL_NAME
3924+ ": mapping %p (pfn:%lx, %lx) to 0x%lx (prot:%lx)\n",
3925+ tsk_rt(t)->ctrl_page, pfn, page_to_pfn(ctrl), vma->vm_start,
3926+ vma->vm_page_prot);
3927+
3928+ /* Map it into the vma. Make sure to use PAGE_SHARED, otherwise
3929+ * userspace actually gets a copy-on-write page. */
3930+ err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, PAGE_SHARED);
3931+
3932+ if (err)
3933+ TRACE_CUR(CTRL_NAME ": remap_pfn_range() failed (%d)\n", err);
3934+
3935+ return err;
3936+}
3937+
3938+static void litmus_ctrl_vm_close(struct vm_area_struct* vma)
3939+{
3940+ TRACE_CUR("%s flags=0x%x prot=0x%x\n", __FUNCTION__,
3941+ vma->vm_flags, vma->vm_page_prot);
3942+
3943+ TRACE_CUR(CTRL_NAME
3944+ ": %p:%p vma:%p vma->vm_private_data:%p closed.\n",
3945+ (void*) vma->vm_start, (void*) vma->vm_end, vma,
3946+ vma->vm_private_data, current->comm,
3947+ current->pid);
3948+}
3949+
3950+static int litmus_ctrl_vm_fault(struct vm_area_struct* vma,
3951+ struct vm_fault* vmf)
3952+{
3953+ /* This function should never be called, since
3954+ * all pages should have been mapped by mmap()
3955+ * already. */
3956+ TRACE_CUR("%s flags=0x%x\n", __FUNCTION__, vma->vm_flags);
3957+
3958+ /* nope, you only get one page */
3959+ return VM_FAULT_SIGBUS;
3960+}
3961+
3962+static struct vm_operations_struct litmus_ctrl_vm_ops = {
3963+ .close = litmus_ctrl_vm_close,
3964+ .fault = litmus_ctrl_vm_fault,
3965+};
3966+
3967+static int litmus_ctrl_mmap(struct file* filp, struct vm_area_struct* vma)
3968+{
3969+ int err = 0;
3970+
3971+ /* first make sure mapper knows what he's doing */
3972+
3973+ /* you can only get one page */
3974+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
3975+ return -EINVAL;
3976+
3977+ /* you can only map the "first" page */
3978+ if (vma->vm_pgoff != 0)
3979+ return -EINVAL;
3980+
3981+ /* you can't share it with anyone */
3982+ if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
3983+ return -EINVAL;
3984+
3985+ vma->vm_ops = &litmus_ctrl_vm_ops;
3986+ /* this mapping should not be kept across forks,
3987+ * and cannot be expanded */
3988+ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
3989+
3990+ err = alloc_ctrl_page(current);
3991+ if (!err)
3992+ err = map_ctrl_page(current, vma);
3993+
3994+ TRACE_CUR("%s flags=0x%x prot=0x%lx\n",
3995+ __FUNCTION__, vma->vm_flags, vma->vm_page_prot);
3996+
3997+ return err;
3998+}
3999+
4000+static struct file_operations litmus_ctrl_fops = {
4001+ .owner = THIS_MODULE,
4002+ .mmap = litmus_ctrl_mmap,
4003+};
4004+
4005+static struct miscdevice litmus_ctrl_dev = {
4006+ .name = CTRL_NAME,
4007+ .minor = MISC_DYNAMIC_MINOR,
4008+ .fops = &litmus_ctrl_fops,
4009+};
4010+
4011+static int __init init_litmus_ctrl_dev(void)
4012+{
4013+ int err;
4014+
4015+ BUILD_BUG_ON(sizeof(struct control_page) > PAGE_SIZE);
4016+
4017+ printk("Initializing LITMUS^RT control device.\n");
4018+ err = misc_register(&litmus_ctrl_dev);
4019+ if (err)
4020+ printk("Could not allocate %s device (%d).\n", CTRL_NAME, err);
4021+ return err;
4022+}
4023+
4024+static void __exit exit_litmus_ctrl_dev(void)
4025+{
4026+ misc_deregister(&litmus_ctrl_dev);
4027+}
4028+
4029+module_init(init_litmus_ctrl_dev);
4030+module_exit(exit_litmus_ctrl_dev);
4031diff --git a/litmus/edf_common.c b/litmus/edf_common.c
4032new file mode 100644
4033index 0000000..06daec6
4034--- /dev/null
4035+++ b/litmus/edf_common.c
4036@@ -0,0 +1,102 @@
4037+/*
4038+ * kernel/edf_common.c
4039+ *
4040+ * Common functions for EDF based scheduler.
4041+ */
4042+
4043+#include <linux/percpu.h>
4044+#include <linux/sched.h>
4045+#include <linux/list.h>
4046+
4047+#include <litmus/litmus.h>
4048+#include <litmus/sched_plugin.h>
4049+#include <litmus/sched_trace.h>
4050+
4051+#include <litmus/edf_common.h>
4052+
4053+/* edf_higher_prio - returns true if first has a higher EDF priority
4054+ * than second. Deadline ties are broken by PID.
4055+ *
4056+ * both first and second may be NULL
4057+ */
4058+int edf_higher_prio(struct task_struct* first,
4059+ struct task_struct* second)
4060+{
4061+ struct task_struct *first_task = first;
4062+ struct task_struct *second_task = second;
4063+
4064+ /* There is no point in comparing a task to itself. */
4065+ if (first && first == second) {
4066+ TRACE_TASK(first,
4067+ "WARNING: pointless edf priority comparison.\n");
4068+ return 0;
4069+ }
4070+
4071+
4072+ /* Check for inherited priorities. Change task
4073+ * used for comparison in such a case.
4074+ */
4075+ if (first && first->rt_param.inh_task)
4076+ first_task = first->rt_param.inh_task;
4077+ if (second && second->rt_param.inh_task)
4078+ second_task = second->rt_param.inh_task;
4079+
4080+ return
4081+ /* it has to exist in order to have higher priority */
4082+ first_task && (
4083+ /* does the second task exist and is it a real-time task? If
4084+ * not, the first task (which is a RT task) has higher
4085+ * priority.
4086+ */
4087+ !second_task || !is_realtime(second_task) ||
4088+
4089+ /* is the deadline of the first task earlier?
4090+ * Then it has higher priority.
4091+ */
4092+ earlier_deadline(first_task, second_task) ||
4093+
4094+ /* Do we have a deadline tie?
4095+ * Then break by PID.
4096+ */
4097+ (get_deadline(first_task) == get_deadline(second_task) &&
4098+ (first_task->pid < second_task->pid ||
4099+
4100+ /* If the PIDs are the same then the task with the inherited
4101+ * priority wins.
4102+ */
4103+ (first_task->pid == second_task->pid &&
4104+ !second->rt_param.inh_task))));
4105+}
4106+
4107+int edf_ready_order(struct bheap_node* a, struct bheap_node* b)
4108+{
4109+ return edf_higher_prio(bheap2task(a), bheap2task(b));
4110+}
4111+
4112+void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
4113+ release_jobs_t release)
4114+{
4115+ rt_domain_init(rt, edf_ready_order, resched, release);
4116+}
4117+
4118+/* need_to_preempt - check whether the task t needs to be preempted
4119+ * call only with irqs disabled and with ready_lock acquired
4120+ * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
4121+ */
4122+int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t)
4123+{
4124+ /* we need the read lock for edf_ready_queue */
4125+ /* no need to preempt if there is nothing pending */
4126+ if (!__jobs_pending(rt))
4127+ return 0;
4128+ /* we need to reschedule if t doesn't exist */
4129+ if (!t)
4130+ return 1;
4131+
4132+ /* NOTE: We cannot check for non-preemptibility since we
4133+ * don't know what address space we're currently in.
4134+ */
4135+
4136+ /* make sure to get non-rt stuff out of the way */
4137+ return !is_realtime(t) || edf_higher_prio(__next_ready(rt), t);
4138+}
4139diff --git a/litmus/fdso.c b/litmus/fdso.c
4140new file mode 100644
4141index 0000000..85be716
4142--- /dev/null
4143+++ b/litmus/fdso.c
4144@@ -0,0 +1,281 @@
4145+/* fdso.c - file descriptor attached shared objects
4146+ *
4147+ * (c) 2007 B. Brandenburg, LITMUS^RT project
4148+ *
4149+ * Notes:
4150+ * - objects descriptor (OD) tables are not cloned during a fork.
4151+ * - objects are created on-demand, and freed after the last reference
4152+ * is dropped.
4153+ * - for now, object types are hard coded.
4154+ * - As long as we have live objects, we keep a reference to the inode.
4155+ */
4156+
4157+#include <linux/errno.h>
4158+#include <linux/sched.h>
4159+#include <linux/mutex.h>
4160+#include <linux/file.h>
4161+#include <asm/uaccess.h>
4162+
4163+#include <litmus/fdso.h>
4164+
4165+extern struct fdso_ops fmlp_sem_ops;
4166+extern struct fdso_ops srp_sem_ops;
4167+
4168+static const struct fdso_ops* fdso_ops[] = {
4169+ &fmlp_sem_ops,
4170+ &srp_sem_ops,
4171+};
4172+
4173+static void* fdso_create(obj_type_t type)
4174+{
4175+ if (fdso_ops[type]->create)
4176+ return fdso_ops[type]->create();
4177+ else
4178+ return NULL;
4179+}
4180+
4181+static void fdso_destroy(obj_type_t type, void* obj)
4182+{
4183+ fdso_ops[type]->destroy(obj);
4184+}
4185+
4186+static int fdso_open(struct od_table_entry* entry, void* __user config)
4187+{
4188+ if (fdso_ops[entry->obj->type]->open)
4189+ return fdso_ops[entry->obj->type]->open(entry, config);
4190+ else
4191+ return 0;
4192+}
4193+
4194+static int fdso_close(struct od_table_entry* entry)
4195+{
4196+ if (fdso_ops[entry->obj->type]->close)
4197+ return fdso_ops[entry->obj->type]->close(entry);
4198+ else
4199+ return 0;
4200+}
4201+
4202+/* inode must be locked already */
4203+static struct inode_obj_id* alloc_inode_obj(struct inode* inode,
4204+ obj_type_t type,
4205+ unsigned int id)
4206+{
4207+ struct inode_obj_id* obj;
4208+ void* raw_obj;
4209+
4210+ raw_obj = fdso_create(type);
4211+ if (!raw_obj)
4212+ return NULL;
4213+
4214+ obj = kmalloc(sizeof(*obj), GFP_KERNEL);
4215+ if (!obj)
4216+ return NULL;
4217+ INIT_LIST_HEAD(&obj->list);
4218+ atomic_set(&obj->count, 1);
4219+ obj->type = type;
4220+ obj->id = id;
4221+ obj->obj = raw_obj;
4222+ obj->inode = inode;
4223+
4224+ list_add(&obj->list, &inode->i_obj_list);
4225+ atomic_inc(&inode->i_count);
4226+
4227+ printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id);
4228+ return obj;
4229+}
4230+
4231+/* inode must be locked already */
4232+static struct inode_obj_id* get_inode_obj(struct inode* inode,
4233+ obj_type_t type,
4234+ unsigned int id)
4235+{
4236+ struct list_head* pos;
4237+ struct inode_obj_id* obj = NULL;
4238+
4239+ list_for_each(pos, &inode->i_obj_list) {
4240+ obj = list_entry(pos, struct inode_obj_id, list);
4241+ if (obj->id == id && obj->type == type) {
4242+ atomic_inc(&obj->count);
4243+ return obj;
4244+ }
4245+ }
4246+ printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id);
4247+ return NULL;
4248+}
4249+
4250+
4251+static void put_inode_obj(struct inode_obj_id* obj)
4252+{
4253+ struct inode* inode;
4254+ int let_go = 0;
4255+
4256+ inode = obj->inode;
4257+ if (atomic_dec_and_test(&obj->count)) {
4258+
4259+ mutex_lock(&inode->i_obj_mutex);
4260+ /* no new references can be obtained */
4261+ if (!atomic_read(&obj->count)) {
4262+ list_del(&obj->list);
4263+ fdso_destroy(obj->type, obj->obj);
4264+ kfree(obj);
4265+ let_go = 1;
4266+ }
4267+ mutex_unlock(&inode->i_obj_mutex);
4268+ if (let_go)
4269+ iput(inode);
4270+ }
4271+}
4272+
4273+static struct od_table_entry* get_od_entry(struct task_struct* t)
4274+{
4275+ struct od_table_entry* table;
4276+ int i;
4277+
4278+
4279+ table = t->od_table;
4280+ if (!table) {
4281+ table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS,
4282+ GFP_KERNEL);
4283+ t->od_table = table;
4284+ }
4285+
4286+ for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++)
4287+ if (!table[i].used) {
4288+ table[i].used = 1;
4289+ return table + i;
4290+ }
4291+ return NULL;
4292+}
4293+
4294+static int put_od_entry(struct od_table_entry* od)
4295+{
4296+ put_inode_obj(od->obj);
4297+ od->used = 0;
4298+ return 0;
4299+}
4300+
4301+void exit_od_table(struct task_struct* t)
4302+{
4303+ int i;
4304+
4305+ if (t->od_table) {
4306+ for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++)
4307+ if (t->od_table[i].used)
4308+ put_od_entry(t->od_table + i);
4309+ kfree(t->od_table);
4310+ t->od_table = NULL;
4311+ }
4312+}
4313+
4314+static int do_sys_od_open(struct file* file, obj_type_t type, int id,
4315+ void* __user config)
4316+{
4317+ int idx = 0, err;
4318+ struct inode* inode;
4319+ struct inode_obj_id* obj = NULL;
4320+ struct od_table_entry* entry;
4321+
4322+ inode = file->f_dentry->d_inode;
4323+
4324+ entry = get_od_entry(current);
4325+ if (!entry)
4326+ return -ENOMEM;
4327+
4328+ mutex_lock(&inode->i_obj_mutex);
4329+ obj = get_inode_obj(inode, type, id);
4330+ if (!obj)
4331+ obj = alloc_inode_obj(inode, type, id);
4332+ if (!obj) {
4333+ idx = -ENOMEM;
4334+ entry->used = 0;
4335+ } else {
4336+ entry->obj = obj;
4337+ entry->extra = NULL;
4338+ idx = entry - current->od_table;
4339+ }
4340+
4341+ mutex_unlock(&inode->i_obj_mutex);
4342+
4343+ err = fdso_open(entry, config);
4344+ if (err < 0) {
4345+ /* The class rejected the open call.
4346+ * We need to clean up and tell user space.
4347+ */
4348+ put_od_entry(entry);
4349+ idx = err;
4350+ }
4351+
4352+ return idx;
4353+}
4354+
4355+
4356+struct od_table_entry* __od_lookup(int od)
4357+{
4358+ struct task_struct *t = current;
4359+
4360+ if (!t->od_table)
4361+ return NULL;
4362+ if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
4363+ return NULL;
4364+ if (!t->od_table[od].used)
4365+ return NULL;
4366+ return t->od_table + od;
4367+}
4368+
4369+
4370+asmlinkage long sys_od_open(int fd, int type, int obj_id, void* __user config)
4371+{
4372+ int ret = 0;
4373+ struct file* file;
4374+
4375+ /*
4376+ 1) get file from fd, get inode from file
4377+ 2) lock inode
4378+ 3) try to lookup object
4379+ 4) if not present create and enqueue object, inc inode refcnt
4380+ 5) increment refcnt of object
4381+ 6) alloc od_table_entry, setup ptrs
4382+ 7) unlock inode
4383+ 8) return offset in od_table as OD
4384+ */
4385+
4386+ if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) {
4387+ ret = -EINVAL;
4388+ goto out;
4389+ }
4390+
4391+ file = fget(fd);
4392+ if (!file) {
4393+ ret = -EBADF;
4394+ goto out;
4395+ }
4396+
4397+ ret = do_sys_od_open(file, type, obj_id, config);
4398+
4399+ fput(file);
4400+
4401+out:
4402+ return ret;
4403+}
4404+
4405+
4406+asmlinkage long sys_od_close(int od)
4407+{
4408+ int ret = -EINVAL;
4409+ struct task_struct *t = current;
4410+
4411+ if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
4412+ return ret;
4413+
4414+ if (!t->od_table || !t->od_table[od].used)
4415+ return ret;
4416+
4417+
4418+ /* give the class a chance to reject the close
4419+ */
4420+ ret = fdso_close(t->od_table + od);
4421+ if (ret == 0)
4422+ ret = put_od_entry(t->od_table + od);
4423+
4424+ return ret;
4425+}
4426diff --git a/litmus/fmlp.c b/litmus/fmlp.c
4427new file mode 100644
4428index 0000000..03fa735
4429--- /dev/null
4430+++ b/litmus/fmlp.c
4431@@ -0,0 +1,268 @@
4432+/*
4433+ * FMLP implementation.
4434+ * Much of the code here is borrowed from include/asm-i386/semaphore.h
4435+ */
4436+
4437+#include <asm/atomic.h>
4438+
4439+#include <linux/semaphore.h>
4440+#include <linux/sched.h>
4441+#include <linux/wait.h>
4442+#include <linux/spinlock.h>
4443+
4444+#include <litmus/litmus.h>
4445+#include <litmus/sched_plugin.h>
4446+#include <litmus/edf_common.h>
4447+
4448+#include <litmus/fdso.h>
4449+
4450+#include <litmus/trace.h>
4451+
4452+#ifdef CONFIG_FMLP
4453+
4454+static void* create_fmlp_semaphore(void)
4455+{
4456+ struct pi_semaphore* sem;
4457+ int i;
4458+
4459+ sem = kmalloc(sizeof(*sem), GFP_KERNEL);
4460+ if (!sem)
4461+ return NULL;
4462+ atomic_set(&sem->count, 1);
4463+ sem->sleepers = 0;
4464+ init_waitqueue_head(&sem->wait);
4465+ sem->hp.task = NULL;
4466+ sem->holder = NULL;
4467+ for (i = 0; i < NR_CPUS; i++)
4468+ sem->hp.cpu_task[i] = NULL;
4469+ return sem;
4470+}
4471+
4472+static int open_fmlp_semaphore(struct od_table_entry* entry, void* __user arg)
4473+{
4474+ if (!fmlp_active())
4475+ return -EBUSY;
4476+ return 0;
4477+}
4478+
4479+static void destroy_fmlp_semaphore(void* sem)
4480+{
4481+ /* XXX assert invariants */
4482+ kfree(sem);
4483+}
4484+
4485+struct fdso_ops fmlp_sem_ops = {
4486+ .create = create_fmlp_semaphore,
4487+ .open = open_fmlp_semaphore,
4488+ .destroy = destroy_fmlp_semaphore
4489+};
4490+
4491+struct wq_pair {
4492+ struct task_struct* tsk;
4493+ struct pi_semaphore* sem;
4494+};
4495+
4496+static int rt_pi_wake_up(wait_queue_t *wait, unsigned mode, int sync,
4497+ void *key)
4498+{
4499+ struct wq_pair* wqp = (struct wq_pair*) wait->private;
4500+ set_rt_flags(wqp->tsk, RT_F_EXIT_SEM);
4501+ litmus->inherit_priority(wqp->sem, wqp->tsk);
4502+ TRACE_TASK(wqp->tsk,
4503+ "woken up by rt_pi_wake_up() (RT_F_SEM_EXIT, PI)\n");
4504+ /* point to task for default_wake_function() */
4505+ wait->private = wqp->tsk;
4506+ default_wake_function(wait, mode, sync, key);
4507+
4508+ /* Always return true since we know that if we encountered a task
4509+ * that was already running the wake_up raced with the schedule in
4510+ * rt_pi_down(). In that case the task in rt_pi_down() will be scheduled
4511+ * immediately and own the lock. We must not wake up another task in
4512+ * any case.
4513+ */
4514+ return 1;
4515+}
4516+
4517+/* caller is responsible for locking */
4518+int edf_set_hp_task(struct pi_semaphore *sem)
4519+{
4520+ struct list_head *tmp, *next;
4521+ struct task_struct *queued;
4522+ int ret = 0;
4523+
4524+ sem->hp.task = NULL;
4525+ list_for_each_safe(tmp, next, &sem->wait.task_list) {
4526+ queued = ((struct wq_pair*)
4527+ list_entry(tmp, wait_queue_t,
4528+ task_list)->private)->tsk;
4529+
4530+ /* Compare task prios, find high prio task. */
4531+ if (edf_higher_prio(queued, sem->hp.task)) {
4532+ sem->hp.task = queued;
4533+ ret = 1;
4534+ }
4535+ }
4536+ return ret;
4537+}
4538+
4539+/* caller is responsible for locking */
4540+int edf_set_hp_cpu_task(struct pi_semaphore *sem, int cpu)
4541+{
4542+ struct list_head *tmp, *next;
4543+ struct task_struct *queued;
4544+ int ret = 0;
4545+
4546+ sem->hp.cpu_task[cpu] = NULL;
4547+ list_for_each_safe(tmp, next, &sem->wait.task_list) {
4548+ queued = ((struct wq_pair*)
4549+ list_entry(tmp, wait_queue_t,
4550+ task_list)->private)->tsk;
4551+
4552+ /* Compare task prios, find high prio task. */
4553+ if (get_partition(queued) == cpu &&
4554+ edf_higher_prio(queued, sem->hp.cpu_task[cpu])) {
4555+ sem->hp.cpu_task[cpu] = queued;
4556+ ret = 1;
4557+ }
4558+ }
4559+ return ret;
4560+}
4561+
4562+static int do_fmlp_down(struct pi_semaphore* sem)
4563+{
4564+ unsigned long flags;
4565+ struct task_struct *tsk = current;
4566+ struct wq_pair pair;
4567+ int suspended = 1;
4568+ wait_queue_t wait = {
4569+ .private = &pair,
4570+ .func = rt_pi_wake_up,
4571+ .task_list = {NULL, NULL}
4572+ };
4573+
4574+ pair.tsk = tsk;
4575+ pair.sem = sem;
4576+ spin_lock_irqsave(&sem->wait.lock, flags);
4577+
4578+ if (atomic_dec_return(&sem->count) < 0 ||
4579+ waitqueue_active(&sem->wait)) {
4580+ /* we need to suspend */
4581+ tsk->state = TASK_UNINTERRUPTIBLE;
4582+ add_wait_queue_exclusive_locked(&sem->wait, &wait);
4583+
4584+ TRACE_CUR("suspends on PI lock %p\n", sem);
4585+ litmus->pi_block(sem, tsk);
4586+
4587+ /* release lock before sleeping */
4588+ spin_unlock_irqrestore(&sem->wait.lock, flags);
4589+
4590+ TS_PI_DOWN_END;
4591+ preempt_enable_no_resched();
4592+
4593+
4594+ /* we depend on the FIFO order
4595+ * Thus, we don't need to recheck when we wake up, we
4596+ * are guaranteed to have the lock since there is only one
4597+ * wake up per release
4598+ */
4599+ schedule();
4600+
4601+ TRACE_CUR("woke up, now owns PI lock %p\n", sem);
4602+
4603+ /* try_to_wake_up() set our state to TASK_RUNNING,
4604+ * all we need to do is to remove our wait queue entry
4605+ */
4606+ remove_wait_queue(&sem->wait, &wait);
4607+ } else {
4608+ /* no priority inheritance necessary, since there are no queued
4609+ * tasks.
4610+ */
4611+ suspended = 0;
4612+ TRACE_CUR("acquired PI lock %p, no contention\n", sem);
4613+ sem->holder = tsk;
4614+
4615+ /* don't know if we're global or partitioned. */
4616+ sem->hp.task = tsk;
4617+ sem->hp.cpu_task[get_partition(tsk)] = tsk;
4618+
4619+ litmus->inherit_priority(sem, tsk);
4620+ spin_unlock_irqrestore(&sem->wait.lock, flags);
4621+ }
4622+ return suspended;
4623+}
4624+
4625+static void do_fmlp_up(struct pi_semaphore* sem)
4626+{
4627+ unsigned long flags;
4628+
4629+ spin_lock_irqsave(&sem->wait.lock, flags);
4630+
4631+ TRACE_CUR("releases PI lock %p\n", sem);
4632+ litmus->return_priority(sem);
4633+ sem->holder = NULL;
4634+ if (atomic_inc_return(&sem->count) < 1)
4635+ /* there is a task queued */
4636+ wake_up_locked(&sem->wait);
4637+
4638+ spin_unlock_irqrestore(&sem->wait.lock, flags);
4639+}
4640+
4641+asmlinkage long sys_fmlp_down(int sem_od)
4642+{
4643+ long ret = 0;
4644+ struct pi_semaphore * sem;
4645+ int suspended = 0;
4646+
4647+ preempt_disable();
4648+ TS_PI_DOWN_START;
4649+
4650+ sem = lookup_fmlp_sem(sem_od);
4651+ if (sem)
4652+ suspended = do_fmlp_down(sem);
4653+ else
4654+ ret = -EINVAL;
4655+
4656+ if (!suspended) {
4657+ TS_PI_DOWN_END;
4658+ preempt_enable();
4659+ }
4660+
4661+ return ret;
4662+}
4663+
4664+asmlinkage long sys_fmlp_up(int sem_od)
4665+{
4666+ long ret = 0;
4667+ struct pi_semaphore * sem;
4668+
4669+ preempt_disable();
4670+ TS_PI_UP_START;
4671+
4672+ sem = lookup_fmlp_sem(sem_od);
4673+ if (sem)
4674+ do_fmlp_up(sem);
4675+ else
4676+ ret = -EINVAL;
4677+
4678+
4679+ TS_PI_UP_END;
4680+ preempt_enable();
4681+
4682+ return ret;
4683+}
4684+
4685+#else
4686+
4687+struct fdso_ops fmlp_sem_ops = {};
4688+
4689+asmlinkage long sys_fmlp_down(int sem_od)
4690+{
4691+ return -ENOSYS;
4692+}
4693+
4694+asmlinkage long sys_fmlp_up(int sem_od)
4695+{
4696+ return -ENOSYS;
4697+}
4698+
4699+#endif
4700diff --git a/litmus/ft_event.c b/litmus/ft_event.c
4701new file mode 100644
4702index 0000000..399a07b
4703--- /dev/null
4704+++ b/litmus/ft_event.c
4705@@ -0,0 +1,43 @@
4706+#include <linux/types.h>
4707+
4708+#include <litmus/feather_trace.h>
4709+
4710+#if !defined(CONFIG_ARCH_HAS_FEATHER_TRACE) || defined(CONFIG_DEBUG_RODATA)
4711+/* provide dummy implementation */
4712+
4713+int ft_events[MAX_EVENTS];
4714+
4715+int ft_enable_event(unsigned long id)
4716+{
4717+ if (id < MAX_EVENTS) {
4718+ ft_events[id]++;
4719+ return 1;
4720+ } else
4721+ return 0;
4722+}
4723+
4724+int ft_disable_event(unsigned long id)
4725+{
4726+ if (id < MAX_EVENTS && ft_events[id]) {
4727+ ft_events[id]--;
4728+ return 1;
4729+ } else
4730+ return 0;
4731+}
4732+
4733+int ft_disable_all_events(void)
4734+{
4735+ int i;
4736+
4737+ for (i = 0; i < MAX_EVENTS; i++)
4738+ ft_events[i] = 0;
4739+
4740+ return MAX_EVENTS;
4741+}
4742+
4743+int ft_is_event_enabled(unsigned long id)
4744+{
4745+ return id < MAX_EVENTS && ft_events[id];
4746+}
4747+
4748+#endif
4749diff --git a/litmus/ftdev.c b/litmus/ftdev.c
4750new file mode 100644
4751index 0000000..51dafae
4752--- /dev/null
4753+++ b/litmus/ftdev.c
4754@@ -0,0 +1,360 @@
4755+#include <linux/sched.h>
4756+#include <linux/fs.h>
4757+#include <linux/slab.h>
4758+#include <linux/cdev.h>
4759+#include <asm/uaccess.h>
4760+#include <linux/module.h>
4761+
4762+#include <litmus/litmus.h>
4763+#include <litmus/feather_trace.h>
4764+#include <litmus/ftdev.h>
4765+
4766+struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size)
4767+{
4768+ struct ft_buffer* buf;
4769+ size_t total = (size + 1) * count;
4770+ char* mem;
4771+ int order = 0, pages = 1;
4772+
4773+ buf = kmalloc(sizeof(*buf), GFP_KERNEL);
4774+ if (!buf)
4775+ return NULL;
4776+
4777+ total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0);
4778+ while (pages < total) {
4779+ order++;
4780+ pages *= 2;
4781+ }
4782+
4783+ mem = (char*) __get_free_pages(GFP_KERNEL, order);
4784+ if (!mem) {
4785+ kfree(buf);
4786+ return NULL;
4787+ }
4788+
4789+ if (!init_ft_buffer(buf, count, size,
4790+ mem + (count * size), /* markers at the end */
4791+ mem)) { /* buffer objects */
4792+ free_pages((unsigned long) mem, order);
4793+ kfree(buf);
4794+ return NULL;
4795+ }
4796+ return buf;
4797+}
4798+
4799+void free_ft_buffer(struct ft_buffer* buf)
4800+{
4801+ int order = 0, pages = 1;
4802+ size_t total;
4803+
4804+ if (buf) {
4805+ total = (buf->slot_size + 1) * buf->slot_count;
4806+ total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0);
4807+ while (pages < total) {
4808+ order++;
4809+ pages *= 2;
4810+ }
4811+ free_pages((unsigned long) buf->buffer_mem, order);
4812+ kfree(buf);
4813+ }
4814+}
4815+
4816+struct ftdev_event {
4817+ int id;
4818+ struct ftdev_event* next;
4819+};
4820+
4821+static int activate(struct ftdev_event** chain, int id)
4822+{
4823+ struct ftdev_event* ev = kmalloc(sizeof(*ev), GFP_KERNEL);
4824+ if (ev) {
4825+ printk(KERN_INFO
4826+ "Enabling feather-trace event %d.\n", (int) id);
4827+ ft_enable_event(id);
4828+ ev->id = id;
4829+ ev->next = *chain;
4830+ *chain = ev;
4831+ }
4832+ return ev ? 0 : -ENOMEM;
4833+}
4834+
4835+static void deactivate(struct ftdev_event** chain, int id)
4836+{
4837+ struct ftdev_event **cur = chain;
4838+ struct ftdev_event *nxt;
4839+ while (*cur) {
4840+ if ((*cur)->id == id) {
4841+ nxt = (*cur)->next;
4842+ kfree(*cur);
4843+ *cur = nxt;
4844+ printk(KERN_INFO
4845+ "Disabling feather-trace event %d.\n", (int) id);
4846+ ft_disable_event(id);
4847+ break;
4848+ }
4849+ cur = &(*cur)->next;
4850+ }
4851+}
4852+
4853+static int ftdev_open(struct inode *in, struct file *filp)
4854+{
4855+ struct ftdev* ftdev;
4856+ struct ftdev_minor* ftdm;
4857+ unsigned int buf_idx = iminor(in);
4858+ int err = 0;
4859+
4860+ ftdev = container_of(in->i_cdev, struct ftdev, cdev);
4861+
4862+ if (buf_idx >= ftdev->minor_cnt) {
4863+ err = -ENODEV;
4864+ goto out;
4865+ }
4866+ if (ftdev->can_open && (err = ftdev->can_open(ftdev, buf_idx)))
4867+ goto out;
4868+
4869+ ftdm = ftdev->minor + buf_idx;
4870+ filp->private_data = ftdm;
4871+
4872+ if (mutex_lock_interruptible(&ftdm->lock)) {
4873+ err = -ERESTARTSYS;
4874+ goto out;
4875+ }
4876+
4877+ if (!ftdm->readers && ftdev->alloc)
4878+ err = ftdev->alloc(ftdev, buf_idx);
4879+ if (0 == err)
4880+ ftdm->readers++;
4881+
4882+ mutex_unlock(&ftdm->lock);
4883+out:
4884+ return err;
4885+}
4886+
4887+static int ftdev_release(struct inode *in, struct file *filp)
4888+{
4889+ struct ftdev* ftdev;
4890+ struct ftdev_minor* ftdm;
4891+ unsigned int buf_idx = iminor(in);
4892+ int err = 0;
4893+
4894+ ftdev = container_of(in->i_cdev, struct ftdev, cdev);
4895+
4896+ if (buf_idx >= ftdev->minor_cnt) {
4897+ err = -ENODEV;
4898+ goto out;
4899+ }
4900+ ftdm = ftdev->minor + buf_idx;
4901+
4902+ if (mutex_lock_interruptible(&ftdm->lock)) {
4903+ err = -ERESTARTSYS;
4904+ goto out;
4905+ }
4906+
4907+ if (ftdm->readers == 1) {
4908+ while (ftdm->events)
4909+ deactivate(&ftdm->events, ftdm->events->id);
4910+
4911+ /* wait for any pending events to complete */
4912+ set_current_state(TASK_UNINTERRUPTIBLE);
4913+ schedule_timeout(HZ);
4914+
4915+ printk(KERN_ALERT "Failed trace writes: %u\n",
4916+ ftdm->buf->failed_writes);
4917+
4918+ if (ftdev->free)
4919+ ftdev->free(ftdev, buf_idx);
4920+ }
4921+
4922+ ftdm->readers--;
4923+ mutex_unlock(&ftdm->lock);
4924+out:
4925+ return err;
4926+}
4927+
4928+/* based on ft_buffer_read
4929+ * @returns < 0 : page fault
4930+ * = 0 : no data available
4931+ * = 1 : one slot copied
4932+ */
4933+static int ft_buffer_copy_to_user(struct ft_buffer* buf, char __user *dest)
4934+{
4935+ unsigned int idx;
4936+ int err = 0;
4937+ if (buf->free_count != buf->slot_count) {
4938+ /* data available */
4939+ idx = buf->read_idx % buf->slot_count;
4940+ if (buf->slots[idx] == SLOT_READY) {
4941+ err = copy_to_user(dest, ((char*) buf->buffer_mem) +
4942+ idx * buf->slot_size,
4943+ buf->slot_size);
4944+ if (err == 0) {
4945+ /* copy ok */
4946+ buf->slots[idx] = SLOT_FREE;
4947+ buf->read_idx++;
4948+ fetch_and_inc(&buf->free_count);
4949+ err = 1;
4950+ }
4951+ }
4952+ }
4953+ return err;
4954+}
4955+
4956+static ssize_t ftdev_read(struct file *filp,
4957+ char __user *to, size_t len, loff_t *f_pos)
4958+{
4959+ /* we ignore f_pos, this is strictly sequential */
4960+
4961+ ssize_t err = 0;
4962+ size_t chunk;
4963+ int copied;
4964+ struct ftdev_minor* ftdm = filp->private_data;
4965+
4966+ if (mutex_lock_interruptible(&ftdm->lock)) {
4967+ err = -ERESTARTSYS;
4968+ goto out;
4969+ }
4970+
4971+
4972+ chunk = ftdm->buf->slot_size;
4973+ while (len >= chunk) {
4974+ copied = ft_buffer_copy_to_user(ftdm->buf, to);
4975+ if (copied == 1) {
4976+ len -= chunk;
4977+ to += chunk;
4978+ err += chunk;
4979+ } else if (err == 0 && copied == 0 && ftdm->events) {
4980+ /* Only wait if there are any events enabled and only
4981+ * if we haven't copied some data yet. We cannot wait
4982+ * here with copied data because that data would get
4983+ * lost if the task is interrupted (e.g., killed).
4984+ */
4985+ set_current_state(TASK_INTERRUPTIBLE);
4986+ schedule_timeout(50);
4987+ if (signal_pending(current)) {
4988+ if (err == 0)
4989+ /* nothing read yet, signal problem */
4990+ err = -ERESTARTSYS;
4991+ break;
4992+ }
4993+ } else if (copied < 0) {
4994+ /* page fault */
4995+ err = copied;
4996+ break;
4997+ } else
4998+ /* nothing left to get, return to user space */
4999+ break;
5000+ }
5001+ mutex_unlock(&ftdm->lock);
5002+out:
5003+ return err;
5004+}
5005+
5006+typedef uint32_t cmd_t;
5007+
5008+static ssize_t ftdev_write(struct file *filp, const char __user *from,
5009+ size_t len, loff_t *f_pos)
5010+{
5011+ struct ftdev_minor* ftdm = filp->private_data;
5012+ ssize_t err = -EINVAL;
5013+ cmd_t cmd;
5014+ cmd_t id;
5015+
5016+ if (len % sizeof(cmd) || len < 2 * sizeof(cmd))
5017+ goto out;
5018+
5019+ if (copy_from_user(&cmd, from, sizeof(cmd))) {
5020+ err = -EFAULT;
5021+ goto out;
5022+ }
5023+ len -= sizeof(cmd);
5024+ from += sizeof(cmd);
5025+
5026+ if (cmd != FTDEV_ENABLE_CMD && cmd != FTDEV_DISABLE_CMD)
5027+ goto out;
5028+
5029+ if (mutex_lock_interruptible(&ftdm->lock)) {
5030+ err = -ERESTARTSYS;
5031+ goto out;
5032+ }
5033+
5034+ err = sizeof(cmd);
5035+ while (len) {
5036+ if (copy_from_user(&id, from, sizeof(cmd))) {
5037+ err = -EFAULT;
5038+ goto out_unlock;
5039+ }
5040+ /* FIXME: check id against list of acceptable events */
5041+ len -= sizeof(cmd);
5042+ from += sizeof(cmd);
5043+ if (cmd == FTDEV_DISABLE_CMD)
5044+ deactivate(&ftdm->events, id);
5045+ else if (activate(&ftdm->events, id) != 0) {
5046+ err = -ENOMEM;
5047+ goto out_unlock;
5048+ }
5049+ err += sizeof(cmd);
5050+ }
5051+
5052+out_unlock:
5053+ mutex_unlock(&ftdm->lock);
5054+out:
5055+ return err;
5056+}
5057+
5058+struct file_operations ftdev_fops = {
5059+ .owner = THIS_MODULE,
5060+ .open = ftdev_open,
5061+ .release = ftdev_release,
5062+ .write = ftdev_write,
5063+ .read = ftdev_read,
5064+};
5065+
5066+
5067+void ftdev_init(struct ftdev* ftdev, struct module* owner)
5068+{
5069+ int i;
5070+ cdev_init(&ftdev->cdev, &ftdev_fops);
5071+ ftdev->cdev.owner = owner;
5072+ ftdev->cdev.ops = &ftdev_fops;
5073+ ftdev->minor_cnt = 0;
5074+ for (i = 0; i < MAX_FTDEV_MINORS; i++) {
5075+ mutex_init(&ftdev->minor[i].lock);
5076+ ftdev->minor[i].readers = 0;
5077+ ftdev->minor[i].buf = NULL;
5078+ ftdev->minor[i].events = NULL;
5079+ }
5080+ ftdev->alloc = NULL;
5081+ ftdev->free = NULL;
5082+ ftdev->can_open = NULL;
5083+}
5084+
5085+int register_ftdev(struct ftdev* ftdev, const char* name, int major)
5086+{
5087+ dev_t trace_dev;
5088+ int error = 0;
5089+
5090+ if(major) {
5091+ trace_dev = MKDEV(major, 0);
5092+ error = register_chrdev_region(trace_dev, ftdev->minor_cnt,
5093+ name);
5094+ } else {
5095+ error = alloc_chrdev_region(&trace_dev, 0, ftdev->minor_cnt,
5096+ name);
5097+ major = MAJOR(trace_dev);
5098+ }
5099+ if (error)
5100+ {
5101+ printk(KERN_WARNING "ftdev(%s): "
5102+ "Could not register major/minor number %d/%u\n",
5103+ name, major, ftdev->minor_cnt);
5104+ return error;
5105+ }
5106+ error = cdev_add(&ftdev->cdev, trace_dev, ftdev->minor_cnt);
5107+ if (error) {
5108+ printk(KERN_WARNING "ftdev(%s): "
5109+ "Could not add cdev for major/minor = %d/%u.\n",
5110+ name, major, ftdev->minor_cnt);
5111+ return error;
5112+ }
5113+ return error;
5114+}
5115diff --git a/litmus/jobs.c b/litmus/jobs.c
5116new file mode 100644
5117index 0000000..36e3146
5118--- /dev/null
5119+++ b/litmus/jobs.c
5120@@ -0,0 +1,43 @@
5121+/* litmus/jobs.c - common job control code
5122+ */
5123+
5124+#include <linux/sched.h>
5125+
5126+#include <litmus/litmus.h>
5127+#include <litmus/jobs.h>
5128+
5129+void prepare_for_next_period(struct task_struct *t)
5130+{
5131+ BUG_ON(!t);
5132+ /* prepare next release */
5133+ t->rt_param.job_params.release = t->rt_param.job_params.deadline;
5134+ t->rt_param.job_params.deadline += get_rt_period(t);
5135+ t->rt_param.job_params.exec_time = 0;
5136+ /* update job sequence number */
5137+ t->rt_param.job_params.job_no++;
5138+
5139+ /* don't confuse Linux */
5140+ t->rt.time_slice = 1;
5141+}
5142+
5143+void release_at(struct task_struct *t, lt_t start)
5144+{
5145+ t->rt_param.job_params.deadline = start;
5146+ prepare_for_next_period(t);
5147+ set_rt_flags(t, RT_F_RUNNING);
5148+}
5149+
5150+
5151+/*
5152+ * Deactivate current task until the beginning of the next period.
5153+ */
5154+long complete_job(void)
5155+{
5156+ /* Mark that we do not excute anymore */
5157+ set_rt_flags(current, RT_F_SLEEP);
5158+ /* call schedule, this will return when a new job arrives
5159+ * it also takes care of preparing for the next release
5160+ */
5161+ schedule();
5162+ return 0;
5163+}
5164diff --git a/litmus/litmus.c b/litmus/litmus.c
5165new file mode 100644
5166index 0000000..b04a42b
5167--- /dev/null
5168+++ b/litmus/litmus.c
5169@@ -0,0 +1,799 @@
5170+/*
5171+ * litmus.c -- Implementation of the LITMUS syscalls,
5172+ * the LITMUS intialization code,
5173+ * and the procfs interface..
5174+ */
5175+#include <asm/uaccess.h>
5176+#include <linux/uaccess.h>
5177+#include <linux/sysrq.h>
5178+
5179+#include <linux/module.h>
5180+#include <linux/proc_fs.h>
5181+#include <linux/slab.h>
5182+
5183+#include <litmus/litmus.h>
5184+#include <linux/sched.h>
5185+#include <litmus/sched_plugin.h>
5186+
5187+#include <litmus/bheap.h>
5188+
5189+#include <litmus/trace.h>
5190+
5191+#include <litmus/rt_domain.h>
5192+
5193+/* Number of RT tasks that exist in the system */
5194+atomic_t rt_task_count = ATOMIC_INIT(0);
5195+static DEFINE_RAW_SPINLOCK(task_transition_lock);
5196+/* synchronize plugin switching */
5197+atomic_t cannot_use_plugin = ATOMIC_INIT(0);
5198+
5199+/* Give log messages sequential IDs. */
5200+atomic_t __log_seq_no = ATOMIC_INIT(0);
5201+
5202+#ifdef CONFIG_RELEASE_MASTER
5203+/* current master CPU for handling timer IRQs */
5204+atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU);
5205+#endif
5206+
5207+static struct kmem_cache * bheap_node_cache;
5208+extern struct kmem_cache * release_heap_cache;
5209+
5210+struct bheap_node* bheap_node_alloc(int gfp_flags)
5211+{
5212+ return kmem_cache_alloc(bheap_node_cache, gfp_flags);
5213+}
5214+
5215+void bheap_node_free(struct bheap_node* hn)
5216+{
5217+ kmem_cache_free(bheap_node_cache, hn);
5218+}
5219+
5220+struct release_heap* release_heap_alloc(int gfp_flags);
5221+void release_heap_free(struct release_heap* rh);
5222+
5223+/*
5224+ * sys_set_task_rt_param
5225+ * @pid: Pid of the task which scheduling parameters must be changed
5226+ * @param: New real-time extension parameters such as the execution cost and
5227+ * period
5228+ * Syscall for manipulating with task rt extension params
5229+ * Returns EFAULT if param is NULL.
5230+ * ESRCH if pid is not corrsponding
5231+ * to a valid task.
5232+ * EINVAL if either period or execution cost is <=0
5233+ * EPERM if pid is a real-time task
5234+ * 0 if success
5235+ *
5236+ * Only non-real-time tasks may be configured with this system call
5237+ * to avoid races with the scheduler. In practice, this means that a
5238+ * task's parameters must be set _before_ calling sys_prepare_rt_task()
5239+ *
5240+ * find_task_by_vpid() assumes that we are in the same namespace of the
5241+ * target.
5242+ */
5243+asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
5244+{
5245+ struct rt_task tp;
5246+ struct task_struct *target;
5247+ int retval = -EINVAL;
5248+
5249+ printk("Setting up rt task parameters for process %d.\n", pid);
5250+
5251+ if (pid < 0 || param == 0) {
5252+ goto out;
5253+ }
5254+ if (copy_from_user(&tp, param, sizeof(tp))) {
5255+ retval = -EFAULT;
5256+ goto out;
5257+ }
5258+
5259+ /* Task search and manipulation must be protected */
5260+ read_lock_irq(&tasklist_lock);
5261+ if (!(target = find_task_by_vpid(pid))) {
5262+ retval = -ESRCH;
5263+ goto out_unlock;
5264+ }
5265+
5266+ if (is_realtime(target)) {
5267+ /* The task is already a real-time task.
5268+ * We cannot not allow parameter changes at this point.
5269+ */
5270+ retval = -EBUSY;
5271+ goto out_unlock;
5272+ }
5273+
5274+ if (tp.exec_cost <= 0)
5275+ goto out_unlock;
5276+ if (tp.period <= 0)
5277+ goto out_unlock;
5278+ if (!cpu_online(tp.cpu))
5279+ goto out_unlock;
5280+ if (tp.period < tp.exec_cost)
5281+ {
5282+ printk(KERN_INFO "litmus: real-time task %d rejected "
5283+ "because wcet > period\n", pid);
5284+ goto out_unlock;
5285+ }
5286+ if (tp.budget_policy != NO_ENFORCEMENT &&
5287+ tp.budget_policy != QUANTUM_ENFORCEMENT &&
5288+ tp.budget_policy != PRECISE_ENFORCEMENT)
5289+ {
5290+ printk(KERN_INFO "litmus: real-time task %d rejected "
5291+ "because unsupported budget enforcement policy "
5292+ "specified (%d)\n",
5293+ pid, tp.budget_policy);
5294+ goto out_unlock;
5295+ }
5296+
5297+ target->rt_param.task_params = tp;
5298+
5299+ retval = 0;
5300+ out_unlock:
5301+ read_unlock_irq(&tasklist_lock);
5302+ out:
5303+ return retval;
5304+}
5305+
5306+/*
5307+ * Getter of task's RT params
5308+ * returns EINVAL if param or pid is NULL
5309+ * returns ESRCH if pid does not correspond to a valid task
5310+ * returns EFAULT if copying of parameters has failed.
5311+ *
5312+ * find_task_by_vpid() assumes that we are in the same namespace of the
5313+ * target.
5314+ */
5315+asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param)
5316+{
5317+ int retval = -EINVAL;
5318+ struct task_struct *source;
5319+ struct rt_task lp;
5320+ if (param == 0 || pid < 0)
5321+ goto out;
5322+ read_lock(&tasklist_lock);
5323+ if (!(source = find_task_by_vpid(pid))) {
5324+ retval = -ESRCH;
5325+ goto out_unlock;
5326+ }
5327+ lp = source->rt_param.task_params;
5328+ read_unlock(&tasklist_lock);
5329+ /* Do copying outside the lock */
5330+ retval =
5331+ copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
5332+ return retval;
5333+ out_unlock:
5334+ read_unlock(&tasklist_lock);
5335+ out:
5336+ return retval;
5337+
5338+}
5339+
5340+/*
5341+ * This is the crucial function for periodic task implementation,
5342+ * It checks if a task is periodic, checks if such kind of sleep
5343+ * is permitted and calls plugin-specific sleep, which puts the
5344+ * task into a wait array.
5345+ * returns 0 on successful wakeup
5346+ * returns EPERM if current conditions do not permit such sleep
5347+ * returns EINVAL if current task is not able to go to sleep
5348+ */
5349+asmlinkage long sys_complete_job(void)
5350+{
5351+ int retval = -EPERM;
5352+ if (!is_realtime(current)) {
5353+ retval = -EINVAL;
5354+ goto out;
5355+ }
5356+ /* Task with negative or zero period cannot sleep */
5357+ if (get_rt_period(current) <= 0) {
5358+ retval = -EINVAL;
5359+ goto out;
5360+ }
5361+ /* The plugin has to put the task into an
5362+ * appropriate queue and call schedule
5363+ */
5364+ retval = litmus->complete_job();
5365+ out:
5366+ return retval;
5367+}
5368+
5369+/* This is an "improved" version of sys_complete_job that
5370+ * addresses the problem of unintentionally missing a job after
5371+ * an overrun.
5372+ *
5373+ * returns 0 on successful wakeup
5374+ * returns EPERM if current conditions do not permit such sleep
5375+ * returns EINVAL if current task is not able to go to sleep
5376+ */
5377+asmlinkage long sys_wait_for_job_release(unsigned int job)
5378+{
5379+ int retval = -EPERM;
5380+ if (!is_realtime(current)) {
5381+ retval = -EINVAL;
5382+ goto out;
5383+ }
5384+
5385+ /* Task with negative or zero period cannot sleep */
5386+ if (get_rt_period(current) <= 0) {
5387+ retval = -EINVAL;
5388+ goto out;
5389+ }
5390+
5391+ retval = 0;
5392+
5393+ /* first wait until we have "reached" the desired job
5394+ *
5395+ * This implementation has at least two problems:
5396+ *
5397+ * 1) It doesn't gracefully handle the wrap around of
5398+ * job_no. Since LITMUS is a prototype, this is not much
5399+ * of a problem right now.
5400+ *
5401+ * 2) It is theoretically racy if a job release occurs
5402+ * between checking job_no and calling sleep_next_period().
5403+ * A proper solution would requiring adding another callback
5404+ * in the plugin structure and testing the condition with
5405+ * interrupts disabled.
5406+ *
5407+ * FIXME: At least problem 2 should be taken care of eventually.
5408+ */
5409+ while (!retval && job > current->rt_param.job_params.job_no)
5410+ /* If the last job overran then job <= job_no and we
5411+ * don't send the task to sleep.
5412+ */
5413+ retval = litmus->complete_job();
5414+ out:
5415+ return retval;
5416+}
5417+
5418+/* This is a helper syscall to query the current job sequence number.
5419+ *
5420+ * returns 0 on successful query
5421+ * returns EPERM if task is not a real-time task.
5422+ * returns EFAULT if &job is not a valid pointer.
5423+ */
5424+asmlinkage long sys_query_job_no(unsigned int __user *job)
5425+{
5426+ int retval = -EPERM;
5427+ if (is_realtime(current))
5428+ retval = put_user(current->rt_param.job_params.job_no, job);
5429+
5430+ return retval;
5431+}
5432+
5433+/* sys_null_call() is only used for determining raw system call
5434+ * overheads (kernel entry, kernel exit). It has no useful side effects.
5435+ * If ts is non-NULL, then the current Feather-Trace time is recorded.
5436+ */
5437+asmlinkage long sys_null_call(cycles_t __user *ts)
5438+{
5439+ long ret = 0;
5440+ cycles_t now;
5441+
5442+ if (ts) {
5443+ now = get_cycles();
5444+ ret = put_user(now, ts);
5445+ }
5446+
5447+ return ret;
5448+}
5449+
5450+/* p is a real-time task. Re-init its state as a best-effort task. */
5451+static void reinit_litmus_state(struct task_struct* p, int restore)
5452+{
5453+ struct rt_task user_config = {};
5454+ void* ctrl_page = NULL;
5455+
5456+ if (restore) {
5457+ /* Safe user-space provided configuration data.
5458+ * and allocated page. */
5459+ user_config = p->rt_param.task_params;
5460+ ctrl_page = p->rt_param.ctrl_page;
5461+ }
5462+
5463+ /* We probably should not be inheriting any task's priority
5464+ * at this point in time.
5465+ */
5466+ WARN_ON(p->rt_param.inh_task);
5467+
5468+ /* We need to restore the priority of the task. */
5469+// __setscheduler(p, p->rt_param.old_policy, p->rt_param.old_prio); XXX why is this commented?
5470+
5471+ /* Cleanup everything else. */
5472+ memset(&p->rt_param, 0, sizeof(p->rt_param));
5473+
5474+ /* Restore preserved fields. */
5475+ if (restore) {
5476+ p->rt_param.task_params = user_config;
5477+ p->rt_param.ctrl_page = ctrl_page;
5478+ }
5479+}
5480+
5481+long litmus_admit_task(struct task_struct* tsk)
5482+{
5483+ long retval = 0;
5484+ unsigned long flags;
5485+
5486+ BUG_ON(is_realtime(tsk));
5487+
5488+ if (get_rt_period(tsk) == 0 ||
5489+ get_exec_cost(tsk) > get_rt_period(tsk)) {
5490+ TRACE_TASK(tsk, "litmus admit: invalid task parameters "
5491+ "(%lu, %lu)\n",
5492+ get_exec_cost(tsk), get_rt_period(tsk));
5493+ retval = -EINVAL;
5494+ goto out;
5495+ }
5496+
5497+ if (!cpu_online(get_partition(tsk))) {
5498+ TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n",
5499+ get_partition(tsk));
5500+ retval = -EINVAL;
5501+ goto out;
5502+ }
5503+
5504+ INIT_LIST_HEAD(&tsk_rt(tsk)->list);
5505+
5506+ /* avoid scheduler plugin changing underneath us */
5507+ raw_spin_lock_irqsave(&task_transition_lock, flags);
5508+
5509+ /* allocate heap node for this task */
5510+ tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC);
5511+ tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC);
5512+
5513+ if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) {
5514+ printk(KERN_WARNING "litmus: no more heap node memory!?\n");
5515+
5516+ bheap_node_free(tsk_rt(tsk)->heap_node);
5517+ release_heap_free(tsk_rt(tsk)->rel_heap);
5518+
5519+ retval = -ENOMEM;
5520+ goto out_unlock;
5521+ } else {
5522+ bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
5523+ }
5524+
5525+ retval = litmus->admit_task(tsk);
5526+
5527+ if (!retval) {
5528+ sched_trace_task_name(tsk);
5529+ sched_trace_task_param(tsk);
5530+ atomic_inc(&rt_task_count);
5531+ }
5532+
5533+out_unlock:
5534+ raw_spin_unlock_irqrestore(&task_transition_lock, flags);
5535+out:
5536+ return retval;
5537+}
5538+
5539+void litmus_exit_task(struct task_struct* tsk)
5540+{
5541+ if (is_realtime(tsk)) {
5542+ sched_trace_task_completion(tsk, 1);
5543+
5544+ litmus->task_exit(tsk);
5545+
5546+ BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node));
5547+ bheap_node_free(tsk_rt(tsk)->heap_node);
5548+ release_heap_free(tsk_rt(tsk)->rel_heap);
5549+
5550+ atomic_dec(&rt_task_count);
5551+ reinit_litmus_state(tsk, 1);
5552+ }
5553+}
5554+
5555+/* IPI callback to synchronize plugin switching */
5556+static void synch_on_plugin_switch(void* info)
5557+{
5558+ while (atomic_read(&cannot_use_plugin))
5559+ cpu_relax();
5560+}
5561+
5562+/* Switching a plugin in use is tricky.
5563+ * We must watch out that no real-time tasks exists
5564+ * (and that none is created in parallel) and that the plugin is not
5565+ * currently in use on any processor (in theory).
5566+ */
5567+int switch_sched_plugin(struct sched_plugin* plugin)
5568+{
5569+ unsigned long flags;
5570+ int ret = 0;
5571+
5572+ BUG_ON(!plugin);
5573+
5574+ /* forbid other cpus to use the plugin */
5575+ atomic_set(&cannot_use_plugin, 1);
5576+ /* send IPI to force other CPUs to synch with us */
5577+ smp_call_function(synch_on_plugin_switch, NULL, 0);
5578+
5579+ /* stop task transitions */
5580+ raw_spin_lock_irqsave(&task_transition_lock, flags);
5581+
5582+ /* don't switch if there are active real-time tasks */
5583+ if (atomic_read(&rt_task_count) == 0) {
5584+ ret = litmus->deactivate_plugin();
5585+ if (0 != ret)
5586+ goto out;
5587+ ret = plugin->activate_plugin();
5588+ if (0 != ret) {
5589+ printk(KERN_INFO "Can't activate %s (%d).\n",
5590+ plugin->plugin_name, ret);
5591+ plugin = &linux_sched_plugin;
5592+ }
5593+ printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
5594+ litmus = plugin;
5595+ } else
5596+ ret = -EBUSY;
5597+out:
5598+ raw_spin_unlock_irqrestore(&task_transition_lock, flags);
5599+ atomic_set(&cannot_use_plugin, 0);
5600+ return ret;
5601+}
5602+
5603+/* Called upon fork.
5604+ * p is the newly forked task.
5605+ */
5606+void litmus_fork(struct task_struct* p)
5607+{
5608+ if (is_realtime(p))
5609+ /* clean out any litmus related state, don't preserve anything */
5610+ reinit_litmus_state(p, 0);
5611+ else
5612+ /* non-rt tasks might have ctrl_page set */
5613+ tsk_rt(p)->ctrl_page = NULL;
5614+
5615+ /* od tables are never inherited across a fork */
5616+ p->od_table = NULL;
5617+}
5618+
5619+/* Called upon execve().
5620+ * current is doing the exec.
5621+ * Don't let address space specific stuff leak.
5622+ */
5623+void litmus_exec(void)
5624+{
5625+ struct task_struct* p = current;
5626+
5627+ if (is_realtime(p)) {
5628+ WARN_ON(p->rt_param.inh_task);
5629+ if (tsk_rt(p)->ctrl_page) {
5630+ free_page((unsigned long) tsk_rt(p)->ctrl_page);
5631+ tsk_rt(p)->ctrl_page = NULL;
5632+ }
5633+ }
5634+}
5635+
5636+void exit_litmus(struct task_struct *dead_tsk)
5637+{
5638+ /* We also allow non-RT tasks to
5639+ * allocate control pages to allow
5640+ * measurements with non-RT tasks.
5641+ * So check if we need to free the page
5642+ * in any case.
5643+ */
5644+ if (tsk_rt(dead_tsk)->ctrl_page) {
5645+ TRACE_TASK(dead_tsk,
5646+ "freeing ctrl_page %p\n",
5647+ tsk_rt(dead_tsk)->ctrl_page);
5648+ free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page);
5649+ }
5650+
5651+ /* main cleanup only for RT tasks */
5652+ if (is_realtime(dead_tsk))
5653+ litmus_exit_task(dead_tsk);
5654+}
5655+
5656+
5657+#ifdef CONFIG_MAGIC_SYSRQ
5658+int sys_kill(int pid, int sig);
5659+
5660+static void sysrq_handle_kill_rt_tasks(int key, struct tty_struct *tty)
5661+{
5662+ struct task_struct *t;
5663+ read_lock(&tasklist_lock);
5664+ for_each_process(t) {
5665+ if (is_realtime(t)) {
5666+ sys_kill(t->pid, SIGKILL);
5667+ }
5668+ }
5669+ read_unlock(&tasklist_lock);
5670+}
5671+
5672+static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
5673+ .handler = sysrq_handle_kill_rt_tasks,
5674+ .help_msg = "quit-rt-tasks(X)",
5675+ .action_msg = "sent SIGKILL to all LITMUS^RT real-time tasks",
5676+};
5677+#endif
5678+
5679+/* in litmus/sync.c */
5680+int count_tasks_waiting_for_release(void);
5681+
5682+static int proc_read_stats(char *page, char **start,
5683+ off_t off, int count,
5684+ int *eof, void *data)
5685+{
5686+ int len;
5687+
5688+ len = snprintf(page, PAGE_SIZE,
5689+ "real-time tasks = %d\n"
5690+ "ready for release = %d\n",
5691+ atomic_read(&rt_task_count),
5692+ count_tasks_waiting_for_release());
5693+ return len;
5694+}
5695+
5696+static int proc_read_plugins(char *page, char **start,
5697+ off_t off, int count,
5698+ int *eof, void *data)
5699+{
5700+ int len;
5701+
5702+ len = print_sched_plugins(page, PAGE_SIZE);
5703+ return len;
5704+}
5705+
5706+static int proc_read_curr(char *page, char **start,
5707+ off_t off, int count,
5708+ int *eof, void *data)
5709+{
5710+ int len;
5711+
5712+ len = snprintf(page, PAGE_SIZE, "%s\n", litmus->plugin_name);
5713+ return len;
5714+}
5715+
5716+static int proc_write_curr(struct file *file,
5717+ const char *buffer,
5718+ unsigned long count,
5719+ void *data)
5720+{
5721+ int len, ret;
5722+ char name[65];
5723+ struct sched_plugin* found;
5724+
5725+ if(count > 64)
5726+ len = 64;
5727+ else
5728+ len = count;
5729+
5730+ if(copy_from_user(name, buffer, len))
5731+ return -EFAULT;
5732+
5733+ name[len] = '\0';
5734+ /* chomp name */
5735+ if (len > 1 && name[len - 1] == '\n')
5736+ name[len - 1] = '\0';
5737+
5738+ found = find_sched_plugin(name);
5739+
5740+ if (found) {
5741+ ret = switch_sched_plugin(found);
5742+ if (ret != 0)
5743+ printk(KERN_INFO "Could not switch plugin: %d\n", ret);
5744+ } else
5745+ printk(KERN_INFO "Plugin '%s' is unknown.\n", name);
5746+
5747+ return len;
5748+}
5749+
5750+static int proc_read_cluster_size(char *page, char **start,
5751+ off_t off, int count,
5752+ int *eof, void *data)
5753+{
5754+ int len;
5755+ if (cluster_cache_index == 2)
5756+ len = snprintf(page, PAGE_SIZE, "L2\n");
5757+ else if (cluster_cache_index == 3)
5758+ len = snprintf(page, PAGE_SIZE, "L3\n");
5759+ else if (cluster_cache_index == 1)
5760+ len = snprintf(page, PAGE_SIZE, "L1\n");
5761+ else
5762+ len = snprintf(page, PAGE_SIZE, "ALL\n");
5763+
5764+ return len;
5765+}
5766+
5767+static int proc_write_cluster_size(struct file *file,
5768+ const char *buffer,
5769+ unsigned long count,
5770+ void *data)
5771+{
5772+ int len;
5773+ /* L2, L3 */
5774+ char cache_name[33];
5775+
5776+ if(count > 32)
5777+ len = 32;
5778+ else
5779+ len = count;
5780+
5781+ if(copy_from_user(cache_name, buffer, len))
5782+ return -EFAULT;
5783+
5784+ cache_name[len] = '\0';
5785+ /* chomp name */
5786+ if (len > 1 && cache_name[len - 1] == '\n')
5787+ cache_name[len - 1] = '\0';
5788+
5789+ /* do a quick and dirty comparison to find the cluster size */
5790+ if (!strcmp(cache_name, "L2"))
5791+ cluster_cache_index = 2;
5792+ else if (!strcmp(cache_name, "L3"))
5793+ cluster_cache_index = 3;
5794+ else if (!strcmp(cache_name, "L1"))
5795+ cluster_cache_index = 1;
5796+ else if (!strcmp(cache_name, "ALL"))
5797+ cluster_cache_index = num_online_cpus();
5798+ else
5799+ printk(KERN_INFO "Cluster '%s' is unknown.\n", cache_name);
5800+
5801+ return len;
5802+}
5803+
5804+#ifdef CONFIG_RELEASE_MASTER
5805+static int proc_read_release_master(char *page, char **start,
5806+ off_t off, int count,
5807+ int *eof, void *data)
5808+{
5809+ int len, master;
5810+ master = atomic_read(&release_master_cpu);
5811+ if (master == NO_CPU)
5812+ len = snprintf(page, PAGE_SIZE, "NO_CPU\n");
5813+ else
5814+ len = snprintf(page, PAGE_SIZE, "%d\n", master);
5815+ return len;
5816+}
5817+
5818+static int proc_write_release_master(struct file *file,
5819+ const char *buffer,
5820+ unsigned long count,
5821+ void *data)
5822+{
5823+ int cpu, err, online = 0;
5824+ char msg[64];
5825+
5826+ if (count > 63)
5827+ return -EINVAL;
5828+
5829+ if (copy_from_user(msg, buffer, count))
5830+ return -EFAULT;
5831+
5832+ /* terminate */
5833+ msg[count] = '\0';
5834+ /* chomp */
5835+ if (count > 1 && msg[count - 1] == '\n')
5836+ msg[count - 1] = '\0';
5837+
5838+ if (strcmp(msg, "NO_CPU") == 0) {
5839+ atomic_set(&release_master_cpu, NO_CPU);
5840+ return count;
5841+ } else {
5842+ err = sscanf(msg, "%d", &cpu);
5843+ if (err == 1 && cpu >= 0 && (online = cpu_online(cpu))) {
5844+ atomic_set(&release_master_cpu, cpu);
5845+ return count;
5846+ } else {
5847+ TRACE("invalid release master: '%s' "
5848+ "(err:%d cpu:%d online:%d)\n",
5849+ msg, err, cpu, online);
5850+ return -EINVAL;
5851+ }
5852+ }
5853+}
5854+#endif
5855+
5856+static struct proc_dir_entry *litmus_dir = NULL,
5857+ *curr_file = NULL,
5858+ *stat_file = NULL,
5859+ *plugs_file = NULL,
5860+#ifdef CONFIG_RELEASE_MASTER
5861+ *release_master_file = NULL,
5862+#endif
5863+ *clus_cache_idx_file = NULL;
5864+
5865+static int __init init_litmus_proc(void)
5866+{
5867+ litmus_dir = proc_mkdir("litmus", NULL);
5868+ if (!litmus_dir) {
5869+ printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n");
5870+ return -ENOMEM;
5871+ }
5872+
5873+ curr_file = create_proc_entry("active_plugin",
5874+ 0644, litmus_dir);
5875+ if (!curr_file) {
5876+ printk(KERN_ERR "Could not allocate active_plugin "
5877+ "procfs entry.\n");
5878+ return -ENOMEM;
5879+ }
5880+ curr_file->read_proc = proc_read_curr;
5881+ curr_file->write_proc = proc_write_curr;
5882+
5883+#ifdef CONFIG_RELEASE_MASTER
5884+ release_master_file = create_proc_entry("release_master",
5885+ 0644, litmus_dir);
5886+ if (!release_master_file) {
5887+ printk(KERN_ERR "Could not allocate release_master "
5888+ "procfs entry.\n");
5889+ return -ENOMEM;
5890+ }
5891+ release_master_file->read_proc = proc_read_release_master;
5892+ release_master_file->write_proc = proc_write_release_master;
5893+#endif
5894+
5895+ clus_cache_idx_file = create_proc_entry("cluster_cache",
5896+ 0644, litmus_dir);
5897+ if (!clus_cache_idx_file) {
5898+ printk(KERN_ERR "Could not allocate cluster_cache "
5899+ "procfs entry.\n");
5900+ return -ENOMEM;
5901+ }
5902+ clus_cache_idx_file->read_proc = proc_read_cluster_size;
5903+ clus_cache_idx_file->write_proc = proc_write_cluster_size;
5904+
5905+ stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
5906+ proc_read_stats, NULL);
5907+
5908+ plugs_file = create_proc_read_entry("plugins", 0444, litmus_dir,
5909+ proc_read_plugins, NULL);
5910+
5911+ return 0;
5912+}
5913+
5914+static void exit_litmus_proc(void)
5915+{
5916+ if (plugs_file)
5917+ remove_proc_entry("plugins", litmus_dir);
5918+ if (stat_file)
5919+ remove_proc_entry("stats", litmus_dir);
5920+ if (curr_file)
5921+ remove_proc_entry("active_plugin", litmus_dir);
5922+ if (clus_cache_idx_file)
5923+ remove_proc_entry("cluster_cache", litmus_dir);
5924+#ifdef CONFIG_RELEASE_MASTER
5925+ if (release_master_file)
5926+ remove_proc_entry("release_master", litmus_dir);
5927+#endif
5928+ if (litmus_dir)
5929+ remove_proc_entry("litmus", NULL);
5930+}
5931+
5932+extern struct sched_plugin linux_sched_plugin;
5933+
5934+static int __init _init_litmus(void)
5935+{
5936+ /* Common initializers,
5937+ * mode change lock is used to enforce single mode change
5938+ * operation.
5939+ */
5940+ printk("Starting LITMUS^RT kernel\n");
5941+
5942+ register_sched_plugin(&linux_sched_plugin);
5943+
5944+ bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC);
5945+ release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC);
5946+
5947+#ifdef CONFIG_MAGIC_SYSRQ
5948+ /* offer some debugging help */
5949+ if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op))
5950+ printk("Registered kill rt tasks magic sysrq.\n");
5951+ else
5952+ printk("Could not register kill rt tasks magic sysrq.\n");
5953+#endif
5954+
5955+ init_litmus_proc();
5956+
5957+ return 0;
5958+}
5959+
5960+static void _exit_litmus(void)
5961+{
5962+ exit_litmus_proc();
5963+ kmem_cache_destroy(bheap_node_cache);
5964+ kmem_cache_destroy(release_heap_cache);
5965+}
5966+
5967+module_init(_init_litmus);
5968+module_exit(_exit_litmus);
5969diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
5970new file mode 100644
5971index 0000000..81a5ac1
5972--- /dev/null
5973+++ b/litmus/rt_domain.c
5974@@ -0,0 +1,355 @@
5975+/*
5976+ * litmus/rt_domain.c
5977+ *
5978+ * LITMUS real-time infrastructure. This file contains the
5979+ * functions that manipulate RT domains. RT domains are an abstraction
5980+ * of a ready queue and a release queue.
5981+ */
5982+
5983+#include <linux/percpu.h>
5984+#include <linux/sched.h>
5985+#include <linux/list.h>
5986+#include <linux/slab.h>
5987+
5988+#include <litmus/litmus.h>
5989+#include <litmus/sched_plugin.h>
5990+#include <litmus/sched_trace.h>
5991+
5992+#include <litmus/rt_domain.h>
5993+
5994+#include <litmus/trace.h>
5995+
5996+#include <litmus/bheap.h>
5997+
5998+/* Uncomment when debugging timer races... */
5999+#if 0
6000+#define VTRACE_TASK TRACE_TASK
6001+#define VTRACE TRACE
6002+#else
6003+#define VTRACE_TASK(t, fmt, args...) /* shut up */
6004+#define VTRACE(fmt, args...) /* be quiet already */
6005+#endif
6006+
6007+static int dummy_resched(rt_domain_t *rt)
6008+{
6009+ return 0;
6010+}
6011+
6012+static int dummy_order(struct bheap_node* a, struct bheap_node* b)
6013+{
6014+ return 0;
6015+}
6016+
6017+/* default implementation: use default lock */
6018+static void default_release_jobs(rt_domain_t* rt, struct bheap* tasks)
6019+{
6020+ merge_ready(rt, tasks);
6021+}
6022+
6023+static unsigned int time2slot(lt_t time)
6024+{
6025+ return (unsigned int) time2quanta(time, FLOOR) % RELEASE_QUEUE_SLOTS;
6026+}
6027+
6028+static enum hrtimer_restart on_release_timer(struct hrtimer *timer)
6029+{
6030+ unsigned long flags;
6031+ struct release_heap* rh;
6032+
6033+ VTRACE("on_release_timer(0x%p) starts.\n", timer);
6034+
6035+ TS_RELEASE_START;
6036+
6037+ rh = container_of(timer, struct release_heap, timer);
6038+
6039+ raw_spin_lock_irqsave(&rh->dom->release_lock, flags);
6040+ VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock);
6041+ /* remove from release queue */
6042+ list_del(&rh->list);
6043+ raw_spin_unlock_irqrestore(&rh->dom->release_lock, flags);
6044+ VTRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock);
6045+
6046+ /* call release callback */
6047+ rh->dom->release_jobs(rh->dom, &rh->heap);
6048+ /* WARNING: rh can be referenced from other CPUs from now on. */
6049+
6050+ TS_RELEASE_END;
6051+
6052+ VTRACE("on_release_timer(0x%p) ends.\n", timer);
6053+
6054+ return HRTIMER_NORESTART;
6055+}
6056+
6057+/* allocated in litmus.c */
6058+struct kmem_cache * release_heap_cache;
6059+
6060+struct release_heap* release_heap_alloc(int gfp_flags)
6061+{
6062+ struct release_heap* rh;
6063+ rh= kmem_cache_alloc(release_heap_cache, gfp_flags);
6064+ if (rh) {
6065+ /* initialize timer */
6066+ hrtimer_init(&rh->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
6067+ rh->timer.function = on_release_timer;
6068+ }
6069+ return rh;
6070+}
6071+
6072+void release_heap_free(struct release_heap* rh)
6073+{
6074+ /* make sure timer is no longer in use */
6075+ hrtimer_cancel(&rh->timer);
6076+ kmem_cache_free(release_heap_cache, rh);
6077+}
6078+
6079+/* Caller must hold release lock.
6080+ * Will return heap for given time. If no such heap exists prior to
6081+ * the invocation it will be created.
6082+ */
6083+static struct release_heap* get_release_heap(rt_domain_t *rt,
6084+ struct task_struct* t,
6085+ int use_task_heap)
6086+{
6087+ struct list_head* pos;
6088+ struct release_heap* heap = NULL;
6089+ struct release_heap* rh;
6090+ lt_t release_time = get_release(t);
6091+ unsigned int slot = time2slot(release_time);
6092+
6093+ /* initialize pos for the case that the list is empty */
6094+ pos = rt->release_queue.slot[slot].next;
6095+ list_for_each(pos, &rt->release_queue.slot[slot]) {
6096+ rh = list_entry(pos, struct release_heap, list);
6097+ if (release_time == rh->release_time) {
6098+ /* perfect match -- this happens on hyperperiod
6099+ * boundaries
6100+ */
6101+ heap = rh;
6102+ break;
6103+ } else if (lt_before(release_time, rh->release_time)) {
6104+ /* we need to insert a new node since rh is
6105+ * already in the future
6106+ */
6107+ break;
6108+ }
6109+ }
6110+ if (!heap && use_task_heap) {
6111+ /* use pre-allocated release heap */
6112+ rh = tsk_rt(t)->rel_heap;
6113+
6114+ rh->dom = rt;
6115+ rh->release_time = release_time;
6116+
6117+ /* add to release queue */
6118+ list_add(&rh->list, pos->prev);
6119+ heap = rh;
6120+ }
6121+ return heap;
6122+}
6123+
6124+static void reinit_release_heap(struct task_struct* t)
6125+{
6126+ struct release_heap* rh;
6127+
6128+ /* use pre-allocated release heap */
6129+ rh = tsk_rt(t)->rel_heap;
6130+
6131+ /* Make sure it is safe to use. The timer callback could still
6132+ * be executing on another CPU; hrtimer_cancel() will wait
6133+ * until the timer callback has completed. However, under no
6134+ * circumstances should the timer be active (= yet to be
6135+ * triggered).
6136+ *
6137+ * WARNING: If the CPU still holds the release_lock at this point,
6138+ * deadlock may occur!
6139+ */
6140+ BUG_ON(hrtimer_cancel(&rh->timer));
6141+
6142+ /* initialize */
6143+ bheap_init(&rh->heap);
6144+#ifdef CONFIG_RELEASE_MASTER
6145+ atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE);
6146+#endif
6147+}
6148+/* arm_release_timer() - start local release timer or trigger
6149+ * remote timer (pull timer)
6150+ *
6151+ * Called by add_release() with:
6152+ * - tobe_lock taken
6153+ * - IRQ disabled
6154+ */
6155+#ifdef CONFIG_RELEASE_MASTER
6156+#define arm_release_timer(t) arm_release_timer_on((t), NO_CPU)
6157+static void arm_release_timer_on(rt_domain_t *_rt , int target_cpu)
6158+#else
6159+static void arm_release_timer(rt_domain_t *_rt)
6160+#endif
6161+{
6162+ rt_domain_t *rt = _rt;
6163+ struct list_head list;
6164+ struct list_head *pos, *safe;
6165+ struct task_struct* t;
6166+ struct release_heap* rh;
6167+
6168+ VTRACE("arm_release_timer() at %llu\n", litmus_clock());
6169+ list_replace_init(&rt->tobe_released, &list);
6170+
6171+ list_for_each_safe(pos, safe, &list) {
6172+ /* pick task of work list */
6173+ t = list_entry(pos, struct task_struct, rt_param.list);
6174+ sched_trace_task_release(t);
6175+ list_del(pos);
6176+
6177+ /* put into release heap while holding release_lock */
6178+ raw_spin_lock(&rt->release_lock);
6179+ VTRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock);
6180+
6181+ rh = get_release_heap(rt, t, 0);
6182+ if (!rh) {
6183+ /* need to use our own, but drop lock first */
6184+ raw_spin_unlock(&rt->release_lock);
6185+ VTRACE_TASK(t, "Dropped release_lock 0x%p\n",
6186+ &rt->release_lock);
6187+
6188+ reinit_release_heap(t);
6189+ VTRACE_TASK(t, "release_heap ready\n");
6190+
6191+ raw_spin_lock(&rt->release_lock);
6192+ VTRACE_TASK(t, "Re-acquired release_lock 0x%p\n",
6193+ &rt->release_lock);
6194+
6195+ rh = get_release_heap(rt, t, 1);
6196+ }
6197+ bheap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node);
6198+ VTRACE_TASK(t, "arm_release_timer(): added to release heap\n");
6199+
6200+ raw_spin_unlock(&rt->release_lock);
6201+ VTRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock);
6202+
6203+ /* To avoid arming the timer multiple times, we only let the
6204+ * owner do the arming (which is the "first" task to reference
6205+ * this release_heap anyway).
6206+ */
6207+ if (rh == tsk_rt(t)->rel_heap) {
6208+ VTRACE_TASK(t, "arming timer 0x%p\n", &rh->timer);
6209+ /* we cannot arm the timer using hrtimer_start()
6210+ * as it may deadlock on rq->lock
6211+ *
6212+ * PINNED mode is ok on both local and remote CPU
6213+ */
6214+#ifdef CONFIG_RELEASE_MASTER
6215+ if (rt->release_master == NO_CPU &&
6216+ target_cpu == NO_CPU)
6217+#endif
6218+ __hrtimer_start_range_ns(&rh->timer,
6219+ ns_to_ktime(rh->release_time),
6220+ 0, HRTIMER_MODE_ABS_PINNED, 0);
6221+#ifdef CONFIG_RELEASE_MASTER
6222+ else
6223+ hrtimer_start_on(
6224+ /* target_cpu overrides release master */
6225+ (target_cpu != NO_CPU ?
6226+ target_cpu : rt->release_master),
6227+ &rh->info, &rh->timer,
6228+ ns_to_ktime(rh->release_time),
6229+ HRTIMER_MODE_ABS_PINNED);
6230+#endif
6231+ } else
6232+ VTRACE_TASK(t, "0x%p is not my timer\n", &rh->timer);
6233+ }
6234+}
6235+
6236+void rt_domain_init(rt_domain_t *rt,
6237+ bheap_prio_t order,
6238+ check_resched_needed_t check,
6239+ release_jobs_t release
6240+ )
6241+{
6242+ int i;
6243+
6244+ BUG_ON(!rt);
6245+ if (!check)
6246+ check = dummy_resched;
6247+ if (!release)
6248+ release = default_release_jobs;
6249+ if (!order)
6250+ order = dummy_order;
6251+
6252+#ifdef CONFIG_RELEASE_MASTER
6253+ rt->release_master = NO_CPU;
6254+#endif
6255+
6256+ bheap_init(&rt->ready_queue);
6257+ INIT_LIST_HEAD(&rt->tobe_released);
6258+ for (i = 0; i < RELEASE_QUEUE_SLOTS; i++)
6259+ INIT_LIST_HEAD(&rt->release_queue.slot[i]);
6260+
6261+ raw_spin_lock_init(&rt->ready_lock);
6262+ raw_spin_lock_init(&rt->release_lock);
6263+ raw_spin_lock_init(&rt->tobe_lock);
6264+
6265+ rt->check_resched = check;
6266+ rt->release_jobs = release;
6267+ rt->order = order;
6268+}
6269+
6270+/* add_ready - add a real-time task to the rt ready queue. It must be runnable.
6271+ * @new: the newly released task
6272+ */
6273+void __add_ready(rt_domain_t* rt, struct task_struct *new)
6274+{
6275+ TRACE("rt: adding %s/%d (%llu, %llu) rel=%llu to ready queue at %llu\n",
6276+ new->comm, new->pid, get_exec_cost(new), get_rt_period(new),
6277+ get_release(new), litmus_clock());
6278+
6279+ BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node));
6280+
6281+ bheap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node);
6282+ rt->check_resched(rt);
6283+}
6284+
6285+/* merge_ready - Add a sorted set of tasks to the rt ready queue. They must be runnable.
6286+ * @tasks - the newly released tasks
6287+ */
6288+void __merge_ready(rt_domain_t* rt, struct bheap* tasks)
6289+{
6290+ bheap_union(rt->order, &rt->ready_queue, tasks);
6291+ rt->check_resched(rt);
6292+}
6293+
6294+
6295+#ifdef CONFIG_RELEASE_MASTER
6296+void __add_release_on(rt_domain_t* rt, struct task_struct *task,
6297+ int target_cpu)
6298+{
6299+ TRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n",
6300+ get_release(task), target_cpu);
6301+ list_add(&tsk_rt(task)->list, &rt->tobe_released);
6302+ task->rt_param.domain = rt;
6303+
6304+ /* start release timer */
6305+ TS_SCHED2_START(task);
6306+
6307+ arm_release_timer_on(rt, target_cpu);
6308+
6309+ TS_SCHED2_END(task);
6310+}
6311+#endif
6312+
6313+/* add_release - add a real-time task to the rt release queue.
6314+ * @task: the sleeping task
6315+ */
6316+void __add_release(rt_domain_t* rt, struct task_struct *task)
6317+{
6318+ TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task));
6319+ list_add(&tsk_rt(task)->list, &rt->tobe_released);
6320+ task->rt_param.domain = rt;
6321+
6322+ /* start release timer */
6323+ TS_SCHED2_START(task);
6324+
6325+ arm_release_timer(rt);
6326+
6327+ TS_SCHED2_END(task);
6328+}
6329+
6330diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c
6331new file mode 100644
6332index 0000000..f5b7708
6333--- /dev/null
6334+++ b/litmus/sched_cedf.c
6335@@ -0,0 +1,773 @@
6336+/*
6337+ * litmus/sched_cedf.c
6338+ *
6339+ * Implementation of the C-EDF scheduling algorithm.
6340+ *
6341+ * This implementation is based on G-EDF:
6342+ * - CPUs are clustered around L2 or L3 caches.
6343+ * - Clusters topology is automatically detected (this is arch dependent
6344+ * and is working only on x86 at the moment --- and only with modern
6345+ * cpus that exports cpuid4 information)
6346+ * - The plugins _does not_ attempt to put tasks in the right cluster i.e.
6347+ * the programmer needs to be aware of the topology to place tasks
6348+ * in the desired cluster
6349+ * - default clustering is around L2 cache (cache index = 2)
6350+ * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
6351+ * online_cpus are placed in a single cluster).
6352+ *
6353+ * For details on functions, take a look at sched_gsn_edf.c
6354+ *
6355+ * Currently, we do not support changes in the number of online cpus.
6356+ * If the num_online_cpus() dynamically changes, the plugin is broken.
6357+ *
6358+ * This version uses the simple approach and serializes all scheduling
6359+ * decisions by the use of a queue lock. This is probably not the
6360+ * best way to do it, but it should suffice for now.
6361+ */
6362+
6363+#include <linux/spinlock.h>
6364+#include <linux/percpu.h>
6365+#include <linux/sched.h>
6366+#include <linux/slab.h>
6367+
6368+#include <litmus/litmus.h>
6369+#include <litmus/jobs.h>
6370+#include <litmus/sched_plugin.h>
6371+#include <litmus/edf_common.h>
6372+#include <litmus/sched_trace.h>
6373+
6374+#include <litmus/bheap.h>
6375+
6376+#include <linux/module.h>
6377+
6378+/* forward declaration... a funny thing with C ;) */
6379+struct clusterdomain;
6380+
6381+/* cpu_entry_t - maintain the linked and scheduled state
6382+ *
6383+ * A cpu also contains a pointer to the cedf_domain_t cluster
6384+ * that owns it (struct clusterdomain*)
6385+ */
6386+typedef struct {
6387+ int cpu;
6388+ struct clusterdomain* cluster; /* owning cluster */
6389+ struct task_struct* linked; /* only RT tasks */
6390+ struct task_struct* scheduled; /* only RT tasks */
6391+ atomic_t will_schedule; /* prevent unneeded IPIs */
6392+ struct bheap_node* hn;
6393+} cpu_entry_t;
6394+
6395+/* one cpu_entry_t per CPU */
6396+DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
6397+
6398+#define set_will_schedule() \
6399+ (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 1))
6400+#define clear_will_schedule() \
6401+ (atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 0))
6402+#define test_will_schedule(cpu) \
6403+ (atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
6404+
6405+/*
6406+ * In C-EDF there is a cedf domain _per_ cluster
6407+ * The number of clusters is dynamically determined accordingly to the
6408+ * total cpu number and the cluster size
6409+ */
6410+typedef struct clusterdomain {
6411+ /* rt_domain for this cluster */
6412+ rt_domain_t domain;
6413+ /* cpus in this cluster */
6414+ cpu_entry_t* *cpus;
6415+ /* map of this cluster cpus */
6416+ cpumask_var_t cpu_map;
6417+ /* the cpus queue themselves according to priority in here */
6418+ struct bheap_node *heap_node;
6419+ struct bheap cpu_heap;
6420+ /* lock for this cluster */
6421+#define lock domain.ready_lock
6422+} cedf_domain_t;
6423+
6424+/* a cedf_domain per cluster; allocation is done at init/activation time */
6425+cedf_domain_t *cedf;
6426+
6427+#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
6428+#define task_cpu_cluster(task) remote_cluster(get_partition(task))
6429+
6430+/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
6431+ * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
6432+ * information during the initialization of the plugin (e.g., topology)
6433+#define WANT_ALL_SCHED_EVENTS
6434+ */
6435+#define VERBOSE_INIT
6436+
6437+static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
6438+{
6439+ cpu_entry_t *a, *b;
6440+ a = _a->value;
6441+ b = _b->value;
6442+ /* Note that a and b are inverted: we want the lowest-priority CPU at
6443+ * the top of the heap.
6444+ */
6445+ return edf_higher_prio(b->linked, a->linked);
6446+}
6447+
6448+/* update_cpu_position - Move the cpu entry to the correct place to maintain
6449+ * order in the cpu queue. Caller must hold cedf lock.
6450+ */
6451+static void update_cpu_position(cpu_entry_t *entry)
6452+{
6453+ cedf_domain_t *cluster = entry->cluster;
6454+
6455+ if (likely(bheap_node_in_heap(entry->hn)))
6456+ bheap_delete(cpu_lower_prio,
6457+ &cluster->cpu_heap,
6458+ entry->hn);
6459+
6460+ bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
6461+}
6462+
6463+/* caller must hold cedf lock */
6464+static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
6465+{
6466+ struct bheap_node* hn;
6467+ hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
6468+ return hn->value;
6469+}
6470+
6471+
6472+/* link_task_to_cpu - Update the link of a CPU.
6473+ * Handles the case where the to-be-linked task is already
6474+ * scheduled on a different CPU.
6475+ */
6476+static noinline void link_task_to_cpu(struct task_struct* linked,
6477+ cpu_entry_t *entry)
6478+{
6479+ cpu_entry_t *sched;
6480+ struct task_struct* tmp;
6481+ int on_cpu;
6482+
6483+ BUG_ON(linked && !is_realtime(linked));
6484+
6485+ /* Currently linked task is set to be unlinked. */
6486+ if (entry->linked) {
6487+ entry->linked->rt_param.linked_on = NO_CPU;
6488+ }
6489+
6490+ /* Link new task to CPU. */
6491+ if (linked) {
6492+ set_rt_flags(linked, RT_F_RUNNING);
6493+ /* handle task is already scheduled somewhere! */
6494+ on_cpu = linked->rt_param.scheduled_on;
6495+ if (on_cpu != NO_CPU) {
6496+ sched = &per_cpu(cedf_cpu_entries, on_cpu);
6497+ /* this should only happen if not linked already */
6498+ BUG_ON(sched->linked == linked);
6499+
6500+ /* If we are already scheduled on the CPU to which we
6501+ * wanted to link, we don't need to do the swap --
6502+ * we just link ourselves to the CPU and depend on
6503+ * the caller to get things right.
6504+ */
6505+ if (entry != sched) {
6506+ TRACE_TASK(linked,
6507+ "already scheduled on %d, updating link.\n",
6508+ sched->cpu);
6509+ tmp = sched->linked;
6510+ linked->rt_param.linked_on = sched->cpu;
6511+ sched->linked = linked;
6512+ update_cpu_position(sched);
6513+ linked = tmp;
6514+ }
6515+ }
6516+ if (linked) /* might be NULL due to swap */
6517+ linked->rt_param.linked_on = entry->cpu;
6518+ }
6519+ entry->linked = linked;
6520+#ifdef WANT_ALL_SCHED_EVENTS
6521+ if (linked)
6522+ TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
6523+ else
6524+ TRACE("NULL linked to %d.\n", entry->cpu);
6525+#endif
6526+ update_cpu_position(entry);
6527+}
6528+
6529+/* unlink - Make sure a task is not linked any longer to an entry
6530+ * where it was linked before. Must hold cedf_lock.
6531+ */
6532+static noinline void unlink(struct task_struct* t)
6533+{
6534+ cpu_entry_t *entry;
6535+
6536+ if (unlikely(!t)) {
6537+ TRACE_BUG_ON(!t);
6538+ return;
6539+ }
6540+
6541+
6542+ if (t->rt_param.linked_on != NO_CPU) {
6543+ /* unlink */
6544+ entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on);
6545+ t->rt_param.linked_on = NO_CPU;
6546+ link_task_to_cpu(NULL, entry);
6547+ } else if (is_queued(t)) {
6548+ /* This is an interesting situation: t is scheduled,
6549+ * but was just recently unlinked. It cannot be
6550+ * linked anywhere else (because then it would have
6551+ * been relinked to this CPU), thus it must be in some
6552+ * queue. We must remove it from the list in this
6553+ * case.
6554+ *
6555+ * in C-EDF case is should be somewhere in the queue for
6556+ * its domain, therefore and we can get the domain using
6557+ * task_cpu_cluster
6558+ */
6559+ remove(&(task_cpu_cluster(t))->domain, t);
6560+ }
6561+}
6562+
6563+
6564+/* preempt - force a CPU to reschedule
6565+ */
6566+static void preempt(cpu_entry_t *entry)
6567+{
6568+ preempt_if_preemptable(entry->scheduled, entry->cpu);
6569+}
6570+
6571+/* requeue - Put an unlinked task into gsn-edf domain.
6572+ * Caller must hold cedf_lock.
6573+ */
6574+static noinline void requeue(struct task_struct* task)
6575+{
6576+ cedf_domain_t *cluster = task_cpu_cluster(task);
6577+ BUG_ON(!task);
6578+ /* sanity check before insertion */
6579+ BUG_ON(is_queued(task));
6580+
6581+ if (is_released(task, litmus_clock()))
6582+ __add_ready(&cluster->domain, task);
6583+ else {
6584+ /* it has got to wait */
6585+ add_release(&cluster->domain, task);
6586+ }
6587+}
6588+
6589+/* check for any necessary preemptions */
6590+static void check_for_preemptions(cedf_domain_t *cluster)
6591+{
6592+ struct task_struct *task;
6593+ cpu_entry_t* last;
6594+
6595+ for(last = lowest_prio_cpu(cluster);
6596+ edf_preemption_needed(&cluster->domain, last->linked);
6597+ last = lowest_prio_cpu(cluster)) {
6598+ /* preemption necessary */
6599+ task = __take_ready(&cluster->domain);
6600+ TRACE("check_for_preemptions: attempting to link task %d to %d\n",
6601+ task->pid, last->cpu);
6602+ if (last->linked)
6603+ requeue(last->linked);
6604+ link_task_to_cpu(task, last);
6605+ preempt(last);
6606+ }
6607+}
6608+
6609+/* cedf_job_arrival: task is either resumed or released */
6610+static noinline void cedf_job_arrival(struct task_struct* task)
6611+{
6612+ cedf_domain_t *cluster = task_cpu_cluster(task);
6613+ BUG_ON(!task);
6614+
6615+ requeue(task);
6616+ check_for_preemptions(cluster);
6617+}
6618+
6619+static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
6620+{
6621+ cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain);
6622+ unsigned long flags;
6623+
6624+ raw_spin_lock_irqsave(&cluster->lock, flags);
6625+
6626+ __merge_ready(&cluster->domain, tasks);
6627+ check_for_preemptions(cluster);
6628+
6629+ raw_spin_unlock_irqrestore(&cluster->lock, flags);
6630+}
6631+
6632+/* caller holds cedf_lock */
6633+static noinline void job_completion(struct task_struct *t, int forced)
6634+{
6635+ BUG_ON(!t);
6636+
6637+ sched_trace_task_completion(t, forced);
6638+
6639+ TRACE_TASK(t, "job_completion().\n");
6640+
6641+ /* set flags */
6642+ set_rt_flags(t, RT_F_SLEEP);
6643+ /* prepare for next period */
6644+ prepare_for_next_period(t);
6645+ if (is_released(t, litmus_clock()))
6646+ sched_trace_task_release(t);
6647+ /* unlink */
6648+ unlink(t);
6649+ /* requeue
6650+ * But don't requeue a blocking task. */
6651+ if (is_running(t))
6652+ cedf_job_arrival(t);
6653+}
6654+
6655+/* cedf_tick - this function is called for every local timer
6656+ * interrupt.
6657+ *
6658+ * checks whether the current task has expired and checks
6659+ * whether we need to preempt it if it has not expired
6660+ */
6661+static void cedf_tick(struct task_struct* t)
6662+{
6663+ if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
6664+ if (!is_np(t)) {
6665+ /* np tasks will be preempted when they become
6666+ * preemptable again
6667+ */
6668+ set_tsk_need_resched(t);
6669+ set_will_schedule();
6670+ TRACE("cedf_scheduler_tick: "
6671+ "%d is preemptable "
6672+ " => FORCE_RESCHED\n", t->pid);
6673+ } else if (is_user_np(t)) {
6674+ TRACE("cedf_scheduler_tick: "
6675+ "%d is non-preemptable, "
6676+ "preemption delayed.\n", t->pid);
6677+ request_exit_np(t);
6678+ }
6679+ }
6680+}
6681+
6682+/* Getting schedule() right is a bit tricky. schedule() may not make any
6683+ * assumptions on the state of the current task since it may be called for a
6684+ * number of reasons. The reasons include a scheduler_tick() determined that it
6685+ * was necessary, because sys_exit_np() was called, because some Linux
6686+ * subsystem determined so, or even (in the worst case) because there is a bug
6687+ * hidden somewhere. Thus, we must take extreme care to determine what the
6688+ * current state is.
6689+ *
6690+ * The CPU could currently be scheduling a task (or not), be linked (or not).
6691+ *
6692+ * The following assertions for the scheduled task could hold:
6693+ *
6694+ * - !is_running(scheduled) // the job blocks
6695+ * - scheduled->timeslice == 0 // the job completed (forcefully)
6696+ * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
6697+ * - linked != scheduled // we need to reschedule (for any reason)
6698+ * - is_np(scheduled) // rescheduling must be delayed,
6699+ * sys_exit_np must be requested
6700+ *
6701+ * Any of these can occur together.
6702+ */
6703+static struct task_struct* cedf_schedule(struct task_struct * prev)
6704+{
6705+ cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
6706+ cedf_domain_t *cluster = entry->cluster;
6707+ int out_of_time, sleep, preempt, np, exists, blocks;
6708+ struct task_struct* next = NULL;
6709+
6710+ raw_spin_lock(&cluster->lock);
6711+ clear_will_schedule();
6712+
6713+ /* sanity checking */
6714+ BUG_ON(entry->scheduled && entry->scheduled != prev);
6715+ BUG_ON(entry->scheduled && !is_realtime(prev));
6716+ BUG_ON(is_realtime(prev) && !entry->scheduled);
6717+
6718+ /* (0) Determine state */
6719+ exists = entry->scheduled != NULL;
6720+ blocks = exists && !is_running(entry->scheduled);
6721+ out_of_time = exists &&
6722+ budget_enforced(entry->scheduled) &&
6723+ budget_exhausted(entry->scheduled);
6724+ np = exists && is_np(entry->scheduled);
6725+ sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
6726+ preempt = entry->scheduled != entry->linked;
6727+
6728+#ifdef WANT_ALL_SCHED_EVENTS
6729+ TRACE_TASK(prev, "invoked cedf_schedule.\n");
6730+#endif
6731+
6732+ if (exists)
6733+ TRACE_TASK(prev,
6734+ "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
6735+ "state:%d sig:%d\n",
6736+ blocks, out_of_time, np, sleep, preempt,
6737+ prev->state, signal_pending(prev));
6738+ if (entry->linked && preempt)
6739+ TRACE_TASK(prev, "will be preempted by %s/%d\n",
6740+ entry->linked->comm, entry->linked->pid);
6741+
6742+
6743+ /* If a task blocks we have no choice but to reschedule.
6744+ */
6745+ if (blocks)
6746+ unlink(entry->scheduled);
6747+
6748+ /* Request a sys_exit_np() call if we would like to preempt but cannot.
6749+ * We need to make sure to update the link structure anyway in case
6750+ * that we are still linked. Multiple calls to request_exit_np() don't
6751+ * hurt.
6752+ */
6753+ if (np && (out_of_time || preempt || sleep)) {
6754+ unlink(entry->scheduled);
6755+ request_exit_np(entry->scheduled);
6756+ }
6757+
6758+ /* Any task that is preemptable and either exhausts its execution
6759+ * budget or wants to sleep completes. We may have to reschedule after
6760+ * this. Don't do a job completion if we block (can't have timers running
6761+ * for blocked jobs). Preemption go first for the same reason.
6762+ */
6763+ if (!np && (out_of_time || sleep) && !blocks && !preempt)
6764+ job_completion(entry->scheduled, !sleep);
6765+
6766+ /* Link pending task if we became unlinked.
6767+ */
6768+ if (!entry->linked)
6769+ link_task_to_cpu(__take_ready(&cluster->domain), entry);
6770+
6771+ /* The final scheduling decision. Do we need to switch for some reason?
6772+ * If linked is different from scheduled, then select linked as next.
6773+ */
6774+ if ((!np || blocks) &&
6775+ entry->linked != entry->scheduled) {
6776+ /* Schedule a linked job? */
6777+ if (entry->linked) {
6778+ entry->linked->rt_param.scheduled_on = entry->cpu;
6779+ next = entry->linked;
6780+ }
6781+ if (entry->scheduled) {
6782+ /* not gonna be scheduled soon */
6783+ entry->scheduled->rt_param.scheduled_on = NO_CPU;
6784+ TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
6785+ }
6786+ } else
6787+ /* Only override Linux scheduler if we have a real-time task
6788+ * scheduled that needs to continue.
6789+ */
6790+ if (exists)
6791+ next = prev;
6792+
6793+ raw_spin_unlock(&cluster->lock);
6794+
6795+#ifdef WANT_ALL_SCHED_EVENTS
6796+ TRACE("cedf_lock released, next=0x%p\n", next);
6797+
6798+ if (next)
6799+ TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
6800+ else if (exists && !next)
6801+ TRACE("becomes idle at %llu.\n", litmus_clock());
6802+#endif
6803+
6804+
6805+ return next;
6806+}
6807+
6808+
6809+/* _finish_switch - we just finished the switch away from prev
6810+ */
6811+static void cedf_finish_switch(struct task_struct *prev)
6812+{
6813+ cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries);
6814+
6815+ entry->scheduled = is_realtime(current) ? current : NULL;
6816+#ifdef WANT_ALL_SCHED_EVENTS
6817+ TRACE_TASK(prev, "switched away from\n");
6818+#endif
6819+}
6820+
6821+
6822+/* Prepare a task for running in RT mode
6823+ */
6824+static void cedf_task_new(struct task_struct * t, int on_rq, int running)
6825+{
6826+ unsigned long flags;
6827+ cpu_entry_t* entry;
6828+ cedf_domain_t* cluster;
6829+
6830+ TRACE("gsn edf: task new %d\n", t->pid);
6831+
6832+ /* the cluster doesn't change even if t is running */
6833+ cluster = task_cpu_cluster(t);
6834+
6835+ raw_spin_lock_irqsave(&cluster->domain.ready_lock, flags);
6836+
6837+ /* setup job params */
6838+ release_at(t, litmus_clock());
6839+
6840+ if (running) {
6841+ entry = &per_cpu(cedf_cpu_entries, task_cpu(t));
6842+ BUG_ON(entry->scheduled);
6843+
6844+ entry->scheduled = t;
6845+ tsk_rt(t)->scheduled_on = task_cpu(t);
6846+ } else {
6847+ t->rt_param.scheduled_on = NO_CPU;
6848+ }
6849+ t->rt_param.linked_on = NO_CPU;
6850+
6851+ cedf_job_arrival(t);
6852+ raw_spin_unlock_irqrestore(&(cluster->domain.ready_lock), flags);
6853+}
6854+
6855+static void cedf_task_wake_up(struct task_struct *task)
6856+{
6857+ unsigned long flags;
6858+ lt_t now;
6859+ cedf_domain_t *cluster;
6860+
6861+ TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
6862+
6863+ cluster = task_cpu_cluster(task);
6864+
6865+ raw_spin_lock_irqsave(&cluster->lock, flags);
6866+ /* We need to take suspensions because of semaphores into
6867+ * account! If a job resumes after being suspended due to acquiring
6868+ * a semaphore, it should never be treated as a new job release.
6869+ */
6870+ if (get_rt_flags(task) == RT_F_EXIT_SEM) {
6871+ set_rt_flags(task, RT_F_RUNNING);
6872+ } else {
6873+ now = litmus_clock();
6874+ if (is_tardy(task, now)) {
6875+ /* new sporadic release */
6876+ release_at(task, now);
6877+ sched_trace_task_release(task);
6878+ }
6879+ else {
6880+ if (task->rt.time_slice) {
6881+ /* came back in time before deadline
6882+ */
6883+ set_rt_flags(task, RT_F_RUNNING);
6884+ }
6885+ }
6886+ }
6887+ cedf_job_arrival(task);
6888+ raw_spin_unlock_irqrestore(&cluster->lock, flags);
6889+}
6890+
6891+static void cedf_task_block(struct task_struct *t)
6892+{
6893+ unsigned long flags;
6894+ cedf_domain_t *cluster;
6895+
6896+ TRACE_TASK(t, "block at %llu\n", litmus_clock());
6897+
6898+ cluster = task_cpu_cluster(t);
6899+
6900+ /* unlink if necessary */
6901+ raw_spin_lock_irqsave(&cluster->lock, flags);
6902+ unlink(t);
6903+ raw_spin_unlock_irqrestore(&cluster->lock, flags);
6904+
6905+ BUG_ON(!is_realtime(t));
6906+}
6907+
6908+
6909+static void cedf_task_exit(struct task_struct * t)
6910+{
6911+ unsigned long flags;
6912+ cedf_domain_t *cluster = task_cpu_cluster(t);
6913+
6914+ /* unlink if necessary */
6915+ raw_spin_lock_irqsave(&cluster->lock, flags);
6916+ unlink(t);
6917+ if (tsk_rt(t)->scheduled_on != NO_CPU) {
6918+ cluster->cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
6919+ tsk_rt(t)->scheduled_on = NO_CPU;
6920+ }
6921+ raw_spin_unlock_irqrestore(&cluster->lock, flags);
6922+
6923+ BUG_ON(!is_realtime(t));
6924+ TRACE_TASK(t, "RIP\n");
6925+}
6926+
6927+static long cedf_admit_task(struct task_struct* tsk)
6928+{
6929+ return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
6930+}
6931+
6932+/* total number of cluster */
6933+static int num_clusters;
6934+/* we do not support cluster of different sizes */
6935+static unsigned int cluster_size;
6936+
6937+#ifdef VERBOSE_INIT
6938+static void print_cluster_topology(cpumask_var_t mask, int cpu)
6939+{
6940+ int chk;
6941+ char buf[255];
6942+
6943+ chk = cpulist_scnprintf(buf, 254, mask);
6944+ buf[chk] = '\0';
6945+ printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf);
6946+
6947+}
6948+#endif
6949+
6950+static int clusters_allocated = 0;
6951+
6952+static void cleanup_cedf(void)
6953+{
6954+ int i;
6955+
6956+ if (clusters_allocated) {
6957+ for (i = 0; i < num_clusters; i++) {
6958+ kfree(cedf[i].cpus);
6959+ kfree(cedf[i].heap_node);
6960+ free_cpumask_var(cedf[i].cpu_map);
6961+ }
6962+
6963+ kfree(cedf);
6964+ }
6965+}
6966+
6967+static long cedf_activate_plugin(void)
6968+{
6969+ int i, j, cpu, ccpu, cpu_count;
6970+ cpu_entry_t *entry;
6971+
6972+ cpumask_var_t mask;
6973+ int chk = 0;
6974+
6975+ /* de-allocate old clusters, if any */
6976+ cleanup_cedf();
6977+
6978+ printk(KERN_INFO "C-EDF: Activate Plugin, cache index = %d\n",
6979+ cluster_cache_index);
6980+
6981+ /* need to get cluster_size first */
6982+ if(!zalloc_cpumask_var(&mask, GFP_ATOMIC))
6983+ return -ENOMEM;
6984+
6985+ if (unlikely(cluster_cache_index == num_online_cpus())) {
6986+
6987+ cluster_size = num_online_cpus();
6988+ } else {
6989+
6990+ chk = get_shared_cpu_map(mask, 0, cluster_cache_index);
6991+ if (chk) {
6992+ /* if chk != 0 then it is the max allowed index */
6993+ printk(KERN_INFO "C-EDF: Cannot support cache index = %d\n",
6994+ cluster_cache_index);
6995+ printk(KERN_INFO "C-EDF: Using cache index = %d\n",
6996+ chk);
6997+ cluster_cache_index = chk;
6998+ }
6999+
7000+ cluster_size = cpumask_weight(mask);
7001+ }
7002+
7003+ if ((num_online_cpus() % cluster_size) != 0) {
7004+ /* this can't be right, some cpus are left out */
7005+ printk(KERN_ERR "C-EDF: Trying to group %d cpus in %d!\n",
7006+ num_online_cpus(), cluster_size);
7007+ return -1;
7008+ }
7009+
7010+ num_clusters = num_online_cpus() / cluster_size;
7011+ printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n",
7012+ num_clusters, cluster_size);
7013+
7014+ /* initialize clusters */
7015+ cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC);
7016+ for (i = 0; i < num_clusters; i++) {
7017+
7018+ cedf[i].cpus = kmalloc(cluster_size * sizeof(cpu_entry_t),
7019+ GFP_ATOMIC);
7020+ cedf[i].heap_node = kmalloc(
7021+ cluster_size * sizeof(struct bheap_node),
7022+ GFP_ATOMIC);
7023+ bheap_init(&(cedf[i].cpu_heap));
7024+ edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs);
7025+
7026+ if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC))
7027+ return -ENOMEM;
7028+ }
7029+
7030+ /* cycle through cluster and add cpus to them */
7031+ for (i = 0; i < num_clusters; i++) {
7032+
7033+ for_each_online_cpu(cpu) {
7034+ /* check if the cpu is already in a cluster */
7035+ for (j = 0; j < num_clusters; j++)
7036+ if (cpumask_test_cpu(cpu, cedf[j].cpu_map))
7037+ break;
7038+ /* if it is in a cluster go to next cpu */
7039+ if (cpumask_test_cpu(cpu, cedf[j].cpu_map))
7040+ continue;
7041+
7042+ /* this cpu isn't in any cluster */
7043+ /* get the shared cpus */
7044+ if (unlikely(cluster_cache_index == num_online_cpus()))
7045+ cpumask_copy(mask, cpu_online_mask);
7046+ else
7047+ get_shared_cpu_map(mask, cpu, cluster_cache_index);
7048+
7049+ cpumask_copy(cedf[i].cpu_map, mask);
7050+#ifdef VERBOSE_INIT
7051+ print_cluster_topology(mask, cpu);
7052+#endif
7053+ /* add cpus to current cluster and init cpu_entry_t */
7054+ cpu_count = 0;
7055+ for_each_cpu(ccpu, cedf[i].cpu_map) {
7056+
7057+ entry = &per_cpu(cedf_cpu_entries, ccpu);
7058+ cedf[i].cpus[cpu_count] = entry;
7059+ atomic_set(&entry->will_schedule, 0);
7060+ entry->cpu = ccpu;
7061+ entry->cluster = &cedf[i];
7062+ entry->hn = &(cedf[i].heap_node[cpu_count]);
7063+ bheap_node_init(&entry->hn, entry);
7064+
7065+ cpu_count++;
7066+
7067+ entry->linked = NULL;
7068+ entry->scheduled = NULL;
7069+ update_cpu_position(entry);
7070+ }
7071+ /* done with this cluster */
7072+ break;
7073+ }
7074+ }
7075+
7076+ free_cpumask_var(mask);
7077+ clusters_allocated = 1;
7078+ return 0;
7079+}
7080+
7081+/* Plugin object */
7082+static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = {
7083+ .plugin_name = "C-EDF",
7084+ .finish_switch = cedf_finish_switch,
7085+ .tick = cedf_tick,
7086+ .task_new = cedf_task_new,
7087+ .complete_job = complete_job,
7088+ .task_exit = cedf_task_exit,
7089+ .schedule = cedf_schedule,
7090+ .task_wake_up = cedf_task_wake_up,
7091+ .task_block = cedf_task_block,
7092+ .admit_task = cedf_admit_task,
7093+ .activate_plugin = cedf_activate_plugin,
7094+};
7095+
7096+
7097+static int __init init_cedf(void)
7098+{
7099+ return register_sched_plugin(&cedf_plugin);
7100+}
7101+
7102+static void clean_cedf(void)
7103+{
7104+ cleanup_cedf();
7105+}
7106+
7107+module_init(init_cedf);
7108+module_exit(clean_cedf);
7109diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
7110new file mode 100644
7111index 0000000..e101768
7112--- /dev/null
7113+++ b/litmus/sched_gsn_edf.c
7114@@ -0,0 +1,842 @@
7115+/*
7116+ * litmus/sched_gsn_edf.c
7117+ *
7118+ * Implementation of the GSN-EDF scheduling algorithm.
7119+ *
7120+ * This version uses the simple approach and serializes all scheduling
7121+ * decisions by the use of a queue lock. This is probably not the
7122+ * best way to do it, but it should suffice for now.
7123+ */
7124+
7125+#include <linux/spinlock.h>
7126+#include <linux/percpu.h>
7127+#include <linux/sched.h>
7128+
7129+#include <litmus/litmus.h>
7130+#include <litmus/jobs.h>
7131+#include <litmus/sched_plugin.h>
7132+#include <litmus/edf_common.h>
7133+#include <litmus/sched_trace.h>
7134+
7135+#include <litmus/bheap.h>
7136+
7137+#include <linux/module.h>
7138+
7139+/* Overview of GSN-EDF operations.
7140+ *
7141+ * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This
7142+ * description only covers how the individual operations are implemented in
7143+ * LITMUS.
7144+ *
7145+ * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage
7146+ * structure (NOT the actually scheduled
7147+ * task). If there is another linked task To
7148+ * already it will set To->linked_on = NO_CPU
7149+ * (thereby removing its association with this
7150+ * CPU). However, it will not requeue the
7151+ * previously linked task (if any). It will set
7152+ * T's state to RT_F_RUNNING and check whether
7153+ * it is already running somewhere else. If T
7154+ * is scheduled somewhere else it will link
7155+ * it to that CPU instead (and pull the linked
7156+ * task to cpu). T may be NULL.
7157+ *
7158+ * unlink(T) - Unlink removes T from all scheduler data
7159+ * structures. If it is linked to some CPU it
7160+ * will link NULL to that CPU. If it is
7161+ * currently queued in the gsnedf queue it will
7162+ * be removed from the rt_domain. It is safe to
7163+ * call unlink(T) if T is not linked. T may not
7164+ * be NULL.
7165+ *
7166+ * requeue(T) - Requeue will insert T into the appropriate
7167+ * queue. If the system is in real-time mode and
7168+ * the T is released already, it will go into the
7169+ * ready queue. If the system is not in
7170+ * real-time mode is T, then T will go into the
7171+ * release queue. If T's release time is in the
7172+ * future, it will go into the release
7173+ * queue. That means that T's release time/job
7174+ * no/etc. has to be updated before requeu(T) is
7175+ * called. It is not safe to call requeue(T)
7176+ * when T is already queued. T may not be NULL.
7177+ *
7178+ * gsnedf_job_arrival(T) - This is the catch all function when T enters
7179+ * the system after either a suspension or at a
7180+ * job release. It will queue T (which means it
7181+ * is not safe to call gsnedf_job_arrival(T) if
7182+ * T is already queued) and then check whether a
7183+ * preemption is necessary. If a preemption is
7184+ * necessary it will update the linkage
7185+ * accordingly and cause scheduled to be called
7186+ * (either with an IPI or need_resched). It is
7187+ * safe to call gsnedf_job_arrival(T) if T's
7188+ * next job has not been actually released yet
7189+ * (releast time in the future). T will be put
7190+ * on the release queue in that case.
7191+ *
7192+ * job_completion(T) - Take care of everything that needs to be done
7193+ * to prepare T for its next release and place
7194+ * it in the right queue with
7195+ * gsnedf_job_arrival().
7196+ *
7197+ *
7198+ * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is
7199+ * equivalent to unlink(T). Note that if you unlink a task from a CPU none of
7200+ * the functions will automatically propagate pending task from the ready queue
7201+ * to a linked task. This is the job of the calling function ( by means of
7202+ * __take_ready).
7203+ */
7204+
7205+
7206+/* cpu_entry_t - maintain the linked and scheduled state
7207+ */
7208+typedef struct {
7209+ int cpu;
7210+ struct task_struct* linked; /* only RT tasks */
7211+ struct task_struct* scheduled; /* only RT tasks */
7212+ atomic_t will_schedule; /* prevent unneeded IPIs */
7213+ struct bheap_node* hn;
7214+} cpu_entry_t;
7215+DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries);
7216+
7217+cpu_entry_t* gsnedf_cpus[NR_CPUS];
7218+
7219+#define set_will_schedule() \
7220+ (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 1))
7221+#define clear_will_schedule() \
7222+ (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 0))
7223+#define test_will_schedule(cpu) \
7224+ (atomic_read(&per_cpu(gsnedf_cpu_entries, cpu).will_schedule))
7225+
7226+
7227+/* the cpus queue themselves according to priority in here */
7228+static struct bheap_node gsnedf_heap_node[NR_CPUS];
7229+static struct bheap gsnedf_cpu_heap;
7230+
7231+static rt_domain_t gsnedf;
7232+#define gsnedf_lock (gsnedf.ready_lock)
7233+
7234+
7235+/* Uncomment this if you want to see all scheduling decisions in the
7236+ * TRACE() log.
7237+#define WANT_ALL_SCHED_EVENTS
7238+ */
7239+
7240+static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
7241+{
7242+ cpu_entry_t *a, *b;
7243+ a = _a->value;
7244+ b = _b->value;
7245+ /* Note that a and b are inverted: we want the lowest-priority CPU at
7246+ * the top of the heap.
7247+ */
7248+ return edf_higher_prio(b->linked, a->linked);
7249+}
7250+
7251+/* update_cpu_position - Move the cpu entry to the correct place to maintain
7252+ * order in the cpu queue. Caller must hold gsnedf lock.
7253+ */
7254+static void update_cpu_position(cpu_entry_t *entry)
7255+{
7256+ if (likely(bheap_node_in_heap(entry->hn)))
7257+ bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
7258+ bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
7259+}
7260+
7261+/* caller must hold gsnedf lock */
7262+static cpu_entry_t* lowest_prio_cpu(void)
7263+{
7264+ struct bheap_node* hn;
7265+ hn = bheap_peek(cpu_lower_prio, &gsnedf_cpu_heap);
7266+ return hn->value;
7267+}
7268+
7269+
7270+/* link_task_to_cpu - Update the link of a CPU.
7271+ * Handles the case where the to-be-linked task is already
7272+ * scheduled on a different CPU.
7273+ */
7274+static noinline void link_task_to_cpu(struct task_struct* linked,
7275+ cpu_entry_t *entry)
7276+{
7277+ cpu_entry_t *sched;
7278+ struct task_struct* tmp;
7279+ int on_cpu;
7280+
7281+ BUG_ON(linked && !is_realtime(linked));
7282+
7283+ /* Currently linked task is set to be unlinked. */
7284+ if (entry->linked) {
7285+ entry->linked->rt_param.linked_on = NO_CPU;
7286+ }
7287+
7288+ /* Link new task to CPU. */
7289+ if (linked) {
7290+ set_rt_flags(linked, RT_F_RUNNING);
7291+ /* handle task is already scheduled somewhere! */
7292+ on_cpu = linked->rt_param.scheduled_on;
7293+ if (on_cpu != NO_CPU) {
7294+ sched = &per_cpu(gsnedf_cpu_entries, on_cpu);
7295+ /* this should only happen if not linked already */
7296+ BUG_ON(sched->linked == linked);
7297+
7298+ /* If we are already scheduled on the CPU to which we
7299+ * wanted to link, we don't need to do the swap --
7300+ * we just link ourselves to the CPU and depend on
7301+ * the caller to get things right.
7302+ */
7303+ if (entry != sched) {
7304+ TRACE_TASK(linked,
7305+ "already scheduled on %d, updating link.\n",
7306+ sched->cpu);
7307+ tmp = sched->linked;
7308+ linked->rt_param.linked_on = sched->cpu;
7309+ sched->linked = linked;
7310+ update_cpu_position(sched);
7311+ linked = tmp;
7312+ }
7313+ }
7314+ if (linked) /* might be NULL due to swap */
7315+ linked->rt_param.linked_on = entry->cpu;
7316+ }
7317+ entry->linked = linked;
7318+#ifdef WANT_ALL_SCHED_EVENTS
7319+ if (linked)
7320+ TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
7321+ else
7322+ TRACE("NULL linked to %d.\n", entry->cpu);
7323+#endif
7324+ update_cpu_position(entry);
7325+}
7326+
7327+/* unlink - Make sure a task is not linked any longer to an entry
7328+ * where it was linked before. Must hold gsnedf_lock.
7329+ */
7330+static noinline void unlink(struct task_struct* t)
7331+{
7332+ cpu_entry_t *entry;
7333+
7334+ if (unlikely(!t)) {
7335+ TRACE_BUG_ON(!t);
7336+ return;
7337+ }
7338+
7339+ if (t->rt_param.linked_on != NO_CPU) {
7340+ /* unlink */
7341+ entry = &per_cpu(gsnedf_cpu_entries, t->rt_param.linked_on);
7342+ t->rt_param.linked_on = NO_CPU;
7343+ link_task_to_cpu(NULL, entry);
7344+ } else if (is_queued(t)) {
7345+ /* This is an interesting situation: t is scheduled,
7346+ * but was just recently unlinked. It cannot be
7347+ * linked anywhere else (because then it would have
7348+ * been relinked to this CPU), thus it must be in some
7349+ * queue. We must remove it from the list in this
7350+ * case.
7351+ */
7352+ remove(&gsnedf, t);
7353+ }
7354+}
7355+
7356+
7357+/* preempt - force a CPU to reschedule
7358+ */
7359+static void preempt(cpu_entry_t *entry)
7360+{
7361+ preempt_if_preemptable(entry->scheduled, entry->cpu);
7362+}
7363+
7364+/* requeue - Put an unlinked task into gsn-edf domain.
7365+ * Caller must hold gsnedf_lock.
7366+ */
7367+static noinline void requeue(struct task_struct* task)
7368+{
7369+ BUG_ON(!task);
7370+ /* sanity check before insertion */
7371+ BUG_ON(is_queued(task));
7372+
7373+ if (is_released(task, litmus_clock()))
7374+ __add_ready(&gsnedf, task);
7375+ else {
7376+ /* it has got to wait */
7377+ add_release(&gsnedf, task);
7378+ }
7379+}
7380+
7381+/* check for any necessary preemptions */
7382+static void check_for_preemptions(void)
7383+{
7384+ struct task_struct *task;
7385+ cpu_entry_t* last;
7386+
7387+ for(last = lowest_prio_cpu();
7388+ edf_preemption_needed(&gsnedf, last->linked);
7389+ last = lowest_prio_cpu()) {
7390+ /* preemption necessary */
7391+ task = __take_ready(&gsnedf);
7392+ TRACE("check_for_preemptions: attempting to link task %d to %d\n",
7393+ task->pid, last->cpu);
7394+ if (last->linked)
7395+ requeue(last->linked);
7396+ link_task_to_cpu(task, last);
7397+ preempt(last);
7398+ }
7399+}
7400+
7401+/* gsnedf_job_arrival: task is either resumed or released */
7402+static noinline void gsnedf_job_arrival(struct task_struct* task)
7403+{
7404+ BUG_ON(!task);
7405+
7406+ requeue(task);
7407+ check_for_preemptions();
7408+}
7409+
7410+static void gsnedf_release_jobs(rt_domain_t* rt, struct bheap* tasks)
7411+{
7412+ unsigned long flags;
7413+
7414+ raw_spin_lock_irqsave(&gsnedf_lock, flags);
7415+
7416+ __merge_ready(rt, tasks);
7417+ check_for_preemptions();
7418+
7419+ raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
7420+}
7421+
7422+/* caller holds gsnedf_lock */
7423+static noinline void job_completion(struct task_struct *t, int forced)
7424+{
7425+ BUG_ON(!t);
7426+
7427+ sched_trace_task_completion(t, forced);
7428+
7429+ TRACE_TASK(t, "job_completion().\n");
7430+
7431+ /* set flags */
7432+ set_rt_flags(t, RT_F_SLEEP);
7433+ /* prepare for next period */
7434+ prepare_for_next_period(t);
7435+ if (is_released(t, litmus_clock()))
7436+ sched_trace_task_release(t);
7437+ /* unlink */
7438+ unlink(t);
7439+ /* requeue
7440+ * But don't requeue a blocking task. */
7441+ if (is_running(t))
7442+ gsnedf_job_arrival(t);
7443+}
7444+
7445+/* gsnedf_tick - this function is called for every local timer
7446+ * interrupt.
7447+ *
7448+ * checks whether the current task has expired and checks
7449+ * whether we need to preempt it if it has not expired
7450+ */
7451+static void gsnedf_tick(struct task_struct* t)
7452+{
7453+ if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
7454+ if (!is_np(t)) {
7455+ /* np tasks will be preempted when they become
7456+ * preemptable again
7457+ */
7458+ set_tsk_need_resched(t);
7459+ set_will_schedule();
7460+ TRACE("gsnedf_scheduler_tick: "
7461+ "%d is preemptable "
7462+ " => FORCE_RESCHED\n", t->pid);
7463+ } else if (is_user_np(t)) {
7464+ TRACE("gsnedf_scheduler_tick: "
7465+ "%d is non-preemptable, "
7466+ "preemption delayed.\n", t->pid);
7467+ request_exit_np(t);
7468+ }
7469+ }
7470+}
7471+
7472+/* Getting schedule() right is a bit tricky. schedule() may not make any
7473+ * assumptions on the state of the current task since it may be called for a
7474+ * number of reasons. The reasons include a scheduler_tick() determined that it
7475+ * was necessary, because sys_exit_np() was called, because some Linux
7476+ * subsystem determined so, or even (in the worst case) because there is a bug
7477+ * hidden somewhere. Thus, we must take extreme care to determine what the
7478+ * current state is.
7479+ *
7480+ * The CPU could currently be scheduling a task (or not), be linked (or not).
7481+ *
7482+ * The following assertions for the scheduled task could hold:
7483+ *
7484+ * - !is_running(scheduled) // the job blocks
7485+ * - scheduled->timeslice == 0 // the job completed (forcefully)
7486+ * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
7487+ * - linked != scheduled // we need to reschedule (for any reason)
7488+ * - is_np(scheduled) // rescheduling must be delayed,
7489+ * sys_exit_np must be requested
7490+ *
7491+ * Any of these can occur together.
7492+ */
7493+static struct task_struct* gsnedf_schedule(struct task_struct * prev)
7494+{
7495+ cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
7496+ int out_of_time, sleep, preempt, np, exists, blocks;
7497+ struct task_struct* next = NULL;
7498+
7499+#ifdef CONFIG_RELEASE_MASTER
7500+ /* Bail out early if we are the release master.
7501+ * The release master never schedules any real-time tasks.
7502+ */
7503+ if (gsnedf.release_master == entry->cpu)
7504+ return NULL;
7505+#endif
7506+
7507+ raw_spin_lock(&gsnedf_lock);
7508+ clear_will_schedule();
7509+
7510+ /* sanity checking */
7511+ BUG_ON(entry->scheduled && entry->scheduled != prev);
7512+ BUG_ON(entry->scheduled && !is_realtime(prev));
7513+ BUG_ON(is_realtime(prev) && !entry->scheduled);
7514+
7515+ /* (0) Determine state */
7516+ exists = entry->scheduled != NULL;
7517+ blocks = exists && !is_running(entry->scheduled);
7518+ out_of_time = exists &&
7519+ budget_enforced(entry->scheduled) &&
7520+ budget_exhausted(entry->scheduled);
7521+ np = exists && is_np(entry->scheduled);
7522+ sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
7523+ preempt = entry->scheduled != entry->linked;
7524+
7525+#ifdef WANT_ALL_SCHED_EVENTS
7526+ TRACE_TASK(prev, "invoked gsnedf_schedule.\n");
7527+#endif
7528+
7529+ if (exists)
7530+ TRACE_TASK(prev,
7531+ "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
7532+ "state:%d sig:%d\n",
7533+ blocks, out_of_time, np, sleep, preempt,
7534+ prev->state, signal_pending(prev));
7535+ if (entry->linked && preempt)
7536+ TRACE_TASK(prev, "will be preempted by %s/%d\n",
7537+ entry->linked->comm, entry->linked->pid);
7538+
7539+
7540+ /* If a task blocks we have no choice but to reschedule.
7541+ */
7542+ if (blocks)
7543+ unlink(entry->scheduled);
7544+
7545+ /* Request a sys_exit_np() call if we would like to preempt but cannot.
7546+ * We need to make sure to update the link structure anyway in case
7547+ * that we are still linked. Multiple calls to request_exit_np() don't
7548+ * hurt.
7549+ */
7550+ if (np && (out_of_time || preempt || sleep)) {
7551+ unlink(entry->scheduled);
7552+ request_exit_np(entry->scheduled);
7553+ }
7554+
7555+ /* Any task that is preemptable and either exhausts its execution
7556+ * budget or wants to sleep completes. We may have to reschedule after
7557+ * this. Don't do a job completion if we block (can't have timers running
7558+ * for blocked jobs). Preemption go first for the same reason.
7559+ */
7560+ if (!np && (out_of_time || sleep) && !blocks && !preempt)
7561+ job_completion(entry->scheduled, !sleep);
7562+
7563+ /* Link pending task if we became unlinked.
7564+ */
7565+ if (!entry->linked)
7566+ link_task_to_cpu(__take_ready(&gsnedf), entry);
7567+
7568+ /* The final scheduling decision. Do we need to switch for some reason?
7569+ * If linked is different from scheduled, then select linked as next.
7570+ */
7571+ if ((!np || blocks) &&
7572+ entry->linked != entry->scheduled) {
7573+ /* Schedule a linked job? */
7574+ if (entry->linked) {
7575+ entry->linked->rt_param.scheduled_on = entry->cpu;
7576+ next = entry->linked;
7577+ }
7578+ if (entry->scheduled) {
7579+ /* not gonna be scheduled soon */
7580+ entry->scheduled->rt_param.scheduled_on = NO_CPU;
7581+ TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
7582+ }
7583+ } else
7584+ /* Only override Linux scheduler if we have a real-time task
7585+ * scheduled that needs to continue.
7586+ */
7587+ if (exists)
7588+ next = prev;
7589+
7590+ raw_spin_unlock(&gsnedf_lock);
7591+
7592+#ifdef WANT_ALL_SCHED_EVENTS
7593+ TRACE("gsnedf_lock released, next=0x%p\n", next);
7594+
7595+ if (next)
7596+ TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
7597+ else if (exists && !next)
7598+ TRACE("becomes idle at %llu.\n", litmus_clock());
7599+#endif
7600+
7601+
7602+ return next;
7603+}
7604+
7605+
7606+/* _finish_switch - we just finished the switch away from prev
7607+ */
7608+static void gsnedf_finish_switch(struct task_struct *prev)
7609+{
7610+ cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
7611+
7612+ entry->scheduled = is_realtime(current) ? current : NULL;
7613+#ifdef WANT_ALL_SCHED_EVENTS
7614+ TRACE_TASK(prev, "switched away from\n");
7615+#endif
7616+}
7617+
7618+
7619+/* Prepare a task for running in RT mode
7620+ */
7621+static void gsnedf_task_new(struct task_struct * t, int on_rq, int running)
7622+{
7623+ unsigned long flags;
7624+ cpu_entry_t* entry;
7625+
7626+ TRACE("gsn edf: task new %d\n", t->pid);
7627+
7628+ raw_spin_lock_irqsave(&gsnedf_lock, flags);
7629+
7630+ /* setup job params */
7631+ release_at(t, litmus_clock());
7632+
7633+ if (running) {
7634+ entry = &per_cpu(gsnedf_cpu_entries, task_cpu(t));
7635+ BUG_ON(entry->scheduled);
7636+
7637+#ifdef CONFIG_RELEASE_MASTER
7638+ if (entry->cpu != gsnedf.release_master) {
7639+#endif
7640+ entry->scheduled = t;
7641+ tsk_rt(t)->scheduled_on = task_cpu(t);
7642+#ifdef CONFIG_RELEASE_MASTER
7643+ } else {
7644+ /* do not schedule on release master */
7645+ preempt(entry); /* force resched */
7646+ tsk_rt(t)->scheduled_on = NO_CPU;
7647+ }
7648+#endif
7649+ } else {
7650+ t->rt_param.scheduled_on = NO_CPU;
7651+ }
7652+ t->rt_param.linked_on = NO_CPU;
7653+
7654+ gsnedf_job_arrival(t);
7655+ raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
7656+}
7657+
7658+static void gsnedf_task_wake_up(struct task_struct *task)
7659+{
7660+ unsigned long flags;
7661+ lt_t now;
7662+
7663+ TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
7664+
7665+ raw_spin_lock_irqsave(&gsnedf_lock, flags);
7666+ /* We need to take suspensions because of semaphores into
7667+ * account! If a job resumes after being suspended due to acquiring
7668+ * a semaphore, it should never be treated as a new job release.
7669+ */
7670+ if (get_rt_flags(task) == RT_F_EXIT_SEM) {
7671+ set_rt_flags(task, RT_F_RUNNING);
7672+ } else {
7673+ now = litmus_clock();
7674+ if (is_tardy(task, now)) {
7675+ /* new sporadic release */
7676+ release_at(task, now);
7677+ sched_trace_task_release(task);
7678+ }
7679+ else {
7680+ if (task->rt.time_slice) {
7681+ /* came back in time before deadline
7682+ */
7683+ set_rt_flags(task, RT_F_RUNNING);
7684+ }
7685+ }
7686+ }
7687+ gsnedf_job_arrival(task);
7688+ raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
7689+}
7690+
7691+static void gsnedf_task_block(struct task_struct *t)
7692+{
7693+ unsigned long flags;
7694+
7695+ TRACE_TASK(t, "block at %llu\n", litmus_clock());
7696+
7697+ /* unlink if necessary */
7698+ raw_spin_lock_irqsave(&gsnedf_lock, flags);
7699+ unlink(t);
7700+ raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
7701+
7702+ BUG_ON(!is_realtime(t));
7703+}
7704+
7705+
7706+static void gsnedf_task_exit(struct task_struct * t)
7707+{
7708+ unsigned long flags;
7709+
7710+ /* unlink if necessary */
7711+ raw_spin_lock_irqsave(&gsnedf_lock, flags);
7712+ unlink(t);
7713+ if (tsk_rt(t)->scheduled_on != NO_CPU) {
7714+ gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
7715+ tsk_rt(t)->scheduled_on = NO_CPU;
7716+ }
7717+ raw_spin_unlock_irqrestore(&gsnedf_lock, flags);
7718+
7719+ BUG_ON(!is_realtime(t));
7720+ TRACE_TASK(t, "RIP\n");
7721+}
7722+
7723+#ifdef CONFIG_FMLP
7724+
7725+/* Update the queue position of a task that got it's priority boosted via
7726+ * priority inheritance. */
7727+static void update_queue_position(struct task_struct *holder)
7728+{
7729+ /* We don't know whether holder is in the ready queue. It should, but
7730+ * on a budget overrun it may already be in a release queue. Hence,
7731+ * calling unlink() is not possible since it assumes that the task is
7732+ * not in a release queue. However, we can safely check whether
7733+ * sem->holder is currently in a queue or scheduled after locking both
7734+ * the release and the ready queue lock. */
7735+
7736+ /* Assumption: caller holds gsnedf_lock */
7737+
7738+ int check_preempt = 0;
7739+
7740+ if (tsk_rt(holder)->linked_on != NO_CPU) {
7741+ TRACE_TASK(holder, "%s: linked on %d\n",
7742+ __FUNCTION__, tsk_rt(holder)->linked_on);
7743+ /* Holder is scheduled; need to re-order CPUs.
7744+ * We can't use heap_decrease() here since
7745+ * the cpu_heap is ordered in reverse direction, so
7746+ * it is actually an increase. */
7747+ bheap_delete(cpu_lower_prio, &gsnedf_cpu_heap,
7748+ gsnedf_cpus[tsk_rt(holder)->linked_on]->hn);
7749+ bheap_insert(cpu_lower_prio, &gsnedf_cpu_heap,
7750+ gsnedf_cpus[tsk_rt(holder)->linked_on]->hn);
7751+ } else {
7752+ /* holder may be queued: first stop queue changes */
7753+ raw_spin_lock(&gsnedf.release_lock);
7754+ if (is_queued(holder)) {
7755+ TRACE_TASK(holder, "%s: is queued\n",
7756+ __FUNCTION__);
7757+ /* We need to update the position
7758+ * of holder in some heap. Note that this
7759+ * may be a release heap. */
7760+ check_preempt =
7761+ !bheap_decrease(edf_ready_order,
7762+ tsk_rt(holder)->heap_node);
7763+ } else {
7764+ /* Nothing to do: if it is not queued and not linked
7765+ * then it is currently being moved by other code
7766+ * (e.g., a timer interrupt handler) that will use the
7767+ * correct priority when enqueuing the task. */
7768+ TRACE_TASK(holder, "%s: is NOT queued => Done.\n",
7769+ __FUNCTION__);
7770+ }
7771+ raw_spin_unlock(&gsnedf.release_lock);
7772+
7773+ /* If holder was enqueued in a release heap, then the following
7774+ * preemption check is pointless, but we can't easily detect
7775+ * that case. If you want to fix this, then consider that
7776+ * simply adding a state flag requires O(n) time to update when
7777+ * releasing n tasks, which conflicts with the goal to have
7778+ * O(log n) merges. */
7779+ if (check_preempt) {
7780+ /* heap_decrease() hit the top level of the heap: make
7781+ * sure preemption checks get the right task, not the
7782+ * potentially stale cache. */
7783+ bheap_uncache_min(edf_ready_order,
7784+ &gsnedf.ready_queue);
7785+ check_for_preemptions();
7786+ }
7787+ }
7788+}
7789+
7790+static long gsnedf_pi_block(struct pi_semaphore *sem,
7791+ struct task_struct *new_waiter)
7792+{
7793+ /* This callback has to handle the situation where a new waiter is
7794+ * added to the wait queue of the semaphore.
7795+ *
7796+ * We must check if has a higher priority than the currently
7797+ * highest-priority task, and then potentially reschedule.
7798+ */
7799+
7800+ BUG_ON(!new_waiter);
7801+
7802+ if (edf_higher_prio(new_waiter, sem->hp.task)) {
7803+ TRACE_TASK(new_waiter, " boosts priority via %p\n", sem);
7804+ /* called with IRQs disabled */
7805+ raw_spin_lock(&gsnedf_lock);
7806+ /* store new highest-priority task */
7807+ sem->hp.task = new_waiter;
7808+ if (sem->holder) {
7809+ TRACE_TASK(sem->holder,
7810+ " holds %p and will inherit from %s/%d\n",
7811+ sem,
7812+ new_waiter->comm, new_waiter->pid);
7813+ /* let holder inherit */
7814+ sem->holder->rt_param.inh_task = new_waiter;
7815+ update_queue_position(sem->holder);
7816+ }
7817+ raw_spin_unlock(&gsnedf_lock);
7818+ }
7819+
7820+ return 0;
7821+}
7822+
7823+static long gsnedf_inherit_priority(struct pi_semaphore *sem,
7824+ struct task_struct *new_owner)
7825+{
7826+ /* We don't need to acquire the gsnedf_lock since at the time of this
7827+ * call new_owner isn't actually scheduled yet (it's still sleeping)
7828+ * and since the calling function already holds sem->wait.lock, which
7829+ * prevents concurrent sem->hp.task changes.
7830+ */
7831+
7832+ if (sem->hp.task && sem->hp.task != new_owner) {
7833+ new_owner->rt_param.inh_task = sem->hp.task;
7834+ TRACE_TASK(new_owner, "inherited priority from %s/%d\n",
7835+ sem->hp.task->comm, sem->hp.task->pid);
7836+ } else
7837+ TRACE_TASK(new_owner,
7838+ "cannot inherit priority, "
7839+ "no higher priority job waits.\n");
7840+ return 0;
7841+}
7842+
7843+/* This function is called on a semaphore release, and assumes that
7844+ * the current task is also the semaphore holder.
7845+ */
7846+static long gsnedf_return_priority(struct pi_semaphore *sem)
7847+{
7848+ struct task_struct* t = current;
7849+ int ret = 0;
7850+
7851+ /* Find new highest-priority semaphore task
7852+ * if holder task is the current hp.task.
7853+ *
7854+ * Calling function holds sem->wait.lock.
7855+ */
7856+ if (t == sem->hp.task)
7857+ edf_set_hp_task(sem);
7858+
7859+ TRACE_CUR("gsnedf_return_priority for lock %p\n", sem);
7860+
7861+ if (t->rt_param.inh_task) {
7862+ /* interrupts already disabled by PI code */
7863+ raw_spin_lock(&gsnedf_lock);
7864+
7865+ /* Reset inh_task to NULL. */
7866+ t->rt_param.inh_task = NULL;
7867+
7868+ /* Check if rescheduling is necessary */
7869+ unlink(t);
7870+ gsnedf_job_arrival(t);
7871+ raw_spin_unlock(&gsnedf_lock);
7872+ }
7873+
7874+ return ret;
7875+}
7876+
7877+#endif
7878+
7879+static long gsnedf_admit_task(struct task_struct* tsk)
7880+{
7881+ return 0;
7882+}
7883+
7884+static long gsnedf_activate_plugin(void)
7885+{
7886+ int cpu;
7887+ cpu_entry_t *entry;
7888+
7889+ bheap_init(&gsnedf_cpu_heap);
7890+#ifdef CONFIG_RELEASE_MASTER
7891+ gsnedf.release_master = atomic_read(&release_master_cpu);
7892+#endif
7893+
7894+ for_each_online_cpu(cpu) {
7895+ entry = &per_cpu(gsnedf_cpu_entries, cpu);
7896+ bheap_node_init(&entry->hn, entry);
7897+ atomic_set(&entry->will_schedule, 0);
7898+ entry->linked = NULL;
7899+ entry->scheduled = NULL;
7900+#ifdef CONFIG_RELEASE_MASTER
7901+ if (cpu != gsnedf.release_master) {
7902+#endif
7903+ TRACE("GSN-EDF: Initializing CPU #%d.\n", cpu);
7904+ update_cpu_position(entry);
7905+#ifdef CONFIG_RELEASE_MASTER
7906+ } else {
7907+ TRACE("GSN-EDF: CPU %d is release master.\n", cpu);
7908+ }
7909+#endif
7910+ }
7911+ return 0;
7912+}
7913+
7914+/* Plugin object */
7915+static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
7916+ .plugin_name = "GSN-EDF",
7917+ .finish_switch = gsnedf_finish_switch,
7918+ .tick = gsnedf_tick,
7919+ .task_new = gsnedf_task_new,
7920+ .complete_job = complete_job,
7921+ .task_exit = gsnedf_task_exit,
7922+ .schedule = gsnedf_schedule,
7923+ .task_wake_up = gsnedf_task_wake_up,
7924+ .task_block = gsnedf_task_block,
7925+#ifdef CONFIG_FMLP
7926+ .fmlp_active = 1,
7927+ .pi_block = gsnedf_pi_block,
7928+ .inherit_priority = gsnedf_inherit_priority,
7929+ .return_priority = gsnedf_return_priority,
7930+#endif
7931+ .admit_task = gsnedf_admit_task,
7932+ .activate_plugin = gsnedf_activate_plugin,
7933+};
7934+
7935+
7936+static int __init init_gsn_edf(void)
7937+{
7938+ int cpu;
7939+ cpu_entry_t *entry;
7940+
7941+ bheap_init(&gsnedf_cpu_heap);
7942+ /* initialize CPU state */
7943+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
7944+ entry = &per_cpu(gsnedf_cpu_entries, cpu);
7945+ gsnedf_cpus[cpu] = entry;
7946+ atomic_set(&entry->will_schedule, 0);
7947+ entry->cpu = cpu;
7948+ entry->hn = &gsnedf_heap_node[cpu];
7949+ bheap_node_init(&entry->hn, entry);
7950+ }
7951+ edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs);
7952+ return register_sched_plugin(&gsn_edf_plugin);
7953+}
7954+
7955+
7956+module_init(init_gsn_edf);
7957diff --git a/litmus/sched_litmus.c b/litmus/sched_litmus.c
7958new file mode 100644
7959index 0000000..3ed713b
7960--- /dev/null
7961+++ b/litmus/sched_litmus.c
7962@@ -0,0 +1,315 @@
7963+/* This file is included from kernel/sched.c */
7964+
7965+#include <litmus/litmus.h>
7966+#include <litmus/budget.h>
7967+#include <litmus/sched_plugin.h>
7968+
7969+static void update_time_litmus(struct rq *rq, struct task_struct *p)
7970+{
7971+ u64 delta = rq->clock - p->se.exec_start;
7972+ if (unlikely((s64)delta < 0))
7973+ delta = 0;
7974+ /* per job counter */
7975+ p->rt_param.job_params.exec_time += delta;
7976+ /* task counter */
7977+ p->se.sum_exec_runtime += delta;
7978+ /* sched_clock() */
7979+ p->se.exec_start = rq->clock;
7980+ cpuacct_charge(p, delta);
7981+}
7982+
7983+static void double_rq_lock(struct rq *rq1, struct rq *rq2);
7984+static void double_rq_unlock(struct rq *rq1, struct rq *rq2);
7985+
7986+/*
7987+ * litmus_tick gets called by scheduler_tick() with HZ freq
7988+ * Interrupts are disabled
7989+ */
7990+static void litmus_tick(struct rq *rq, struct task_struct *p)
7991+{
7992+ TS_PLUGIN_TICK_START;
7993+
7994+ if (is_realtime(p))
7995+ update_time_litmus(rq, p);
7996+
7997+ /* plugin tick */
7998+ litmus->tick(p);
7999+
8000+ TS_PLUGIN_TICK_END;
8001+
8002+ return;
8003+}
8004+
8005+static struct task_struct *
8006+litmus_schedule(struct rq *rq, struct task_struct *prev)
8007+{
8008+ struct rq* other_rq;
8009+ struct task_struct *next;
8010+
8011+ long was_running;
8012+ lt_t _maybe_deadlock = 0;
8013+
8014+ /* let the plugin schedule */
8015+ next = litmus->schedule(prev);
8016+
8017+ /* check if a global plugin pulled a task from a different RQ */
8018+ if (next && task_rq(next) != rq) {
8019+ /* we need to migrate the task */
8020+ other_rq = task_rq(next);
8021+ TRACE_TASK(next, "migrate from %d\n", other_rq->cpu);
8022+
8023+ /* while we drop the lock, the prev task could change its
8024+ * state
8025+ */
8026+ was_running = is_running(prev);
8027+ mb();
8028+ raw_spin_unlock(&rq->lock);
8029+
8030+ /* Don't race with a concurrent switch. This could deadlock in
8031+ * the case of cross or circular migrations. It's the job of
8032+ * the plugin to make sure that doesn't happen.
8033+ */
8034+ TRACE_TASK(next, "stack_in_use=%d\n",
8035+ next->rt_param.stack_in_use);
8036+ if (next->rt_param.stack_in_use != NO_CPU) {
8037+ TRACE_TASK(next, "waiting to deschedule\n");
8038+ _maybe_deadlock = litmus_clock();
8039+ }
8040+ while (next->rt_param.stack_in_use != NO_CPU) {
8041+ cpu_relax();
8042+ mb();
8043+ if (next->rt_param.stack_in_use == NO_CPU)
8044+ TRACE_TASK(next,"descheduled. Proceeding.\n");
8045+
8046+ if (lt_before(_maybe_deadlock + 10000000,
8047+ litmus_clock())) {
8048+ /* We've been spinning for 10ms.
8049+ * Something can't be right!
8050+ * Let's abandon the task and bail out; at least
8051+ * we will have debug info instead of a hard
8052+ * deadlock.
8053+ */
8054+ TRACE_TASK(next,"stack too long in use. "
8055+ "Deadlock?\n");
8056+ next = NULL;
8057+
8058+ /* bail out */
8059+ raw_spin_lock(&rq->lock);
8060+ return next;
8061+ }
8062+ }
8063+#ifdef __ARCH_WANT_UNLOCKED_CTXSW
8064+ if (next->oncpu)
8065+ TRACE_TASK(next, "waiting for !oncpu");
8066+ while (next->oncpu) {
8067+ cpu_relax();
8068+ mb();
8069+ }
8070+#endif
8071+ double_rq_lock(rq, other_rq);
8072+ mb();
8073+ if (is_realtime(prev) && is_running(prev) != was_running) {
8074+ TRACE_TASK(prev,
8075+ "state changed while we dropped"
8076+ " the lock: is_running=%d, was_running=%d\n",
8077+ is_running(prev), was_running);
8078+ if (is_running(prev) && !was_running) {
8079+ /* prev task became unblocked
8080+ * we need to simulate normal sequence of events
8081+ * to scheduler plugins.
8082+ */
8083+ litmus->task_block(prev);
8084+ litmus->task_wake_up(prev);
8085+ }
8086+ }
8087+
8088+ set_task_cpu(next, smp_processor_id());
8089+
8090+ /* DEBUG: now that we have the lock we need to make sure a
8091+ * couple of things still hold:
8092+ * - it is still a real-time task
8093+ * - it is still runnable (could have been stopped)
8094+ * If either is violated, then the active plugin is
8095+ * doing something wrong.
8096+ */
8097+ if (!is_realtime(next) || !is_running(next)) {
8098+ /* BAD BAD BAD */
8099+ TRACE_TASK(next,"BAD: migration invariant FAILED: "
8100+ "rt=%d running=%d\n",
8101+ is_realtime(next),
8102+ is_running(next));
8103+ /* drop the task */
8104+ next = NULL;
8105+ }
8106+ /* release the other CPU's runqueue, but keep ours */
8107+ raw_spin_unlock(&other_rq->lock);
8108+ }
8109+ if (next) {
8110+ next->rt_param.stack_in_use = rq->cpu;
8111+ next->se.exec_start = rq->clock;
8112+ }
8113+
8114+ update_enforcement_timer(next);
8115+ return next;
8116+}
8117+
8118+static void enqueue_task_litmus(struct rq *rq, struct task_struct *p,
8119+ int wakeup, bool head)
8120+{
8121+ if (wakeup) {
8122+ sched_trace_task_resume(p);
8123+ tsk_rt(p)->present = 1;
8124+ /* LITMUS^RT plugins need to update the state
8125+ * _before_ making it available in global structures.
8126+ * Linux gets away with being lazy about the task state
8127+ * update. We can't do that, hence we update the task
8128+ * state already here.
8129+ *
8130+ * WARNING: this needs to be re-evaluated when porting
8131+ * to newer kernel versions.
8132+ */
8133+ p->state = TASK_RUNNING;
8134+ litmus->task_wake_up(p);
8135+
8136+ rq->litmus.nr_running++;
8137+ } else
8138+ TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n");
8139+}
8140+
8141+static void dequeue_task_litmus(struct rq *rq, struct task_struct *p, int sleep)
8142+{
8143+ if (sleep) {
8144+ litmus->task_block(p);
8145+ tsk_rt(p)->present = 0;
8146+ sched_trace_task_block(p);
8147+
8148+ rq->litmus.nr_running--;
8149+ } else
8150+ TRACE_TASK(p, "ignoring a dequeue, not going to sleep.\n");
8151+}
8152+
8153+static void yield_task_litmus(struct rq *rq)
8154+{
8155+ BUG_ON(rq->curr != current);
8156+ /* sched_yield() is called to trigger delayed preemptions.
8157+ * Thus, mark the current task as needing to be rescheduled.
8158+ * This will cause the scheduler plugin to be invoked, which can
8159+ * then determine if a preemption is still required.
8160+ */
8161+ clear_exit_np(current);
8162+ set_tsk_need_resched(current);
8163+}
8164+
8165+/* Plugins are responsible for this.
8166+ */
8167+static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p, int flags)
8168+{
8169+}
8170+
8171+static void put_prev_task_litmus(struct rq *rq, struct task_struct *p)
8172+{
8173+}
8174+
8175+static void pre_schedule_litmus(struct rq *rq, struct task_struct *prev)
8176+{
8177+ update_time_litmus(rq, prev);
8178+ if (!is_running(prev))
8179+ tsk_rt(prev)->present = 0;
8180+}
8181+
8182+/* pick_next_task_litmus() - litmus_schedule() function
8183+ *
8184+ * return the next task to be scheduled
8185+ */
8186+static struct task_struct *pick_next_task_litmus(struct rq *rq)
8187+{
8188+ /* get the to-be-switched-out task (prev) */
8189+ struct task_struct *prev = rq->litmus.prev;
8190+ struct task_struct *next;
8191+
8192+ /* if not called from schedule() but from somewhere
8193+ * else (e.g., migration), return now!
8194+ */
8195+ if(!rq->litmus.prev)
8196+ return NULL;
8197+
8198+ rq->litmus.prev = NULL;
8199+
8200+ TS_PLUGIN_SCHED_START;
8201+ next = litmus_schedule(rq, prev);
8202+ TS_PLUGIN_SCHED_END;
8203+
8204+ return next;
8205+}
8206+
8207+static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued)
8208+{
8209+ /* nothing to do; tick related tasks are done by litmus_tick() */
8210+ return;
8211+}
8212+
8213+static void switched_to_litmus(struct rq *rq, struct task_struct *p, int running)
8214+{
8215+}
8216+
8217+static void prio_changed_litmus(struct rq *rq, struct task_struct *p,
8218+ int oldprio, int running)
8219+{
8220+}
8221+
8222+unsigned int get_rr_interval_litmus(struct rq *rq, struct task_struct *p)
8223+{
8224+ /* return infinity */
8225+ return 0;
8226+}
8227+
8228+/* This is called when a task became a real-time task, either due to a SCHED_*
8229+ * class transition or due to PI mutex inheritance. We don't handle Linux PI
8230+ * mutex inheritance yet (and probably never will). Use LITMUS provided
8231+ * synchronization primitives instead.
8232+ */
8233+static void set_curr_task_litmus(struct rq *rq)
8234+{
8235+ rq->curr->se.exec_start = rq->clock;
8236+}
8237+
8238+
8239+#ifdef CONFIG_SMP
8240+/* execve tries to rebalance task in this scheduling domain.
8241+ * We don't care about the scheduling domain; can gets called from
8242+ * exec, fork, wakeup.
8243+ */
8244+static int select_task_rq_litmus(struct task_struct *p, int sd_flag, int flags)
8245+{
8246+ /* preemption is already disabled.
8247+ * We don't want to change cpu here
8248+ */
8249+ return task_cpu(p);
8250+}
8251+#endif
8252+
8253+static const struct sched_class litmus_sched_class = {
8254+ .next = &rt_sched_class,
8255+ .enqueue_task = enqueue_task_litmus,
8256+ .dequeue_task = dequeue_task_litmus,
8257+ .yield_task = yield_task_litmus,
8258+
8259+ .check_preempt_curr = check_preempt_curr_litmus,
8260+
8261+ .pick_next_task = pick_next_task_litmus,
8262+ .put_prev_task = put_prev_task_litmus,
8263+
8264+#ifdef CONFIG_SMP
8265+ .select_task_rq = select_task_rq_litmus,
8266+
8267+ .pre_schedule = pre_schedule_litmus,
8268+#endif
8269+
8270+ .set_curr_task = set_curr_task_litmus,
8271+ .task_tick = task_tick_litmus,
8272+
8273+ .get_rr_interval = get_rr_interval_litmus,
8274+
8275+ .prio_changed = prio_changed_litmus,
8276+ .switched_to = switched_to_litmus,
8277+};
8278diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c
8279new file mode 100644
8280index 0000000..ea77d32
8281--- /dev/null
8282+++ b/litmus/sched_pfair.c
8283@@ -0,0 +1,897 @@
8284+/*
8285+ * kernel/sched_pfair.c
8286+ *
8287+ * Implementation of the (global) Pfair scheduling algorithm.
8288+ *
8289+ */
8290+
8291+#include <asm/div64.h>
8292+#include <linux/delay.h>
8293+#include <linux/module.h>
8294+#include <linux/spinlock.h>
8295+#include <linux/percpu.h>
8296+#include <linux/sched.h>
8297+#include <linux/list.h>
8298+#include <linux/slab.h>
8299+
8300+#include <litmus/litmus.h>
8301+#include <litmus/jobs.h>
8302+#include <litmus/rt_domain.h>
8303+#include <litmus/sched_plugin.h>
8304+#include <litmus/sched_trace.h>
8305+
8306+#include <litmus/bheap.h>
8307+
8308+struct subtask {
8309+ /* measured in quanta relative to job release */
8310+ quanta_t release;
8311+ quanta_t deadline;
8312+ quanta_t overlap; /* called "b bit" by PD^2 */
8313+ quanta_t group_deadline;
8314+};
8315+
8316+struct pfair_param {
8317+ quanta_t quanta; /* number of subtasks */
8318+ quanta_t cur; /* index of current subtask */
8319+
8320+ quanta_t release; /* in quanta */
8321+ quanta_t period; /* in quanta */
8322+
8323+ quanta_t last_quantum; /* when scheduled last */
8324+ int last_cpu; /* where scheduled last */
8325+
8326+ unsigned int sporadic_release; /* On wakeup, new sporadic release? */
8327+
8328+ struct subtask subtasks[0]; /* allocate together with pfair_param */
8329+};
8330+
8331+#define tsk_pfair(tsk) ((tsk)->rt_param.pfair)
8332+
8333+struct pfair_state {
8334+ int cpu;
8335+ volatile quanta_t cur_tick; /* updated by the CPU that is advancing
8336+ * the time */
8337+ volatile quanta_t local_tick; /* What tick is the local CPU currently
8338+ * executing? Updated only by the local
8339+ * CPU. In QEMU, this may lag behind the
8340+ * current tick. In a real system, with
8341+ * proper timers and aligned quanta,
8342+ * that should only be the
8343+ * case for a very short time after the
8344+ * time advanced. With staggered quanta,
8345+ * it will lag for the duration of the
8346+ * offset.
8347+ */
8348+
8349+ struct task_struct* linked; /* the task that should be executing */
8350+ struct task_struct* local; /* the local copy of linked */
8351+ struct task_struct* scheduled; /* what is actually scheduled */
8352+
8353+ unsigned long missed_quanta;
8354+ lt_t offset; /* stagger offset */
8355+};
8356+
8357+/* Currently, we limit the maximum period of any task to 2000 quanta.
8358+ * The reason is that it makes the implementation easier since we do not
8359+ * need to reallocate the release wheel on task arrivals.
8360+ * In the future
8361+ */
8362+#define PFAIR_MAX_PERIOD 2000
8363+
8364+/* This is the release queue wheel. It is indexed by pfair_time %
8365+ * PFAIR_MAX_PERIOD. Each heap is ordered by PFAIR priority, so that it can be
8366+ * merged with the ready queue.
8367+ */
8368+static struct bheap release_queue[PFAIR_MAX_PERIOD];
8369+
8370+DEFINE_PER_CPU(struct pfair_state, pfair_state);
8371+struct pfair_state* *pstate; /* short cut */
8372+
8373+static quanta_t pfair_time = 0; /* the "official" PFAIR clock */
8374+static quanta_t merge_time = 0; /* Updated after the release queue has been
8375+ * merged. Used by drop_all_references().
8376+ */
8377+
8378+static rt_domain_t pfair;
8379+
8380+/* The pfair_lock is used to serialize all scheduling events.
8381+ */
8382+#define pfair_lock pfair.ready_lock
8383+
8384+/* Enable for lots of trace info.
8385+ * #define PFAIR_DEBUG
8386+ */
8387+
8388+#ifdef PFAIR_DEBUG
8389+#define PTRACE_TASK(t, f, args...) TRACE_TASK(t, f, ## args)
8390+#define PTRACE(f, args...) TRACE(f, ## args)
8391+#else
8392+#define PTRACE_TASK(t, f, args...)
8393+#define PTRACE(f, args...)
8394+#endif
8395+
8396+/* gcc will inline all of these accessor functions... */
8397+static struct subtask* cur_subtask(struct task_struct* t)
8398+{
8399+ return tsk_pfair(t)->subtasks + tsk_pfair(t)->cur;
8400+}
8401+
8402+static quanta_t cur_deadline(struct task_struct* t)
8403+{
8404+ return cur_subtask(t)->deadline + tsk_pfair(t)->release;
8405+}
8406+
8407+
8408+static quanta_t cur_sub_release(struct task_struct* t)
8409+{
8410+ return cur_subtask(t)->release + tsk_pfair(t)->release;
8411+}
8412+
8413+static quanta_t cur_release(struct task_struct* t)
8414+{
8415+#ifdef EARLY_RELEASE
8416+ /* only the release of the first subtask counts when we early
8417+ * release */
8418+ return tsk_pfair(t)->release;
8419+#else
8420+ return cur_sub_release(t);
8421+#endif
8422+}
8423+
8424+static quanta_t cur_overlap(struct task_struct* t)
8425+{
8426+ return cur_subtask(t)->overlap;
8427+}
8428+
8429+static quanta_t cur_group_deadline(struct task_struct* t)
8430+{
8431+ quanta_t gdl = cur_subtask(t)->group_deadline;
8432+ if (gdl)
8433+ return gdl + tsk_pfair(t)->release;
8434+ else
8435+ return gdl;
8436+}
8437+
8438+
8439+static int pfair_higher_prio(struct task_struct* first,
8440+ struct task_struct* second)
8441+{
8442+ return /* first task must exist */
8443+ first && (
8444+ /* Does the second task exist and is it a real-time task? If
8445+ * not, the first task (which is a RT task) has higher
8446+ * priority.
8447+ */
8448+ !second || !is_realtime(second) ||
8449+
8450+ /* Is the (subtask) deadline of the first task earlier?
8451+ * Then it has higher priority.
8452+ */
8453+ time_before(cur_deadline(first), cur_deadline(second)) ||
8454+
8455+ /* Do we have a deadline tie?
8456+ * Then break by B-bit.
8457+ */
8458+ (cur_deadline(first) == cur_deadline(second) &&
8459+ (cur_overlap(first) > cur_overlap(second) ||
8460+
8461+ /* Do we have a B-bit tie?
8462+ * Then break by group deadline.
8463+ */
8464+ (cur_overlap(first) == cur_overlap(second) &&
8465+ (time_after(cur_group_deadline(first),
8466+ cur_group_deadline(second)) ||
8467+
8468+ /* Do we have a group deadline tie?
8469+ * Then break by PID, which are unique.
8470+ */
8471+ (cur_group_deadline(first) ==
8472+ cur_group_deadline(second) &&
8473+ first->pid < second->pid))))));
8474+}
8475+
8476+int pfair_ready_order(struct bheap_node* a, struct bheap_node* b)
8477+{
8478+ return pfair_higher_prio(bheap2task(a), bheap2task(b));
8479+}
8480+
8481+/* return the proper release queue for time t */
8482+static struct bheap* relq(quanta_t t)
8483+{
8484+ struct bheap* rq = &release_queue[t % PFAIR_MAX_PERIOD];
8485+ return rq;
8486+}
8487+
8488+static void prepare_release(struct task_struct* t, quanta_t at)
8489+{
8490+ tsk_pfair(t)->release = at;
8491+ tsk_pfair(t)->cur = 0;
8492+}
8493+
8494+static void __pfair_add_release(struct task_struct* t, struct bheap* queue)
8495+{
8496+ bheap_insert(pfair_ready_order, queue,
8497+ tsk_rt(t)->heap_node);
8498+}
8499+
8500+static void pfair_add_release(struct task_struct* t)
8501+{
8502+ BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node));
8503+ __pfair_add_release(t, relq(cur_release(t)));
8504+}
8505+
8506+/* pull released tasks from the release queue */
8507+static void poll_releases(quanta_t time)
8508+{
8509+ __merge_ready(&pfair, relq(time));
8510+ merge_time = time;
8511+}
8512+
8513+static void check_preempt(struct task_struct* t)
8514+{
8515+ int cpu = NO_CPU;
8516+ if (tsk_rt(t)->linked_on != tsk_rt(t)->scheduled_on &&
8517+ tsk_rt(t)->present) {
8518+ /* the task can be scheduled and
8519+ * is not scheduled where it ought to be scheduled
8520+ */
8521+ cpu = tsk_rt(t)->linked_on != NO_CPU ?
8522+ tsk_rt(t)->linked_on :
8523+ tsk_rt(t)->scheduled_on;
8524+ PTRACE_TASK(t, "linked_on:%d, scheduled_on:%d\n",
8525+ tsk_rt(t)->linked_on, tsk_rt(t)->scheduled_on);
8526+ /* preempt */
8527+ if (cpu == smp_processor_id())
8528+ set_tsk_need_resched(current);
8529+ else {
8530+ smp_send_reschedule(cpu);
8531+ }
8532+ }
8533+}
8534+
8535+/* caller must hold pfair_lock */
8536+static void drop_all_references(struct task_struct *t)
8537+{
8538+ int cpu;
8539+ struct pfair_state* s;
8540+ struct bheap* q;
8541+ if (bheap_node_in_heap(tsk_rt(t)->heap_node)) {
8542+ /* figure out what queue the node is in */
8543+ if (time_before_eq(cur_release(t), merge_time))
8544+ q = &pfair.ready_queue;
8545+ else
8546+ q = relq(cur_release(t));
8547+ bheap_delete(pfair_ready_order, q,
8548+ tsk_rt(t)->heap_node);
8549+ }
8550+ for (cpu = 0; cpu < num_online_cpus(); cpu++) {
8551+ s = &per_cpu(pfair_state, cpu);
8552+ if (s->linked == t)
8553+ s->linked = NULL;
8554+ if (s->local == t)
8555+ s->local = NULL;
8556+ if (s->scheduled == t)
8557+ s->scheduled = NULL;
8558+ }
8559+}
8560+
8561+/* returns 1 if the task needs to go the release queue */
8562+static int advance_subtask(quanta_t time, struct task_struct* t, int cpu)
8563+{
8564+ struct pfair_param* p = tsk_pfair(t);
8565+ int to_relq;
8566+ p->cur = (p->cur + 1) % p->quanta;
8567+ if (!p->cur) {
8568+ sched_trace_task_completion(t, 1);
8569+ if (tsk_rt(t)->present) {
8570+ /* we start a new job */
8571+ prepare_for_next_period(t);
8572+ sched_trace_task_release(t);
8573+ get_rt_flags(t) = RT_F_RUNNING;
8574+ p->release += p->period;
8575+ } else {
8576+ /* remove task from system until it wakes */
8577+ drop_all_references(t);
8578+ tsk_pfair(t)->sporadic_release = 1;
8579+ TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n",
8580+ cpu, p->cur);
8581+ return 0;
8582+ }
8583+ }
8584+ to_relq = time_after(cur_release(t), time);
8585+ TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d\n",
8586+ cpu, p->cur, to_relq);
8587+ return to_relq;
8588+}
8589+
8590+static void advance_subtasks(quanta_t time)
8591+{
8592+ int cpu, missed;
8593+ struct task_struct* l;
8594+ struct pfair_param* p;
8595+
8596+ for_each_online_cpu(cpu) {
8597+ l = pstate[cpu]->linked;
8598+ missed = pstate[cpu]->linked != pstate[cpu]->local;
8599+ if (l) {
8600+ p = tsk_pfair(l);
8601+ p->last_quantum = time;
8602+ p->last_cpu = cpu;
8603+ if (advance_subtask(time, l, cpu)) {
8604+ pstate[cpu]->linked = NULL;
8605+ pfair_add_release(l);
8606+ }
8607+ }
8608+ }
8609+}
8610+
8611+static int target_cpu(quanta_t time, struct task_struct* t, int default_cpu)
8612+{
8613+ int cpu;
8614+ if (tsk_rt(t)->scheduled_on != NO_CPU) {
8615+ /* always observe scheduled_on linkage */
8616+ default_cpu = tsk_rt(t)->scheduled_on;
8617+ } else if (tsk_pfair(t)->last_quantum == time - 1) {
8618+ /* back2back quanta */
8619+ /* Only observe last_quantum if no scheduled_on is in the way.
8620+ * This should only kick in if a CPU missed quanta, and that
8621+ * *should* only happen in QEMU.
8622+ */
8623+ cpu = tsk_pfair(t)->last_cpu;
8624+ if (!pstate[cpu]->linked ||
8625+ tsk_rt(pstate[cpu]->linked)->scheduled_on != cpu) {
8626+ default_cpu = cpu;
8627+ }
8628+ }
8629+ return default_cpu;
8630+}
8631+
8632+/* returns one if linking was redirected */
8633+static int pfair_link(quanta_t time, int cpu,
8634+ struct task_struct* t)
8635+{
8636+ int target = target_cpu(time, t, cpu);
8637+ struct task_struct* prev = pstate[cpu]->linked;
8638+ struct task_struct* other;
8639+
8640+ if (target != cpu) {
8641+ other = pstate[target]->linked;
8642+ pstate[target]->linked = t;
8643+ tsk_rt(t)->linked_on = target;
8644+ if (!other)
8645+ /* linked ok, but reschedule this CPU */
8646+ return 1;
8647+ if (target < cpu) {
8648+ /* link other to cpu instead */
8649+ tsk_rt(other)->linked_on = cpu;
8650+ pstate[cpu]->linked = other;
8651+ if (prev) {
8652+ /* prev got pushed back into the ready queue */
8653+ tsk_rt(prev)->linked_on = NO_CPU;
8654+ __add_ready(&pfair, prev);
8655+ }
8656+ /* we are done with this cpu */
8657+ return 0;
8658+ } else {
8659+ /* re-add other, it's original CPU was not considered yet */
8660+ tsk_rt(other)->linked_on = NO_CPU;
8661+ __add_ready(&pfair, other);
8662+ /* reschedule this CPU */
8663+ return 1;
8664+ }
8665+ } else {
8666+ pstate[cpu]->linked = t;
8667+ tsk_rt(t)->linked_on = cpu;
8668+ if (prev) {
8669+ /* prev got pushed back into the ready queue */
8670+ tsk_rt(prev)->linked_on = NO_CPU;
8671+ __add_ready(&pfair, prev);
8672+ }
8673+ /* we are done with this CPU */
8674+ return 0;
8675+ }
8676+}
8677+
8678+static void schedule_subtasks(quanta_t time)
8679+{
8680+ int cpu, retry;
8681+
8682+ for_each_online_cpu(cpu) {
8683+ retry = 1;
8684+ while (retry) {
8685+ if (pfair_higher_prio(__peek_ready(&pfair),
8686+ pstate[cpu]->linked))
8687+ retry = pfair_link(time, cpu,
8688+ __take_ready(&pfair));
8689+ else
8690+ retry = 0;
8691+ }
8692+ }
8693+}
8694+
8695+static void schedule_next_quantum(quanta_t time)
8696+{
8697+ int cpu;
8698+
8699+ /* called with interrupts disabled */
8700+ PTRACE("--- Q %lu at %llu PRE-SPIN\n",
8701+ time, litmus_clock());
8702+ raw_spin_lock(&pfair_lock);
8703+ PTRACE("<<< Q %lu at %llu\n",
8704+ time, litmus_clock());
8705+
8706+ sched_trace_quantum_boundary();
8707+
8708+ advance_subtasks(time);
8709+ poll_releases(time);
8710+ schedule_subtasks(time);
8711+
8712+ for (cpu = 0; cpu < num_online_cpus(); cpu++)
8713+ if (pstate[cpu]->linked)
8714+ PTRACE_TASK(pstate[cpu]->linked,
8715+ " linked on %d.\n", cpu);
8716+ else
8717+ PTRACE("(null) linked on %d.\n", cpu);
8718+
8719+ /* We are done. Advance time. */
8720+ mb();
8721+ for (cpu = 0; cpu < num_online_cpus(); cpu++) {
8722+ if (pstate[cpu]->local_tick != pstate[cpu]->cur_tick) {
8723+ TRACE("BAD Quantum not acked on %d "
8724+ "(l:%lu c:%lu p:%lu)\n",
8725+ cpu,
8726+ pstate[cpu]->local_tick,
8727+ pstate[cpu]->cur_tick,
8728+ pfair_time);
8729+ pstate[cpu]->missed_quanta++;
8730+ }
8731+ pstate[cpu]->cur_tick = time;
8732+ }
8733+ PTRACE(">>> Q %lu at %llu\n",
8734+ time, litmus_clock());
8735+ raw_spin_unlock(&pfair_lock);
8736+}
8737+
8738+static noinline void wait_for_quantum(quanta_t q, struct pfair_state* state)
8739+{
8740+ quanta_t loc;
8741+
8742+ goto first; /* skip mb() on first iteration */
8743+ do {
8744+ cpu_relax();
8745+ mb();
8746+ first: loc = state->cur_tick;
8747+ /* FIXME: what if loc > cur? */
8748+ } while (time_before(loc, q));
8749+ PTRACE("observed cur_tick:%lu >= q:%lu\n",
8750+ loc, q);
8751+}
8752+
8753+static quanta_t current_quantum(struct pfair_state* state)
8754+{
8755+ lt_t t = litmus_clock() - state->offset;
8756+ return time2quanta(t, FLOOR);
8757+}
8758+
8759+static void catchup_quanta(quanta_t from, quanta_t target,
8760+ struct pfair_state* state)
8761+{
8762+ quanta_t cur = from, time;
8763+ TRACE("+++< BAD catching up quanta from %lu to %lu\n",
8764+ from, target);
8765+ while (time_before(cur, target)) {
8766+ wait_for_quantum(cur, state);
8767+ cur++;
8768+ time = cmpxchg(&pfair_time,
8769+ cur - 1, /* expected */
8770+ cur /* next */
8771+ );
8772+ if (time == cur - 1)
8773+ schedule_next_quantum(cur);
8774+ }
8775+ TRACE("+++> catching up done\n");
8776+}
8777+
8778+/* pfair_tick - this function is called for every local timer
8779+ * interrupt.
8780+ */
8781+static void pfair_tick(struct task_struct* t)
8782+{
8783+ struct pfair_state* state = &__get_cpu_var(pfair_state);
8784+ quanta_t time, cur;
8785+ int retry = 10;
8786+
8787+ do {
8788+ cur = current_quantum(state);
8789+ PTRACE("q %lu at %llu\n", cur, litmus_clock());
8790+
8791+ /* Attempt to advance time. First CPU to get here
8792+ * will prepare the next quantum.
8793+ */
8794+ time = cmpxchg(&pfair_time,
8795+ cur - 1, /* expected */
8796+ cur /* next */
8797+ );
8798+ if (time == cur - 1) {
8799+ /* exchange succeeded */
8800+ wait_for_quantum(cur - 1, state);
8801+ schedule_next_quantum(cur);
8802+ retry = 0;
8803+ } else if (time_before(time, cur - 1)) {
8804+ /* the whole system missed a tick !? */
8805+ catchup_quanta(time, cur, state);
8806+ retry--;
8807+ } else if (time_after(time, cur)) {
8808+ /* our timer lagging behind!? */
8809+ TRACE("BAD pfair_time:%lu > cur:%lu\n", time, cur);
8810+ retry--;
8811+ } else {
8812+ /* Some other CPU already started scheduling
8813+ * this quantum. Let it do its job and then update.
8814+ */
8815+ retry = 0;
8816+ }
8817+ } while (retry);
8818+
8819+ /* Spin locally until time advances. */
8820+ wait_for_quantum(cur, state);
8821+
8822+ /* copy assignment */
8823+ /* FIXME: what if we race with a future update? Corrupted state? */
8824+ state->local = state->linked;
8825+ /* signal that we are done */
8826+ mb();
8827+ state->local_tick = state->cur_tick;
8828+
8829+ if (state->local != current
8830+ && (is_realtime(current) || is_present(state->local)))
8831+ set_tsk_need_resched(current);
8832+}
8833+
8834+static int safe_to_schedule(struct task_struct* t, int cpu)
8835+{
8836+ int where = tsk_rt(t)->scheduled_on;
8837+ if (where != NO_CPU && where != cpu) {
8838+ TRACE_TASK(t, "BAD: can't be scheduled on %d, "
8839+ "scheduled already on %d.\n", cpu, where);
8840+ return 0;
8841+ } else
8842+ return tsk_rt(t)->present && get_rt_flags(t) == RT_F_RUNNING;
8843+}
8844+
8845+static struct task_struct* pfair_schedule(struct task_struct * prev)
8846+{
8847+ struct pfair_state* state = &__get_cpu_var(pfair_state);
8848+ int blocks;
8849+ struct task_struct* next = NULL;
8850+
8851+ raw_spin_lock(&pfair_lock);
8852+
8853+ blocks = is_realtime(prev) && !is_running(prev);
8854+
8855+ if (state->local && safe_to_schedule(state->local, state->cpu))
8856+ next = state->local;
8857+
8858+ if (prev != next) {
8859+ tsk_rt(prev)->scheduled_on = NO_CPU;
8860+ if (next)
8861+ tsk_rt(next)->scheduled_on = state->cpu;
8862+ }
8863+
8864+ raw_spin_unlock(&pfair_lock);
8865+
8866+ if (next)
8867+ TRACE_TASK(next, "scheduled rel=%lu at %lu (%llu)\n",
8868+ tsk_pfair(next)->release, pfair_time, litmus_clock());
8869+ else if (is_realtime(prev))
8870+ TRACE("Becomes idle at %lu (%llu)\n", pfair_time, litmus_clock());
8871+
8872+ return next;
8873+}
8874+
8875+static void pfair_task_new(struct task_struct * t, int on_rq, int running)
8876+{
8877+ unsigned long flags;
8878+
8879+ TRACE("pfair: task new %d state:%d\n", t->pid, t->state);
8880+
8881+ raw_spin_lock_irqsave(&pfair_lock, flags);
8882+ if (running)
8883+ t->rt_param.scheduled_on = task_cpu(t);
8884+ else
8885+ t->rt_param.scheduled_on = NO_CPU;
8886+
8887+ prepare_release(t, pfair_time + 1);
8888+ tsk_pfair(t)->sporadic_release = 0;
8889+ pfair_add_release(t);
8890+ check_preempt(t);
8891+
8892+ raw_spin_unlock_irqrestore(&pfair_lock, flags);
8893+}
8894+
8895+static void pfair_task_wake_up(struct task_struct *t)
8896+{
8897+ unsigned long flags;
8898+ lt_t now;
8899+
8900+ TRACE_TASK(t, "wakes at %llu, release=%lu, pfair_time:%lu\n",
8901+ litmus_clock(), cur_release(t), pfair_time);
8902+
8903+ raw_spin_lock_irqsave(&pfair_lock, flags);
8904+
8905+ /* It is a little unclear how to deal with Pfair
8906+ * tasks that block for a while and then wake. For now,
8907+ * if a task blocks and wakes before its next job release,
8908+ * then it may resume if it is currently linked somewhere
8909+ * (as if it never blocked at all). Otherwise, we have a
8910+ * new sporadic job release.
8911+ */
8912+ if (tsk_pfair(t)->sporadic_release) {
8913+ now = litmus_clock();
8914+ release_at(t, now);
8915+ prepare_release(t, time2quanta(now, CEIL));
8916+ sched_trace_task_release(t);
8917+ /* FIXME: race with pfair_time advancing */
8918+ pfair_add_release(t);
8919+ tsk_pfair(t)->sporadic_release = 0;
8920+ }
8921+
8922+ check_preempt(t);
8923+
8924+ raw_spin_unlock_irqrestore(&pfair_lock, flags);
8925+ TRACE_TASK(t, "wake up done at %llu\n", litmus_clock());
8926+}
8927+
8928+static void pfair_task_block(struct task_struct *t)
8929+{
8930+ BUG_ON(!is_realtime(t));
8931+ TRACE_TASK(t, "blocks at %llu, state:%d\n",
8932+ litmus_clock(), t->state);
8933+}
8934+
8935+static void pfair_task_exit(struct task_struct * t)
8936+{
8937+ unsigned long flags;
8938+
8939+ BUG_ON(!is_realtime(t));
8940+
8941+ /* Remote task from release or ready queue, and ensure
8942+ * that it is not the scheduled task for ANY CPU. We
8943+ * do this blanket check because occassionally when
8944+ * tasks exit while blocked, the task_cpu of the task
8945+ * might not be the same as the CPU that the PFAIR scheduler
8946+ * has chosen for it.
8947+ */
8948+ raw_spin_lock_irqsave(&pfair_lock, flags);
8949+
8950+ TRACE_TASK(t, "RIP, state:%d\n", t->state);
8951+ drop_all_references(t);
8952+
8953+ raw_spin_unlock_irqrestore(&pfair_lock, flags);
8954+
8955+ kfree(t->rt_param.pfair);
8956+ t->rt_param.pfair = NULL;
8957+}
8958+
8959+
8960+static void pfair_release_at(struct task_struct* task, lt_t start)
8961+{
8962+ unsigned long flags;
8963+ quanta_t release;
8964+
8965+ BUG_ON(!is_realtime(task));
8966+
8967+ raw_spin_lock_irqsave(&pfair_lock, flags);
8968+ release_at(task, start);
8969+ release = time2quanta(start, CEIL);
8970+
8971+ if (release - pfair_time >= PFAIR_MAX_PERIOD)
8972+ release = pfair_time + PFAIR_MAX_PERIOD;
8973+
8974+ TRACE_TASK(task, "sys release at %lu\n", release);
8975+
8976+ drop_all_references(task);
8977+ prepare_release(task, release);
8978+ pfair_add_release(task);
8979+
8980+ /* Clear sporadic release flag, since this release subsumes any
8981+ * sporadic release on wake.
8982+ */
8983+ tsk_pfair(task)->sporadic_release = 0;
8984+
8985+ raw_spin_unlock_irqrestore(&pfair_lock, flags);
8986+}
8987+
8988+static void init_subtask(struct subtask* sub, unsigned long i,
8989+ lt_t quanta, lt_t period)
8990+{
8991+ /* since i is zero-based, the formulas are shifted by one */
8992+ lt_t tmp;
8993+
8994+ /* release */
8995+ tmp = period * i;
8996+ do_div(tmp, quanta); /* floor */
8997+ sub->release = (quanta_t) tmp;
8998+
8999+ /* deadline */
9000+ tmp = period * (i + 1);
9001+ if (do_div(tmp, quanta)) /* ceil */
9002+ tmp++;
9003+ sub->deadline = (quanta_t) tmp;
9004+
9005+ /* next release */
9006+ tmp = period * (i + 1);
9007+ do_div(tmp, quanta); /* floor */
9008+ sub->overlap = sub->deadline - (quanta_t) tmp;
9009+
9010+ /* Group deadline.
9011+ * Based on the formula given in Uma's thesis.
9012+ */
9013+ if (2 * quanta >= period) {
9014+ /* heavy */
9015+ tmp = (sub->deadline - (i + 1)) * period;
9016+ if (period > quanta &&
9017+ do_div(tmp, (period - quanta))) /* ceil */
9018+ tmp++;
9019+ sub->group_deadline = (quanta_t) tmp;
9020+ } else
9021+ sub->group_deadline = 0;
9022+}
9023+
9024+static void dump_subtasks(struct task_struct* t)
9025+{
9026+ unsigned long i;
9027+ for (i = 0; i < t->rt_param.pfair->quanta; i++)
9028+ TRACE_TASK(t, "SUBTASK %lu: rel=%lu dl=%lu bbit:%lu gdl:%lu\n",
9029+ i + 1,
9030+ t->rt_param.pfair->subtasks[i].release,
9031+ t->rt_param.pfair->subtasks[i].deadline,
9032+ t->rt_param.pfair->subtasks[i].overlap,
9033+ t->rt_param.pfair->subtasks[i].group_deadline);
9034+}
9035+
9036+static long pfair_admit_task(struct task_struct* t)
9037+{
9038+ lt_t quanta;
9039+ lt_t period;
9040+ s64 quantum_length = ktime_to_ns(tick_period);
9041+ struct pfair_param* param;
9042+ unsigned long i;
9043+
9044+ /* Pfair is a tick-based method, so the time
9045+ * of interest is jiffies. Calculate tick-based
9046+ * times for everything.
9047+ * (Ceiling of exec cost, floor of period.)
9048+ */
9049+
9050+ quanta = get_exec_cost(t);
9051+ period = get_rt_period(t);
9052+
9053+ quanta = time2quanta(get_exec_cost(t), CEIL);
9054+
9055+ if (do_div(period, quantum_length))
9056+ printk(KERN_WARNING
9057+ "The period of %s/%d is not a multiple of %llu.\n",
9058+ t->comm, t->pid, (unsigned long long) quantum_length);
9059+
9060+ if (period >= PFAIR_MAX_PERIOD) {
9061+ printk(KERN_WARNING
9062+ "PFAIR: Rejecting task %s/%d; its period is too long.\n",
9063+ t->comm, t->pid);
9064+ return -EINVAL;
9065+ }
9066+
9067+ if (quanta == period) {
9068+ /* special case: task has weight 1.0 */
9069+ printk(KERN_INFO
9070+ "Admitting weight 1.0 task. (%s/%d, %llu, %llu).\n",
9071+ t->comm, t->pid, quanta, period);
9072+ quanta = 1;
9073+ period = 1;
9074+ }
9075+
9076+ param = kmalloc(sizeof(*param) +
9077+ quanta * sizeof(struct subtask), GFP_ATOMIC);
9078+
9079+ if (!param)
9080+ return -ENOMEM;
9081+
9082+ param->quanta = quanta;
9083+ param->cur = 0;
9084+ param->release = 0;
9085+ param->period = period;
9086+
9087+ for (i = 0; i < quanta; i++)
9088+ init_subtask(param->subtasks + i, i, quanta, period);
9089+
9090+ if (t->rt_param.pfair)
9091+ /* get rid of stale allocation */
9092+ kfree(t->rt_param.pfair);
9093+
9094+ t->rt_param.pfair = param;
9095+
9096+ /* spew out some debug info */
9097+ dump_subtasks(t);
9098+
9099+ return 0;
9100+}
9101+
9102+static long pfair_activate_plugin(void)
9103+{
9104+ int cpu;
9105+ struct pfair_state* state;
9106+
9107+ state = &__get_cpu_var(pfair_state);
9108+ pfair_time = current_quantum(state);
9109+
9110+ TRACE("Activating PFAIR at q=%lu\n", pfair_time);
9111+
9112+ for (cpu = 0; cpu < num_online_cpus(); cpu++) {
9113+ state = &per_cpu(pfair_state, cpu);
9114+ state->cur_tick = pfair_time;
9115+ state->local_tick = pfair_time;
9116+ state->missed_quanta = 0;
9117+ state->offset = cpu_stagger_offset(cpu);
9118+ }
9119+
9120+ return 0;
9121+}
9122+
9123+/* Plugin object */
9124+static struct sched_plugin pfair_plugin __cacheline_aligned_in_smp = {
9125+ .plugin_name = "PFAIR",
9126+ .tick = pfair_tick,
9127+ .task_new = pfair_task_new,
9128+ .task_exit = pfair_task_exit,
9129+ .schedule = pfair_schedule,
9130+ .task_wake_up = pfair_task_wake_up,
9131+ .task_block = pfair_task_block,
9132+ .admit_task = pfair_admit_task,
9133+ .release_at = pfair_release_at,
9134+ .complete_job = complete_job,
9135+ .activate_plugin = pfair_activate_plugin,
9136+};
9137+
9138+static int __init init_pfair(void)
9139+{
9140+ int cpu, i;
9141+ struct pfair_state *state;
9142+
9143+
9144+ /*
9145+ * initialize short_cut for per-cpu pfair state;
9146+ * there may be a problem here if someone removes a cpu
9147+ * while we are doing this initialization... and if cpus
9148+ * are added / removed later... is it a _real_ problem?
9149+ */
9150+ pstate = kmalloc(sizeof(struct pfair_state*) * num_online_cpus(), GFP_KERNEL);
9151+
9152+ /* initialize release queue */
9153+ for (i = 0; i < PFAIR_MAX_PERIOD; i++)
9154+ bheap_init(&release_queue[i]);
9155+
9156+ /* initialize CPU state */
9157+ for (cpu = 0; cpu < num_online_cpus(); cpu++) {
9158+ state = &per_cpu(pfair_state, cpu);
9159+ state->cpu = cpu;
9160+ state->cur_tick = 0;
9161+ state->local_tick = 0;
9162+ state->linked = NULL;
9163+ state->local = NULL;
9164+ state->scheduled = NULL;
9165+ state->missed_quanta = 0;
9166+ state->offset = cpu_stagger_offset(cpu);
9167+ pstate[cpu] = state;
9168+ }
9169+
9170+ rt_domain_init(&pfair, pfair_ready_order, NULL, NULL);
9171+ return register_sched_plugin(&pfair_plugin);
9172+}
9173+
9174+static void __exit clean_pfair(void)
9175+{
9176+ kfree(pstate);
9177+}
9178+
9179+module_init(init_pfair);
9180+module_exit(clean_pfair);
9181diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
9182new file mode 100644
9183index 0000000..3543b7b
9184--- /dev/null
9185+++ b/litmus/sched_plugin.c
9186@@ -0,0 +1,265 @@
9187+/* sched_plugin.c -- core infrastructure for the scheduler plugin system
9188+ *
9189+ * This file includes the initialization of the plugin system, the no-op Linux
9190+ * scheduler plugin, some dummy functions, and some helper functions.
9191+ */
9192+
9193+#include <linux/list.h>
9194+#include <linux/spinlock.h>
9195+
9196+#include <litmus/litmus.h>
9197+#include <litmus/sched_plugin.h>
9198+
9199+#include <litmus/jobs.h>
9200+
9201+/*
9202+ * Generic function to trigger preemption on either local or remote cpu
9203+ * from scheduler plugins. The key feature is that this function is
9204+ * non-preemptive section aware and does not invoke the scheduler / send
9205+ * IPIs if the to-be-preempted task is actually non-preemptive.
9206+ */
9207+void preempt_if_preemptable(struct task_struct* t, int on_cpu)
9208+{
9209+ /* t is the real-time task executing on CPU on_cpu If t is NULL, then
9210+ * on_cpu is currently scheduling background work.
9211+ */
9212+
9213+ int send_ipi;
9214+
9215+ if (smp_processor_id() == on_cpu) {
9216+ /* local CPU case */
9217+ if (t) {
9218+ /* check if we need to poke userspace */
9219+ if (is_user_np(t))
9220+ /* yes, poke it */
9221+ request_exit_np(t);
9222+ else
9223+ /* no, see if we are allowed to preempt the
9224+ * currently-executing task */
9225+ if (!is_kernel_np(t))
9226+ set_tsk_need_resched(t);
9227+ } else
9228+ /* move non-real-time task out of the way */
9229+ set_tsk_need_resched(current);
9230+ } else {
9231+ /* remote CPU case */
9232+ if (!t)
9233+ /* currently schedules non-real-time work */
9234+ send_ipi = 1;
9235+ else {
9236+ /* currently schedules real-time work */
9237+ if (is_user_np(t)) {
9238+ /* need to notify user space of delayed
9239+ * preemption */
9240+
9241+ /* to avoid a race, set the flag, then test
9242+ * again */
9243+ request_exit_np(t);
9244+ /* make sure it got written */
9245+ mb();
9246+ }
9247+ /* Only send an ipi if remote task might have raced our
9248+ * request, i.e., send an IPI to make sure if it exited
9249+ * its critical section.
9250+ */
9251+ send_ipi = !is_np(t) && !is_kernel_np(t);
9252+ }
9253+ if (likely(send_ipi))
9254+ smp_send_reschedule(on_cpu);
9255+ }
9256+}
9257+
9258+
9259+/*************************************************************
9260+ * Dummy plugin functions *
9261+ *************************************************************/
9262+
9263+static void litmus_dummy_finish_switch(struct task_struct * prev)
9264+{
9265+}
9266+
9267+static struct task_struct* litmus_dummy_schedule(struct task_struct * prev)
9268+{
9269+ return NULL;
9270+}
9271+
9272+static void litmus_dummy_tick(struct task_struct* tsk)
9273+{
9274+}
9275+
9276+static long litmus_dummy_admit_task(struct task_struct* tsk)
9277+{
9278+ printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n",
9279+ tsk->comm, tsk->pid);
9280+ return -EINVAL;
9281+}
9282+
9283+static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running)
9284+{
9285+}
9286+
9287+static void litmus_dummy_task_wake_up(struct task_struct *task)
9288+{
9289+}
9290+
9291+static void litmus_dummy_task_block(struct task_struct *task)
9292+{
9293+}
9294+
9295+static void litmus_dummy_task_exit(struct task_struct *task)
9296+{
9297+}
9298+
9299+static long litmus_dummy_complete_job(void)
9300+{
9301+ return -ENOSYS;
9302+}
9303+
9304+static long litmus_dummy_activate_plugin(void)
9305+{
9306+ return 0;
9307+}
9308+
9309+static long litmus_dummy_deactivate_plugin(void)
9310+{
9311+ return 0;
9312+}
9313+
9314+#ifdef CONFIG_FMLP
9315+
9316+static long litmus_dummy_inherit_priority(struct pi_semaphore *sem,
9317+ struct task_struct *new_owner)
9318+{
9319+ return -ENOSYS;
9320+}
9321+
9322+static long litmus_dummy_return_priority(struct pi_semaphore *sem)
9323+{
9324+ return -ENOSYS;
9325+}
9326+
9327+static long litmus_dummy_pi_block(struct pi_semaphore *sem,
9328+ struct task_struct *new_waiter)
9329+{
9330+ return -ENOSYS;
9331+}
9332+
9333+#endif
9334+
9335+
9336+/* The default scheduler plugin. It doesn't do anything and lets Linux do its
9337+ * job.
9338+ */
9339+struct sched_plugin linux_sched_plugin = {
9340+ .plugin_name = "Linux",
9341+ .tick = litmus_dummy_tick,
9342+ .task_new = litmus_dummy_task_new,
9343+ .task_exit = litmus_dummy_task_exit,
9344+ .task_wake_up = litmus_dummy_task_wake_up,
9345+ .task_block = litmus_dummy_task_block,
9346+ .complete_job = litmus_dummy_complete_job,
9347+ .schedule = litmus_dummy_schedule,
9348+ .finish_switch = litmus_dummy_finish_switch,
9349+ .activate_plugin = litmus_dummy_activate_plugin,
9350+ .deactivate_plugin = litmus_dummy_deactivate_plugin,
9351+#ifdef CONFIG_FMLP
9352+ .inherit_priority = litmus_dummy_inherit_priority,
9353+ .return_priority = litmus_dummy_return_priority,
9354+ .pi_block = litmus_dummy_pi_block,
9355+#endif
9356+ .admit_task = litmus_dummy_admit_task
9357+};
9358+
9359+/*
9360+ * The cluster size is needed in C-EDF: it makes sense only to cluster
9361+ * around L2 or L3, so if cluster_cache_index = 2 (default) we cluster
9362+ * all the CPUs that shares a L2 cache, while cluster_cache_index = 3
9363+ * we cluster all CPs that shares a L3 cache
9364+ */
9365+int cluster_cache_index = 2;
9366+
9367+/*
9368+ * The reference to current plugin that is used to schedule tasks within
9369+ * the system. It stores references to actual function implementations
9370+ * Should be initialized by calling "init_***_plugin()"
9371+ */
9372+struct sched_plugin *litmus = &linux_sched_plugin;
9373+
9374+/* the list of registered scheduling plugins */
9375+static LIST_HEAD(sched_plugins);
9376+static DEFINE_RAW_SPINLOCK(sched_plugins_lock);
9377+
9378+#define CHECK(func) {\
9379+ if (!plugin->func) \
9380+ plugin->func = litmus_dummy_ ## func;}
9381+
9382+/* FIXME: get reference to module */
9383+int register_sched_plugin(struct sched_plugin* plugin)
9384+{
9385+ printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n",
9386+ plugin->plugin_name);
9387+
9388+ /* make sure we don't trip over null pointers later */
9389+ CHECK(finish_switch);
9390+ CHECK(schedule);
9391+ CHECK(tick);
9392+ CHECK(task_wake_up);
9393+ CHECK(task_exit);
9394+ CHECK(task_block);
9395+ CHECK(task_new);
9396+ CHECK(complete_job);
9397+ CHECK(activate_plugin);
9398+ CHECK(deactivate_plugin);
9399+#ifdef CONFIG_FMLP
9400+ CHECK(inherit_priority);
9401+ CHECK(return_priority);
9402+ CHECK(pi_block);
9403+#endif
9404+ CHECK(admit_task);
9405+
9406+ if (!plugin->release_at)
9407+ plugin->release_at = release_at;
9408+
9409+ raw_spin_lock(&sched_plugins_lock);
9410+ list_add(&plugin->list, &sched_plugins);
9411+ raw_spin_unlock(&sched_plugins_lock);
9412+
9413+ return 0;
9414+}
9415+
9416+
9417+/* FIXME: reference counting, etc. */
9418+struct sched_plugin* find_sched_plugin(const char* name)
9419+{
9420+ struct list_head *pos;
9421+ struct sched_plugin *plugin;
9422+
9423+ raw_spin_lock(&sched_plugins_lock);
9424+ list_for_each(pos, &sched_plugins) {
9425+ plugin = list_entry(pos, struct sched_plugin, list);
9426+ if (!strcmp(plugin->plugin_name, name))
9427+ goto out_unlock;
9428+ }
9429+ plugin = NULL;
9430+
9431+out_unlock:
9432+ raw_spin_unlock(&sched_plugins_lock);
9433+ return plugin;
9434+}
9435+
9436+int print_sched_plugins(char* buf, int max)
9437+{
9438+ int count = 0;
9439+ struct list_head *pos;
9440+ struct sched_plugin *plugin;
9441+
9442+ raw_spin_lock(&sched_plugins_lock);
9443+ list_for_each(pos, &sched_plugins) {
9444+ plugin = list_entry(pos, struct sched_plugin, list);
9445+ count += snprintf(buf + count, max - count, "%s\n", plugin->plugin_name);
9446+ if (max - count <= 0)
9447+ break;
9448+ }
9449+ raw_spin_unlock(&sched_plugins_lock);
9450+ return count;
9451+}
9452diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
9453new file mode 100644
9454index 0000000..73f6473
9455--- /dev/null
9456+++ b/litmus/sched_psn_edf.c
9457@@ -0,0 +1,482 @@
9458+/*
9459+ * kernel/sched_psn_edf.c
9460+ *
9461+ * Implementation of the PSN-EDF scheduler plugin.
9462+ * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
9463+ *
9464+ * Suspensions and non-preemptable sections are supported.
9465+ * Priority inheritance is not supported.
9466+ */
9467+
9468+#include <linux/percpu.h>
9469+#include <linux/sched.h>
9470+#include <linux/list.h>
9471+#include <linux/spinlock.h>
9472+
9473+#include <linux/module.h>
9474+
9475+#include <litmus/litmus.h>
9476+#include <litmus/jobs.h>
9477+#include <litmus/sched_plugin.h>
9478+#include <litmus/edf_common.h>
9479+
9480+
9481+typedef struct {
9482+ rt_domain_t domain;
9483+ int cpu;
9484+ struct task_struct* scheduled; /* only RT tasks */
9485+/*
9486+ * scheduling lock slock
9487+ * protects the domain and serializes scheduling decisions
9488+ */
9489+#define slock domain.ready_lock
9490+
9491+} psnedf_domain_t;
9492+
9493+DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
9494+
9495+#define local_edf (&__get_cpu_var(psnedf_domains).domain)
9496+#define local_pedf (&__get_cpu_var(psnedf_domains))
9497+#define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain)
9498+#define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu))
9499+#define task_edf(task) remote_edf(get_partition(task))
9500+#define task_pedf(task) remote_pedf(get_partition(task))
9501+
9502+
9503+static void psnedf_domain_init(psnedf_domain_t* pedf,
9504+ check_resched_needed_t check,
9505+ release_jobs_t release,
9506+ int cpu)
9507+{
9508+ edf_domain_init(&pedf->domain, check, release);
9509+ pedf->cpu = cpu;
9510+ pedf->scheduled = NULL;
9511+}
9512+
9513+static void requeue(struct task_struct* t, rt_domain_t *edf)
9514+{
9515+ if (t->state != TASK_RUNNING)
9516+ TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
9517+
9518+ set_rt_flags(t, RT_F_RUNNING);
9519+ if (is_released(t, litmus_clock()))
9520+ __add_ready(edf, t);
9521+ else
9522+ add_release(edf, t); /* it has got to wait */
9523+}
9524+
9525+/* we assume the lock is being held */
9526+static void preempt(psnedf_domain_t *pedf)
9527+{
9528+ preempt_if_preemptable(pedf->scheduled, pedf->cpu);
9529+}
9530+
9531+/* This check is trivial in partioned systems as we only have to consider
9532+ * the CPU of the partition.
9533+ */
9534+static int psnedf_check_resched(rt_domain_t *edf)
9535+{
9536+ psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
9537+
9538+ /* because this is a callback from rt_domain_t we already hold
9539+ * the necessary lock for the ready queue
9540+ */
9541+ if (edf_preemption_needed(edf, pedf->scheduled)) {
9542+ preempt(pedf);
9543+ return 1;
9544+ } else
9545+ return 0;
9546+}
9547+
9548+static void job_completion(struct task_struct* t, int forced)
9549+{
9550+ sched_trace_task_completion(t,forced);
9551+ TRACE_TASK(t, "job_completion().\n");
9552+
9553+ set_rt_flags(t, RT_F_SLEEP);
9554+ prepare_for_next_period(t);
9555+}
9556+
9557+static void psnedf_tick(struct task_struct *t)
9558+{
9559+ psnedf_domain_t *pedf = local_pedf;
9560+
9561+ /* Check for inconsistency. We don't need the lock for this since
9562+ * ->scheduled is only changed in schedule, which obviously is not
9563+ * executing in parallel on this CPU
9564+ */
9565+ BUG_ON(is_realtime(t) && t != pedf->scheduled);
9566+
9567+ if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
9568+ if (!is_np(t)) {
9569+ set_tsk_need_resched(t);
9570+ TRACE("psnedf_scheduler_tick: "
9571+ "%d is preemptable "
9572+ " => FORCE_RESCHED\n", t->pid);
9573+ } else if (is_user_np(t)) {
9574+ TRACE("psnedf_scheduler_tick: "
9575+ "%d is non-preemptable, "
9576+ "preemption delayed.\n", t->pid);
9577+ request_exit_np(t);
9578+ }
9579+ }
9580+}
9581+
9582+static struct task_struct* psnedf_schedule(struct task_struct * prev)
9583+{
9584+ psnedf_domain_t* pedf = local_pedf;
9585+ rt_domain_t* edf = &pedf->domain;
9586+ struct task_struct* next;
9587+
9588+ int out_of_time, sleep, preempt,
9589+ np, exists, blocks, resched;
9590+
9591+ raw_spin_lock(&pedf->slock);
9592+
9593+ /* sanity checking
9594+ * differently from gedf, when a task exits (dead)
9595+ * pedf->schedule may be null and prev _is_ realtime
9596+ */
9597+ BUG_ON(pedf->scheduled && pedf->scheduled != prev);
9598+ BUG_ON(pedf->scheduled && !is_realtime(prev));
9599+
9600+ /* (0) Determine state */
9601+ exists = pedf->scheduled != NULL;
9602+ blocks = exists && !is_running(pedf->scheduled);
9603+ out_of_time = exists &&
9604+ budget_enforced(pedf->scheduled) &&
9605+ budget_exhausted(pedf->scheduled);
9606+ np = exists && is_np(pedf->scheduled);
9607+ sleep = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP;
9608+ preempt = edf_preemption_needed(edf, prev);
9609+
9610+ /* If we need to preempt do so.
9611+ * The following checks set resched to 1 in case of special
9612+ * circumstances.
9613+ */
9614+ resched = preempt;
9615+
9616+ /* If a task blocks we have no choice but to reschedule.
9617+ */
9618+ if (blocks)
9619+ resched = 1;
9620+
9621+ /* Request a sys_exit_np() call if we would like to preempt but cannot.
9622+ * Multiple calls to request_exit_np() don't hurt.
9623+ */
9624+ if (np && (out_of_time || preempt || sleep))
9625+ request_exit_np(pedf->scheduled);
9626+
9627+ /* Any task that is preemptable and either exhausts its execution
9628+ * budget or wants to sleep completes. We may have to reschedule after
9629+ * this.
9630+ */
9631+ if (!np && (out_of_time || sleep) && !blocks) {
9632+ job_completion(pedf->scheduled, !sleep);
9633+ resched = 1;
9634+ }
9635+
9636+ /* The final scheduling decision. Do we need to switch for some reason?
9637+ * Switch if we are in RT mode and have no task or if we need to
9638+ * resched.
9639+ */
9640+ next = NULL;
9641+ if ((!np || blocks) && (resched || !exists)) {
9642+ /* When preempting a task that does not block, then
9643+ * re-insert it into either the ready queue or the
9644+ * release queue (if it completed). requeue() picks
9645+ * the appropriate queue.
9646+ */
9647+ if (pedf->scheduled && !blocks)
9648+ requeue(pedf->scheduled, edf);
9649+ next = __take_ready(edf);
9650+ } else
9651+ /* Only override Linux scheduler if we have a real-time task
9652+ * scheduled that needs to continue.
9653+ */
9654+ if (exists)
9655+ next = prev;
9656+
9657+ if (next) {
9658+ TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
9659+ set_rt_flags(next, RT_F_RUNNING);
9660+ } else {
9661+ TRACE("becoming idle at %llu\n", litmus_clock());
9662+ }
9663+
9664+ pedf->scheduled = next;
9665+ raw_spin_unlock(&pedf->slock);
9666+
9667+ return next;
9668+}
9669+
9670+
9671+/* Prepare a task for running in RT mode
9672+ */
9673+static void psnedf_task_new(struct task_struct * t, int on_rq, int running)
9674+{
9675+ rt_domain_t* edf = task_edf(t);
9676+ psnedf_domain_t* pedf = task_pedf(t);
9677+ unsigned long flags;
9678+
9679+ TRACE_TASK(t, "psn edf: task new, cpu = %d\n",
9680+ t->rt_param.task_params.cpu);
9681+
9682+ /* setup job parameters */
9683+ release_at(t, litmus_clock());
9684+
9685+ /* The task should be running in the queue, otherwise signal
9686+ * code will try to wake it up with fatal consequences.
9687+ */
9688+ raw_spin_lock_irqsave(&pedf->slock, flags);
9689+ if (running) {
9690+ /* there shouldn't be anything else running at the time */
9691+ BUG_ON(pedf->scheduled);
9692+ pedf->scheduled = t;
9693+ } else {
9694+ requeue(t, edf);
9695+ /* maybe we have to reschedule */
9696+ preempt(pedf);
9697+ }
9698+ raw_spin_unlock_irqrestore(&pedf->slock, flags);
9699+}
9700+
9701+static void psnedf_task_wake_up(struct task_struct *task)
9702+{
9703+ unsigned long flags;
9704+ psnedf_domain_t* pedf = task_pedf(task);
9705+ rt_domain_t* edf = task_edf(task);
9706+ lt_t now;
9707+
9708+ TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
9709+ raw_spin_lock_irqsave(&pedf->slock, flags);
9710+ BUG_ON(is_queued(task));
9711+ /* We need to take suspensions because of semaphores into
9712+ * account! If a job resumes after being suspended due to acquiring
9713+ * a semaphore, it should never be treated as a new job release.
9714+ *
9715+ * FIXME: This should be done in some more predictable and userspace-controlled way.
9716+ */
9717+ now = litmus_clock();
9718+ if (is_tardy(task, now) &&
9719+ get_rt_flags(task) != RT_F_EXIT_SEM) {
9720+ /* new sporadic release */
9721+ release_at(task, now);
9722+ sched_trace_task_release(task);
9723+ }
9724+
9725+ /* Only add to ready queue if it is not the currently-scheduled
9726+ * task. This could be the case if a task was woken up concurrently
9727+ * on a remote CPU before the executing CPU got around to actually
9728+ * de-scheduling the task, i.e., wake_up() raced with schedule()
9729+ * and won.
9730+ */
9731+ if (pedf->scheduled != task)
9732+ requeue(task, edf);
9733+
9734+ raw_spin_unlock_irqrestore(&pedf->slock, flags);
9735+ TRACE_TASK(task, "wake up done\n");
9736+}
9737+
9738+static void psnedf_task_block(struct task_struct *t)
9739+{
9740+ /* only running tasks can block, thus t is in no queue */
9741+ TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
9742+
9743+ BUG_ON(!is_realtime(t));
9744+ BUG_ON(is_queued(t));
9745+}
9746+
9747+static void psnedf_task_exit(struct task_struct * t)
9748+{
9749+ unsigned long flags;
9750+ psnedf_domain_t* pedf = task_pedf(t);
9751+ rt_domain_t* edf;
9752+
9753+ raw_spin_lock_irqsave(&pedf->slock, flags);
9754+ if (is_queued(t)) {
9755+ /* dequeue */
9756+ edf = task_edf(t);
9757+ remove(edf, t);
9758+ }
9759+ if (pedf->scheduled == t)
9760+ pedf->scheduled = NULL;
9761+
9762+ TRACE_TASK(t, "RIP, now reschedule\n");
9763+
9764+ preempt(pedf);
9765+ raw_spin_unlock_irqrestore(&pedf->slock, flags);
9766+}
9767+
9768+#ifdef CONFIG_FMLP
9769+static long psnedf_pi_block(struct pi_semaphore *sem,
9770+ struct task_struct *new_waiter)
9771+{
9772+ psnedf_domain_t* pedf;
9773+ rt_domain_t* edf;
9774+ struct task_struct* t;
9775+ int cpu = get_partition(new_waiter);
9776+
9777+ BUG_ON(!new_waiter);
9778+
9779+ if (edf_higher_prio(new_waiter, sem->hp.cpu_task[cpu])) {
9780+ TRACE_TASK(new_waiter, " boosts priority\n");
9781+ pedf = task_pedf(new_waiter);
9782+ edf = task_edf(new_waiter);
9783+
9784+ /* interrupts already disabled */
9785+ raw_spin_lock(&pedf->slock);
9786+
9787+ /* store new highest-priority task */
9788+ sem->hp.cpu_task[cpu] = new_waiter;
9789+ if (sem->holder &&
9790+ get_partition(sem->holder) == get_partition(new_waiter)) {
9791+ /* let holder inherit */
9792+ sem->holder->rt_param.inh_task = new_waiter;
9793+ t = sem->holder;
9794+ if (is_queued(t)) {
9795+ /* queued in domain*/
9796+ remove(edf, t);
9797+ /* readd to make priority change take place */
9798+ /* FIXME: this looks outdated */
9799+ if (is_released(t, litmus_clock()))
9800+ __add_ready(edf, t);
9801+ else
9802+ add_release(edf, t);
9803+ }
9804+ }
9805+
9806+ /* check if we need to reschedule */
9807+ if (edf_preemption_needed(edf, current))
9808+ preempt(pedf);
9809+
9810+ raw_spin_unlock(&pedf->slock);
9811+ }
9812+
9813+ return 0;
9814+}
9815+
9816+static long psnedf_inherit_priority(struct pi_semaphore *sem,
9817+ struct task_struct *new_owner)
9818+{
9819+ int cpu = get_partition(new_owner);
9820+
9821+ new_owner->rt_param.inh_task = sem->hp.cpu_task[cpu];
9822+ if (sem->hp.cpu_task[cpu] && new_owner != sem->hp.cpu_task[cpu]) {
9823+ TRACE_TASK(new_owner,
9824+ "inherited priority from %s/%d\n",
9825+ sem->hp.cpu_task[cpu]->comm,
9826+ sem->hp.cpu_task[cpu]->pid);
9827+ } else
9828+ TRACE_TASK(new_owner,
9829+ "cannot inherit priority: "
9830+ "no higher priority job waits on this CPU!\n");
9831+ /* make new owner non-preemptable as required by FMLP under
9832+ * PSN-EDF.
9833+ */
9834+ make_np(new_owner);
9835+ return 0;
9836+}
9837+
9838+
9839+/* This function is called on a semaphore release, and assumes that
9840+ * the current task is also the semaphore holder.
9841+ */
9842+static long psnedf_return_priority(struct pi_semaphore *sem)
9843+{
9844+ struct task_struct* t = current;
9845+ psnedf_domain_t* pedf = task_pedf(t);
9846+ rt_domain_t* edf = task_edf(t);
9847+ int ret = 0;
9848+ int cpu = get_partition(current);
9849+ int still_np;
9850+
9851+
9852+ /* Find new highest-priority semaphore task
9853+ * if holder task is the current hp.cpu_task[cpu].
9854+ *
9855+ * Calling function holds sem->wait.lock.
9856+ */
9857+ if (t == sem->hp.cpu_task[cpu])
9858+ edf_set_hp_cpu_task(sem, cpu);
9859+
9860+ still_np = take_np(current);
9861+
9862+ /* Since we don't nest resources, this
9863+ * should always be zero */
9864+ BUG_ON(still_np);
9865+
9866+ if (current->rt_param.inh_task) {
9867+ TRACE_CUR("return priority of %s/%d\n",
9868+ current->rt_param.inh_task->comm,
9869+ current->rt_param.inh_task->pid);
9870+ } else
9871+ TRACE_CUR(" no priority to return %p\n", sem);
9872+
9873+
9874+ /* Always check for delayed preemptions that might have become
9875+ * necessary due to non-preemptive execution.
9876+ */
9877+ raw_spin_lock(&pedf->slock);
9878+
9879+ /* Reset inh_task to NULL. */
9880+ current->rt_param.inh_task = NULL;
9881+
9882+ /* check if we need to reschedule */
9883+ if (edf_preemption_needed(edf, current))
9884+ preempt(pedf);
9885+
9886+ raw_spin_unlock(&pedf->slock);
9887+
9888+
9889+ return ret;
9890+}
9891+
9892+#endif
9893+
9894+static long psnedf_admit_task(struct task_struct* tsk)
9895+{
9896+ return task_cpu(tsk) == tsk->rt_param.task_params.cpu ? 0 : -EINVAL;
9897+}
9898+
9899+/* Plugin object */
9900+static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
9901+ .plugin_name = "PSN-EDF",
9902+#ifdef CONFIG_SRP
9903+ .srp_active = 1,
9904+#endif
9905+ .tick = psnedf_tick,
9906+ .task_new = psnedf_task_new,
9907+ .complete_job = complete_job,
9908+ .task_exit = psnedf_task_exit,
9909+ .schedule = psnedf_schedule,
9910+ .task_wake_up = psnedf_task_wake_up,
9911+ .task_block = psnedf_task_block,
9912+#ifdef CONFIG_FMLP
9913+ .fmlp_active = 1,
9914+ .pi_block = psnedf_pi_block,
9915+ .inherit_priority = psnedf_inherit_priority,
9916+ .return_priority = psnedf_return_priority,
9917+#endif
9918+ .admit_task = psnedf_admit_task
9919+};
9920+
9921+
9922+static int __init init_psn_edf(void)
9923+{
9924+ int i;
9925+
9926+ /* We do not really want to support cpu hotplug, do we? ;)
9927+ * However, if we are so crazy to do so,
9928+ * we cannot use num_online_cpu()
9929+ */
9930+ for (i = 0; i < num_online_cpus(); i++) {
9931+ psnedf_domain_init(remote_pedf(i),
9932+ psnedf_check_resched,
9933+ NULL, i);
9934+ }
9935+ return register_sched_plugin(&psn_edf_plugin);
9936+}
9937+
9938+module_init(init_psn_edf);
9939+
9940diff --git a/litmus/sched_task_trace.c b/litmus/sched_task_trace.c
9941new file mode 100644
9942index 0000000..39a543e
9943--- /dev/null
9944+++ b/litmus/sched_task_trace.c
9945@@ -0,0 +1,204 @@
9946+/*
9947+ * sched_task_trace.c -- record scheduling events to a byte stream
9948+ */
9949+
9950+#define NO_TASK_TRACE_DECLS
9951+
9952+#include <linux/module.h>
9953+#include <linux/sched.h>
9954+#include <linux/percpu.h>
9955+
9956+#include <litmus/ftdev.h>
9957+#include <litmus/litmus.h>
9958+
9959+#include <litmus/sched_trace.h>
9960+#include <litmus/feather_trace.h>
9961+#include <litmus/ftdev.h>
9962+
9963+
9964+/* set MAJOR to 0 to have it dynamically assigned */
9965+#define FT_TASK_TRACE_MAJOR 253
9966+#define NO_EVENTS 4096 /* this is a buffer of 12 4k pages per CPU */
9967+
9968+#define now() litmus_clock()
9969+
9970+struct local_buffer {
9971+ struct st_event_record record[NO_EVENTS];
9972+ char flag[NO_EVENTS];
9973+ struct ft_buffer ftbuf;
9974+};
9975+
9976+DEFINE_PER_CPU(struct local_buffer, st_event_buffer);
9977+
9978+static struct ftdev st_dev;
9979+
9980+static int st_dev_can_open(struct ftdev *dev, unsigned int cpu)
9981+{
9982+ return cpu_online(cpu) ? 0 : -ENODEV;
9983+}
9984+
9985+static int __init init_sched_task_trace(void)
9986+{
9987+ struct local_buffer* buf;
9988+ int i, ok = 0;
9989+ ftdev_init(&st_dev, THIS_MODULE);
9990+ for (i = 0; i < NR_CPUS; i++) {
9991+ buf = &per_cpu(st_event_buffer, i);
9992+ ok += init_ft_buffer(&buf->ftbuf, NO_EVENTS,
9993+ sizeof(struct st_event_record),
9994+ buf->flag,
9995+ buf->record);
9996+ st_dev.minor[i].buf = &buf->ftbuf;
9997+ }
9998+ if (ok == NR_CPUS) {
9999+ st_dev.minor_cnt = NR_CPUS;
10000+ st_dev.can_open = st_dev_can_open;
10001+ return register_ftdev(&st_dev, "sched_trace", FT_TASK_TRACE_MAJOR);
10002+ } else {
10003+ return -EINVAL;
10004+ }
10005+}
10006+
10007+module_init(init_sched_task_trace);
10008+
10009+
10010+static inline struct st_event_record* get_record(u8 type, struct task_struct* t)
10011+{
10012+ struct st_event_record* rec = NULL;
10013+ struct local_buffer* buf;
10014+
10015+ buf = &get_cpu_var(st_event_buffer);
10016+ if (ft_buffer_start_write(&buf->ftbuf, (void**) &rec)) {
10017+ rec->hdr.type = type;
10018+ rec->hdr.cpu = smp_processor_id();
10019+ rec->hdr.pid = t ? t->pid : 0;
10020+ rec->hdr.job = t ? t->rt_param.job_params.job_no : 0;
10021+ } else {
10022+ put_cpu_var(st_event_buffer);
10023+ }
10024+ /* rec will be NULL if it failed */
10025+ return rec;
10026+}
10027+
10028+static inline void put_record(struct st_event_record* rec)
10029+{
10030+ struct local_buffer* buf;
10031+ buf = &__get_cpu_var(st_event_buffer);
10032+ ft_buffer_finish_write(&buf->ftbuf, rec);
10033+ put_cpu_var(st_event_buffer);
10034+}
10035+
10036+feather_callback void do_sched_trace_task_name(unsigned long id, unsigned long _task)
10037+{
10038+ struct task_struct *t = (struct task_struct*) _task;
10039+ struct st_event_record* rec = get_record(ST_NAME, t);
10040+ int i;
10041+ if (rec) {
10042+ for (i = 0; i < min(TASK_COMM_LEN, ST_NAME_LEN); i++)
10043+ rec->data.name.cmd[i] = t->comm[i];
10044+ put_record(rec);
10045+ }
10046+}
10047+
10048+feather_callback void do_sched_trace_task_param(unsigned long id, unsigned long _task)
10049+{
10050+ struct task_struct *t = (struct task_struct*) _task;
10051+ struct st_event_record* rec = get_record(ST_PARAM, t);
10052+ if (rec) {
10053+ rec->data.param.wcet = get_exec_cost(t);
10054+ rec->data.param.period = get_rt_period(t);
10055+ rec->data.param.phase = get_rt_phase(t);
10056+ rec->data.param.partition = get_partition(t);
10057+ put_record(rec);
10058+ }
10059+}
10060+
10061+feather_callback void do_sched_trace_task_release(unsigned long id, unsigned long _task)
10062+{
10063+ struct task_struct *t = (struct task_struct*) _task;
10064+ struct st_event_record* rec = get_record(ST_RELEASE, t);
10065+ if (rec) {
10066+ rec->data.release.release = get_release(t);
10067+ rec->data.release.deadline = get_deadline(t);
10068+ put_record(rec);
10069+ }
10070+}
10071+
10072+/* skipped: st_assigned_data, we don't use it atm */
10073+
10074+feather_callback void do_sched_trace_task_switch_to(unsigned long id,
10075+ unsigned long _task)
10076+{
10077+ struct task_struct *t = (struct task_struct*) _task;
10078+ struct st_event_record* rec;
10079+ if (is_realtime(t)) {
10080+ rec = get_record(ST_SWITCH_TO, t);
10081+ if (rec) {
10082+ rec->data.switch_to.when = now();
10083+ rec->data.switch_to.exec_time = get_exec_time(t);
10084+ put_record(rec);
10085+ }
10086+ }
10087+}
10088+
10089+feather_callback void do_sched_trace_task_switch_away(unsigned long id,
10090+ unsigned long _task)
10091+{
10092+ struct task_struct *t = (struct task_struct*) _task;
10093+ struct st_event_record* rec;
10094+ if (is_realtime(t)) {
10095+ rec = get_record(ST_SWITCH_AWAY, t);
10096+ if (rec) {
10097+ rec->data.switch_away.when = now();
10098+ rec->data.switch_away.exec_time = get_exec_time(t);
10099+ put_record(rec);
10100+ }
10101+ }
10102+}
10103+
10104+feather_callback void do_sched_trace_task_completion(unsigned long id,
10105+ unsigned long _task,
10106+ unsigned long forced)
10107+{
10108+ struct task_struct *t = (struct task_struct*) _task;
10109+ struct st_event_record* rec = get_record(ST_COMPLETION, t);
10110+ if (rec) {
10111+ rec->data.completion.when = now();
10112+ rec->data.completion.forced = forced;
10113+ put_record(rec);
10114+ }
10115+}
10116+
10117+feather_callback void do_sched_trace_task_block(unsigned long id,
10118+ unsigned long _task)
10119+{
10120+ struct task_struct *t = (struct task_struct*) _task;
10121+ struct st_event_record* rec = get_record(ST_BLOCK, t);
10122+ if (rec) {
10123+ rec->data.block.when = now();
10124+ put_record(rec);
10125+ }
10126+}
10127+
10128+feather_callback void do_sched_trace_task_resume(unsigned long id,
10129+ unsigned long _task)
10130+{
10131+ struct task_struct *t = (struct task_struct*) _task;
10132+ struct st_event_record* rec = get_record(ST_RESUME, t);
10133+ if (rec) {
10134+ rec->data.resume.when = now();
10135+ put_record(rec);
10136+ }
10137+}
10138+
10139+feather_callback void do_sched_trace_sys_release(unsigned long id,
10140+ unsigned long _start)
10141+{
10142+ lt_t *start = (lt_t*) _start;
10143+ struct st_event_record* rec = get_record(ST_SYS_RELEASE, NULL);
10144+ if (rec) {
10145+ rec->data.sys_release.when = now();
10146+ rec->data.sys_release.release = *start;
10147+ put_record(rec);
10148+ }
10149+}
10150diff --git a/litmus/sched_trace.c b/litmus/sched_trace.c
10151new file mode 100644
10152index 0000000..1fa2094
10153--- /dev/null
10154+++ b/litmus/sched_trace.c
10155@@ -0,0 +1,378 @@
10156+/*
10157+ * sched_trace.c -- record scheduling events to a byte stream.
10158+ */
10159+#include <linux/spinlock.h>
10160+#include <linux/semaphore.h>
10161+
10162+#include <linux/fs.h>
10163+#include <linux/slab.h>
10164+#include <linux/miscdevice.h>
10165+#include <asm/uaccess.h>
10166+#include <linux/module.h>
10167+#include <linux/sysrq.h>
10168+
10169+#include <linux/kfifo.h>
10170+
10171+#include <litmus/sched_trace.h>
10172+#include <litmus/litmus.h>
10173+
10174+#define SCHED_TRACE_NAME "litmus/log"
10175+
10176+/* Allocate a buffer of about 32k per CPU */
10177+#define LITMUS_TRACE_BUF_PAGES 8
10178+#define LITMUS_TRACE_BUF_SIZE (PAGE_SIZE * LITMUS_TRACE_BUF_PAGES * NR_CPUS)
10179+
10180+/* Max length of one read from the buffer */
10181+#define MAX_READ_LEN (64 * 1024)
10182+
10183+/* Max length for one write --- from kernel --- to the buffer */
10184+#define MSG_SIZE 255
10185+
10186+/* Inner ring buffer structure */
10187+typedef struct {
10188+ rwlock_t del_lock;
10189+
10190+ /* the buffer */
10191+ struct kfifo kfifo;
10192+} ring_buffer_t;
10193+
10194+/* Main buffer structure */
10195+typedef struct {
10196+ ring_buffer_t buf;
10197+ atomic_t reader_cnt;
10198+ struct semaphore reader_mutex;
10199+} trace_buffer_t;
10200+
10201+
10202+/*
10203+ * Inner buffer management functions
10204+ */
10205+void rb_init(ring_buffer_t* buf)
10206+{
10207+ rwlock_init(&buf->del_lock);
10208+}
10209+
10210+int rb_alloc_buf(ring_buffer_t* buf, unsigned int size)
10211+{
10212+ unsigned long flags;
10213+ int ret = 0;
10214+
10215+ write_lock_irqsave(&buf->del_lock, flags);
10216+
10217+ /* kfifo size must be a power of 2
10218+ * atm kfifo alloc is automatically rounding the size
10219+ */
10220+ ret = kfifo_alloc(&buf->kfifo, size, GFP_ATOMIC);
10221+
10222+ write_unlock_irqrestore(&buf->del_lock, flags);
10223+
10224+ if(ret < 0)
10225+ printk(KERN_ERR "kfifo_alloc failed\n");
10226+
10227+ return ret;
10228+}
10229+
10230+int rb_free_buf(ring_buffer_t* buf)
10231+{
10232+ unsigned long flags;
10233+
10234+ write_lock_irqsave(&buf->del_lock, flags);
10235+
10236+ BUG_ON(!kfifo_initialized(&buf->kfifo));
10237+ kfifo_free(&buf->kfifo);
10238+
10239+ write_unlock_irqrestore(&buf->del_lock, flags);
10240+
10241+ return 0;
10242+}
10243+
10244+/*
10245+ * Assumption: concurrent writes are serialized externally
10246+ *
10247+ * Will only succeed if there is enough space for all len bytes.
10248+ */
10249+int rb_put(ring_buffer_t* buf, char* mem, size_t len)
10250+{
10251+ unsigned long flags;
10252+ int error = 0;
10253+
10254+ read_lock_irqsave(&buf->del_lock, flags);
10255+
10256+ if (!kfifo_initialized(&buf->kfifo)) {
10257+ error = -ENODEV;
10258+ goto out;
10259+ }
10260+
10261+ if((kfifo_in(&buf->kfifo, mem, len)) < len) {
10262+ error = -ENOMEM;
10263+ goto out;
10264+ }
10265+
10266+ out:
10267+ read_unlock_irqrestore(&buf->del_lock, flags);
10268+ return error;
10269+}
10270+
10271+/* Assumption: concurrent reads are serialized externally */
10272+int rb_get(ring_buffer_t* buf, char* mem, size_t len)
10273+{
10274+ unsigned long flags;
10275+ int error = 0;
10276+
10277+ read_lock_irqsave(&buf->del_lock, flags);
10278+ if (!kfifo_initialized(&buf->kfifo)) {
10279+ error = -ENODEV;
10280+ goto out;
10281+ }
10282+
10283+ error = kfifo_out(&buf->kfifo, (unsigned char*)mem, len);
10284+
10285+ out:
10286+ read_unlock_irqrestore(&buf->del_lock, flags);
10287+ return error;
10288+}
10289+
10290+/*
10291+ * Device Driver management
10292+ */
10293+static DEFINE_RAW_SPINLOCK(log_buffer_lock);
10294+static trace_buffer_t log_buffer;
10295+
10296+static void init_log_buffer(void)
10297+{
10298+ rb_init(&log_buffer.buf);
10299+ atomic_set(&log_buffer.reader_cnt,0);
10300+ init_MUTEX(&log_buffer.reader_mutex);
10301+}
10302+
10303+static DEFINE_PER_CPU(char[MSG_SIZE], fmt_buffer);
10304+
10305+/*
10306+ * sched_trace_log_message - Write to the trace buffer (log_buffer)
10307+ *
10308+ * This is the only function accessing the log_buffer from inside the
10309+ * kernel for writing.
10310+ * Concurrent access to sched_trace_log_message must be serialized using
10311+ * log_buffer_lock
10312+ * The maximum length of a formatted message is 255
10313+ */
10314+void sched_trace_log_message(const char* fmt, ...)
10315+{
10316+ unsigned long flags;
10317+ va_list args;
10318+ size_t len;
10319+ char* buf;
10320+
10321+ va_start(args, fmt);
10322+ local_irq_save(flags);
10323+
10324+ /* format message */
10325+ buf = __get_cpu_var(fmt_buffer);
10326+ len = vscnprintf(buf, MSG_SIZE, fmt, args);
10327+
10328+ raw_spin_lock(&log_buffer_lock);
10329+ /* Don't copy the trailing null byte, we don't want null bytes
10330+ * in a text file.
10331+ */
10332+ rb_put(&log_buffer.buf, buf, len);
10333+ raw_spin_unlock(&log_buffer_lock);
10334+
10335+ local_irq_restore(flags);
10336+ va_end(args);
10337+}
10338+
10339+/*
10340+ * log_read - Read the trace buffer
10341+ *
10342+ * This function is called as a file operation from userspace.
10343+ * Readers can sleep. Access is serialized through reader_mutex
10344+ */
10345+static ssize_t log_read(struct file *filp, char __user *to, size_t len,
10346+ loff_t *f_pos)
10347+{
10348+ /* we ignore f_pos, this is strictly sequential */
10349+
10350+ ssize_t error = -EINVAL;
10351+ char* mem;
10352+ trace_buffer_t *tbuf = filp->private_data;
10353+
10354+ if (down_interruptible(&tbuf->reader_mutex)) {
10355+ error = -ERESTARTSYS;
10356+ goto out;
10357+ }
10358+
10359+ if (len > MAX_READ_LEN)
10360+ len = MAX_READ_LEN;
10361+
10362+ mem = kmalloc(len, GFP_KERNEL);
10363+ if (!mem) {
10364+ error = -ENOMEM;
10365+ goto out_unlock;
10366+ }
10367+
10368+ error = rb_get(&tbuf->buf, mem, len);
10369+ while (!error) {
10370+ set_current_state(TASK_INTERRUPTIBLE);
10371+ schedule_timeout(110);
10372+ if (signal_pending(current))
10373+ error = -ERESTARTSYS;
10374+ else
10375+ error = rb_get(&tbuf->buf, mem, len);
10376+ }
10377+
10378+ if (error > 0 && copy_to_user(to, mem, error))
10379+ error = -EFAULT;
10380+
10381+ kfree(mem);
10382+ out_unlock:
10383+ up(&tbuf->reader_mutex);
10384+ out:
10385+ return error;
10386+}
10387+
10388+/*
10389+ * Enable redirection of printk() messages to the trace buffer.
10390+ * Defined in kernel/printk.c
10391+ */
10392+extern int trace_override;
10393+extern int trace_recurse;
10394+
10395+/*
10396+ * log_open - open the global log message ring buffer.
10397+ */
10398+static int log_open(struct inode *in, struct file *filp)
10399+{
10400+ int error = -EINVAL;
10401+ trace_buffer_t* tbuf;
10402+
10403+ tbuf = &log_buffer;
10404+
10405+ if (down_interruptible(&tbuf->reader_mutex)) {
10406+ error = -ERESTARTSYS;
10407+ goto out;
10408+ }
10409+
10410+ /* first open must allocate buffers */
10411+ if (atomic_inc_return(&tbuf->reader_cnt) == 1) {
10412+ if ((error = rb_alloc_buf(&tbuf->buf, LITMUS_TRACE_BUF_SIZE)))
10413+ {
10414+ atomic_dec(&tbuf->reader_cnt);
10415+ goto out_unlock;
10416+ }
10417+ }
10418+
10419+ error = 0;
10420+ filp->private_data = tbuf;
10421+
10422+ printk(KERN_DEBUG
10423+ "sched_trace kfifo with buffer starting at: 0x%p\n",
10424+ (tbuf->buf.kfifo).buffer);
10425+
10426+ /* override printk() */
10427+ trace_override++;
10428+
10429+ out_unlock:
10430+ up(&tbuf->reader_mutex);
10431+ out:
10432+ return error;
10433+}
10434+
10435+static int log_release(struct inode *in, struct file *filp)
10436+{
10437+ int error = -EINVAL;
10438+ trace_buffer_t* tbuf = filp->private_data;
10439+
10440+ BUG_ON(!filp->private_data);
10441+
10442+ if (down_interruptible(&tbuf->reader_mutex)) {
10443+ error = -ERESTARTSYS;
10444+ goto out;
10445+ }
10446+
10447+ /* last release must deallocate buffers */
10448+ if (atomic_dec_return(&tbuf->reader_cnt) == 0) {
10449+ error = rb_free_buf(&tbuf->buf);
10450+ }
10451+
10452+ /* release printk() overriding */
10453+ trace_override--;
10454+
10455+ printk(KERN_DEBUG "sched_trace kfifo released\n");
10456+
10457+ up(&tbuf->reader_mutex);
10458+ out:
10459+ return error;
10460+}
10461+
10462+/*
10463+ * log_fops - The file operations for accessing the global LITMUS log message
10464+ * buffer.
10465+ *
10466+ * Except for opening the device file it uses the same operations as trace_fops.
10467+ */
10468+static struct file_operations log_fops = {
10469+ .owner = THIS_MODULE,
10470+ .open = log_open,
10471+ .release = log_release,
10472+ .read = log_read,
10473+};
10474+
10475+static struct miscdevice litmus_log_dev = {
10476+ .name = SCHED_TRACE_NAME,
10477+ .minor = MISC_DYNAMIC_MINOR,
10478+ .fops = &log_fops,
10479+};
10480+
10481+#ifdef CONFIG_MAGIC_SYSRQ
10482+void dump_trace_buffer(int max)
10483+{
10484+ char line[80];
10485+ int len;
10486+ int count = 0;
10487+
10488+ /* potential, but very unlikely, race... */
10489+ trace_recurse = 1;
10490+ while ((max == 0 || count++ < max) &&
10491+ (len = rb_get(&log_buffer.buf, line, sizeof(line) - 1)) > 0) {
10492+ line[len] = '\0';
10493+ printk("%s", line);
10494+ }
10495+ trace_recurse = 0;
10496+}
10497+
10498+static void sysrq_dump_trace_buffer(int key, struct tty_struct *tty)
10499+{
10500+ dump_trace_buffer(100);
10501+}
10502+
10503+static struct sysrq_key_op sysrq_dump_trace_buffer_op = {
10504+ .handler = sysrq_dump_trace_buffer,
10505+ .help_msg = "dump-trace-buffer(Y)",
10506+ .action_msg = "writing content of TRACE() buffer",
10507+};
10508+#endif
10509+
10510+static int __init init_sched_trace(void)
10511+{
10512+ printk("Initializing TRACE() device\n");
10513+ init_log_buffer();
10514+
10515+#ifdef CONFIG_MAGIC_SYSRQ
10516+ /* offer some debugging help */
10517+ if (!register_sysrq_key('y', &sysrq_dump_trace_buffer_op))
10518+ printk("Registered dump-trace-buffer(Y) magic sysrq.\n");
10519+ else
10520+ printk("Could not register dump-trace-buffer(Y) magic sysrq.\n");
10521+#endif
10522+
10523+
10524+ return misc_register(&litmus_log_dev);
10525+}
10526+
10527+static void __exit exit_sched_trace(void)
10528+{
10529+ misc_deregister(&litmus_log_dev);
10530+}
10531+
10532+module_init(init_sched_trace);
10533+module_exit(exit_sched_trace);
10534diff --git a/litmus/srp.c b/litmus/srp.c
10535new file mode 100644
10536index 0000000..71639b9
10537--- /dev/null
10538+++ b/litmus/srp.c
10539@@ -0,0 +1,318 @@
10540+/* ************************************************************************** */
10541+/* STACK RESOURCE POLICY */
10542+/* ************************************************************************** */
10543+
10544+#include <asm/atomic.h>
10545+#include <linux/wait.h>
10546+#include <litmus/litmus.h>
10547+#include <litmus/sched_plugin.h>
10548+
10549+#include <litmus/fdso.h>
10550+
10551+#include <litmus/trace.h>
10552+
10553+
10554+#ifdef CONFIG_SRP
10555+
10556+struct srp_priority {
10557+ struct list_head list;
10558+ unsigned int period;
10559+ pid_t pid;
10560+};
10561+
10562+#define list2prio(l) list_entry(l, struct srp_priority, list)
10563+
10564+/* SRP task priority comparison function. Smaller periods have highest
10565+ * priority, tie-break is PID. Special case: period == 0 <=> no priority
10566+ */
10567+static int srp_higher_prio(struct srp_priority* first,
10568+ struct srp_priority* second)
10569+{
10570+ if (!first->period)
10571+ return 0;
10572+ else
10573+ return !second->period ||
10574+ first->period < second->period || (
10575+ first->period == second->period &&
10576+ first->pid < second->pid);
10577+}
10578+
10579+struct srp {
10580+ struct list_head ceiling;
10581+ wait_queue_head_t ceiling_blocked;
10582+};
10583+
10584+
10585+atomic_t srp_objects_in_use = ATOMIC_INIT(0);
10586+
10587+DEFINE_PER_CPU(struct srp, srp);
10588+
10589+
10590+/* Initialize SRP semaphores at boot time. */
10591+static int __init srp_init(void)
10592+{
10593+ int i;
10594+
10595+ printk("Initializing SRP per-CPU ceilings...");
10596+ for (i = 0; i < NR_CPUS; i++) {
10597+ init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked);
10598+ INIT_LIST_HEAD(&per_cpu(srp, i).ceiling);
10599+ }
10600+ printk(" done!\n");
10601+
10602+ return 0;
10603+}
10604+module_init(srp_init);
10605+
10606+
10607+#define system_ceiling(srp) list2prio(srp->ceiling.next)
10608+
10609+
10610+#define UNDEF_SEM -2
10611+
10612+
10613+/* struct for uniprocessor SRP "semaphore" */
10614+struct srp_semaphore {
10615+ struct srp_priority ceiling;
10616+ struct task_struct* owner;
10617+ int cpu; /* cpu associated with this "semaphore" and resource */
10618+};
10619+
10620+#define ceiling2sem(c) container_of(c, struct srp_semaphore, ceiling)
10621+
10622+static int srp_exceeds_ceiling(struct task_struct* first,
10623+ struct srp* srp)
10624+{
10625+ return list_empty(&srp->ceiling) ||
10626+ get_rt_period(first) < system_ceiling(srp)->period ||
10627+ (get_rt_period(first) == system_ceiling(srp)->period &&
10628+ first->pid < system_ceiling(srp)->pid) ||
10629+ ceiling2sem(system_ceiling(srp))->owner == first;
10630+}
10631+
10632+static void srp_add_prio(struct srp* srp, struct srp_priority* prio)
10633+{
10634+ struct list_head *pos;
10635+ if (in_list(&prio->list)) {
10636+ printk(KERN_CRIT "WARNING: SRP violation detected, prio is already in "
10637+ "ceiling list! cpu=%d, srp=%p\n", smp_processor_id(), ceiling2sem(prio));
10638+ return;
10639+ }
10640+ list_for_each(pos, &srp->ceiling)
10641+ if (unlikely(srp_higher_prio(prio, list2prio(pos)))) {
10642+ __list_add(&prio->list, pos->prev, pos);
10643+ return;
10644+ }
10645+
10646+ list_add_tail(&prio->list, &srp->ceiling);
10647+}
10648+
10649+
10650+static void* create_srp_semaphore(void)
10651+{
10652+ struct srp_semaphore* sem;
10653+
10654+ sem = kmalloc(sizeof(*sem), GFP_KERNEL);
10655+ if (!sem)
10656+ return NULL;
10657+
10658+ INIT_LIST_HEAD(&sem->ceiling.list);
10659+ sem->ceiling.period = 0;
10660+ sem->cpu = UNDEF_SEM;
10661+ sem->owner = NULL;
10662+ atomic_inc(&srp_objects_in_use);
10663+ return sem;
10664+}
10665+
10666+static noinline int open_srp_semaphore(struct od_table_entry* entry, void* __user arg)
10667+{
10668+ struct srp_semaphore* sem = (struct srp_semaphore*) entry->obj->obj;
10669+ int ret = 0;
10670+ struct task_struct* t = current;
10671+ struct srp_priority t_prio;
10672+
10673+ TRACE("opening SRP semaphore %p, cpu=%d\n", sem, sem->cpu);
10674+ if (!srp_active())
10675+ return -EBUSY;
10676+
10677+ if (sem->cpu == UNDEF_SEM)
10678+ sem->cpu = get_partition(t);
10679+ else if (sem->cpu != get_partition(t))
10680+ ret = -EPERM;
10681+
10682+ if (ret == 0) {
10683+ t_prio.period = get_rt_period(t);
10684+ t_prio.pid = t->pid;
10685+ if (srp_higher_prio(&t_prio, &sem->ceiling)) {
10686+ sem->ceiling.period = t_prio.period;
10687+ sem->ceiling.pid = t_prio.pid;
10688+ }
10689+ }
10690+
10691+ return ret;
10692+}
10693+
10694+static void destroy_srp_semaphore(void* sem)
10695+{
10696+ /* XXX invariants */
10697+ atomic_dec(&srp_objects_in_use);
10698+ kfree(sem);
10699+}
10700+
10701+struct fdso_ops srp_sem_ops = {
10702+ .create = create_srp_semaphore,
10703+ .open = open_srp_semaphore,
10704+ .destroy = destroy_srp_semaphore
10705+};
10706+
10707+
10708+static void do_srp_down(struct srp_semaphore* sem)
10709+{
10710+ /* Update ceiling. */
10711+ srp_add_prio(&__get_cpu_var(srp), &sem->ceiling);
10712+ WARN_ON(sem->owner != NULL);
10713+ sem->owner = current;
10714+ TRACE_CUR("acquired srp 0x%p\n", sem);
10715+}
10716+
10717+static void do_srp_up(struct srp_semaphore* sem)
10718+{
10719+ /* Determine new system priority ceiling for this CPU. */
10720+ WARN_ON(!in_list(&sem->ceiling.list));
10721+ if (in_list(&sem->ceiling.list))
10722+ list_del(&sem->ceiling.list);
10723+
10724+ sem->owner = NULL;
10725+
10726+ /* Wake tasks on this CPU, if they exceed current ceiling. */
10727+ TRACE_CUR("released srp 0x%p\n", sem);
10728+ wake_up_all(&__get_cpu_var(srp).ceiling_blocked);
10729+}
10730+
10731+/* Adjust the system-wide priority ceiling if resource is claimed. */
10732+asmlinkage long sys_srp_down(int sem_od)
10733+{
10734+ int cpu;
10735+ int ret = -EINVAL;
10736+ struct srp_semaphore* sem;
10737+
10738+ /* disabling preemptions is sufficient protection since
10739+ * SRP is strictly per CPU and we don't interfere with any
10740+ * interrupt handlers
10741+ */
10742+ preempt_disable();
10743+ TS_SRP_DOWN_START;
10744+
10745+ cpu = smp_processor_id();
10746+ sem = lookup_srp_sem(sem_od);
10747+ if (sem && sem->cpu == cpu) {
10748+ do_srp_down(sem);
10749+ ret = 0;
10750+ }
10751+
10752+ TS_SRP_DOWN_END;
10753+ preempt_enable();
10754+ return ret;
10755+}
10756+
10757+/* Adjust the system-wide priority ceiling if resource is freed. */
10758+asmlinkage long sys_srp_up(int sem_od)
10759+{
10760+ int cpu;
10761+ int ret = -EINVAL;
10762+ struct srp_semaphore* sem;
10763+
10764+ preempt_disable();
10765+ TS_SRP_UP_START;
10766+
10767+ cpu = smp_processor_id();
10768+ sem = lookup_srp_sem(sem_od);
10769+
10770+ if (sem && sem->cpu == cpu) {
10771+ do_srp_up(sem);
10772+ ret = 0;
10773+ }
10774+
10775+ TS_SRP_UP_END;
10776+ preempt_enable();
10777+ return ret;
10778+}
10779+
10780+static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync,
10781+ void *key)
10782+{
10783+ int cpu = smp_processor_id();
10784+ struct task_struct *tsk = wait->private;
10785+ if (cpu != get_partition(tsk))
10786+ TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b",
10787+ get_partition(tsk));
10788+ else if (srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
10789+ return default_wake_function(wait, mode, sync, key);
10790+ return 0;
10791+}
10792+
10793+
10794+
10795+static void do_ceiling_block(struct task_struct *tsk)
10796+{
10797+ wait_queue_t wait = {
10798+ .private = tsk,
10799+ .func = srp_wake_up,
10800+ .task_list = {NULL, NULL}
10801+ };
10802+
10803+ tsk->state = TASK_UNINTERRUPTIBLE;
10804+ add_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
10805+ tsk->rt_param.srp_non_recurse = 1;
10806+ preempt_enable_no_resched();
10807+ schedule();
10808+ preempt_disable();
10809+ tsk->rt_param.srp_non_recurse = 0;
10810+ remove_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
10811+}
10812+
10813+/* Wait for current task priority to exceed system-wide priority ceiling.
10814+ */
10815+void srp_ceiling_block(void)
10816+{
10817+ struct task_struct *tsk = current;
10818+
10819+ /* Only applies to real-time tasks, but optimize for RT tasks. */
10820+ if (unlikely(!is_realtime(tsk)))
10821+ return;
10822+
10823+ /* Avoid recursive ceiling blocking. */
10824+ if (unlikely(tsk->rt_param.srp_non_recurse))
10825+ return;
10826+
10827+ /* Bail out early if there aren't any SRP resources around. */
10828+ if (likely(!atomic_read(&srp_objects_in_use)))
10829+ return;
10830+
10831+ preempt_disable();
10832+ if (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) {
10833+ TRACE_CUR("is priority ceiling blocked.\n");
10834+ while (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
10835+ do_ceiling_block(tsk);
10836+ TRACE_CUR("finally exceeds system ceiling.\n");
10837+ } else
10838+ TRACE_CUR("is not priority ceiling blocked\n");
10839+ preempt_enable();
10840+}
10841+
10842+
10843+#else
10844+
10845+asmlinkage long sys_srp_down(int sem_od)
10846+{
10847+ return -ENOSYS;
10848+}
10849+
10850+asmlinkage long sys_srp_up(int sem_od)
10851+{
10852+ return -ENOSYS;
10853+}
10854+
10855+struct fdso_ops srp_sem_ops = {};
10856+
10857+#endif
10858diff --git a/litmus/sync.c b/litmus/sync.c
10859new file mode 100644
10860index 0000000..bf75fde
10861--- /dev/null
10862+++ b/litmus/sync.c
10863@@ -0,0 +1,104 @@
10864+/* litmus/sync.c - Support for synchronous and asynchronous task system releases.
10865+ *
10866+ *
10867+ */
10868+
10869+#include <asm/atomic.h>
10870+#include <asm/uaccess.h>
10871+#include <linux/spinlock.h>
10872+#include <linux/list.h>
10873+#include <linux/sched.h>
10874+#include <linux/completion.h>
10875+
10876+#include <litmus/litmus.h>
10877+#include <litmus/sched_plugin.h>
10878+#include <litmus/jobs.h>
10879+
10880+#include <litmus/sched_trace.h>
10881+
10882+static DECLARE_COMPLETION(ts_release);
10883+
10884+static long do_wait_for_ts_release(void)
10885+{
10886+ long ret = 0;
10887+
10888+ /* If the interruption races with a release, the completion object
10889+ * may have a non-zero counter. To avoid this problem, this should
10890+ * be replaced by wait_for_completion().
10891+ *
10892+ * For debugging purposes, this is interruptible for now.
10893+ */
10894+ ret = wait_for_completion_interruptible(&ts_release);
10895+
10896+ return ret;
10897+}
10898+
10899+int count_tasks_waiting_for_release(void)
10900+{
10901+ unsigned long flags;
10902+ int task_count = 0;
10903+ struct list_head *pos;
10904+
10905+ spin_lock_irqsave(&ts_release.wait.lock, flags);
10906+ list_for_each(pos, &ts_release.wait.task_list) {
10907+ task_count++;
10908+ }
10909+ spin_unlock_irqrestore(&ts_release.wait.lock, flags);
10910+
10911+ return task_count;
10912+}
10913+
10914+static long do_release_ts(lt_t start)
10915+{
10916+ int task_count = 0;
10917+ unsigned long flags;
10918+ struct list_head *pos;
10919+ struct task_struct *t;
10920+
10921+
10922+ spin_lock_irqsave(&ts_release.wait.lock, flags);
10923+ TRACE("<<<<<< synchronous task system release >>>>>>\n");
10924+
10925+ sched_trace_sys_release(&start);
10926+ list_for_each(pos, &ts_release.wait.task_list) {
10927+ t = (struct task_struct*) list_entry(pos,
10928+ struct __wait_queue,
10929+ task_list)->private;
10930+ task_count++;
10931+ litmus->release_at(t, start + t->rt_param.task_params.phase);
10932+ sched_trace_task_release(t);
10933+ }
10934+
10935+ spin_unlock_irqrestore(&ts_release.wait.lock, flags);
10936+
10937+ complete_n(&ts_release, task_count);
10938+
10939+ return task_count;
10940+}
10941+
10942+
10943+asmlinkage long sys_wait_for_ts_release(void)
10944+{
10945+ long ret = -EPERM;
10946+ struct task_struct *t = current;
10947+
10948+ if (is_realtime(t))
10949+ ret = do_wait_for_ts_release();
10950+
10951+ return ret;
10952+}
10953+
10954+
10955+asmlinkage long sys_release_ts(lt_t __user *__delay)
10956+{
10957+ long ret;
10958+ lt_t delay;
10959+
10960+ /* FIXME: check capabilities... */
10961+
10962+ ret = copy_from_user(&delay, __delay, sizeof(delay));
10963+ if (ret == 0)
10964+ ret = do_release_ts(litmus_clock() + delay);
10965+
10966+ return ret;
10967+}
10968diff --git a/litmus/trace.c b/litmus/trace.c
10969new file mode 100644
10970index 0000000..4403769
10971--- /dev/null
10972+++ b/litmus/trace.c
10973@@ -0,0 +1,103 @@
10974+#include <linux/module.h>
10975+
10976+#include <litmus/ftdev.h>
10977+#include <litmus/litmus.h>
10978+#include <litmus/trace.h>
10979+
10980+/******************************************************************************/
10981+/* Allocation */
10982+/******************************************************************************/
10983+
10984+static struct ftdev overhead_dev;
10985+
10986+#define trace_ts_buf overhead_dev.minor[0].buf
10987+
10988+static unsigned int ts_seq_no = 0;
10989+
10990+static inline void __save_timestamp_cpu(unsigned long event,
10991+ uint8_t type, uint8_t cpu)
10992+{
10993+ unsigned int seq_no;
10994+ struct timestamp *ts;
10995+ seq_no = fetch_and_inc((int *) &ts_seq_no);
10996+ if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
10997+ ts->event = event;
10998+ ts->timestamp = ft_timestamp();
10999+ ts->seq_no = seq_no;
11000+ ts->cpu = cpu;
11001+ ts->task_type = type;
11002+ ft_buffer_finish_write(trace_ts_buf, ts);
11003+ }
11004+}
11005+
11006+static inline void __save_timestamp(unsigned long event,
11007+ uint8_t type)
11008+{
11009+ __save_timestamp_cpu(event, type, raw_smp_processor_id());
11010+}
11011+
11012+feather_callback void save_timestamp(unsigned long event)
11013+{
11014+ __save_timestamp(event, TSK_UNKNOWN);
11015+}
11016+
11017+feather_callback void save_timestamp_def(unsigned long event,
11018+ unsigned long type)
11019+{
11020+ __save_timestamp(event, (uint8_t) type);
11021+}
11022+
11023+feather_callback void save_timestamp_task(unsigned long event,
11024+ unsigned long t_ptr)
11025+{
11026+ int rt = is_realtime((struct task_struct *) t_ptr);
11027+ __save_timestamp(event, rt ? TSK_RT : TSK_BE);
11028+}
11029+
11030+feather_callback void save_timestamp_cpu(unsigned long event,
11031+ unsigned long cpu)
11032+{
11033+ __save_timestamp_cpu(event, TSK_UNKNOWN, cpu);
11034+}
11035+
11036+/******************************************************************************/
11037+/* DEVICE FILE DRIVER */
11038+/******************************************************************************/
11039+
11040+/*
11041+ * should be 8M; it is the max we can ask to buddy system allocator (MAX_ORDER)
11042+ * and we might not get as much
11043+ */
11044+#define NO_TIMESTAMPS (2 << 11)
11045+
11046+/* set MAJOR to 0 to have it dynamically assigned */
11047+#define FT_TRACE_MAJOR 252
11048+
11049+static int alloc_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
11050+{
11051+ unsigned int count = NO_TIMESTAMPS;
11052+ while (count && !trace_ts_buf) {
11053+ printk("time stamp buffer: trying to allocate %u time stamps.\n", count);
11054+ ftdev->minor[idx].buf = alloc_ft_buffer(count, sizeof(struct timestamp));
11055+ count /= 2;
11056+ }
11057+ return ftdev->minor[idx].buf ? 0 : -ENOMEM;
11058+}
11059+
11060+static void free_timestamp_buffer(struct ftdev* ftdev, unsigned int idx)
11061+{
11062+ free_ft_buffer(ftdev->minor[idx].buf);
11063+ ftdev->minor[idx].buf = NULL;
11064+}
11065+
11066+static int __init init_ft_overhead_trace(void)
11067+{
11068+ printk("Initializing Feather-Trace overhead tracing device.\n");
11069+ ftdev_init(&overhead_dev, THIS_MODULE);
11070+ overhead_dev.minor_cnt = 1; /* only one buffer */
11071+ overhead_dev.alloc = alloc_timestamp_buffer;
11072+ overhead_dev.free = free_timestamp_buffer;
11073+ return register_ftdev(&overhead_dev, "ft_trace", FT_TRACE_MAJOR);
11074+}
11075+
11076+module_init(init_ft_overhead_trace);
diff --git a/index.html b/index.html
index ddb17b8..088c5f2 100644
--- a/index.html
+++ b/index.html
@@ -64,8 +64,8 @@ Have a look at our group's <a href="http://www.cs.unc.edu/%7Eanderson/papers.htm
64 </p> 64 </p>
65 <h3>Current Version</h3> 65 <h3>Current Version</h3>
66 <p class="notopmargin"> 66 <p class="notopmargin">
67 The current version of LITMUS<sup>RT</sup> is <strong>2010.1</strong> and is based on Linux&nbsp;2.6.32. 67 The current version of LITMUS<sup>RT</sup> is <strong>2010.2</strong> and is based on Linux&nbsp;2.6.34.
68 It was released on 05/19/2010 and includes plugins for the following 68 It was released on 10/21/2010 and includes plugins for the following
69 scheduling policies: 69 scheduling policies:
70 </p> 70 </p>
71 <ul> 71 <ul>
@@ -298,15 +298,51 @@ Technology and Applications Symposium</cite>, pp. 342-353, April 2008.
298 it is also available as a <span class="src">git</span> repository (see <a href="#development">Development</a> below). 298 it is also available as a <span class="src">git</span> repository (see <a href="#development">Development</a> below).
299 </p> 299 </p>
300 <p> 300 <p>
301 The current release of LITMUS<sup>RT</sup> is 2010.1. 301 The current release of LITMUS<sup>RT</sup> is 2010.2.
302 It consists of our Linux kernel modifications in the form of 302 It consists of our Linux kernel modifications in the form of
303 a patch against Linux 2.6.32 and 303 a patch against Linux 2.6.34 and
304 304
305 <span class="src">liblitmus</span>, the user-space API for real-time 305 <span class="src">liblitmus</span>, the user-space API for real-time
306 tasks, as well as <span class="src">ft_tools</span>, a collection of tools 306 tasks, as well as <span class="src">ft_tools</span>, a collection of tools
307 used for tracing with <a href="http://www.cs.unc.edu/~bbb/feathertrace/">Feather-Trace</a> (which is part of the LITMUS<sup>RT</sup> patch). 307 used for tracing with <a href="http://www.cs.unc.edu/~bbb/feathertrace/">Feather-Trace</a> (which is part of the LITMUS<sup>RT</sup> patch).
308 </p> 308 </p>
309 309
310
311 <h3 class="relname">LITMUS<sup>RT</sup> 2010.2</h3>
312 <div class="release">
313 <p>
314 Based on Linux 2.6.34. Released in October 2010.
315
316 </p>
317 <h4>Files:</h4>
318 <ul class="notopmargin">
319 <li>
320 <a href="download/2010.2/litmus-rt-2010.2.patch">litmus-rt-2010.2.patch</a>
321 </li>
322 <li>
323 <a href="download/2010.2/liblitmus-2010.2.tgz">liblitmus-2010.2.tgz</a>
324 </li>
325 <li>
326 <a href="download/2010.2/ft_tools-2010.2.tgz">ft_tools-2010.2.tgz</a>
327 </li>
328 <li><a href="download/2010.2/SHA256SUMS">SHA256 check sums</a>
329 </li>
330 </ul>
331 <h4>Major changes since LITMUS<sup>RT</sup> 2010.1:</h4>
332 <ul class="notopmargin">
333 <li>
334 Rebased LITMUS<sup>RT</sup> from Linux 2.6.32 to Linux 2.6.34.
335 </li>
336 <li>
337 Added support for configurable budget enforcement (no enforcement, coarse-grained enforcement on timer ticks, and precise enforcement using high-resolution timers).
338 </li>
339 <li>Add support for one single cluster (all cpus) under C-EDF</li>
340 <li>Made some features optional (C-EDF, PFair, release-master mode).</li>
341 <li>Fixed several link and compile errors.</li>
342 </ul>
343 </div>
344
345
310 <h3 class="relname">LITMUS<sup>RT</sup> 2010.1</h3> 346 <h3 class="relname">LITMUS<sup>RT</sup> 2010.1</h3>
311 <div class="release"> 347 <div class="release">
312 <p> 348 <p>