summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjoern Brandenburg <bbb@bbb1-cs.cs.unc.edu>2008-08-22 22:46:36 -0400
committerBjoern Brandenburg <bbb@bbb1-cs.cs.unc.edu>2008-08-22 22:46:36 -0400
commit6074547124c3a9657469be06c47b882fc07abe11 (patch)
tree388a9914c078b0f420ea1f6461502a6edde4c649
parent3da32247ed85eb654fa3cc79c82bf941b9ef43e9 (diff)
parent1acaf95fd9ff52512bfd377a87f0c28050e01bc5 (diff)
Merge branch 'master' of git+ssh://cvs/cvs/proj/litmus/repo/homepage
-rw-r--r--.gitignore2
-rw-r--r--download/RTCSA08/SHA256SUMS2
-rw-r--r--download/RTCSA08/liblitmus-RTCSA08.tgzbin0 -> 10277 bytes
-rw-r--r--download/RTCSA08/litmus-rt-RTCSA08.patch7768
-rw-r--r--index.html17
5 files changed, 7789 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5ca0973
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
1.DS_Store
2
diff --git a/download/RTCSA08/SHA256SUMS b/download/RTCSA08/SHA256SUMS
new file mode 100644
index 0000000..4bc8472
--- /dev/null
+++ b/download/RTCSA08/SHA256SUMS
@@ -0,0 +1,2 @@
1f9176d0d1dfd7e1c4ab3ba5f4dc62efa3dd1ab8c50e2e63628fe2d2376cb344b liblitmus-RTCSA08.tgz
224c6b22ba13b096b3dc4356ed98f484548c68c77a59296952d72458154dd6bac litmus-rt-RTCSA08.patch
diff --git a/download/RTCSA08/liblitmus-RTCSA08.tgz b/download/RTCSA08/liblitmus-RTCSA08.tgz
new file mode 100644
index 0000000..9947121
--- /dev/null
+++ b/download/RTCSA08/liblitmus-RTCSA08.tgz
Binary files differ
diff --git a/download/RTCSA08/litmus-rt-RTCSA08.patch b/download/RTCSA08/litmus-rt-RTCSA08.patch
new file mode 100644
index 0000000..e4863a6
--- /dev/null
+++ b/download/RTCSA08/litmus-rt-RTCSA08.patch
@@ -0,0 +1,7768 @@
1 Makefile | 2 +-
2 arch/i386/Kconfig | 28 ++
3 arch/i386/kernel/apic.c | 92 +++++
4 arch/i386/kernel/i386_ksyms.c | 1 +
5 arch/i386/kernel/signal.c | 3 +-
6 arch/i386/kernel/smp.c | 1 +
7 arch/i386/kernel/syscall_table.S | 22 +
8 fs/exec.c | 5 +-
9 fs/inode.c | 2 +
10 include/asm-i386/unistd.h | 25 ++-
11 include/linux/completion.h | 2 +
12 include/linux/fs.h | 5 +
13 include/linux/sched.h | 14 +
14 include/linux/uaccess.h | 16 +
15 include/litmus/edf_common.h | 27 ++
16 include/litmus/fdso.h | 78 ++++
17 include/litmus/feather_buffer.h | 108 +++++
18 include/litmus/feather_trace.h | 93 +++++
19 include/litmus/jobs.h | 9 +
20 include/litmus/litmus.h | 200 +++++++++
21 include/litmus/rm_common.h | 44 ++
22 include/litmus/rt_domain.h | 94 +++++
23 include/litmus/rt_param.h | 177 ++++++++
24 include/litmus/sched_plugin.h | 120 ++++++
25 include/litmus/sched_trace.h | 31 ++
26 include/litmus/trace.h | 106 +++++
27 kernel/exit.c | 4 +
28 kernel/fork.c | 5 +
29 kernel/sched.c | 177 ++++++++-
30 lib/semaphore-sleepers.c | 2 +-
31 litmus/Makefile | 9 +
32 litmus/edf_common.c | 95 +++++
33 litmus/fdso.c | 289 +++++++++++++
34 litmus/ft_event.c | 104 +++++
35 litmus/jobs.c | 43 ++
36 litmus/litmus.c | 830 ++++++++++++++++++++++++++++++++++++++
37 litmus/litmus_sem.c | 551 +++++++++++++++++++++++++
38 litmus/pcp.c | 764 +++++++++++++++++++++++++++++++++++
39 litmus/rm_common.c | 76 ++++
40 litmus/rt_domain.c | 130 ++++++
41 litmus/sched_gsn_edf.c | 733 +++++++++++++++++++++++++++++++++
42 litmus/sched_plugin.c | 169 ++++++++
43 litmus/sched_psn_edf.c | 458 +++++++++++++++++++++
44 litmus/sched_rm.c | 397 ++++++++++++++++++
45 litmus/sched_trace.c | 541 +++++++++++++++++++++++++
46 litmus/sync.c | 84 ++++
47 litmus/trace.c | 302 ++++++++++++++
48 47 files changed, 7052 insertions(+), 16 deletions(-)
49
50diff --git a/Makefile b/Makefile
51index 7e2750f..79cf62b 100644
52--- a/Makefile
53+++ b/Makefile
54@@ -553,7 +553,7 @@ export mod_strip_cmd
55
56
57 ifeq ($(KBUILD_EXTMOD),)
58-core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/
59+core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ litmus/
60
61 vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
62 $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
63diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
64index 0dfee81..da6f1e9 100644
65--- a/arch/i386/Kconfig
66+++ b/arch/i386/Kconfig
67@@ -1210,6 +1210,7 @@ config KPROBES
68 a probepoint and specifies the callback. Kprobes is useful
69 for kernel debugging, non-intrusive instrumentation and testing.
70 If in doubt, say "N".
71+
72 endmenu
73
74 source "arch/i386/Kconfig.debug"
75@@ -1259,3 +1260,30 @@ config X86_TRAMPOLINE
76 config KTIME_SCALAR
77 bool
78 default y
79+
80+
81+menu "LITMUS^RT"
82+
83+
84+config SCHED_TASK_TRACE
85+ bool "Trace real-time tasks"
86+ default y
87+ help
88+ Include support for the sched_trace_XXX() tracing functions. This
89+ allows the collection of real-time task events such as job
90+ completions, job releases, early completions, etc. This results in a
91+ small overhead in the scheduling code. Disable if the overhead is not
92+ acceptable (e.g., benchmarking).
93+
94+config SCHED_DEBUG_TRACE
95+ bool "TRACE() debugging"
96+ default y
97+ help
98+ Include support for sched_trace_log_messageg(), which is used to
99+ implement TRACE(). If disabled, no TRACE() messages will be included
100+ in the kernel, and no overheads due to debugging statements will be
101+ incurred by the scheduler. Disable if the overhead is not acceptable
102+ (e.g. benchmarking).
103+
104+
105+endmenu
106diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
107index 776d9be..36b0159 100644
108--- a/arch/i386/kernel/apic.c
109+++ b/arch/i386/kernel/apic.c
110@@ -26,6 +26,7 @@
111 #include <linux/sysdev.h>
112 #include <linux/cpu.h>
113 #include <linux/module.h>
114+#include <litmus/litmus.h>
115
116 #include <asm/atomic.h>
117 #include <asm/smp.h>
118@@ -43,6 +44,8 @@
119
120 #include "io_ports.h"
121
122+#include <litmus/trace.h>
123+
124 /*
125 * cpu_mask that denotes the CPUs that needs timer interrupt coming in as
126 * IPIs in place of local APIC timers
127@@ -54,6 +57,15 @@ static cpumask_t timer_bcast_ipi;
128 */
129 static int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
130
131+/*
132+ * Definitions and variables related to quantum synchronization.
133+ */
134+#define WAIT_TO_SYNC 30000 /* time after boot until sync */
135+static int stagger = 0; /* are we using staggered quanta? */
136+static atomic_t qsync_time = ATOMIC_INIT(INITIAL_JIFFIES);
137+static atomic_t quantum_sync_barrier = ATOMIC_INIT(0);
138+static atomic_t sync_done = ATOMIC_INIT(0);
139+
140 static inline void lapic_disable(void)
141 {
142 enable_local_apic = -1;
143@@ -786,6 +798,23 @@ static int __init apic_set_verbosity(char *str)
144
145 __setup("apic=", apic_set_verbosity);
146
147+/*
148+ * Determine whether to use aligned or staggerd quanta.
149+ */
150+
151+static int __init apic_synch_type(char *str)
152+{
153+ if (strcmp("aligned", str) == 0)
154+ stagger = 0;
155+ else if (strcmp("staggered", str) == 0)
156+ stagger = 1;
157+ else
158+ stagger = 0; /* aligned quanta by default */
159+ return 1;
160+}
161+
162+__setup("quanta=", apic_synch_type);
163+
164 static int __init detect_init_APIC (void)
165 {
166 u32 h, l, features;
167@@ -1198,6 +1227,47 @@ EXPORT_SYMBOL(switch_ipi_to_APIC_timer);
168 #undef APIC_DIVISOR
169
170 /*
171+ * This function is called to align all quanta, and to stagger quanta if
172+ * necessary. It relies on a barrier to synchronize all processors, so
173+ * that they all reset their APIC timers at the same time. If quanta
174+ * should be staggered, the appropriate stagger delay is then added at
175+ * each processor.
176+ */
177+
178+void synchronize_quanta(void)
179+{
180+ int cpu = smp_processor_id();
181+ int total_cpus = num_online_cpus();
182+ int stagger_interval = jiffies_to_usecs(1) / total_cpus;
183+
184+ /*
185+ * Disable APIC timer, wait for all other processors to reach barrier,
186+ * and re-enable all timers concurrently.
187+ */
188+ disable_APIC_timer();
189+ atomic_inc(&quantum_sync_barrier);
190+ while (atomic_read(&quantum_sync_barrier) < total_cpus) {
191+ /* Delay, otherwise atomic_inc's cannot occur. */
192+ udelay(1);
193+ }
194+
195+ /* Add necessary stagger for this CPU, if required. */
196+ if (stagger) {
197+ int stagger_us = cpu * stagger_interval;
198+ udelay(stagger_us);
199+ }
200+
201+ /* Re-enable all timers. */
202+ __setup_APIC_LVTT(calibration_result);
203+ enable_APIC_timer();
204+
205+ /* The first CPU signals that quantum sync is complete. */
206+ if (cpu == 0)
207+ atomic_inc(&sync_done);
208+}
209+
210+
211+/*
212 * Local timer interrupt handler. It does both profiling and
213 * process statistics/rescheduling.
214 *
215@@ -1209,11 +1279,32 @@ EXPORT_SYMBOL(switch_ipi_to_APIC_timer);
216
217 inline void smp_local_timer_interrupt(void)
218 {
219+/* s64 offset; */
220+
221+ TS_TICK_START;
222+
223 profile_tick(CPU_PROFILING);
224 #ifdef CONFIG_SMP
225 update_process_times(user_mode_vm(get_irq_regs()));
226 #endif
227
228+ /* Print out timing data - can be commented out if necessary. */
229+/* offset = get_nsec_offset(); */
230+/* TRACE("%d\n", offset); */
231+
232+ /*
233+ * Synchronize quanta if we have reached qsync_time plus wait
234+ * interval. The synchronization code itself is placed in its own
235+ * (non-inline) function, to avoid issues with creating an inline
236+ * function that is too large.
237+ */
238+ if (unlikely(!atomic_read(&sync_done) &&
239+ time_after(jiffies,
240+ (unsigned long)(atomic_read(&qsync_time) +
241+ msecs_to_jiffies(WAIT_TO_SYNC))))) {
242+ synchronize_quanta();
243+ }
244+
245 /*
246 * We take the 'long' return path, and there every subsystem
247 * grabs the apropriate locks (kernel lock/ irq lock).
248@@ -1224,6 +1315,7 @@ inline void smp_local_timer_interrupt(void)
249 * Currently this isn't too much of an issue (performance wise),
250 * we can take more than 100K local irqs per second on a 100 MHz P5.
251 */
252+ TS_TICK_END;
253 }
254
255 /*
256diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
257index e3d4b73..9670f77 100644
258--- a/arch/i386/kernel/i386_ksyms.c
259+++ b/arch/i386/kernel/i386_ksyms.c
260@@ -6,6 +6,7 @@ EXPORT_SYMBOL(__down_failed);
261 EXPORT_SYMBOL(__down_failed_interruptible);
262 EXPORT_SYMBOL(__down_failed_trylock);
263 EXPORT_SYMBOL(__up_wakeup);
264+
265 /* Networking helper routines. */
266 EXPORT_SYMBOL(csum_partial_copy_generic);
267
268diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
269index 65d7620..e95d732 100644
270--- a/arch/i386/kernel/signal.c
271+++ b/arch/i386/kernel/signal.c
272@@ -651,7 +651,6 @@ void do_notify_resume(struct pt_regs *regs, void *_unused,
273
274 /* deal with pending signal delivery */
275 if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
276- do_signal(regs);
277-
278+ do_signal(regs);
279 clear_thread_flag(TIF_IRET);
280 }
281diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
282index 5285aff..91921a3 100644
283--- a/arch/i386/kernel/smp.c
284+++ b/arch/i386/kernel/smp.c
285@@ -605,6 +605,7 @@ void smp_send_stop(void)
286 */
287 fastcall void smp_reschedule_interrupt(struct pt_regs *regs)
288 {
289+ set_tsk_need_resched(current);
290 ack_APIC_irq();
291 }
292
293diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
294index 2697e92..48e5e8e 100644
295--- a/arch/i386/kernel/syscall_table.S
296+++ b/arch/i386/kernel/syscall_table.S
297@@ -319,3 +319,25 @@ ENTRY(sys_call_table)
298 .long sys_move_pages
299 .long sys_getcpu
300 .long sys_epoll_pwait
301+ /* LITMUS syscalls */
302+ .long sys_set_rt_task_param /* 320 */
303+ .long sys_get_rt_task_param
304+ .long sys_task_mode_transition
305+ .long sys_sleep_next_period
306+ .long sys_register_np_flag
307+ .long sys_exit_np /* 325 */
308+ .long sys_od_open
309+ .long sys_od_close
310+ .long sys_pi_down
311+ .long sys_pi_up
312+ .long sys_srp_down /* 330 */
313+ .long sys_srp_up
314+ .long sys_reg_task_srp_sem
315+ .long sys_query_job_no
316+ .long sys_wait_for_job_release
317+ .long sys_wait_for_ts_release /* 335 */
318+ .long sys_release_ts
319+ .long sys_pcp_down
320+ .long sys_pcp_up
321+ .long sys_dpcp_invoke
322+ .long sys_dpcp_agent /* 340 */
323diff --git a/fs/exec.c b/fs/exec.c
324index 11fe93f..353d6e3 100644
325--- a/fs/exec.c
326+++ b/fs/exec.c
327@@ -54,6 +54,8 @@
328 #include <asm/uaccess.h>
329 #include <asm/mmu_context.h>
330
331+#include <litmus/litmus.h>
332+
333 #ifdef CONFIG_KMOD
334 #include <linux/kmod.h>
335 #endif
336@@ -1140,7 +1142,8 @@ int do_execve(char * filename,
337 if (IS_ERR(file))
338 goto out_kfree;
339
340- sched_exec();
341+ sched_exec();
342+ litmus_exec();
343
344 bprm->p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
345
346diff --git a/fs/inode.c b/fs/inode.c
347index bf21dc6..fcf8ce3 100644
348--- a/fs/inode.c
349+++ b/fs/inode.c
350@@ -205,6 +205,8 @@ void inode_init_once(struct inode *inode)
351 INIT_LIST_HEAD(&inode->inotify_watches);
352 mutex_init(&inode->inotify_mutex);
353 #endif
354+ INIT_LIST_HEAD(&inode->i_obj_list);
355+ mutex_init(&inode->i_obj_mutex);
356 }
357
358 EXPORT_SYMBOL(inode_init_once);
359diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
360index 833fa17..d0ba5c3 100644
361--- a/include/asm-i386/unistd.h
362+++ b/include/asm-i386/unistd.h
363@@ -325,10 +325,33 @@
364 #define __NR_move_pages 317
365 #define __NR_getcpu 318
366 #define __NR_epoll_pwait 319
367+/* LITMUS */
368+#define __NR_set_rt_task_param 320
369+#define __NR_get_rt_task_param 321
370+#define __NR_task_mode 322
371+#define __NR_sleep_next_period 323
372+#define __NR_register_np_flag 324
373+#define __NR_exit_np 325
374+#define __NR_od_open 326
375+#define __NR_od_close 327
376+#define __NR_pi_down 328
377+#define __NR_pi_up 329
378+#define __NR_srp_down 330
379+#define __NR_srp_up 331
380+#define __NR_reg_task_srp_sem 332
381+#define __NR_query_job_no 333
382+#define __NR_wait_for_job_release 334
383+#define __NR_wait_for_ts_release 335
384+#define __NR_release_ts 336
385+#define __NR_pcp_down 337
386+#define __NR_pcp_up 338
387+#define __NR_dpcp_invoke 339
388+#define __NR_dpcp_agent 340
389+
390
391 #ifdef __KERNEL__
392
393-#define NR_syscalls 320
394+#define NR_syscalls 343
395
396 #define __ARCH_WANT_IPC_PARSE_VERSION
397 #define __ARCH_WANT_OLD_READDIR
398diff --git a/include/linux/completion.h b/include/linux/completion.h
399index 268c5a4..dc633ed 100644
400--- a/include/linux/completion.h
401+++ b/include/linux/completion.h
402@@ -51,6 +51,8 @@ extern unsigned long FASTCALL(wait_for_completion_interruptible_timeout(
403
404 extern void FASTCALL(complete(struct completion *));
405 extern void FASTCALL(complete_all(struct completion *));
406+extern void FASTCALL(complete_n(struct completion *, int n));
407+
408
409 #define INIT_COMPLETION(x) ((x).done = 0)
410
411diff --git a/include/linux/fs.h b/include/linux/fs.h
412index 1410e53..4e1117c 100644
413--- a/include/linux/fs.h
414+++ b/include/linux/fs.h
415@@ -524,6 +524,8 @@ static inline int mapping_writably_mapped(struct address_space *mapping)
416 #define i_size_ordered_init(inode) do { } while (0)
417 #endif
418
419+struct inode_obj_id_table;
420+
421 struct inode {
422 struct hlist_node i_hash;
423 struct list_head i_list;
424@@ -589,6 +591,9 @@ struct inode {
425 void *i_security;
426 #endif
427 void *i_private; /* fs or device private pointer */
428+
429+ struct list_head i_obj_list;
430+ struct mutex i_obj_mutex;
431 };
432
433 /*
434diff --git a/include/linux/sched.h b/include/linux/sched.h
435index 4463735..c7929d6 100644
436--- a/include/linux/sched.h
437+++ b/include/linux/sched.h
438@@ -3,6 +3,8 @@
439
440 #include <linux/auxvec.h> /* For AT_VECTOR_SIZE */
441
442+#include <litmus/rt_param.h>
443+
444 /*
445 * cloning flags:
446 */
447@@ -796,6 +798,8 @@ enum sleep_type {
448 SLEEP_INTERRUPTED,
449 };
450
451+struct od_table_entry;
452+
453 struct prio_array;
454
455 struct task_struct {
456@@ -1051,6 +1055,16 @@ struct task_struct {
457 #ifdef CONFIG_FAULT_INJECTION
458 int make_it_fail;
459 #endif
460+ /* litmus parameters and state */
461+ struct rt_param rt_param;
462+
463+ /* allow scheduler plugins to queue in release lists, etc.
464+ * Cleanup: Move this into the rt_param struct.
465+ */
466+ struct list_head rt_list;
467+
468+ /* references to PI semaphores, etc. */
469+ struct od_table_entry* od_table;
470 };
471
472 static inline pid_t process_group(struct task_struct *tsk)
473diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
474index 975c963..6ae0ff9 100644
475--- a/include/linux/uaccess.h
476+++ b/include/linux/uaccess.h
477@@ -84,4 +84,20 @@ static inline unsigned long __copy_from_user_nocache(void *to,
478 ret; \
479 })
480
481+/* This is a naive attempt at a write version of the above native Linux macro.
482+ */
483+#define poke_kernel_address(val, addr) \
484+ ({ \
485+ long ret; \
486+ mm_segment_t old_fs = get_fs(); \
487+ \
488+ set_fs(KERNEL_DS); \
489+ pagefault_disable(); \
490+ ret = __put_user(val, (__force typeof(val) __user *)(addr)); \
491+ pagefault_enable(); \
492+ set_fs(old_fs); \
493+ ret; \
494+ })
495+
496+
497 #endif /* __LINUX_UACCESS_H__ */
498diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h
499new file mode 100644
500index 0000000..df711f5
501--- /dev/null
502+++ b/include/litmus/edf_common.h
503@@ -0,0 +1,27 @@
504+/* EDF common data structures and utility functions shared by all EDF
505+ * based scheduler plugins
506+ */
507+
508+/* CLEANUP: Add comments and make it less messy.
509+ *
510+ */
511+
512+#ifndef __UNC_EDF_COMMON_H__
513+#define __UNC_EDF_COMMON_H__
514+
515+#include <litmus/rt_domain.h>
516+
517+
518+void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched);
519+
520+int edf_higher_prio(struct task_struct* first,
521+ struct task_struct* second);
522+
523+int edf_ready_order(struct list_head* a, struct list_head* b);
524+
525+int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t);
526+
527+#define job_completed(t) (!is_be(t) && \
528+ (t)->rt_param.times.exec_time == (t)->rt_param.basic_params.exec_cost)
529+
530+#endif
531diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
532new file mode 100644
533index 0000000..5544c1b
534--- /dev/null
535+++ b/include/litmus/fdso.h
536@@ -0,0 +1,78 @@
537+/* fdso.h - file descriptor attached shared objects
538+ *
539+ * (c) 2007 B. Brandenburg, LITMUS^RT project
540+ */
541+
542+#ifndef _LINUX_FDSO_H_
543+#define _LINUX_FDSO_H_
544+
545+#include <linux/list.h>
546+#include <asm/atomic.h>
547+
548+#include <linux/fs.h>
549+
550+#define MAX_OBJECT_DESCRIPTORS 32
551+
552+typedef enum {
553+ MIN_OBJ_TYPE = 0,
554+
555+ PI_SEM = 0,
556+ SRP_SEM = 1,
557+ PCP_SEM = 2,
558+ MPCP_SEM = 3,
559+
560+ MAX_OBJ_TYPE = 3
561+} obj_type_t;
562+
563+struct inode_obj_id {
564+ struct list_head list;
565+ atomic_t count;
566+ struct inode* inode;
567+
568+ obj_type_t type;
569+ void* obj;
570+ unsigned int id;
571+};
572+
573+
574+struct od_table_entry {
575+ unsigned int used;
576+
577+ struct inode_obj_id* obj;
578+ void* extra;
579+};
580+
581+struct fdso_ops {
582+ void* (*create) (void);
583+ void (*destroy)(void*);
584+ int (*open) (struct od_table_entry*, void* __user);
585+ int (*close) (struct od_table_entry*);
586+};
587+
588+/* translate a userspace supplied od into the raw table entry
589+ * returns NULL if od is invalid
590+ */
591+struct od_table_entry* __od_lookup(int od);
592+
593+/* translate a userspace supplied od into the associated object
594+ * returns NULL if od is invalid
595+ */
596+static inline void* od_lookup(int od, obj_type_t type)
597+{
598+ struct od_table_entry* e = __od_lookup(od);
599+ return e && e->obj->type == type ? e->obj->obj : NULL;
600+}
601+
602+static inline void* od_lookup2(int od, obj_type_t type, obj_type_t type2)
603+{
604+ struct od_table_entry* e = __od_lookup(od);
605+ return e && (e->obj->type == type || e->obj->type == type2) ?
606+ e->obj->obj : NULL;
607+}
608+
609+#define lookup_pi_sem(od) ((struct pi_semaphore*) od_lookup(od, PI_SEM))
610+#define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
611+#define lookup_pcp_sem(od) ((struct pcp_semaphore*) \
612+ od_lookup2(od, PCP_SEM, MPCP_SEM))
613+
614+#endif
615diff --git a/include/litmus/feather_buffer.h b/include/litmus/feather_buffer.h
616new file mode 100644
617index 0000000..c788227
618--- /dev/null
619+++ b/include/litmus/feather_buffer.h
620@@ -0,0 +1,108 @@
621+#ifndef _FEATHER_BUFFER_H_
622+#define _FEATHER_BUFFER_H_
623+
624+/* requires UINT_MAX and memcpy */
625+
626+static inline int fetch_and_inc(int *val)
627+{
628+ int ret = 1;
629+ __asm__ __volatile__("lock; xaddl %0, %1" : "+r" (ret), "+m" (*val) : : "memory" );
630+ return ret;
631+}
632+
633+static inline int fetch_and_dec(int *val)
634+{
635+ int ret = -1;
636+ __asm__ __volatile__("lock; xaddl %0, %1" : "+r" (ret), "+m" (*val) : : "memory" );
637+ return ret;
638+}
639+
640+#define SLOT_FREE 0
641+#define SLOT_BUSY 1
642+#define SLOT_READY 2
643+
644+struct ft_buffer {
645+ unsigned int slot_count;
646+ unsigned int slot_size;
647+
648+ int free_count;
649+ unsigned int write_idx;
650+ unsigned int read_idx;
651+
652+ char* slots;
653+ void* buffer_mem;
654+ unsigned int failed_writes;
655+};
656+
657+static inline int init_ft_buffer(struct ft_buffer* buf,
658+ unsigned int slot_count,
659+ unsigned int slot_size,
660+ char* slots,
661+ void* buffer_mem)
662+{
663+ int i = 0;
664+ if (!slot_count || UINT_MAX % slot_count != slot_count - 1) {
665+ /* The slot count must divide UNIT_MAX + 1 so that when it
666+ * wraps around the index correctly points to 0.
667+ */
668+ return 0;
669+ } else {
670+ buf->slot_count = slot_count;
671+ buf->slot_size = slot_size;
672+ buf->slots = slots;
673+ buf->buffer_mem = buffer_mem;
674+ buf->free_count = slot_count;
675+ buf->write_idx = 0;
676+ buf->read_idx = 0;
677+ buf->failed_writes = 0;
678+ for (i = 0; i < slot_count; i++)
679+ buf->slots[i] = SLOT_FREE;
680+ return 1;
681+ }
682+}
683+
684+static inline int ft_buffer_start_write(struct ft_buffer* buf, void **ptr)
685+{
686+ int free = fetch_and_dec(&buf->free_count);
687+ unsigned int idx;
688+ if (free <= 0) {
689+ fetch_and_inc(&buf->free_count);
690+ *ptr = 0;
691+ fetch_and_inc(&buf->failed_writes);
692+ return 0;
693+ } else {
694+ idx = fetch_and_inc((int*) &buf->write_idx) % buf->slot_count;
695+ buf->slots[idx] = SLOT_BUSY;
696+ *ptr = ((char*) buf->buffer_mem) + idx * buf->slot_size;
697+ return 1;
698+ }
699+}
700+
701+static inline void ft_buffer_finish_write(struct ft_buffer* buf, void *ptr)
702+{
703+ unsigned int idx = ((char*) ptr - (char*) buf->buffer_mem) / buf->slot_size;
704+ buf->slots[idx] = SLOT_READY;
705+}
706+
707+
708+/* exclusive reader access is assumed */
709+static inline int ft_buffer_read(struct ft_buffer* buf, void* dest)
710+{
711+ unsigned int idx;
712+ if (buf->free_count == buf->slot_count)
713+ /* nothing available */
714+ return 0;
715+ idx = buf->read_idx % buf->slot_count;
716+ if (buf->slots[idx] == SLOT_READY) {
717+ memcpy(dest, ((char*) buf->buffer_mem) + idx * buf->slot_size,
718+ buf->slot_size);
719+ buf->slots[idx] = SLOT_FREE;
720+ buf->read_idx++;
721+ fetch_and_inc(&buf->free_count);
722+ return 1;
723+ } else
724+ return 0;
725+}
726+
727+
728+#endif
729diff --git a/include/litmus/feather_trace.h b/include/litmus/feather_trace.h
730new file mode 100644
731index 0000000..5c37ea7
732--- /dev/null
733+++ b/include/litmus/feather_trace.h
734@@ -0,0 +1,93 @@
735+#ifndef _FEATHER_TRACE_H_
736+#define _FEATHER_TRACE_H_
737+
738+#define feather_callback __attribute__((regparm(0)))
739+
740+/* make the compiler reload any register that is not saved in
741+ * a cdecl function call
742+ */
743+#define CLOBBER_LIST "memory", "cc", "eax", "ecx", "edx"
744+
745+#define ft_event(id, callback) \
746+ __asm__ __volatile__( \
747+ "1: jmp 2f \n\t" \
748+ " call " #callback " \n\t" \
749+ ".section __event_table, \"aw\" \n\t" \
750+ ".long " #id ", 0, 1b, 2f \n\t" \
751+ ".previous \n\t" \
752+ "2: \n\t" \
753+ : : : CLOBBER_LIST)
754+
755+#define ft_event0(id, callback) \
756+ __asm__ __volatile__( \
757+ "1: jmp 2f \n\t" \
758+ " subl $4, %%esp \n\t" \
759+ " movl $" #id ", (%%esp) \n\t" \
760+ " call " #callback " \n\t" \
761+ " addl $4, %%esp \n\t" \
762+ ".section __event_table, \"aw\" \n\t" \
763+ ".long " #id ", 0, 1b, 2f \n\t" \
764+ ".previous \n\t" \
765+ "2: \n\t" \
766+ : : : CLOBBER_LIST)
767+
768+#define ft_event1(id, callback, param) \
769+ __asm__ __volatile__( \
770+ "1: jmp 2f \n\t" \
771+ " subl $8, %%esp \n\t" \
772+ " movl %0, 4(%%esp) \n\t" \
773+ " movl $" #id ", (%%esp) \n\t" \
774+ " call " #callback " \n\t" \
775+ " addl $8, %%esp \n\t" \
776+ ".section __event_table, \"aw\" \n\t" \
777+ ".long " #id ", 0, 1b, 2f \n\t" \
778+ ".previous \n\t" \
779+ "2: \n\t" \
780+ : : "r" (param) : CLOBBER_LIST)
781+
782+#define ft_event2(id, callback, param, param2) \
783+ __asm__ __volatile__( \
784+ "1: jmp 2f \n\t" \
785+ " subl $12, %%esp \n\t" \
786+ " movl %1, 8(%%esp) \n\t" \
787+ " movl %0, 4(%%esp) \n\t" \
788+ " movl $" #id ", (%%esp) \n\t" \
789+ " call " #callback " \n\t" \
790+ " addl $12, %%esp \n\t" \
791+ ".section __event_table, \"aw\" \n\t" \
792+ ".long " #id ", 0, 1b, 2f \n\t" \
793+ ".previous \n\t" \
794+ "2: \n\t" \
795+ : : "r" (param), "r" (param2) : CLOBBER_LIST)
796+
797+
798+#define ft_event3(id, callback, p, p2, p3) \
799+ __asm__ __volatile__( \
800+ "1: jmp 2f \n\t" \
801+ " subl $16, %%esp \n\t" \
802+ " movl %1, 12(%%esp) \n\t" \
803+ " movl %1, 8(%%esp) \n\t" \
804+ " movl %0, 4(%%esp) \n\t" \
805+ " movl $" #id ", (%%esp) \n\t" \
806+ " call " #callback " \n\t" \
807+ " addl $16, %%esp \n\t" \
808+ ".section __event_table, \"aw\" \n\t" \
809+ ".long " #id ", 0, 1b, 2f \n\t" \
810+ ".previous \n\t" \
811+ "2: \n\t" \
812+ : : "r" (p), "r" (p2), "r" (p3) : CLOBBER_LIST)
813+
814+
815+static inline unsigned long long ft_read_tsc(void)
816+{
817+ unsigned long long ret;
818+ __asm__ __volatile__("rdtsc" : "=A" (ret));
819+ return ret;
820+}
821+
822+int ft_enable_event(unsigned long id);
823+int ft_disable_event(unsigned long id);
824+int ft_is_event_enabled(unsigned long id);
825+int ft_disable_all_events(void);
826+
827+#endif
828diff --git a/include/litmus/jobs.h b/include/litmus/jobs.h
829new file mode 100644
830index 0000000..9bd361e
831--- /dev/null
832+++ b/include/litmus/jobs.h
833@@ -0,0 +1,9 @@
834+#ifndef __LITMUS_JOBS_H__
835+#define __LITMUS_JOBS_H__
836+
837+void prepare_for_next_period(struct task_struct *t);
838+void release_at(struct task_struct *t, lt_t start);
839+long complete_job(void);
840+
841+#endif
842+
843diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
844new file mode 100644
845index 0000000..5853ed5
846--- /dev/null
847+++ b/include/litmus/litmus.h
848@@ -0,0 +1,200 @@
849+/*
850+ * Constant definitions related to
851+ * scheduling policy.
852+ */
853+
854+#ifndef _LINUX_LITMUS_H_
855+#define _LINUX_LITMUS_H_
856+
857+#include <linux/jiffies.h>
858+#include <litmus/sched_trace.h>
859+
860+typedef enum {
861+ SCHED_LINUX = 0,
862+ SCHED_GSN_EDF = 10,
863+ SCHED_PSN_EDF = 11,
864+ /* Add your scheduling policy here */
865+
866+ SCHED_DEFAULT = 0,
867+ SCHED_INVALID = -1,
868+} spolicy;
869+
870+
871+typedef enum {
872+ LITMUS_RESERVED_RANGE = 1024,
873+
874+} sched_setup_cmd_t;
875+
876+/* per-task modes */
877+enum rt_task_mode_t {
878+ BACKGROUND_TASK = 0,
879+ LITMUS_RT_TASK = 1
880+};
881+
882+/* Plugin boot options, for convenience */
883+#define PLUGIN_LINUX "linux"
884+#define PLUGIN_GSN_EDF "gsn_edf"
885+#define PLUGIN_PSN_EDF "psn_edf"
886+
887+extern spolicy sched_policy;
888+
889+/* RT mode start time */
890+extern volatile unsigned long rt_start_time;
891+
892+#define TRACE(fmt, args...) \
893+ sched_trace_log_message("%d: " fmt, raw_smp_processor_id(), ## args)
894+
895+#define TRACE_TASK(t, fmt, args...) \
896+ TRACE("(%s/%d) " fmt, (t)->comm, (t)->pid, ##args)
897+
898+#define TRACE_CUR(fmt, args...) \
899+ TRACE_TASK(current, fmt, ## args)
900+
901+#define TRACE_BUG_ON(cond) \
902+ do { if (cond) TRACE("BUG_ON(%s) at %s:%d " \
903+ "called from %p current=%s/%d state=%d " \
904+ "flags=%x partition=%d cpu=%d rtflags=%d"\
905+ " job=%u knp=%d timeslice=%u\n", \
906+ #cond, __FILE__, __LINE__, __builtin_return_address(0), current->comm, \
907+ current->pid, current->state, current->flags, \
908+ get_partition(current), smp_processor_id(), get_rt_flags(current), \
909+ current->rt_param.job_params.job_no, current->rt_param.kernel_np, \
910+ current->time_slice\
911+ ); } while(0);
912+
913+
914+/* in_list - is a given list_head queued on some list?
915+ */
916+static inline int in_list(struct list_head* list)
917+{
918+ return !( /* case 1: deleted */
919+ (list->next == LIST_POISON1 &&
920+ list->prev == LIST_POISON2)
921+ ||
922+ /* case 2: initialized */
923+ (list->next == list &&
924+ list->prev == list)
925+ );
926+}
927+
928+typedef int (*prio_cmp_t)(struct task_struct* first,
929+ struct task_struct* second);
930+
931+typedef int (*list_cmp_t)(struct list_head*, struct list_head*);
932+
933+static inline unsigned int list_insert(struct list_head* new,
934+ struct list_head* head,
935+ list_cmp_t order_before)
936+{
937+ struct list_head *pos;
938+ unsigned int passed = 0;
939+
940+ BUG_ON(!new);
941+
942+ /* find a spot where the new entry is less than the next */
943+ list_for_each(pos, head) {
944+ if (unlikely(order_before(new, pos))) {
945+ /* pos is not less than new, thus insert here */
946+ __list_add(new, pos->prev, pos);
947+ goto out;
948+ }
949+ passed++;
950+ }
951+ /* if we get to this point either the list is empty or every entry
952+ * queued element is less than new.
953+ * Let's add new to the end. */
954+ list_add_tail(new, head);
955+ out:
956+ return passed;
957+}
958+
959+void list_qsort(struct list_head* list, list_cmp_t less_than);
960+
961+
962+#define RT_PREEMPTIVE 0x2050 /* = NP */
963+#define RT_NON_PREEMPTIVE 0x4e50 /* = P */
964+#define RT_EXIT_NP_REQUESTED 0x5251 /* = RQ */
965+
966+/* returns 1 if task t has registered np flag and set it to RT_NON_PREEMPTIVE
967+ */
968+int is_np(struct task_struct *t);
969+
970+/* request that the task should call sys_exit_np()
971+ */
972+void request_exit_np(struct task_struct *t);
973+
974+/* kill naughty tasks
975+ */
976+void scheduler_signal(struct task_struct *t, unsigned int signal);
977+void send_scheduler_signals(void);
978+void np_mem_kill(struct task_struct *t);
979+
980+void litmus_fork(struct task_struct *tsk);
981+void litmus_exec(void);
982+/* clean up real-time state of a task */
983+void exit_litmus(struct task_struct *dead_tsk);
984+
985+long transition_to_rt(struct task_struct* tsk);
986+long transition_to_be(struct task_struct* tsk);
987+
988+#define is_realtime(t) ((t)->rt_param.is_realtime)
989+#define rt_transition_pending(t) \
990+ ((t)->rt_param.transition_pending)
991+
992+/* Realtime utility macros */
993+#define get_rt_flags(t) ((t)->rt_param.flags)
994+#define set_rt_flags(t,f) (t)->rt_param.flags=(f)
995+#define get_exec_cost(t) ((t)->rt_param.task_params.exec_cost)
996+#define get_exec_time(t) ((t)->rt_param.job_params.exec_time)
997+#define get_rt_period(t) ((t)->rt_param.task_params.period)
998+#define get_partition(t) (t)->rt_param.task_params.cpu
999+#define get_deadline(t) ((t)->rt_param.job_params.deadline)
1000+#define get_class(t) ((t)->rt_param.task_params.cls)
1001+
1002+inline static int budget_exhausted(struct task_struct* t)
1003+{
1004+ return get_exec_time(t) >= get_exec_cost(t);
1005+}
1006+
1007+
1008+#define is_hrt(t) \
1009+ ((t)->rt_param.task_params.class == RT_CLASS_HARD)
1010+#define is_srt(t) \
1011+ ((t)->rt_param.task_params.class == RT_CLASS_SOFT)
1012+#define is_be(t) \
1013+ ((t)->rt_param.task_params.class == RT_CLASS_BEST_EFFORT)
1014+
1015+#define get_release(t) ((t)->rt_param.job_params.release)
1016+
1017+/* Honor the flag in the preempt_count variable that is set
1018+ * when scheduling is in progress.
1019+ */
1020+#define is_running(t) \
1021+ ((t)->state == TASK_RUNNING || \
1022+ (t)->thread_info->preempt_count & PREEMPT_ACTIVE)
1023+
1024+#define is_blocked(t) \
1025+ (!is_running(t))
1026+#define is_released(t, now) \
1027+ (lt_before_eq(get_release(t), now))
1028+#define is_tardy(t, now) \
1029+ (lt_before_eq((t)->rt_param.job_params.deadline, now))
1030+
1031+/* real-time comparison macros */
1032+#define earlier_deadline(a, b) (lt_before(\
1033+ (a)->rt_param.job_params.deadline,\
1034+ (b)->rt_param.job_params.deadline))
1035+#define earlier_release(a, b) (lt_before(\
1036+ (a)->rt_param.job_params.release,\
1037+ (b)->rt_param.job_params.release))
1038+
1039+#define shorter_period(a, b) (lt_before(\
1040+ (a)->rt_param.task_params.period, \
1041+ (b)->rt_param.task_params.period))
1042+
1043+#define make_np(t) do {t->rt_param.kernel_np++;} while(0);
1044+#define take_np(t) do {t->rt_param.kernel_np--;} while(0);
1045+
1046+void srp_ceiling_block(void);
1047+
1048+#endif
1049diff --git a/include/litmus/rm_common.h b/include/litmus/rm_common.h
1050new file mode 100644
1051index 0000000..11e8365
1052--- /dev/null
1053+++ b/include/litmus/rm_common.h
1054@@ -0,0 +1,44 @@
1055+/* rate monotonic helper functions.
1056+ */
1057+
1058+
1059+#ifndef __UNC_RM_COMMON_H__
1060+#define __UNC_RM_COMMON_H__
1061+
1062+#include <litmus/rt_domain.h>
1063+
1064+static inline int _rm_higher_prio(struct pcp_priority *p1,
1065+ struct pcp_priority *p2)
1066+{
1067+ /* does the second task exist and is it a real-time task? If
1068+ * not, the first task (which is a RT task) has higher
1069+ * priority.
1070+ */
1071+
1072+ if (unlikely(!p2))
1073+ return 1;
1074+
1075+ if (p1->in_global_cs == p2->in_global_cs) {
1076+ /* tie break by RM priority */
1077+ if (p1->prio == p2->prio)
1078+ /* tie break equal periods by PID */
1079+ return p1->pid < p2->pid;
1080+ else
1081+ /* shorter period or lower index has higher priority */
1082+ return p1->prio < p2->prio;
1083+ } else
1084+ /* gcs always have higher priority */
1085+ return p1->in_global_cs > p2->in_global_cs;
1086+}
1087+
1088+
1089+void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched);
1090+
1091+int rm_higher_prio(struct task_struct* first,
1092+ struct task_struct* second);
1093+
1094+int rm_ready_order(struct list_head* a, struct list_head* b);
1095+
1096+int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t);
1097+
1098+#endif
1099diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h
1100new file mode 100644
1101index 0000000..79b6034
1102--- /dev/null
1103+++ b/include/litmus/rt_domain.h
1104@@ -0,0 +1,94 @@
1105+/* CLEANUP: Add comments and make it less messy.
1106+ *
1107+ */
1108+
1109+#ifndef __UNC_RT_DOMAIN_H__
1110+#define __UNC_RT_DOMAIN_H__
1111+
1112+struct _rt_domain;
1113+
1114+typedef int (*check_resched_needed_t)(struct _rt_domain *rt);
1115+typedef void (*release_at_t)(struct task_struct *t, lt_t start);
1116+
1117+typedef struct _rt_domain {
1118+ /* runnable rt tasks are in here */
1119+ rwlock_t ready_lock;
1120+ struct list_head ready_queue;
1121+
1122+ /* real-time tasks waiting for release are in here */
1123+ spinlock_t release_lock;
1124+ struct list_head release_queue;
1125+
1126+ /* how do we check if we need to kick another CPU? */
1127+ check_resched_needed_t check_resched;
1128+
1129+ /* how are tasks ordered in the ready queue? */
1130+ list_cmp_t order;
1131+} rt_domain_t;
1132+
1133+#define next_ready(rt) \
1134+ (list_entry((rt)->ready_queue.next, struct task_struct, rt_list))
1135+
1136+#define ready_jobs_pending(rt) \
1137+ (!list_empty(&(rt)->ready_queue))
1138+
1139+void rt_domain_init(rt_domain_t *rt, check_resched_needed_t f,
1140+ list_cmp_t order);
1141+
1142+void __add_ready(rt_domain_t* rt, struct task_struct *new);
1143+void __add_release(rt_domain_t* rt, struct task_struct *task);
1144+
1145+struct task_struct* __take_ready(rt_domain_t* rt);
1146+struct task_struct* __peek_ready(rt_domain_t* rt);
1147+
1148+void try_release_pending(rt_domain_t* rt);
1149+void __release_pending(rt_domain_t* rt);
1150+
1151+static inline void add_ready(rt_domain_t* rt, struct task_struct *new)
1152+{
1153+ unsigned long flags;
1154+ /* first we need the write lock for rt_ready_queue */
1155+ write_lock_irqsave(&rt->ready_lock, flags);
1156+ __add_ready(rt, new);
1157+ write_unlock_irqrestore(&rt->ready_lock, flags);
1158+}
1159+
1160+static inline struct task_struct* take_ready(rt_domain_t* rt)
1161+{
1162+ unsigned long flags;
1163+ struct task_struct* ret;
1164+ /* first we need the write lock for rt_ready_queue */
1165+ write_lock_irqsave(&rt->ready_lock, flags);
1166+ ret = __take_ready(rt);
1167+ write_unlock_irqrestore(&rt->ready_lock, flags);
1168+ return ret;
1169+}
1170+
1171+
1172+static inline void add_release(rt_domain_t* rt, struct task_struct *task)
1173+{
1174+ unsigned long flags;
1175+ /* first we need the write lock for rt_ready_queue */
1176+ spin_lock_irqsave(&rt->release_lock, flags);
1177+ __add_release(rt, task);
1178+ spin_unlock_irqrestore(&rt->release_lock, flags);
1179+}
1180+
1181+static inline int __jobs_pending(rt_domain_t* rt)
1182+{
1183+ return !list_empty(&rt->ready_queue);
1184+}
1185+
1186+static inline int jobs_pending(rt_domain_t* rt)
1187+{
1188+ unsigned long flags;
1189+ int ret;
1190+ /* first we need the write lock for rt_ready_queue */
1191+ read_lock_irqsave(&rt->ready_lock, flags);
1192+ ret = __jobs_pending(rt);
1193+ read_unlock_irqrestore(&rt->ready_lock, flags);
1194+ return ret;
1195+}
1196+
1197+
1198+#endif
1199diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
1200new file mode 100644
1201index 0000000..37a4495
1202--- /dev/null
1203+++ b/include/litmus/rt_param.h
1204@@ -0,0 +1,177 @@
1205+/*
1206+ * Definition of the scheduler plugin interface.
1207+ *
1208+ */
1209+#ifndef _LINUX_RT_PARAM_H_
1210+#define _LINUX_RT_PARAM_H_
1211+
1212+/* Litmus time type. */
1213+typedef unsigned long long lt_t;
1214+
1215+static inline int lt_after(lt_t a, lt_t b)
1216+{
1217+ return ((long long) b) - ((long long) a) < 0;
1218+}
1219+#define lt_before(a, b) lt_after(b, a)
1220+
1221+static inline int lt_after_eq(lt_t a, lt_t b)
1222+{
1223+ return ((long long) a) - ((long long) b) >= 0;
1224+}
1225+#define lt_before_eq(a, b) lt_after_eq(b, a)
1226+
1227+/* different types of clients */
1228+typedef enum {
1229+ RT_CLASS_HARD,
1230+ RT_CLASS_SOFT,
1231+ RT_CLASS_BEST_EFFORT
1232+} task_class_t;
1233+
1234+struct rt_task {
1235+ lt_t exec_cost;
1236+ lt_t period;
1237+ lt_t phase;
1238+ lt_t prio;
1239+ unsigned int cpu;
1240+ task_class_t cls;
1241+};
1242+
1243+#define DPCP_WAIT 0x1
1244+#define DPCP_COMPLETE 0x2
1245+
1246+/* don't export internal data structures to user space (liblitmus) */
1247+#ifdef __KERNEL__
1248+
1249+#include <linux/list.h>
1250+
1251+struct rt_job {
1252+ /* Time instant the the job was or will be released. */
1253+ lt_t release;
1254+ /* What is the current deadline? */
1255+ lt_t deadline;
1256+ /* How much service has this job received so far?
1257+ */
1258+ lt_t exec_time;
1259+
1260+ /* Which job is this. This is used to let user space
1261+ * specify which job to wait for, which is important if jobs
1262+ * overrun. If we just call sys_sleep_next_period() then we
1263+ * will unintentionally miss jobs after an overrun.
1264+ *
1265+ * Increase this sequence number when a job is released.
1266+ */
1267+ unsigned int job_no;
1268+
1269+ /* when did this job start executing? */
1270+ lt_t exec_start;
1271+};
1272+
1273+
1274+/* make priority inheritance cleaner for PCP */
1275+struct pcp_priority {
1276+ lt_t prio;
1277+ int in_global_cs;
1278+ int pid;
1279+};
1280+
1281+struct pcp_semaphore;
1282+
1283+/* RT task parameters for scheduling extensions
1284+ * These parameters are inherited during clone and therefore must
1285+ * be explicitly set up before the task set is launched.
1286+ */
1287+struct rt_param {
1288+ /* is the task sleeping? */
1289+ unsigned int flags:8;
1290+
1291+ /* Real-time marker: 1 iff it is a LITMUS real-time task.
1292+ */
1293+ unsigned int is_realtime:1;
1294+
1295+ /* is a BE->RT or RT->BE transition pending? */
1296+ unsigned int transition_pending:1;
1297+
1298+ /* is this task under control of litmus?
1299+ *
1300+ * this is necessary because otherwise signal delivery code
1301+ * may try to wake up a task that is already queued in plugin
1302+ * data structures.
1303+ *
1304+ * bbb: I believe this flag is fundamentally flawed and should be
1305+ * taken out in the redesign.
1306+ */
1307+ unsigned int litmus_controlled:1;
1308+
1309+ /* do we need to check for srp blocking? */
1310+ unsigned int srp_non_recurse:1;
1311+
1312+ /* if a BE->RT transition failed, then this field contains the error */
1313+ unsigned long transition_error;
1314+
1315+ /* user controlled parameters */
1316+ struct rt_task task_params;
1317+
1318+ /* timing parameters */
1319+ struct rt_job job_params;
1320+
1321+
1322+ /* task representing the current "inherited" task
1323+ * priority, assigned by inherit_priority and
1324+ * return priority in the scheduler plugins.
1325+ * could point to self if PI does not result in
1326+ * an increased task priority.
1327+ */
1328+ struct task_struct* inh_task;
1329+
1330+ /* Don't just dereference this pointer in kernel space!
1331+ * It might very well point to junk or nothing at all.
1332+ * NULL indicates that the task has not requested any non-preemptable
1333+ * section support.
1334+ * Not inherited upon fork.
1335+ */
1336+ short* np_flag;
1337+
1338+ /* For the FMLP under PSN-EDF, it is required to make the task
1339+ * non-preemptive from kernel space. In order not to interfere with
1340+ * user space, this counter indicates the kernel space np setting.
1341+ * kernel_np > 0 => task is non-preemptive
1342+ */
1343+ unsigned int kernel_np;
1344+
1345+ /* This field can be used by plugins to store where the task
1346+ * is currently scheduled. It is the responsibility of the
1347+ * plugin to avoid race conditions.
1348+ *
1349+ * Used by GSN-EDF.
1350+ */
1351+ int scheduled_on;
1352+
1353+ /* This field can be used by plugins to store where the task
1354+ * is currently linked. It is the responsibility of the plugin
1355+ * to avoid race conditions.
1356+ *
1357+ * Used by GSN-EDF.
1358+ */
1359+ int linked_on;
1360+
1361+ /* Used by RM
1362+ */
1363+ struct pcp_priority pcp_prio;
1364+ struct pcp_priority* cur_prio;
1365+ struct list_head owned_semaphores;
1366+ struct pcp_semaphore* blocked_on;
1367+
1368+ /* Fields saved before BE->RT transition.
1369+ */
1370+ int old_policy;
1371+ int old_prio;
1372+};
1373+
1374+/* Possible RT flags */
1375+#define RT_F_RUNNING 0x00000000
1376+#define RT_F_SLEEP 0x00000001
1377+#define RT_F_EXIT_SEM 0x00000008
1378+
1379+#endif
1380+
1381+#endif
1382diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
1383new file mode 100644
1384index 0000000..337668f
1385--- /dev/null
1386+++ b/include/litmus/sched_plugin.h
1387@@ -0,0 +1,120 @@
1388+/*
1389+ * Definition of the scheduler plugin interface.
1390+ *
1391+ */
1392+#ifndef _LINUX_SCHED_PLUGIN_H_
1393+#define _LINUX_SCHED_PLUGIN_H_
1394+
1395+#include <linux/sched.h>
1396+#include <litmus/litmus.h>
1397+
1398+/* struct for semaphore with priority inheritance */
1399+struct pi_semaphore {
1400+ atomic_t count;
1401+ int sleepers;
1402+ wait_queue_head_t wait;
1403+ union {
1404+ /* highest-prio holder/waiter */
1405+ struct task_struct *task;
1406+ struct task_struct* cpu_task[NR_CPUS];
1407+ } hp;
1408+ /* current lock holder */
1409+ struct task_struct *holder;
1410+};
1411+
1412+int set_hp_task(struct pi_semaphore *sem, prio_cmp_t cmp);
1413+int set_hp_cpu_task(struct pi_semaphore *sem, int cpu, prio_cmp_t cmp);
1414+
1415+/********************* scheduler invocation ******************/
1416+
1417+/* Plugin-specific realtime tick handler */
1418+typedef void (*scheduler_tick_t) (void);
1419+/* Novell make sched decision function */
1420+typedef int (*schedule_t) (struct task_struct * prev,
1421+ struct task_struct ** next);
1422+/* Clean up after the task switch has occured.
1423+ * This function is called after every (even non-rt) task switch.
1424+ */
1425+typedef void (*finish_switch_t)(struct task_struct *prev);
1426+
1427+
1428+/********************* task state changes ********************/
1429+
1430+/* called to setup a new real-time task */
1431+typedef long (*prepare_task_t) (struct task_struct *task);
1432+/* called to re-introduce a task after blocking */
1433+typedef void (*wake_up_task_t) (struct task_struct *task);
1434+/* called to notify the plugin of a blocking real-time task
1435+ * it will only be called for real-time tasks and before schedule is called */
1436+typedef void (*task_blocks_t) (struct task_struct *task);
1437+/* called when a real-time task exits. Free any allocated resources */
1438+typedef long (*tear_down_t) (struct task_struct *);
1439+
1440+/* Called when the new_owner is released from the wait queue
1441+ * it should now inherit the priority from sem, _before_ it gets readded
1442+ * to any queue
1443+ */
1444+typedef long (*inherit_priority_t) (struct pi_semaphore *sem,
1445+ struct task_struct *new_owner);
1446+
1447+/* Called when the current task releases a semahpore where it might have
1448+ * inherited a piority from
1449+ */
1450+typedef long (*return_priority_t) (struct pi_semaphore *sem);
1451+
1452+/* Called when a task tries to acquire a semaphore and fails. Check if its
1453+ * priority is higher than that of the current holder.
1454+ */
1455+typedef long (*pi_block_t) (struct pi_semaphore *sem, struct task_struct *t);
1456+
1457+
1458+/********************* sys call backends ********************/
1459+/* This function causes the caller to sleep until the next release */
1460+typedef long (*sleep_next_period_t) (void);
1461+
1462+struct sched_plugin {
1463+ struct list_head list;
1464+ /* basic info */
1465+ char *plugin_name;
1466+ unsigned int srp_active:1;
1467+ unsigned int pcp_active:1;
1468+
1469+ /* scheduler invocation */
1470+ scheduler_tick_t scheduler_tick;
1471+ schedule_t schedule;
1472+ finish_switch_t finish_switch;
1473+
1474+ /* syscall backend */
1475+ sleep_next_period_t sleep_next_period;
1476+
1477+ /* task state changes */
1478+ prepare_task_t prepare_task;
1479+ wake_up_task_t wake_up_task;
1480+ task_blocks_t task_blocks;
1481+ tear_down_t tear_down;
1482+
1483+ /* priority inheritance */
1484+ inherit_priority_t inherit_priority;
1485+ return_priority_t return_priority;
1486+ pi_block_t pi_block;
1487+} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
1488+
1489+
1490+extern struct sched_plugin *curr_sched_plugin;
1491+
1492+int register_sched_plugin(struct sched_plugin* plugin);
1493+struct sched_plugin* find_sched_plugin(const char* name);
1494+int print_sched_plugins(char* buf, int max);
1495+
1496+static inline int pcp_active(void)
1497+{
1498+ return curr_sched_plugin->pcp_active;
1499+}
1500+
1501+static inline int srp_active(void)
1502+{
1503+ return curr_sched_plugin->srp_active;
1504+}
1505+
1506+
1507+#endif
1508diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
1509new file mode 100644
1510index 0000000..f9938c2
1511--- /dev/null
1512+++ b/include/litmus/sched_trace.h
1513@@ -0,0 +1,31 @@
1514+/* sched_trace.h -- record scheduler events to a byte stream for offline analysis.
1515+ */
1516+#ifndef _LINUX_SCHED_TRACE_H_
1517+#define _LINUX_SCHED_TRACE_H_
1518+
1519+#include <linux/sched.h>
1520+
1521+/* dummies, need to be re-implemented */
1522+
1523+/* used in sched.c */
1524+#define sched_trace_task_arrival(t)
1525+#define sched_trace_task_departure(t)
1526+#define sched_trace_task_preemption(t, by)
1527+#define sched_trace_task_scheduled(t)
1528+
1529+/* used in scheduler plugins */
1530+#define sched_trace_job_release(t)
1531+#define sched_trace_job_completion(t)
1532+
1533+
1534+#ifdef CONFIG_SCHED_DEBUG_TRACE
1535+void sched_trace_log_message(const char* fmt, ...);
1536+
1537+#else
1538+
1539+#define sched_trace_log_message(fmt, ...)
1540+
1541+#endif
1542+
1543+
1544+#endif
1545diff --git a/include/litmus/trace.h b/include/litmus/trace.h
1546new file mode 100644
1547index 0000000..5c2c2c0
1548--- /dev/null
1549+++ b/include/litmus/trace.h
1550@@ -0,0 +1,106 @@
1551+
1552+#ifndef _SYS_TRACE_H_
1553+#define _SYS_TRACE_H_
1554+
1555+#include <litmus/feather_trace.h>
1556+#include <litmus/feather_buffer.h>
1557+
1558+
1559+/*********************** TIMESTAMPS ************************/
1560+
1561+struct timestamp {
1562+ unsigned long event;
1563+ unsigned long long timestamp;
1564+ unsigned int seq_no;
1565+ int cpu;
1566+};
1567+
1568+
1569+/* buffer holding time stamps - will be provided by driver */
1570+extern struct ft_buffer* trace_ts_buf;
1571+
1572+/* save_timestamp: stores current time as struct timestamp
1573+ * in trace_ts_buf
1574+ */
1575+asmlinkage void save_timestamp(unsigned long event);
1576+
1577+#define TIMESTAMP(id) ft_event0(id, save_timestamp)
1578+
1579+/* Convention for timestamps
1580+ * =========================
1581+ *
1582+ * In order to process the trace files with a common tool, we use the following
1583+ * convention to measure execution times: The end time id of a code segment is
1584+ * always the next number after the start time event id.
1585+ */
1586+
1587+#define TS_SCHED_START TIMESTAMP(100)
1588+#define TS_SCHED_END TIMESTAMP(101)
1589+#define TS_CXS_START TIMESTAMP(102)
1590+#define TS_CXS_END TIMESTAMP(103)
1591+
1592+#define TS_TICK_START TIMESTAMP(110)
1593+#define TS_TICK_END TIMESTAMP(111)
1594+
1595+#define TS_PLUGIN_SCHED_START TIMESTAMP(120)
1596+#define TS_PLUGIN_SCHED_END TIMESTAMP(121)
1597+
1598+#define TS_PLUGIN_TICK_START TIMESTAMP(130)
1599+#define TS_PLUGIN_TICK_END TIMESTAMP(131)
1600+
1601+#define TS_ENTER_NP_START TIMESTAMP(140)
1602+#define TS_ENTER_NP_END TIMESTAMP(141)
1603+
1604+#define TS_EXIT_NP_START TIMESTAMP(150)
1605+#define TS_EXIT_NP_END TIMESTAMP(151)
1606+
1607+#define TS_SRP_UP_START TIMESTAMP(160)
1608+#define TS_SRP_UP_END TIMESTAMP(161)
1609+#define TS_SRP_DOWN_START TIMESTAMP(162)
1610+#define TS_SRP_DOWN_END TIMESTAMP(163)
1611+
1612+#define TS_PI_UP_START TIMESTAMP(170)
1613+#define TS_PI_UP_END TIMESTAMP(171)
1614+#define TS_PI_DOWN_START TIMESTAMP(172)
1615+#define TS_PI_DOWN_END TIMESTAMP(173)
1616+
1617+#define TS_FIFO_UP_START TIMESTAMP(180)
1618+#define TS_FIFO_UP_END TIMESTAMP(181)
1619+#define TS_FIFO_DOWN_START TIMESTAMP(182)
1620+#define TS_FIFO_DOWN_END TIMESTAMP(183)
1621+
1622+#define PCP1 200
1623+#define PCP2 204
1624+
1625+#define DPCP 210
1626+#define MPCP 220
1627+#define FMLP 230
1628+#define SRPT 240
1629+
1630+#define TS_PCP_UP_START TIMESTAMP(PCP1)
1631+#define TS_PCP_UP_END TIMESTAMP(PCP1 + 1)
1632+#define TS_PCP1_DOWN_START TIMESTAMP(PCP1 + 2)
1633+#define TS_PCP1_DOWN_END TIMESTAMP(PCP1 + 3)
1634+#define TS_PCP2_DOWN_START TIMESTAMP(PCP2 + 2)
1635+#define TS_PCP2_DOWN_END TIMESTAMP(PCP2 + 3)
1636+
1637+
1638+#define TS_DPCP_INVOKE_START TIMESTAMP(DPCP)
1639+#define TS_DPCP_INVOKE_END TIMESTAMP(DPCP + 1)
1640+#define TS_DPCP_AGENT1_START TIMESTAMP(DPCP + 2)
1641+#define TS_DPCP_AGENT1_END TIMESTAMP(DPCP + 3)
1642+#define TS_DPCP_AGENT2_START TIMESTAMP(DPCP + 4)
1643+#define TS_DPCP_AGENT2_END TIMESTAMP(DPCP + 5)
1644+
1645+
1646+#define TS_MPCP_UP_START TIMESTAMP(MPCP)
1647+#define TS_MPCP_UP_END TIMESTAMP(MPCP + 1)
1648+#define TS_MPCP_DOWN_START TIMESTAMP(MPCP + 2)
1649+#define TS_MPCP_DOWN_END TIMESTAMP(MPCP + 3)
1650+
1651+
1652+#define TS_SRPT_START TIMESTAMP(SRPT)
1653+#define TS_SRPT_END TIMESTAMP(SRPT + 1)
1654+
1655+
1656+#endif /* !_SYS_TRACE_H_ */
1657diff --git a/kernel/exit.c b/kernel/exit.c
1658index fec12eb..8a0eb79 100644
1659--- a/kernel/exit.c
1660+++ b/kernel/exit.c
1661@@ -50,6 +50,8 @@
1662
1663 extern void sem_exit (void);
1664
1665+extern void exit_od_table(struct task_struct* t);
1666+
1667 static void exit_mm(struct task_struct * tsk);
1668
1669 static void __unhash_process(struct task_struct *p)
1670@@ -916,6 +918,8 @@ fastcall NORET_TYPE void do_exit(long code)
1671 if (unlikely(tsk->audit_context))
1672 audit_free(tsk);
1673
1674+ exit_od_table(tsk);
1675+
1676 taskstats_exit(tsk, group_dead);
1677
1678 exit_mm(tsk);
1679diff --git a/kernel/fork.c b/kernel/fork.c
1680index d57118d..6fa6e03 100644
1681--- a/kernel/fork.c
1682+++ b/kernel/fork.c
1683@@ -57,6 +57,9 @@
1684 #include <asm/cacheflush.h>
1685 #include <asm/tlbflush.h>
1686
1687+#include <litmus/litmus.h>
1688+#include <litmus/sched_plugin.h>
1689+
1690 /*
1691 * Protected counters by write_lock_irq(&tasklist_lock)
1692 */
1693@@ -118,6 +121,8 @@ void __put_task_struct(struct task_struct *tsk)
1694 WARN_ON(atomic_read(&tsk->usage));
1695 WARN_ON(tsk == current);
1696
1697+ exit_litmus(tsk);
1698+
1699 security_task_free(tsk);
1700 free_uid(tsk->user);
1701 put_group_info(tsk->group_info);
1702diff --git a/kernel/sched.c b/kernel/sched.c
1703index cca93cc..fb35f31 100644
1704--- a/kernel/sched.c
1705+++ b/kernel/sched.c
1706@@ -56,6 +56,12 @@
1707
1708 #include <asm/unistd.h>
1709
1710+#include <litmus/litmus.h>
1711+#include <litmus/sched_plugin.h>
1712+#include <litmus/sched_trace.h>
1713+#include <litmus/rt_param.h>
1714+#include <litmus/trace.h>
1715+
1716 /*
1717 * Convert user-nice values [ -20 ... 0 ... 19 ]
1718 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
1719@@ -836,7 +842,7 @@ static int effective_prio(struct task_struct *p)
1720 * keep the priority unchanged. Otherwise, update priority
1721 * to the normal priority:
1722 */
1723- if (!rt_prio(p->prio))
1724+ if (!rt_prio(p->prio) && !is_realtime(p))
1725 return p->normal_prio;
1726 return p->prio;
1727 }
1728@@ -844,7 +850,7 @@ static int effective_prio(struct task_struct *p)
1729 /*
1730 * __activate_task - move a task to the runqueue.
1731 */
1732-static void __activate_task(struct task_struct *p, struct rq *rq)
1733+void __activate_task(struct task_struct *p, struct rq *rq)
1734 {
1735 struct prio_array *target = rq->active;
1736
1737@@ -999,7 +1005,7 @@ out:
1738 /*
1739 * deactivate_task - remove a task from the runqueue.
1740 */
1741-static void deactivate_task(struct task_struct *p, struct rq *rq)
1742+void deactivate_task(struct task_struct *p, struct rq *rq)
1743 {
1744 dec_nr_running(p, rq);
1745 dequeue_task(p, p->array);
1746@@ -1408,6 +1414,10 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
1747 #endif
1748
1749 rq = task_rq_lock(p, &flags);
1750+
1751+ if (is_realtime(p))
1752+ TRACE("try_to_wake_up(%s/%d)\n", p->comm, p->pid);
1753+
1754 old_state = p->state;
1755 if (!(old_state & state))
1756 goto out;
1757@@ -1415,6 +1425,12 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
1758 if (p->array)
1759 goto out_running;
1760
1761+ sched_trace_task_arrival(p);
1762+ if (is_realtime(p)) {
1763+ curr_sched_plugin->wake_up_task(p);
1764+ goto out_running;
1765+ }
1766+
1767 cpu = task_cpu(p);
1768 this_cpu = smp_processor_id();
1769
1770@@ -1576,6 +1592,8 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags)
1771 {
1772 int cpu = get_cpu();
1773
1774+ litmus_fork(p);
1775+
1776 #ifdef CONFIG_SMP
1777 cpu = sched_balance_self(cpu, SD_BALANCE_FORK);
1778 #endif
1779@@ -1730,6 +1748,9 @@ void fastcall sched_exit(struct task_struct *p)
1780 unsigned long flags;
1781 struct rq *rq;
1782
1783+ if (is_realtime(p))
1784+ return;
1785+
1786 /*
1787 * If the child was a (relative-) CPU hog then decrease
1788 * the sleep_avg of the parent as well.
1789@@ -1765,6 +1786,31 @@ static inline void prepare_task_switch(struct rq *rq, struct task_struct *next)
1790 prepare_arch_switch(next);
1791 }
1792
1793+static void litmus_transition(struct task_struct *tsk, struct rq *rq)
1794+{
1795+ int wakeup = 0;
1796+ WARN_ON(tsk->state != TASK_STOPPED);
1797+
1798+ tsk->rt_param.transition_pending = 0;
1799+ if (is_realtime(tsk)) {
1800+ /* RT -> BE transition */
1801+ tsk->rt_param.transition_error = transition_to_be(tsk);
1802+ wakeup = tsk->rt_param.transition_error == 0;
1803+ } else {
1804+ /* BE -> RT transition */
1805+ tsk->rt_param.transition_error = transition_to_rt(tsk);
1806+ /* If it was rejected as a real-time task, then
1807+ * keep it running as a best-effort task.
1808+ */
1809+ wakeup = tsk->rt_param.transition_error != 0;
1810+ }
1811+ if (wakeup) {
1812+ /* we still hold the runqueue lock */
1813+ tsk->state = TASK_RUNNING;
1814+ __activate_task(tsk, rq);
1815+ }
1816+}
1817+
1818 /**
1819 * finish_task_switch - clean up after a task-switch
1820 * @rq: runqueue associated with task-switch
1821@@ -1801,6 +1847,15 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
1822 */
1823 prev_state = prev->state;
1824 finish_arch_switch(prev);
1825+ /* Requeue previous real-time task before we drop the rq lock, cause
1826+ * that may lead to a preemption.
1827+ */
1828+ curr_sched_plugin->finish_switch(prev);
1829+ sched_trace_task_scheduled(current);
1830+ if (rt_transition_pending(prev))
1831+ litmus_transition(prev, rq);
1832+ /* trace before IRQs are enabled */
1833+ TS_CXS_END;
1834 finish_lock_switch(rq, prev);
1835 if (mm)
1836 mmdrop(mm);
1837@@ -2095,6 +2150,10 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
1838 struct sched_domain *sd, enum idle_type idle,
1839 int *all_pinned)
1840 {
1841+ /* Don't migrate LITMUS^RT tasks. */
1842+ if (is_realtime(p))
1843+ return 0;
1844+
1845 /*
1846 * We do not migrate tasks that are:
1847 * 1) running (obviously), or
1848@@ -3220,11 +3279,30 @@ void scheduler_tick(void)
1849
1850 update_cpu_clock(p, rq, now);
1851
1852+ /* real-time accounting is done by the plugin
1853+ * call linux functions only for background tasks
1854+ */
1855 if (p == rq->idle)
1856- /* Task on the idle queue */
1857- wake_priority_sleeper(rq);
1858- else
1859+ /* Task on the idle queue */
1860+ wake_priority_sleeper(rq);
1861+ else if (is_realtime(p)) {
1862+ /* time accounting for LITMUS^RT tasks */
1863+ p->rt_param.job_params.exec_time +=
1864+ now - p->rt_param.job_params.exec_start;
1865+ p->rt_param.job_params.exec_start = now;
1866+ } else
1867+ /* normal Linux tasks */
1868 task_running_tick(rq, p);
1869+
1870+ /* check whether the RT scheduler plugin requires a call to
1871+ * schedule
1872+ */
1873+ TS_PLUGIN_TICK_START;
1874+ curr_sched_plugin->scheduler_tick();
1875+ TS_PLUGIN_TICK_END;
1876+
1877+ send_scheduler_signals();
1878+
1879 #ifdef CONFIG_SMP
1880 update_load(rq);
1881 if (time_after_eq(jiffies, rq->next_balance))
1882@@ -3406,6 +3484,7 @@ static inline int interactive_sleep(enum sleep_type sleep_type)
1883 sleep_type == SLEEP_INTERRUPTED);
1884 }
1885
1886+
1887 /*
1888 * schedule() is the main scheduler function.
1889 */
1890@@ -3420,6 +3499,7 @@ asmlinkage void __sched schedule(void)
1891 long *switch_count;
1892 struct rq *rq;
1893
1894+
1895 /*
1896 * Test if we are atomic. Since do_exit() needs to call into
1897 * schedule() atomically, we ignore that path for now.
1898@@ -3427,8 +3507,9 @@ asmlinkage void __sched schedule(void)
1899 */
1900 if (unlikely(in_atomic() && !current->exit_state)) {
1901 printk(KERN_ERR "BUG: scheduling while atomic: "
1902- "%s/0x%08x/%d\n",
1903- current->comm, preempt_count(), current->pid);
1904+ "%s/0x%08x/%d %s\n",
1905+ current->comm, preempt_count(), current->pid,
1906+ is_realtime(current) ? "rt" : "non-rt");
1907 debug_show_held_locks(current);
1908 if (irqs_disabled())
1909 print_irqtrace_events(current);
1910@@ -3438,6 +3519,7 @@ asmlinkage void __sched schedule(void)
1911
1912 need_resched:
1913 preempt_disable();
1914+ TS_SCHED_START;
1915 prev = current;
1916 release_kernel_lock(prev);
1917 need_resched_nonpreemptible:
1918@@ -3470,6 +3552,7 @@ need_resched_nonpreemptible:
1919 spin_lock_irq(&rq->lock);
1920
1921 switch_count = &prev->nivcsw;
1922+ /* check for blocking tasks */
1923 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
1924 switch_count = &prev->nvcsw;
1925 if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
1926@@ -3478,11 +3561,60 @@ need_resched_nonpreemptible:
1927 else {
1928 if (prev->state == TASK_UNINTERRUPTIBLE)
1929 rq->nr_uninterruptible++;
1930+
1931+ if (is_realtime(prev)) {
1932+ TRACE_TASK(prev, "blocks, state = %d\n",
1933+ prev->state);
1934+ curr_sched_plugin->task_blocks(prev);
1935+ /* Enable this for all tasks to get _a lot_ of
1936+ * data. Can be helpful for debugging.
1937+ */
1938+ sched_trace_task_departure(prev);
1939+ }
1940+ /* only indirect switching is supported in the current
1941+ * version of LITMUS
1942+ */
1943 deactivate_task(prev, rq);
1944 }
1945 }
1946
1947+ next = NULL;
1948+
1949+ if (is_realtime(prev)) {
1950+ /* If we are invoked after scheduler_tick(), then
1951+ * prev is charged a tiny amount of overhead time.
1952+ * Since analysis has (or should have) accounted for
1953+ * overheads, this is ok.
1954+ */
1955+ prev->rt_param.job_params.exec_time +=
1956+ now - prev->rt_param.job_params.exec_start;
1957+ prev->rt_param.job_params.exec_start = now;
1958+ }
1959+
1960+ /* consult the real-time plugin */
1961+ TS_PLUGIN_SCHED_START;
1962+ curr_sched_plugin->schedule(prev, &next);
1963+ TS_PLUGIN_SCHED_END;
1964+
1965 cpu = smp_processor_id();
1966+
1967+ if (prev != next && is_realtime(prev) && is_running(prev))
1968+ deactivate_task(prev, rq);
1969+ if (next && prev != next) {
1970+ __activate_task(next, rq);
1971+ set_task_cpu(next, cpu);
1972+ }
1973+
1974+ /* If the real-time plugin wants to switch to a specific task
1975+ * it'll be on the rq and have the highest priority. There will
1976+ * be exaclty one such task, thus the selection of the next task
1977+ * is unambiguous and the following code can only get
1978+ * triggered if there are no RT tasks pending (on this CPU). Thus,
1979+ * we may as well skip it.
1980+ */
1981+ if (next)
1982+ goto switch_tasks;
1983+
1984 if (unlikely(!rq->nr_running)) {
1985 idle_balance(cpu, rq);
1986 if (!rq->nr_running) {
1987@@ -3546,12 +3678,17 @@ switch_tasks:
1988 prev->timestamp = prev->last_ran = now;
1989
1990 sched_info_switch(prev, next);
1991+ TS_SCHED_END;
1992 if (likely(prev != next)) {
1993+ TS_CXS_START;
1994+ if (is_running(prev))
1995+ sched_trace_task_preemption(prev, next);
1996 next->timestamp = now;
1997 rq->nr_switches++;
1998 rq->curr = next;
1999 ++*switch_count;
2000
2001+ next->rt_param.job_params.exec_start = now;
2002 prepare_task_switch(rq, next);
2003 prev = context_switch(rq, prev, next);
2004 barrier();
2005@@ -3561,8 +3698,11 @@ switch_tasks:
2006 * frame will be invalid.
2007 */
2008 finish_task_switch(this_rq(), prev);
2009- } else
2010+ } else {
2011 spin_unlock_irq(&rq->lock);
2012+ }
2013+
2014+ send_scheduler_signals();
2015
2016 prev = current;
2017 if (unlikely(reacquire_kernel_lock(prev) < 0))
2018@@ -3570,6 +3710,8 @@ switch_tasks:
2019 preempt_enable_no_resched();
2020 if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
2021 goto need_resched;
2022+ if (srp_active())
2023+ srp_ceiling_block();
2024 }
2025 EXPORT_SYMBOL(schedule);
2026
2027@@ -3691,6 +3833,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
2028 }
2029 }
2030
2031+
2032 /**
2033 * __wake_up - wake up threads blocked on a waitqueue.
2034 * @q: the waitqueue
2035@@ -3709,6 +3852,7 @@ void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode,
2036 }
2037 EXPORT_SYMBOL(__wake_up);
2038
2039+
2040 /*
2041 * Same as __wake_up but called with the spinlock in wait_queue_head_t held.
2042 */
2043@@ -3717,6 +3861,7 @@ void fastcall __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
2044 __wake_up_common(q, mode, 1, 0, NULL);
2045 }
2046
2047+
2048 /**
2049 * __wake_up_sync - wake up threads blocked on a waitqueue.
2050 * @q: the waitqueue
2051@@ -3772,6 +3917,18 @@ void fastcall complete_all(struct completion *x)
2052 }
2053 EXPORT_SYMBOL(complete_all);
2054
2055+void fastcall complete_n(struct completion *x, int n)
2056+{
2057+ unsigned long flags;
2058+
2059+ spin_lock_irqsave(&x->wait.lock, flags);
2060+ x->done += n;
2061+ __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
2062+ n, 0, NULL);
2063+ spin_unlock_irqrestore(&x->wait.lock, flags);
2064+}
2065+EXPORT_SYMBOL(complete_n);
2066+
2067 void fastcall __sched wait_for_completion(struct completion *x)
2068 {
2069 might_sleep();
2070@@ -4175,7 +4332,7 @@ static inline struct task_struct *find_process_by_pid(pid_t pid)
2071 }
2072
2073 /* Actually do priority change: must hold rq lock. */
2074-static void __setscheduler(struct task_struct *p, int policy, int prio)
2075+void __setscheduler(struct task_struct *p, int policy, int prio)
2076 {
2077 BUG_ON(p->array);
2078
2079diff --git a/lib/semaphore-sleepers.c b/lib/semaphore-sleepers.c
2080index 1281805..3f4d543 100644
2081--- a/lib/semaphore-sleepers.c
2082+++ b/lib/semaphore-sleepers.c
2083@@ -108,7 +108,7 @@ fastcall int __sched __down_interruptible(struct semaphore * sem)
2084 /*
2085 * With signals pending, this turns into
2086 * the trylock failure case - we won't be
2087- * sleeping, and we* can't get the lock as
2088+ * sleeping, and we can't get the lock as
2089 * it has contention. Just correct the count
2090 * and exit.
2091 */
2092diff --git a/litmus/Makefile b/litmus/Makefile
2093new file mode 100644
2094index 0000000..db2518d
2095--- /dev/null
2096+++ b/litmus/Makefile
2097@@ -0,0 +1,9 @@
2098+#
2099+# Makefile for LITMUS^RT
2100+#
2101+
2102+obj-y = sched_plugin.o litmus.o sched_trace.o \
2103+ edf_common.o rm_common.o\
2104+ sched_gsn_edf.o sched_psn_edf.o litmus_sem.o \
2105+ trace.o ft_event.o rt_domain.o fdso.o \
2106+ sched_rm.o sync.o jobs.o pcp.o
2107diff --git a/litmus/edf_common.c b/litmus/edf_common.c
2108new file mode 100644
2109index 0000000..2a52835
2110--- /dev/null
2111+++ b/litmus/edf_common.c
2112@@ -0,0 +1,95 @@
2113+/*
2114+ * kernel/edf_common.c
2115+ *
2116+ * Common functions for EDF based scheduler.
2117+ */
2118+
2119+#include <linux/percpu.h>
2120+#include <linux/sched.h>
2121+#include <linux/list.h>
2122+
2123+#include <litmus/litmus.h>
2124+#include <litmus/sched_plugin.h>
2125+#include <litmus/sched_trace.h>
2126+
2127+
2128+#include <litmus/edf_common.h>
2129+
2130+/* edf_higher_prio - returns true if first has a higher EDF priority
2131+ * than second. Deadline ties are broken by PID.
2132+ *
2133+ * first first must not be NULL and a real-time task.
2134+ * second may be NULL or a non-rt task.
2135+ */
2136+int edf_higher_prio(struct task_struct* first,
2137+ struct task_struct* second)
2138+{
2139+ struct task_struct *first_task = first;
2140+ struct task_struct *second_task = second;
2141+
2142+ /* Check for inherited priorities. Change task
2143+ * used for comparison in such a case.
2144+ */
2145+ if (first && first->rt_param.inh_task)
2146+ first_task = first->rt_param.inh_task;
2147+ if (second && second->rt_param.inh_task)
2148+ second_task = second->rt_param.inh_task;
2149+
2150+ return
2151+ /* does the second task exist and is it a real-time task? If
2152+ * not, the first task (which is a RT task) has higher
2153+ * priority.
2154+ */
2155+ !second_task || !is_realtime(second_task) ||
2156+
2157+ /* is the deadline of the first task earlier?
2158+ * Then it has higher priority.
2159+ */
2160+ earlier_deadline(first_task, second_task) ||
2161+
2162+ /* Do we have a deadline tie?
2163+ * Then break by PID.
2164+ */
2165+ (get_deadline(first_task) == get_deadline(second_task) &&
2166+ (first_task->pid < second_task->pid ||
2167+
2168+ /* If the PIDs are the same then the task with the inherited
2169+ * priority wins.
2170+ */
2171+ (first_task->pid == second_task->pid &&
2172+ !second->rt_param.inh_task)));
2173+}
2174+
2175+int edf_ready_order(struct list_head* a, struct list_head* b)
2176+{
2177+ return edf_higher_prio(
2178+ list_entry(a, struct task_struct, rt_list),
2179+ list_entry(b, struct task_struct, rt_list));
2180+}
2181+
2182+void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched)
2183+{
2184+ rt_domain_init(rt, resched, edf_ready_order);
2185+}
2186+
2187+/* need_to_preempt - check whether the task t needs to be preempted
2188+ * call only with irqs disabled and with ready_lock acquired
2189+ * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
2190+ */
2191+int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t)
2192+{
2193+ /* we need the read lock for edf_ready_queue */
2194+ /* no need to preempt if there is nothing pending */
2195+ if (!ready_jobs_pending(rt))
2196+ return 0;
2197+ /* we need to reschedule if t doesn't exist */
2198+ if (!t)
2199+ return 1;
2200+
2201+ /* NOTE: We cannot check for non-preemptibility since we
2202+ * don't know what address space we're currently in.
2203+ */
2204+
2205+ /* make sure to get non-rt stuff out of the way */
2206+ return !is_realtime(t) || edf_higher_prio(next_ready(rt), t);
2207+}
2208diff --git a/litmus/fdso.c b/litmus/fdso.c
2209new file mode 100644
2210index 0000000..ded9918
2211--- /dev/null
2212+++ b/litmus/fdso.c
2213@@ -0,0 +1,289 @@
2214+/* fdso.c - file descriptor attached shared objects
2215+ *
2216+ * (c) 2007 B. Brandenburg, LITMUS^RT project
2217+ *
2218+ * Notes:
2219+ * - objects descriptor (OD) tables are not cloned during a fork.
2220+ * - objects are created on-demand, and freed after the last reference
2221+ * is dropped.
2222+ * - for now, object types are hard coded.
2223+ * - As long as we have live objects, we keep a reference to the inode.
2224+ */
2225+
2226+#include <linux/errno.h>
2227+#include <linux/sched.h>
2228+#include <linux/mutex.h>
2229+#include <linux/file.h>
2230+#include <asm/uaccess.h>
2231+
2232+#include <litmus/fdso.h>
2233+
2234+extern struct fdso_ops pi_sem_ops;
2235+extern struct fdso_ops srp_sem_ops;
2236+extern struct fdso_ops pcp_sem_ops;
2237+extern struct fdso_ops mpcp_sem_ops;
2238+
2239+static const struct fdso_ops* fdso_ops[] = {
2240+ &pi_sem_ops,
2241+ &srp_sem_ops,
2242+ &pcp_sem_ops,
2243+ &mpcp_sem_ops,
2244+};
2245+
2246+static void* fdso_create(obj_type_t type)
2247+{
2248+ return fdso_ops[type]->create();
2249+}
2250+
2251+static void fdso_destroy(obj_type_t type, void* obj)
2252+{
2253+ fdso_ops[type]->destroy(obj);
2254+}
2255+
2256+static int fdso_open(struct od_table_entry* entry, void* __user config)
2257+{
2258+ if (fdso_ops[entry->obj->type]->open)
2259+ return fdso_ops[entry->obj->type]->open(entry, config);
2260+ else
2261+ return 0;
2262+}
2263+
2264+static int fdso_close(struct od_table_entry* entry)
2265+{
2266+ if (fdso_ops[entry->obj->type]->close)
2267+ return fdso_ops[entry->obj->type]->close(entry);
2268+ else
2269+ return 0;
2270+}
2271+
2272+/* inode must be locked already */
2273+static struct inode_obj_id* alloc_inode_obj(struct inode* inode,
2274+ obj_type_t type,
2275+ unsigned int id)
2276+{
2277+ struct inode_obj_id* obj;
2278+ void* raw_obj;
2279+
2280+ raw_obj = fdso_create(type);
2281+ if (!raw_obj)
2282+ return NULL;
2283+
2284+ obj = kmalloc(sizeof(struct inode_obj_id), GFP_KERNEL);
2285+ if (!obj)
2286+ return NULL;
2287+ INIT_LIST_HEAD(&obj->list);
2288+ atomic_set(&obj->count, 1);
2289+ obj->type = type;
2290+ obj->id = id;
2291+ obj->obj = raw_obj;
2292+ obj->inode = inode;
2293+
2294+ list_add(&obj->list, &inode->i_obj_list);
2295+ atomic_inc(&inode->i_count);
2296+/*
2297+ printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n",
2298+ inode, type, id);
2299+*/
2300+ return obj;
2301+}
2302+
2303+/* inode must be locked already */
2304+static struct inode_obj_id* get_inode_obj(struct inode* inode,
2305+ obj_type_t type,
2306+ unsigned int id)
2307+{
2308+ struct list_head* pos;
2309+ struct inode_obj_id* obj = NULL;
2310+
2311+ list_for_each(pos, &inode->i_obj_list) {
2312+ obj = list_entry(pos, struct inode_obj_id, list);
2313+ if (obj->id == id && obj->type == type) {
2314+ atomic_inc(&obj->count);
2315+ return obj;
2316+ }
2317+ }
2318+/*
2319+ printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n",
2320+ inode, type, id);
2321+*/
2322+ return NULL;
2323+}
2324+
2325+
2326+static void put_inode_obj(struct inode_obj_id* obj)
2327+{
2328+ struct inode* inode;
2329+ int let_go = 0;
2330+
2331+ inode = obj->inode;
2332+ if (atomic_dec_and_test(&obj->count)) {
2333+
2334+ mutex_lock(&inode->i_obj_mutex);
2335+ /* no new references can be obtained */
2336+ if (!atomic_read(&obj->count)) {
2337+ list_del(&obj->list);
2338+ fdso_destroy(obj->type, obj->obj);
2339+ kfree(obj);
2340+ let_go = 1;
2341+ }
2342+ mutex_unlock(&inode->i_obj_mutex);
2343+ if (let_go)
2344+ iput(inode);
2345+ }
2346+}
2347+
2348+static struct od_table_entry* get_od_entry(struct task_struct* t)
2349+{
2350+ struct od_table_entry* table;
2351+ int i;
2352+
2353+
2354+ table = t->od_table;
2355+ if (!table) {
2356+ table = (struct od_table_entry*)
2357+ kzalloc(sizeof(struct od_table_entry) *
2358+ MAX_OBJECT_DESCRIPTORS, GFP_KERNEL);
2359+ t->od_table = table;
2360+ }
2361+
2362+ for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++)
2363+ if (!table[i].used) {
2364+ table[i].used = 1;
2365+ return table + i;
2366+ }
2367+ return NULL;
2368+}
2369+
2370+static int put_od_entry(struct od_table_entry* od)
2371+{
2372+ put_inode_obj(od->obj);
2373+ od->used = 0;
2374+ return 0;
2375+}
2376+
2377+void exit_od_table(struct task_struct* t)
2378+{
2379+ int i;
2380+
2381+ if (t->od_table) {
2382+ for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++)
2383+ if (t->od_table[i].used)
2384+ put_od_entry(t->od_table + i);
2385+ kfree(t->od_table);
2386+ t->od_table = NULL;
2387+ }
2388+}
2389+
2390+static int do_sys_od_open(struct file* file, obj_type_t type, int id,
2391+ void* __user config)
2392+{
2393+ int idx = 0, err;
2394+ struct inode* inode;
2395+ struct inode_obj_id* obj = NULL;
2396+ struct od_table_entry* entry;
2397+
2398+ inode = file->f_dentry->d_inode;
2399+
2400+ entry = get_od_entry(current);
2401+ if (!entry)
2402+ return -ENOMEM;
2403+
2404+ mutex_lock(&inode->i_obj_mutex);
2405+ obj = get_inode_obj(inode, type, id);
2406+ if (!obj)
2407+ obj = alloc_inode_obj(inode, type, id);
2408+ if (!obj) {
2409+ idx = -ENOMEM;
2410+ entry->used = 0;
2411+ } else {
2412+ entry->obj = obj;
2413+ entry->extra = NULL;
2414+ idx = entry - current->od_table;
2415+ }
2416+
2417+ mutex_unlock(&inode->i_obj_mutex);
2418+
2419+ /* FIXME: What if the allocation failed? */
2420+ err = fdso_open(entry, config);
2421+ if (err < 0) {
2422+ /* The class rejected the open call.
2423+ * We need to clean up and tell user space.
2424+ */
2425+ put_od_entry(entry);
2426+ idx = err;
2427+ }
2428+
2429+ return idx;
2430+}
2431+
2432+
2433+struct od_table_entry* __od_lookup(int od)
2434+{
2435+ struct task_struct *t = current;
2436+
2437+ if (!t->od_table)
2438+ return NULL;
2439+ if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
2440+ return NULL;
2441+ if (!t->od_table[od].used)
2442+ return NULL;
2443+ return t->od_table + od;
2444+}
2445+
2446+
2447+asmlinkage int sys_od_open(int fd, int type, int obj_id, void* __user config)
2448+{
2449+ int ret = 0;
2450+ struct file* file;
2451+
2452+ /*
2453+ 1) get file from fd, get inode from file
2454+ 2) lock inode
2455+ 3) try to lookup object
2456+ 4) if not present create and enqueue object, inc inode refcnt
2457+ 5) increment refcnt of object
2458+ 6) alloc od_table_entry, setup ptrs
2459+ 7) unlock inode
2460+ 8) return offset in od_table as OD
2461+ */
2462+
2463+ if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) {
2464+ ret = -EINVAL;
2465+ goto out;
2466+ }
2467+
2468+ file = fget(fd);
2469+ if (!file) {
2470+ ret = -EBADF;
2471+ goto out;
2472+ }
2473+
2474+ ret = do_sys_od_open(file, type, obj_id, config);
2475+
2476+ fput(file);
2477+
2478+out:
2479+ return ret;
2480+}
2481+
2482+
2483+asmlinkage int sys_od_close(int od)
2484+{
2485+ int ret = -EINVAL;
2486+ struct task_struct *t = current;
2487+
2488+ if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
2489+ return ret;
2490+
2491+ if (!t->od_table || !t->od_table[od].used)
2492+ return ret;
2493+
2494+
2495+ /* give the class a chance to reject the close
2496+ */
2497+ ret = fdso_close(t->od_table + od);
2498+ if (ret == 0)
2499+ ret = put_od_entry(t->od_table + od);
2500+
2501+ return ret;
2502+}
2503diff --git a/litmus/ft_event.c b/litmus/ft_event.c
2504new file mode 100644
2505index 0000000..db9f4ea
2506--- /dev/null
2507+++ b/litmus/ft_event.c
2508@@ -0,0 +1,104 @@
2509+#include <linux/types.h>
2510+
2511+#include <litmus/feather_trace.h>
2512+
2513+/* the feather trace management functions assume
2514+ * exclusive access to the event table
2515+ */
2516+
2517+
2518+#define BYTE_JUMP 0xeb
2519+#define BYTE_JUMP_LEN 0x02
2520+
2521+/* for each event, there is an entry in the event table */
2522+struct trace_event {
2523+ long id;
2524+ long count;
2525+ long start_addr;
2526+ long end_addr;
2527+};
2528+
2529+extern struct trace_event __start___event_table[];
2530+extern struct trace_event __stop___event_table[];
2531+
2532+int ft_enable_event(unsigned long id)
2533+{
2534+ struct trace_event* te = __start___event_table;
2535+ int count = 0;
2536+ char* delta;
2537+ unsigned char* instr;
2538+
2539+ while (te < __stop___event_table) {
2540+ if (te->id == id && ++te->count == 1) {
2541+ instr = (unsigned char*) te->start_addr;
2542+ /* make sure we don't clobber something wrong */
2543+ if (*instr == BYTE_JUMP) {
2544+ delta = (((unsigned char*) te->start_addr) + 1);
2545+ *delta = 0;
2546+ }
2547+ }
2548+ if (te->id == id)
2549+ count++;
2550+ te++;
2551+ }
2552+ return count;
2553+}
2554+
2555+int ft_disable_event(unsigned long id)
2556+{
2557+ struct trace_event* te = __start___event_table;
2558+ int count = 0;
2559+ char* delta;
2560+ unsigned char* instr;
2561+
2562+ while (te < __stop___event_table) {
2563+ if (te->id == id && --te->count == 0) {
2564+ instr = (unsigned char*) te->start_addr;
2565+ if (*instr == BYTE_JUMP) {
2566+ delta = (((unsigned char*) te->start_addr) + 1);
2567+ *delta = te->end_addr - te->start_addr -
2568+ BYTE_JUMP_LEN;
2569+ }
2570+ }
2571+ if (te->id == id)
2572+ count++;
2573+ te++;
2574+ }
2575+ return count;
2576+}
2577+
2578+int ft_disable_all_events(void)
2579+{
2580+ struct trace_event* te = __start___event_table;
2581+ int count = 0;
2582+ char* delta;
2583+ unsigned char* instr;
2584+
2585+ while (te < __stop___event_table) {
2586+ if (te->count) {
2587+ instr = (unsigned char*) te->start_addr;
2588+ if (*instr == BYTE_JUMP) {
2589+ delta = (((unsigned char*) te->start_addr)
2590+ + 1);
2591+ *delta = te->end_addr - te->start_addr -
2592+ BYTE_JUMP_LEN;
2593+ te->count = 0;
2594+ count++;
2595+ }
2596+ }
2597+ te++;
2598+ }
2599+ return count;
2600+}
2601+
2602+int ft_is_event_enabled(unsigned long id)
2603+{
2604+ struct trace_event* te = __start___event_table;
2605+
2606+ while (te < __stop___event_table) {
2607+ if (te->id == id)
2608+ return te->count;
2609+ te++;
2610+ }
2611+ return 0;
2612+}
2613diff --git a/litmus/jobs.c b/litmus/jobs.c
2614new file mode 100644
2615index 0000000..e294bc5
2616--- /dev/null
2617+++ b/litmus/jobs.c
2618@@ -0,0 +1,43 @@
2619+/* litmus/jobs.c - common job control code
2620+ */
2621+
2622+#include <linux/sched.h>
2623+
2624+#include <litmus/litmus.h>
2625+#include <litmus/jobs.h>
2626+
2627+void prepare_for_next_period(struct task_struct *t)
2628+{
2629+ BUG_ON(!t);
2630+ /* prepare next release */
2631+ t->rt_param.job_params.release = t->rt_param.job_params.deadline;
2632+ t->rt_param.job_params.deadline += get_rt_period(t);
2633+ t->rt_param.job_params.exec_time = 0;
2634+ /* update job sequence number */
2635+ t->rt_param.job_params.job_no++;
2636+
2637+ /* don't confuse Linux */
2638+ t->time_slice = 1;
2639+}
2640+
2641+void release_at(struct task_struct *t, lt_t start)
2642+{
2643+ t->rt_param.job_params.deadline = start;
2644+ prepare_for_next_period(t);
2645+ set_rt_flags(t, RT_F_RUNNING);
2646+}
2647+
2648+
2649+/*
2650+ * Deactivate current task until the beginning of the next period.
2651+ */
2652+long complete_job(void)
2653+{
2654+ /* Mark that we do not excute anymore */
2655+ set_rt_flags(current, RT_F_SLEEP);
2656+ /* call schedule, this will return when a new job arrives
2657+ * it also takes care of preparing for the next release
2658+ */
2659+ schedule();
2660+ return 0;
2661+}
2662diff --git a/litmus/litmus.c b/litmus/litmus.c
2663new file mode 100644
2664index 0000000..77aad7d
2665--- /dev/null
2666+++ b/litmus/litmus.c
2667@@ -0,0 +1,830 @@
2668+/* litmus.c -- Implementation of the LITMUS syscalls, the LITMUS intialization code,
2669+ * and the procfs interface..
2670+ */
2671+#include <asm/uaccess.h>
2672+#include <linux/uaccess.h>
2673+#include <linux/sysrq.h>
2674+
2675+#include <linux/module.h>
2676+#include <linux/proc_fs.h>
2677+
2678+
2679+#include <litmus/litmus.h>
2680+#include <linux/sched.h>
2681+#include <litmus/sched_plugin.h>
2682+
2683+#include <litmus/trace.h>
2684+
2685+/* Number of RT tasks that exist in the system */
2686+atomic_t rt_task_count = ATOMIC_INIT(0);
2687+static DEFINE_SPINLOCK(task_transition_lock);
2688+
2689+/* To send signals from the scheduler
2690+ * Must drop locks first.
2691+ */
2692+static LIST_HEAD(sched_sig_list);
2693+static DEFINE_SPINLOCK(sched_sig_list_lock);
2694+
2695+/*
2696+ * sys_set_task_rt_param
2697+ * @pid: Pid of the task which scheduling parameters must be changed
2698+ * @param: New real-time extension parameters such as the execution cost and
2699+ * period
2700+ * Syscall for manipulating with task rt extension params
2701+ * Returns EFAULT if param is NULL.
2702+ * ESRCH if pid is not corrsponding
2703+ * to a valid task.
2704+ * EINVAL if either period or execution cost is <=0
2705+ * EPERM if pid is a real-time task
2706+ * 0 if success
2707+ *
2708+ * Only non-real-time tasks may be configured with this system call
2709+ * to avoid races with the scheduler. In practice, this means that a
2710+ * task's parameters must be set _before_ calling sys_prepare_rt_task()
2711+ */
2712+asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
2713+{
2714+ struct rt_task tp;
2715+ struct task_struct *target;
2716+ int retval = -EINVAL;
2717+
2718+ printk("Setting up rt task parameters for process %d.\n", pid);
2719+
2720+ if (pid < 0 || param == 0) {
2721+ goto out;
2722+ }
2723+ if (copy_from_user(&tp, param, sizeof(tp))) {
2724+ retval = -EFAULT;
2725+ goto out;
2726+ }
2727+
2728+ /* Task search and manipulation must be protected */
2729+ read_lock_irq(&tasklist_lock);
2730+ if (!(target = find_task_by_pid(pid))) {
2731+ retval = -ESRCH;
2732+ goto out_unlock;
2733+ }
2734+
2735+ if (is_realtime(target)) {
2736+ /* The task is already a real-time task.
2737+ * We cannot not allow parameter changes at this point.
2738+ */
2739+ retval = -EBUSY;
2740+ goto out_unlock;
2741+ }
2742+
2743+ if (tp.exec_cost <= 0)
2744+ goto out_unlock;
2745+ if (tp.period <= 0)
2746+ goto out_unlock;
2747+ if (!cpu_online(tp.cpu))
2748+ goto out_unlock;
2749+ if (tp.period < tp.exec_cost)
2750+ {
2751+ printk(KERN_INFO "litmus: real-time task %d rejected "
2752+ "because wcet > period\n", pid);
2753+ goto out_unlock;
2754+ }
2755+
2756+ target->rt_param.task_params = tp;
2757+
2758+ retval = 0;
2759+ out_unlock:
2760+ read_unlock_irq(&tasklist_lock);
2761+ out:
2762+ return retval;
2763+}
2764+
2765+/* Getter of task's RT params
2766+ * returns EINVAL if param or pid is NULL
2767+ * returns ESRCH if pid does not correspond to a valid task
2768+ * returns EFAULT if copying of parameters has failed.
2769+ */
2770+asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param)
2771+{
2772+ int retval = -EINVAL;
2773+ struct task_struct *source;
2774+ struct rt_task lp;
2775+ if (param == 0 || pid < 0)
2776+ goto out;
2777+ read_lock(&tasklist_lock);
2778+ if (!(source = find_task_by_pid(pid))) {
2779+ retval = -ESRCH;
2780+ goto out_unlock;
2781+ }
2782+ lp = source->rt_param.task_params;
2783+ read_unlock(&tasklist_lock);
2784+ /* Do copying outside the lock */
2785+ retval =
2786+ copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
2787+ return retval;
2788+ out_unlock:
2789+ read_unlock(&tasklist_lock);
2790+ out:
2791+ return retval;
2792+
2793+}
2794+
2795+/* sys_task_mode_transition
2796+ * @target_mode: The desired execution mode after the system call completes.
2797+ * Either BACKGROUND_TASK or LITMUS_RT_TASK.
2798+ * Allow a normal task to become a real-time task, vice versa.
2799+ * Returns EINVAL if illegal transition requested.
2800+ * 0 if task mode was changed succesfully
2801+ * other if plugin failed.
2802+ */
2803+asmlinkage long sys_task_mode_transition(int target_mode)
2804+{
2805+ int retval = -EINVAL;
2806+ struct task_struct *t = current;
2807+
2808+ if (( is_realtime(t) && target_mode == BACKGROUND_TASK) ||
2809+ (!is_realtime(t) && target_mode == LITMUS_RT_TASK)) {
2810+ TRACE_TASK(t, "attempts mode transition to %s\n",
2811+ is_realtime(t) ? "best-effort" : "real-time");
2812+ preempt_disable();
2813+ t->rt_param.transition_pending = 1;
2814+ t->state = TASK_STOPPED;
2815+ preempt_enable_no_resched();
2816+
2817+ schedule();
2818+
2819+ retval = t->rt_param.transition_error;
2820+ }
2821+ return retval;
2822+}
2823+
2824+/* implemented in kernel/litmus_sem.c */
2825+void srp_ceiling_block(void);
2826+
2827+/*
2828+ * This is the crucial function for periodic task implementation,
2829+ * It checks if a task is periodic, checks if such kind of sleep
2830+ * is permitted and calls plugin-specific sleep, which puts the
2831+ * task into a wait array.
2832+ * returns 0 on successful wakeup
2833+ * returns EPERM if current conditions do not permit such sleep
2834+ * returns EINVAL if current task is not able to go to sleep
2835+ */
2836+asmlinkage long sys_sleep_next_period(void)
2837+{
2838+ int retval = -EPERM;
2839+ if (!is_realtime(current)) {
2840+ retval = -EINVAL;
2841+ goto out;
2842+ }
2843+ /* Task with negative or zero period cannot sleep */
2844+ if (get_rt_period(current) <= 0) {
2845+ retval = -EINVAL;
2846+ goto out;
2847+ }
2848+ /* The plugin has to put the task into an
2849+ * appropriate queue and call schedule
2850+ */
2851+ retval = curr_sched_plugin->sleep_next_period();
2852+ out:
2853+ return retval;
2854+}
2855+
2856+/* This is an "improved" version of sys_sleep_next_period() that
2857+ * addresses the problem of unintentionally missing a job after
2858+ * an overrun.
2859+ *
2860+ * returns 0 on successful wakeup
2861+ * returns EPERM if current conditions do not permit such sleep
2862+ * returns EINVAL if current task is not able to go to sleep
2863+ */
2864+asmlinkage long sys_wait_for_job_release(unsigned int job)
2865+{
2866+ int retval = -EPERM;
2867+ if (!is_realtime(current)) {
2868+ retval = -EINVAL;
2869+ goto out;
2870+ }
2871+
2872+ /* Task with negative or zero period cannot sleep */
2873+ if (get_rt_period(current) <= 0) {
2874+ retval = -EINVAL;
2875+ goto out;
2876+ }
2877+
2878+ retval = 0;
2879+
2880+ /* first wait until we have "reached" the desired job
2881+ *
2882+ * This implementation has at least two problems:
2883+ *
2884+ * 1) It doesn't gracefully handle the wrap around of
2885+ * job_no. Since LITMUS is a prototype, this is not much
2886+ * of a problem right now.
2887+ *
2888+ * 2) It is theoretically racy if a job release occurs
2889+ * between checking job_no and calling sleep_next_period().
2890+ * A proper solution would requiring adding another callback
2891+ * in the plugin structure and testing the condition with
2892+ * interrupts disabled.
2893+ *
2894+ * FIXME: At least problem 2 should be taken care of eventually.
2895+ */
2896+ while (!retval && job > current->rt_param.job_params.job_no)
2897+ /* If the last job overran then job <= job_no and we
2898+ * don't send the task to sleep.
2899+ */
2900+ retval = curr_sched_plugin->sleep_next_period();
2901+ out:
2902+ return retval;
2903+}
2904+
2905+/* This is a helper syscall to query the current job sequence number.
2906+ *
2907+ * returns 0 on successful query
2908+ * returns EPERM if task is not a real-time task.
2909+ * returns EFAULT if &job is not a valid pointer.
2910+ */
2911+asmlinkage long sys_query_job_no(unsigned int __user *job)
2912+{
2913+ int retval = -EPERM;
2914+ if (is_realtime(current))
2915+ retval = put_user(current->rt_param.job_params.job_no, job);
2916+
2917+ return retval;
2918+}
2919+
2920+struct sched_sig {
2921+ struct list_head list;
2922+ struct task_struct* task;
2923+ unsigned int signal:31;
2924+ int force:1;
2925+};
2926+
2927+static void __scheduler_signal(struct task_struct *t, unsigned int signo,
2928+ int force)
2929+{
2930+ struct sched_sig* sig;
2931+
2932+ sig = kmalloc(GFP_ATOMIC, sizeof(struct sched_sig));
2933+ if (!sig) {
2934+ TRACE_TASK(t, "dropping signal: %u\n", t);
2935+ return;
2936+ }
2937+
2938+ spin_lock(&sched_sig_list_lock);
2939+
2940+ sig->signal = signo;
2941+ sig->force = force;
2942+ sig->task = t;
2943+ get_task_struct(t);
2944+ list_add(&sig->list, &sched_sig_list);
2945+
2946+ spin_unlock(&sched_sig_list_lock);
2947+}
2948+
2949+void scheduler_signal(struct task_struct *t, unsigned int signo)
2950+{
2951+ __scheduler_signal(t, signo, 0);
2952+}
2953+
2954+void force_scheduler_signal(struct task_struct *t, unsigned int signo)
2955+{
2956+ __scheduler_signal(t, signo, 1);
2957+}
2958+
2959+/* FIXME: get rid of the locking and do this on a per-processor basis */
2960+void send_scheduler_signals(void)
2961+{
2962+ unsigned long flags;
2963+ struct list_head *p, *extra;
2964+ struct siginfo info;
2965+ struct sched_sig* sig;
2966+ struct task_struct* t;
2967+ struct list_head claimed;
2968+
2969+ if (spin_trylock_irqsave(&sched_sig_list_lock, flags)) {
2970+ if (list_empty(&sched_sig_list))
2971+ p = NULL;
2972+ else {
2973+ p = sched_sig_list.next;
2974+ list_del(&sched_sig_list);
2975+ INIT_LIST_HEAD(&sched_sig_list);
2976+ }
2977+ spin_unlock_irqrestore(&sched_sig_list_lock, flags);
2978+
2979+ /* abort if there are no signals */
2980+ if (!p)
2981+ return;
2982+
2983+ /* take signal list we just obtained */
2984+ list_add(&claimed, p);
2985+
2986+ list_for_each_safe(p, extra, &claimed) {
2987+ list_del(p);
2988+ sig = list_entry(p, struct sched_sig, list);
2989+ t = sig->task;
2990+ info.si_signo = sig->signal;
2991+ info.si_errno = 0;
2992+ info.si_code = SI_KERNEL;
2993+ info.si_pid = 1;
2994+ info.si_uid = 0;
2995+ TRACE("sending signal %d to %d\n", info.si_signo,
2996+ t->pid);
2997+ if (sig->force)
2998+ force_sig_info(sig->signal, &info, t);
2999+ else
3000+ send_sig_info(sig->signal, &info, t);
3001+ put_task_struct(t);
3002+ kfree(sig);
3003+ }
3004+ }
3005+
3006+}
3007+
3008+static inline void np_mem_error(struct task_struct* t, const char* reason)
3009+{
3010+ if (t->state != TASK_DEAD && !(t->flags & PF_EXITING)) {
3011+ TRACE("np section: %s => %s/%d killed\n",
3012+ reason, t->comm, t->pid);
3013+ force_scheduler_signal(t, SIGKILL);
3014+ }
3015+}
3016+
3017+/* sys_register_np_flag() allows real-time tasks to register an
3018+ * np section indicator.
3019+ * returns 0 if the flag was successfully registered
3020+ * returns EINVAL if current task is not a real-time task
3021+ * returns EFAULT if *flag couldn't be written
3022+ */
3023+asmlinkage long sys_register_np_flag(short __user *flag)
3024+{
3025+ int retval = -EINVAL;
3026+ short test_val = RT_PREEMPTIVE;
3027+
3028+ /* avoid races with the scheduler */
3029+ preempt_disable();
3030+ TRACE("reg_np_flag(%p) for %s/%d\n", flag,
3031+ current->comm, current->pid);
3032+
3033+ /* Let's first try to write to the address.
3034+ * That way it is initialized and any bugs
3035+ * involving dangling pointers will caught
3036+ * early.
3037+ * NULL indicates disabling np section support
3038+ * and should not be tested.
3039+ */
3040+ if (flag)
3041+ retval = poke_kernel_address(test_val, flag);
3042+ else
3043+ retval = 0;
3044+ TRACE("reg_np_flag: retval=%d\n", retval);
3045+ if (unlikely(0 != retval))
3046+ np_mem_error(current, "np flag: not writable");
3047+ else
3048+ /* the pointer is ok */
3049+ current->rt_param.np_flag = flag;
3050+
3051+ preempt_enable();
3052+ return retval;
3053+}
3054+
3055+
3056+void request_exit_np(struct task_struct *t)
3057+{
3058+ int ret;
3059+ short flag;
3060+
3061+ /* We can only do this if t is actually currently scheduled on this CPU
3062+ * because otherwise we are in the wrong address space. Thus make sure
3063+ * to check.
3064+ */
3065+ BUG_ON(t != current);
3066+
3067+ if (unlikely(!is_realtime(t) || !t->rt_param.np_flag)) {
3068+ TRACE_TASK(t, "request_exit_np(): BAD TASK!\n");
3069+ return;
3070+ }
3071+
3072+ flag = RT_EXIT_NP_REQUESTED;
3073+ ret = poke_kernel_address(flag, t->rt_param.np_flag + 1);
3074+ TRACE("request_exit_np(%s/%d)\n", t->comm, t->pid);
3075+ if (unlikely(0 != ret))
3076+ np_mem_error(current, "request_exit_np(): flag not writable");
3077+
3078+}
3079+
3080+
3081+int is_np(struct task_struct* t)
3082+{
3083+ int ret;
3084+ unsigned short flag = 0x5858; /* = XX, looks nicer in debug*/
3085+
3086+ BUG_ON(t != current);
3087+
3088+ if (unlikely(t->rt_param.kernel_np))
3089+ return 1;
3090+ else if (unlikely(t->rt_param.np_flag == NULL) ||
3091+ t->flags & PF_EXITING ||
3092+ t->state == TASK_DEAD)
3093+ return 0;
3094+ else {
3095+ /* This is the tricky part. The process has registered a
3096+ * non-preemptive section marker. We now need to check whether
3097+ * it is set to to NON_PREEMPTIVE. Along the way we could
3098+ * discover that the pointer points to an unmapped region (=>
3099+ * kill the task) or that the location contains some garbage
3100+ * value (=> also kill the task). Killing the task in any case
3101+ * forces userspace to play nicely. Any bugs will be discovered
3102+ * immediately.
3103+ */
3104+ ret = probe_kernel_address(t->rt_param.np_flag, flag);
3105+ if (0 == ret && (flag == RT_NON_PREEMPTIVE ||
3106+ flag == RT_PREEMPTIVE))
3107+ return flag != RT_PREEMPTIVE;
3108+ else {
3109+ /* either we could not read from the address or
3110+ * it contained garbage => kill the process
3111+ * FIXME: Should we cause a SEGFAULT instead?
3112+ */
3113+ TRACE("is_np: ret=%d flag=%c%c (%x)\n", ret,
3114+ flag & 0xff, (flag >> 8) & 0xff, flag);
3115+ np_mem_error(t, "is_np() could not read");
3116+ return 0;
3117+ }
3118+ }
3119+}
3120+
3121+/*
3122+ * sys_exit_np() allows real-time tasks to signal that it left a
3123+ * non-preemptable section. It will be called after the kernel requested a
3124+ * callback in the preemption indicator flag.
3125+ * returns 0 if the signal was valid and processed.
3126+ * returns EINVAL if current task is not a real-time task
3127+ */
3128+asmlinkage long sys_exit_np(void)
3129+{
3130+ int retval = -EINVAL;
3131+
3132+ TS_EXIT_NP_START;
3133+
3134+ if (!is_realtime(current))
3135+ goto out;
3136+
3137+ TRACE("sys_exit_np(%s/%d)\n", current->comm, current->pid);
3138+ /* force rescheduling so that we can be preempted */
3139+ set_tsk_need_resched(current);
3140+ retval = 0;
3141+ out:
3142+
3143+ TS_EXIT_NP_END;
3144+ return retval;
3145+}
3146+
3147+void __setscheduler(struct task_struct *, int, int);
3148+
3149+/* p is a real-time task. Re-init its state as a best-effort task. */
3150+static void reinit_litmus_state(struct task_struct* p, int restore)
3151+{
3152+ struct rt_task user_config = {};
3153+ __user short *np_flag = NULL;
3154+
3155+ if (restore) {
3156+ /* Safe user-space provided configuration data.
3157+ * FIXME: This is missing service levels for adaptive tasks.
3158+ */
3159+ user_config = p->rt_param.task_params;
3160+ np_flag = p->rt_param.np_flag;
3161+ }
3162+
3163+ /* We probably should not be inheriting any task's priority
3164+ * at this point in time.
3165+ */
3166+ WARN_ON(p->rt_param.inh_task);
3167+
3168+ /* We need to restore the priority of the task. */
3169+ __setscheduler(p, p->rt_param.old_policy, p->rt_param.old_prio);
3170+
3171+ /* Cleanup everything else. */
3172+ memset(&p->rt_param, 0, sizeof(struct rt_task));
3173+
3174+ /* Restore preserved fields. */
3175+ if (restore) {
3176+ p->rt_param.task_params = user_config;
3177+ p->rt_param.np_flag = np_flag;
3178+ }
3179+}
3180+
3181+long transition_to_rt(struct task_struct* tsk)
3182+{
3183+ long retval;
3184+ long flags;
3185+
3186+ BUG_ON(is_realtime(tsk));
3187+
3188+ if (get_rt_period(tsk) == 0 ||
3189+ get_exec_cost(tsk) > get_rt_period(tsk)) {
3190+ TRACE_TASK(tsk, "litmus prepare: invalid task parameters "
3191+ "(%lu, %lu)\n",
3192+ get_exec_cost(tsk), get_rt_period(tsk));
3193+ return -EINVAL;
3194+ }
3195+
3196+ if (!cpu_online(get_partition(tsk)))
3197+ {
3198+ TRACE_TASK(tsk, "litmus prepare: cpu %d is not online\n",
3199+ get_partition(tsk));
3200+ return -EINVAL;
3201+ }
3202+
3203+ tsk->rt_param.old_prio = tsk->rt_priority;
3204+ tsk->rt_param.old_policy = tsk->policy;
3205+ INIT_LIST_HEAD(&tsk->rt_list);
3206+
3207+ /* avoid scheduler plugin changing underneath us */
3208+ spin_lock_irqsave(&task_transition_lock, flags);
3209+ retval = curr_sched_plugin->prepare_task(tsk);
3210+
3211+ if (!retval) {
3212+ atomic_inc(&rt_task_count);
3213+ __setscheduler(tsk, SCHED_FIFO, MAX_RT_PRIO - 1);
3214+ tsk->rt_param.is_realtime = 1;
3215+ tsk->rt_param.litmus_controlled = 1;
3216+ }
3217+ spin_unlock_irqrestore(&task_transition_lock, flags);
3218+
3219+ return retval;
3220+}
3221+
3222+long transition_to_be(struct task_struct* tsk)
3223+{
3224+ BUG_ON(!is_realtime(tsk));
3225+
3226+ curr_sched_plugin->tear_down(tsk);
3227+ atomic_dec(&rt_task_count);
3228+ reinit_litmus_state(tsk, 1);
3229+ return 0;
3230+}
3231+
3232+
3233+/* Switching a plugin in use is tricky.
3234+ * We must watch out that no real-time tasks exists
3235+ * (and that none is created in parallel) and that the plugin is not
3236+ * currently in use on any processor (in theory).
3237+ *
3238+ * For now, we don't enforce the second part since it is unlikely to cause
3239+ * any trouble by itself as long as we don't unload modules.
3240+ */
3241+int switch_sched_plugin(struct sched_plugin* plugin)
3242+{
3243+ long flags;
3244+ int ret = 0;
3245+
3246+ BUG_ON(!plugin);
3247+
3248+ /* stop task transitions */
3249+ spin_lock_irqsave(&task_transition_lock, flags);
3250+
3251+ /* don't switch if there are active real-time tasks */
3252+ if (atomic_read(&rt_task_count) == 0) {
3253+ printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
3254+ curr_sched_plugin = plugin;
3255+ } else
3256+ ret = -EBUSY;
3257+
3258+ spin_unlock_irqrestore(&task_transition_lock, flags);
3259+ return ret;
3260+}
3261+
3262+/* Called upon fork.
3263+ * p is the newly forked task.
3264+ */
3265+void litmus_fork(struct task_struct* p)
3266+{
3267+ if (is_realtime(p))
3268+ /* clean out any litmus related state, don't preserve anything*/
3269+ reinit_litmus_state(p, 0);
3270+}
3271+
3272+/* Called upon execve().
3273+ * current is doing the exec.
3274+ * Don't let address space specific stuff leak.
3275+ */
3276+void litmus_exec(void)
3277+{
3278+ struct task_struct* p = current;
3279+
3280+ if (is_realtime(p)) {
3281+ WARN_ON(p->rt_param.inh_task);
3282+ p->rt_param.np_flag = NULL;
3283+ }
3284+}
3285+
3286+void exit_litmus(struct task_struct *dead_tsk)
3287+{
3288+ if (is_realtime(dead_tsk))
3289+ transition_to_be(dead_tsk);
3290+}
3291+
3292+
3293+void list_qsort(struct list_head* list, list_cmp_t less_than)
3294+{
3295+ struct list_head lt;
3296+ struct list_head geq;
3297+ struct list_head *pos, *extra, *pivot;
3298+ int n_lt = 0, n_geq = 0;
3299+ BUG_ON(!list);
3300+
3301+ if (list->next == list)
3302+ return;
3303+
3304+ INIT_LIST_HEAD(&lt);
3305+ INIT_LIST_HEAD(&geq);
3306+
3307+ pivot = list->next;
3308+ list_del(pivot);
3309+ list_for_each_safe(pos, extra, list) {
3310+ list_del(pos);
3311+ if (less_than(pos, pivot)) {
3312+ list_add(pos, &lt);
3313+ n_lt++;
3314+ } else {
3315+ list_add(pos, &geq);
3316+ n_geq++;
3317+ }
3318+ }
3319+ if (n_lt < n_geq) {
3320+ list_qsort(&lt, less_than);
3321+ list_qsort(&geq, less_than);
3322+ } else {
3323+ list_qsort(&geq, less_than);
3324+ list_qsort(&lt, less_than);
3325+ }
3326+ list_splice(&geq, list);
3327+ list_add(pivot, list);
3328+ list_splice(&lt, list);
3329+}
3330+
3331+#ifdef CONFIG_MAGIC_SYSRQ
3332+int sys_kill(int pid, int sig);
3333+
3334+static void sysrq_handle_kill_rt_tasks(int key, struct tty_struct *tty)
3335+{
3336+ struct task_struct *t;
3337+ read_lock(&tasklist_lock);
3338+ for_each_process(t) {
3339+ if (is_realtime(t)) {
3340+ sys_kill(t->pid, SIGKILL);
3341+ }
3342+ }
3343+ read_unlock(&tasklist_lock);
3344+}
3345+
3346+static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
3347+ .handler = sysrq_handle_kill_rt_tasks,
3348+ .help_msg = "Quit-rt-tasks",
3349+ .action_msg = "sent SIGKILL to all real-time tasks",
3350+};
3351+#endif
3352+
3353+static int proc_read_stats(char *page, char **start,
3354+ off_t off, int count,
3355+ int *eof, void *data)
3356+{
3357+ int len;
3358+
3359+ len = snprintf(page, PAGE_SIZE,
3360+ "real-time task count = %d\n",
3361+ atomic_read(&rt_task_count));
3362+ return len;
3363+}
3364+
3365+static int proc_read_plugins(char *page, char **start,
3366+ off_t off, int count,
3367+ int *eof, void *data)
3368+{
3369+ int len;
3370+
3371+ len = print_sched_plugins(page, PAGE_SIZE);
3372+ return len;
3373+}
3374+
3375+static int proc_read_curr(char *page, char **start,
3376+ off_t off, int count,
3377+ int *eof, void *data)
3378+{
3379+ int len;
3380+
3381+ len = snprintf(page, PAGE_SIZE, "%s\n", curr_sched_plugin->plugin_name);
3382+ return len;
3383+}
3384+
3385+static int proc_write_curr(struct file *file,
3386+ const char *buffer,
3387+ unsigned long count,
3388+ void *data)
3389+{
3390+ int len, ret;
3391+ char name[65];
3392+ struct sched_plugin* found;
3393+
3394+ if(count > 64)
3395+ len = 64;
3396+ else
3397+ len = count;
3398+
3399+ if(copy_from_user(name, buffer, len))
3400+ return -EFAULT;
3401+
3402+ name[len] = '\0';
3403+ /* chomp name */
3404+ if (len > 1 && name[len - 1] == '\n')
3405+ name[len - 1] = '\0';
3406+
3407+ found = find_sched_plugin(name);
3408+
3409+ if (found) {
3410+ ret = switch_sched_plugin(found);
3411+ if (ret != 0)
3412+ printk(KERN_INFO "Could not switch plugin: %d\n", ret);
3413+ } else
3414+ printk(KERN_INFO "Plugin '%s' is unknown.\n", name);
3415+
3416+ return len;
3417+}
3418+
3419+
3420+static struct proc_dir_entry *litmus_dir = NULL,
3421+ *curr_file = NULL,
3422+ *stat_file = NULL,
3423+ *plugs_file = NULL;
3424+
3425+static int __init init_litmus_proc(void)
3426+{
3427+ litmus_dir = proc_mkdir("litmus", NULL);
3428+ if (!litmus_dir) {
3429+ printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n");
3430+ return -ENOMEM;
3431+ }
3432+ litmus_dir->owner = THIS_MODULE;
3433+
3434+ curr_file = create_proc_entry("active_plugin",
3435+ 0644, litmus_dir);
3436+ if (!curr_file) {
3437+ printk(KERN_ERR "Could not allocate active_plugin "
3438+ "procfs entry.\n");
3439+ return -ENOMEM;
3440+ }
3441+ curr_file->owner = THIS_MODULE;
3442+ curr_file->read_proc = proc_read_curr;
3443+ curr_file->write_proc = proc_write_curr;
3444+
3445+ stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
3446+ proc_read_stats, NULL);
3447+
3448+ plugs_file = create_proc_read_entry("plugins", 0444, litmus_dir,
3449+ proc_read_plugins, NULL);
3450+
3451+ return 0;
3452+}
3453+
3454+static void exit_litmus_proc(void)
3455+{
3456+ if (plugs_file)
3457+ remove_proc_entry("plugins", litmus_dir);
3458+ if (stat_file)
3459+ remove_proc_entry("stats", litmus_dir);
3460+ if (curr_file)
3461+ remove_proc_entry("active_plugin", litmus_dir);
3462+ if (litmus_dir)
3463+ remove_proc_entry("litmus", NULL);
3464+}
3465+
3466+extern struct sched_plugin linux_sched_plugin;
3467+
3468+static int __init _init_litmus(void)
3469+{
3470+ /* Common initializers,
3471+ * mode change lock is used to enforce single mode change
3472+ * operation.
3473+ */
3474+ printk("Starting LITMUS^RT kernel\n");
3475+
3476+ register_sched_plugin(&linux_sched_plugin);
3477+
3478+#ifdef CONFIG_MAGIC_SYSRQ
3479+ /* offer some debugging help */
3480+ if (!register_sysrq_key('q', &sysrq_kill_rt_tasks_op))
3481+ printk("Registered kill rt tasks magic sysrq.\n");
3482+ else
3483+ printk("Could not register kill rt tasks magic sysrq.\n");
3484+#endif
3485+
3486+ init_litmus_proc();
3487+
3488+ return 0;
3489+}
3490+
3491+static void _exit_litmus(void)
3492+{
3493+ exit_litmus_proc();
3494+}
3495+
3496+module_init(_init_litmus);
3497+module_exit(_exit_litmus);
3498diff --git a/litmus/litmus_sem.c b/litmus/litmus_sem.c
3499new file mode 100644
3500index 0000000..7179b43
3501--- /dev/null
3502+++ b/litmus/litmus_sem.c
3503@@ -0,0 +1,551 @@
3504+/*
3505+ * PI semaphores and SRP implementations.
3506+ * Much of the code here is borrowed from include/asm-i386/semaphore.h.
3507+ *
3508+ * NOTE: This implementation is very much a prototype and horribly insecure. It
3509+ * is intended to be a proof of concept, not a feature-complete solution.
3510+ */
3511+
3512+#include <asm/atomic.h>
3513+#include <asm/semaphore.h>
3514+#include <linux/sched.h>
3515+#include <linux/wait.h>
3516+#include <linux/spinlock.h>
3517+#include <litmus/litmus.h>
3518+#include <litmus/sched_plugin.h>
3519+#include <litmus/edf_common.h>
3520+
3521+#include <litmus/fdso.h>
3522+
3523+#include <litmus/trace.h>
3524+
3525+/* ************************************************************************** */
3526+/* PRIORITY INHERITANCE */
3527+/* ************************************************************************** */
3528+
3529+static void* create_pi_semaphore(void)
3530+{
3531+ struct pi_semaphore* sem;
3532+ int i;
3533+
3534+ sem = kmalloc(sizeof(struct pi_semaphore), GFP_KERNEL);
3535+ if (!sem)
3536+ return NULL;
3537+ atomic_set(&sem->count, 1);
3538+ sem->sleepers = 0;
3539+ init_waitqueue_head(&sem->wait);
3540+ sem->hp.task = NULL;
3541+ sem->holder = NULL;
3542+ for (i = 0; i < NR_CPUS; i++)
3543+ sem->hp.cpu_task[i] = NULL;
3544+ return sem;
3545+}
3546+
3547+static void destroy_pi_semaphore(void* sem)
3548+{
3549+ /* XXX assert invariants */
3550+ kfree(sem);
3551+}
3552+
3553+struct fdso_ops pi_sem_ops = {
3554+ .create = create_pi_semaphore,
3555+ .destroy = destroy_pi_semaphore
3556+};
3557+
3558+struct wq_pair {
3559+ struct task_struct* tsk;
3560+ struct pi_semaphore* sem;
3561+};
3562+
3563+static int rt_pi_wake_up(wait_queue_t *wait, unsigned mode, int sync,
3564+ void *key)
3565+{
3566+ struct wq_pair* wqp = (struct wq_pair*) wait->private;
3567+ set_rt_flags(wqp->tsk, RT_F_EXIT_SEM);
3568+ curr_sched_plugin->inherit_priority(wqp->sem, wqp->tsk);
3569+ TRACE_TASK(wqp->tsk,
3570+ "woken up by rt_pi_wake_up() (RT_F_SEM_EXIT, PI)\n");
3571+ /* point to task for default_wake_function() */
3572+ wait->private = wqp->tsk;
3573+ default_wake_function(wait, mode, sync, key);
3574+
3575+ /* Always return true since we know that if we encountered a task
3576+ * that was already running the wake_up raced with the schedule in
3577+ * rt_pi_down(). In that case the task in rt_pi_down() will be scheduled
3578+ * immediately and own the lock. We must not wake up another task in
3579+ * any case.
3580+ */
3581+ return 1;
3582+}
3583+
3584+/* caller is responsible for locking */
3585+int set_hp_task(struct pi_semaphore *sem, prio_cmp_t higher_prio)
3586+{
3587+ struct list_head *tmp, *next;
3588+ struct task_struct *queued;
3589+ int ret = 0;
3590+
3591+ sem->hp.task = NULL;
3592+ list_for_each_safe(tmp, next, &sem->wait.task_list) {
3593+ queued = ((struct wq_pair*)
3594+ list_entry(tmp, wait_queue_t,
3595+ task_list)->private)->tsk;
3596+
3597+ /* Compare task prios, find high prio task. */
3598+ if (higher_prio(queued, sem->hp.task)) {
3599+ sem->hp.task = queued;
3600+ ret = 1;
3601+ }
3602+ }
3603+ return ret;
3604+}
3605+
3606+/* caller is responsible for locking */
3607+int set_hp_cpu_task(struct pi_semaphore *sem, int cpu, prio_cmp_t higher_prio)
3608+{
3609+ struct list_head *tmp, *next;
3610+ struct task_struct *queued;
3611+ int ret = 0;
3612+
3613+ sem->hp.cpu_task[cpu] = NULL;
3614+ list_for_each_safe(tmp, next, &sem->wait.task_list) {
3615+ queued = ((struct wq_pair*)
3616+ list_entry(tmp, wait_queue_t,
3617+ task_list)->private)->tsk;
3618+
3619+ /* Compare task prios, find high prio task. */
3620+ if (get_partition(queued) == cpu &&
3621+ higher_prio(queued, sem->hp.cpu_task[cpu])) {
3622+ sem->hp.cpu_task[cpu] = queued;
3623+ ret = 1;
3624+ }
3625+ }
3626+ return ret;
3627+}
3628+
3629+int do_pi_down(struct pi_semaphore* sem)
3630+{
3631+ unsigned long flags;
3632+ struct task_struct *tsk = current;
3633+ struct wq_pair pair;
3634+ int suspended = 1;
3635+ wait_queue_t wait = {
3636+ .private = &pair,
3637+ .func = rt_pi_wake_up,
3638+ .task_list = {NULL, NULL}
3639+ };
3640+
3641+ pair.tsk = tsk;
3642+ pair.sem = sem;
3643+ spin_lock_irqsave(&sem->wait.lock, flags);
3644+
3645+ if (atomic_dec_return(&sem->count) < 0 ||
3646+ waitqueue_active(&sem->wait)) {
3647+ /* we need to suspend */
3648+ tsk->state = TASK_UNINTERRUPTIBLE;
3649+ add_wait_queue_exclusive_locked(&sem->wait, &wait);
3650+
3651+ TRACE_CUR("suspends on PI lock %p\n", sem);
3652+ curr_sched_plugin->pi_block(sem, tsk);
3653+
3654+ /* release lock before sleeping */
3655+ spin_unlock_irqrestore(&sem->wait.lock, flags);
3656+
3657+ TS_PI_DOWN_END;
3658+ preempt_enable_no_resched();
3659+
3660+
3661+ /* we depend on the FIFO order
3662+ * Thus, we don't need to recheck when we wake up, we
3663+ * are guaranteed to have the lock since there is only one
3664+ * wake up per release
3665+ */
3666+ schedule();
3667+
3668+ TRACE_CUR("woke up, now owns PI lock %p\n", sem);
3669+
3670+ /* try_to_wake_up() set our state to TASK_RUNNING,
3671+ * all we need to do is to remove our wait queue entry
3672+ */
3673+ remove_wait_queue(&sem->wait, &wait);
3674+ } else {
3675+ /* no priority inheritance necessary, since there are no queued
3676+ * tasks.
3677+ */
3678+ suspended = 0;
3679+ TRACE_CUR("acquired PI lock %p, no contention\n", sem);
3680+ sem->holder = tsk;
3681+ sem->hp.task = tsk;
3682+ curr_sched_plugin->inherit_priority(sem, tsk);
3683+ spin_unlock_irqrestore(&sem->wait.lock, flags);
3684+ }
3685+ return suspended;
3686+}
3687+
3688+void do_pi_up(struct pi_semaphore* sem)
3689+{
3690+ unsigned long flags;
3691+
3692+ spin_lock_irqsave(&sem->wait.lock, flags);
3693+
3694+ TRACE_CUR("releases PI lock %p\n", sem);
3695+ curr_sched_plugin->return_priority(sem);
3696+ sem->holder = NULL;
3697+ if (atomic_inc_return(&sem->count) < 1)
3698+ /* there is a task queued */
3699+ wake_up_locked(&sem->wait);
3700+
3701+ spin_unlock_irqrestore(&sem->wait.lock, flags);
3702+}
3703+
3704+asmlinkage long sys_pi_down(int sem_od)
3705+{
3706+ long ret = 0;
3707+ struct pi_semaphore * sem;
3708+ int suspended = 0;
3709+
3710+ preempt_disable();
3711+ TS_PI_DOWN_START;
3712+
3713+ sem = lookup_pi_sem(sem_od);
3714+ if (sem)
3715+ suspended = do_pi_down(sem);
3716+ else
3717+ ret = -EINVAL;
3718+
3719+ if (!suspended) {
3720+ TS_PI_DOWN_END;
3721+ preempt_enable();
3722+ }
3723+
3724+ return ret;
3725+}
3726+
3727+asmlinkage long sys_pi_up(int sem_od)
3728+{
3729+ long ret = 0;
3730+ struct pi_semaphore * sem;
3731+
3732+ preempt_disable();
3733+ TS_PI_UP_START;
3734+
3735+ sem = lookup_pi_sem(sem_od);
3736+ if (sem)
3737+ do_pi_up(sem);
3738+ else
3739+ ret = -EINVAL;
3740+
3741+
3742+ TS_PI_UP_END;
3743+ preempt_enable();
3744+
3745+ return ret;
3746+}
3747+
3748+
3749+/* ************************************************************************** */
3750+/* STACK RESOURCE POLICY */
3751+/* ************************************************************************** */
3752+
3753+
3754+struct srp_priority {
3755+ struct list_head list;
3756+ unsigned int period;
3757+ pid_t pid;
3758+};
3759+
3760+#define list2prio(l) list_entry(l, struct srp_priority, list)
3761+
3762+/* SRP task priority comparison function. Smaller periods have highest
3763+ * priority, tie-break is PID. Special case: period == 0 <=> no priority
3764+ */
3765+static int srp_higher_prio(struct srp_priority* first,
3766+ struct srp_priority* second)
3767+{
3768+ if (!first->period)
3769+ return 0;
3770+ else
3771+ return !second->period ||
3772+ first->period < second->period || (
3773+ first->period == second->period &&
3774+ first->pid < second->pid);
3775+}
3776+
3777+struct srp {
3778+ struct list_head ceiling;
3779+ wait_queue_head_t ceiling_blocked;
3780+};
3781+
3782+
3783+atomic_t srp_objects_in_use = ATOMIC_INIT(0);
3784+
3785+DEFINE_PER_CPU(struct srp, srp);
3786+
3787+
3788+/* Initialize SRP semaphores at boot time. */
3789+static int __init srp_init(void)
3790+{
3791+ int i;
3792+
3793+ printk("Initializing SRP per-CPU ceilings...");
3794+ for (i = 0; i < NR_CPUS; i++) {
3795+ init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked);
3796+ INIT_LIST_HEAD(&per_cpu(srp, i).ceiling);
3797+ }
3798+ printk(" done!\n");
3799+
3800+ return 0;
3801+}
3802+module_init(srp_init);
3803+
3804+
3805+#define system_ceiling(srp) list2prio(srp->ceiling.next)
3806+
3807+
3808+#define UNDEF_SEM -2
3809+
3810+
3811+/* struct for uniprocessor SRP "semaphore" */
3812+struct srp_semaphore {
3813+ struct srp_priority ceiling;
3814+ struct task_struct* owner;
3815+ int cpu; /* cpu associated with this "semaphore" and resource */
3816+};
3817+
3818+#define ceiling2sem(c) container_of(c, struct srp_semaphore, ceiling)
3819+
3820+static int srp_exceeds_ceiling(struct task_struct* first,
3821+ struct srp* srp)
3822+{
3823+ return list_empty(&srp->ceiling) ||
3824+ get_rt_period(first) < system_ceiling(srp)->period ||
3825+ (get_rt_period(first) == system_ceiling(srp)->period &&
3826+ first->pid < system_ceiling(srp)->pid) ||
3827+ ceiling2sem(system_ceiling(srp))->owner == first;
3828+}
3829+
3830+static void srp_add_prio(struct srp* srp, struct srp_priority* prio)
3831+{
3832+ struct list_head *pos;
3833+ if (in_list(&prio->list)) {
3834+ printk(KERN_CRIT "WARNING: SRP violation detected, prio is already in "
3835+ "ceiling list! cpu=%d, srp=%p\n", smp_processor_id(), ceiling2sem(prio));
3836+ return;
3837+ }
3838+ list_for_each(pos, &srp->ceiling)
3839+ if (unlikely(srp_higher_prio(prio, list2prio(pos)))) {
3840+ __list_add(&prio->list, pos->prev, pos);
3841+ return;
3842+ }
3843+
3844+ list_add_tail(&prio->list, &srp->ceiling);
3845+}
3846+
3847+
3848+static void* create_srp_semaphore(void)
3849+{
3850+ struct srp_semaphore* sem;
3851+
3852+ sem = kmalloc(sizeof(*sem), GFP_KERNEL);
3853+ if (!sem)
3854+ return NULL;
3855+
3856+ INIT_LIST_HEAD(&sem->ceiling.list);
3857+ sem->ceiling.period = 0;
3858+ sem->cpu = UNDEF_SEM;
3859+ sem->owner = NULL;
3860+ atomic_inc(&srp_objects_in_use);
3861+ return sem;
3862+}
3863+
3864+static noinline int open_srp_semaphore(struct od_table_entry* entry, void* __user arg)
3865+{
3866+ struct srp_semaphore* sem = (struct srp_semaphore*) entry->obj->obj;
3867+ int ret = 0;
3868+ struct task_struct* t = current;
3869+ struct srp_priority t_prio;
3870+
3871+ TRACE("opening SRP semaphore %p, cpu=%d\n", sem, sem->cpu);
3872+ if (!srp_active())
3873+ return -EBUSY;
3874+
3875+ if (sem->cpu == UNDEF_SEM)
3876+ sem->cpu = get_partition(t);
3877+ else if (sem->cpu != get_partition(t))
3878+ ret = -EPERM;
3879+
3880+ if (ret == 0) {
3881+ t_prio.period = get_rt_period(t);
3882+ t_prio.pid = t->pid;
3883+ if (srp_higher_prio(&t_prio, &sem->ceiling)) {
3884+ sem->ceiling.period = t_prio.period;
3885+ sem->ceiling.pid = t_prio.pid;
3886+ }
3887+ }
3888+
3889+ return ret;
3890+}
3891+
3892+static void destroy_srp_semaphore(void* sem)
3893+{
3894+ /* XXX invariants */
3895+ atomic_dec(&srp_objects_in_use);
3896+ kfree(sem);
3897+}
3898+
3899+struct fdso_ops srp_sem_ops = {
3900+ .create = create_srp_semaphore,
3901+ .open = open_srp_semaphore,
3902+ .destroy = destroy_srp_semaphore
3903+};
3904+
3905+
3906+void do_srp_down(struct srp_semaphore* sem)
3907+{
3908+ /* Update ceiling. */
3909+ srp_add_prio(&__get_cpu_var(srp), &sem->ceiling);
3910+ WARN_ON(sem->owner != NULL);
3911+ sem->owner = current;
3912+ TRACE_CUR("acquired srp 0x%p\n", sem);
3913+}
3914+
3915+void do_srp_up(struct srp_semaphore* sem)
3916+{
3917+ /* Determine new system priority ceiling for this CPU. */
3918+ WARN_ON(!in_list(&sem->ceiling.list));
3919+ if (in_list(&sem->ceiling.list))
3920+ list_del(&sem->ceiling.list);
3921+
3922+ sem->owner = NULL;
3923+
3924+ /* Wake tasks on this CPU, if they exceed current ceiling. */
3925+ TRACE_CUR("released srp 0x%p\n", sem);
3926+ wake_up_all(&__get_cpu_var(srp).ceiling_blocked);
3927+}
3928+
3929+/* Adjust the system-wide priority ceiling if resource is claimed. */
3930+asmlinkage long sys_srp_down(int sem_od)
3931+{
3932+ int cpu;
3933+ int ret = -EINVAL;
3934+ struct srp_semaphore* sem;
3935+
3936+ /* disabling preemptions is sufficient protection since
3937+ * SRP is strictly per CPU and we don't interfere with any
3938+ * interrupt handlers
3939+ */
3940+ preempt_disable();
3941+ TS_SRP_DOWN_START;
3942+
3943+ cpu = smp_processor_id();
3944+ sem = lookup_srp_sem(sem_od);
3945+ if (sem && sem->cpu == cpu) {
3946+ do_srp_down(sem);
3947+ ret = 0;
3948+ }
3949+
3950+ TS_SRP_DOWN_END;
3951+ preempt_enable();
3952+ return ret;
3953+}
3954+
3955+/* Adjust the system-wide priority ceiling if resource is freed. */
3956+asmlinkage long sys_srp_up(int sem_od)
3957+{
3958+ int cpu;
3959+ int ret = -EINVAL;
3960+ struct srp_semaphore* sem;
3961+
3962+ preempt_disable();
3963+ TS_SRP_UP_START;
3964+
3965+ cpu = smp_processor_id();
3966+ sem = lookup_srp_sem(sem_od);
3967+
3968+ if (sem && sem->cpu == cpu) {
3969+ do_srp_up(sem);
3970+ ret = 0;
3971+ }
3972+
3973+ TS_SRP_UP_END;
3974+ preempt_enable();
3975+ return ret;
3976+}
3977+
3978+asmlinkage long sys_reg_task_srp_sem(int sem_od)
3979+{
3980+ /* unused */
3981+ return 0;
3982+}
3983+
3984+static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync,
3985+ void *key)
3986+{
3987+ int cpu = smp_processor_id();
3988+ struct task_struct *tsk = wait->private;
3989+ if (cpu != get_partition(tsk))
3990+ TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b",
3991+ get_partition(tsk));
3992+ else if (srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
3993+ return default_wake_function(wait, mode, sync, key);
3994+ return 0;
3995+}
3996+
3997+
3998+
3999+static void do_ceiling_block(struct task_struct *tsk)
4000+{
4001+ wait_queue_t wait = {
4002+ .private = tsk,
4003+ .func = srp_wake_up,
4004+ .task_list = {NULL, NULL}
4005+ };
4006+
4007+ tsk->state = TASK_UNINTERRUPTIBLE;
4008+ add_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
4009+ tsk->rt_param.srp_non_recurse = 1;
4010+ preempt_enable_no_resched();
4011+ schedule();
4012+ preempt_disable();
4013+ tsk->rt_param.srp_non_recurse = 0;
4014+ remove_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
4015+}
4016+
4017+/* Wait for current task priority to exceed system-wide priority ceiling.
4018+ */
4019+void srp_ceiling_block(void)
4020+{
4021+ struct task_struct *tsk = current;
4022+
4023+ TS_SRPT_START;
4024+
4025+ /* Only applies to real-time tasks, but optimize for RT tasks. */
4026+ if (unlikely(!is_realtime(tsk)))
4027+ return;
4028+
4029+ /* Avoid recursive ceiling blocking. */
4030+ if (unlikely(tsk->rt_param.srp_non_recurse))
4031+ return;
4032+
4033+ /* Bail out early if there aren't any SRP resources around. */
4034+ if (likely(!atomic_read(&srp_objects_in_use)))
4035+ return;
4036+
4037+ preempt_disable();
4038+ if (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) {
4039+ TRACE_CUR("is priority ceiling blocked.\n");
4040+ TS_SRPT_END;
4041+ while (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
4042+ do_ceiling_block(tsk);
4043+ TRACE_CUR("finally exceeds system ceiling.\n");
4044+ } else {
4045+ TS_SRPT_END;
4046+ TRACE_CUR("is not priority ceiling blocked\n");
4047+ }
4048+ preempt_enable();
4049+}
4050+
4051+/* ************************************************************************** */
4052+
4053+
4054+
4055diff --git a/litmus/pcp.c b/litmus/pcp.c
4056new file mode 100644
4057index 0000000..06030d4
4058--- /dev/null
4059+++ b/litmus/pcp.c
4060@@ -0,0 +1,764 @@
4061+/* pcp.c -- Implementations of the PCP, D-PCP, and M-PCP.
4062+ *
4063+ */
4064+#include <asm/uaccess.h>
4065+#include <linux/wait.h>
4066+#include <linux/list.h>
4067+#include <linux/sched.h>
4068+#include <linux/spinlock.h>
4069+#include <linux/completion.h>
4070+
4071+#include <litmus/sched_plugin.h>
4072+#include <litmus/litmus.h>
4073+#include <litmus/rm_common.h>
4074+#include <litmus/fdso.h>
4075+#include <litmus/trace.h>
4076+
4077+/* from sched_rm.c */
4078+void rm_set_prio(struct task_struct *t, struct pcp_priority* new_prio);
4079+
4080+#define GLOBAL_SEM -1
4081+#define UNDEF_SEM -2
4082+
4083+#define get_prio(t) ((t)->rt_param.cur_prio)
4084+#define get_base_prio(t) (&((t)->rt_param.pcp_prio))
4085+
4086+
4087+struct dpcp_request {
4088+ struct list_head list;
4089+ struct completion done;
4090+ long arg;
4091+ lt_t prio;
4092+ int pid;
4093+};
4094+
4095+struct pcp_semaphore {
4096+ int cpu;
4097+
4098+ /* waiting tasks */
4099+ wait_queue_head_t blocked;
4100+ struct pcp_priority* blocked_prio;
4101+
4102+ /* system ceiling support */
4103+ struct list_head list;
4104+ struct pcp_priority ceiling;
4105+
4106+ /* task_struct owned_semaphore list */
4107+ struct list_head owned_list;
4108+
4109+ /* Current lock holder.
4110+ * NULL implies unlocked.
4111+ */
4112+ struct task_struct* holder;
4113+
4114+ /* D-PCP support */
4115+ spinlock_t dpcp_lock;
4116+ struct list_head dpcp_requests;
4117+ int dpcp_count;
4118+ struct dpcp_request* dpcp_current;
4119+ struct completion dpcp_job;
4120+ struct task_struct* dpcp_agent;
4121+};
4122+
4123+static DEFINE_PER_CPU(spinlock_t, pcp_lock);
4124+static DEFINE_PER_CPU(struct list_head, sys_ceiling);
4125+
4126+static noinline void init_pcp_sem(struct pcp_semaphore *sem, int cpu)
4127+{
4128+ sem->cpu = cpu;
4129+ init_waitqueue_head(&sem->blocked);
4130+ INIT_LIST_HEAD(&sem->list);
4131+ INIT_LIST_HEAD(&sem->owned_list);
4132+ INIT_LIST_HEAD(&sem->dpcp_requests);
4133+ sem->holder = NULL;
4134+ sem->dpcp_current = NULL;
4135+ sem->blocked_prio = NULL;
4136+ sem->ceiling = (struct pcp_priority) {ULLONG_MAX, 0, INT_MAX};
4137+ init_completion(&sem->dpcp_job);
4138+ spin_lock_init(&sem->dpcp_lock);
4139+ sem->dpcp_count = 0;
4140+ sem->dpcp_agent = NULL;
4141+}
4142+
4143+static noinline int tsk_pcp_higher_prio(struct task_struct* t,
4144+ struct pcp_priority* p2)
4145+{
4146+ return _rm_higher_prio(t->rt_param.cur_prio, p2);
4147+}
4148+
4149+static noinline struct pcp_semaphore* get_ceiling(int cpu)
4150+{
4151+ struct list_head *ceil_list = &per_cpu(sys_ceiling, cpu);
4152+ if (list_empty(ceil_list))
4153+ return NULL;
4154+ return list_entry(ceil_list->next, struct pcp_semaphore, list);
4155+}
4156+
4157+static noinline void raise_ceiling(struct pcp_semaphore* sem, int cpu)
4158+{
4159+ struct list_head *ceil_list = &per_cpu(sys_ceiling, cpu);
4160+ list_add(&sem->list, ceil_list);
4161+}
4162+
4163+static noinline int exceeds_ceiling(struct task_struct* t,
4164+ struct pcp_semaphore* ceil)
4165+{
4166+ return !ceil || ceil->holder == t ||
4167+ tsk_pcp_higher_prio(t, &ceil->ceiling);
4168+}
4169+
4170+static noinline void give_priority(struct task_struct* t, struct pcp_semaphore* sem)
4171+{
4172+ struct pcp_semaphore* next;
4173+ /* sem->blocked_prio can be NULL, but _rm_higher_prio() handles that */
4174+
4175+ /* only update if we actually exceed existing priorities */
4176+ if (_rm_higher_prio(get_prio(t), sem->blocked_prio) &&
4177+ _rm_higher_prio(get_prio(t), get_base_prio(sem->holder))) {
4178+ /* we need to register our priority */
4179+ sem->blocked_prio = get_prio(t);
4180+
4181+ /* only update task if it results in a priority increase */
4182+ if (_rm_higher_prio(get_prio(t), get_prio(sem->holder))) {
4183+ /* update prio */
4184+ TRACE("PCP: %s/%d inherits from %s/%d\n",
4185+ sem->holder->comm, sem->holder->pid,
4186+ t->comm, t->pid);
4187+ rm_set_prio(sem->holder, get_prio(t));
4188+ /* check if recipient is blocked, too */
4189+ next = sem->holder->rt_param.blocked_on;
4190+ if (next)
4191+ /* Transitive priority inheritance.
4192+ * Recurse.
4193+ */
4194+ give_priority(sem->holder, next);
4195+ }
4196+ }
4197+}
4198+
4199+static noinline long local_pcp_down(struct pcp_semaphore *sem)
4200+{
4201+ long ret = 0;
4202+ struct task_struct* t = current;
4203+ struct pcp_semaphore* ceiling;
4204+ int cpu;
4205+ int ceiling_passed = 0;
4206+
4207+ /* don't allow recursive locking */
4208+ if (sem->holder == t)
4209+ return -EINVAL;
4210+
4211+ cpu = smp_processor_id();
4212+ if (cpu != sem->cpu) {
4213+ preempt_enable();
4214+ return -EPERM;
4215+ }
4216+
4217+
4218+ /* first we need to pass the local system ceiling */
4219+ while (!ceiling_passed) {
4220+ ceiling = get_ceiling(cpu);
4221+ TRACE_TASK(t, "PCP: I want %p, ceiling is %p\n", sem, ceiling);
4222+ ceiling_passed = exceeds_ceiling(t, ceiling);
4223+ if (!ceiling_passed) {
4224+ /* block on sys_ceiling */
4225+ DECLARE_WAITQUEUE(waitq, t);
4226+ TRACE_TASK(t, "blocks on PCP system ceiling\n");
4227+ add_wait_queue(&ceiling->blocked, &waitq);
4228+ /* initiate priority inheritance */
4229+ give_priority(t, ceiling);
4230+ t->rt_param.blocked_on = ceiling;
4231+ t->state = TASK_UNINTERRUPTIBLE;
4232+ preempt_enable_no_resched();
4233+ TS_PCP1_DOWN_END;
4234+ schedule();
4235+ preempt_disable();
4236+ t->rt_param.blocked_on = NULL;
4237+ remove_wait_queue(&ceiling->blocked, &waitq);
4238+ } else {
4239+ if (ceiling)
4240+ TRACE_TASK(t,
4241+ "system ceiling passed: {%llu, %d, %d} < "
4242+ "{%llu, %d, %d}\n",
4243+ ceiling->ceiling.prio,
4244+ ceiling->ceiling.in_global_cs,
4245+ ceiling->ceiling.pid,
4246+ t->rt_param.cur_prio->prio,
4247+ t->rt_param.cur_prio->in_global_cs,
4248+ t->rt_param.cur_prio->pid
4249+ );
4250+ else
4251+ TRACE_TASK(t,
4252+ "system ceiling passed: NULL < "
4253+ "{%llu, %d, %d}\n",
4254+ t->rt_param.cur_prio->prio,
4255+ t->rt_param.cur_prio->in_global_cs,
4256+ t->rt_param.cur_prio->pid
4257+ );
4258+ TS_PCP1_DOWN_END;
4259+ }
4260+ }
4261+
4262+ TS_PCP2_DOWN_START;
4263+ /* Since we have passed the priority ceiling the semaphore cannot be
4264+ * in use. If it were in use then the ceiling would be at least as high
4265+ * as our priority.
4266+ */
4267+ WARN_ON(sem->holder);
4268+
4269+ TRACE_TASK(t, "taking PCP semaphore 0x%p, owner:%p\n", sem, sem->holder);
4270+
4271+ /* We can become the owner. */
4272+ sem->holder = t;
4273+ list_add(&sem->owned_list, &t->rt_param.owned_semaphores);
4274+
4275+ /* We need to update the system ceiling, but only
4276+ * if the new ceiling is higher than the old.
4277+ */
4278+ ceiling = get_ceiling(cpu);
4279+ /* if the priorities are equal then t already owns ceiling,
4280+ * otherwise it would not have gotten past the system ceiling
4281+ */
4282+ if (!ceiling || _rm_higher_prio(&sem->ceiling, &ceiling->ceiling)) {
4283+ raise_ceiling(sem, cpu);
4284+ TRACE_TASK(t, "raised ceiling on %d\n", cpu);
4285+ }
4286+
4287+ TS_PCP2_DOWN_END;
4288+ return ret;
4289+}
4290+
4291+static noinline struct pcp_priority* fetch_highest_prio(struct task_struct *t)
4292+{
4293+ struct pcp_priority *prio;
4294+ struct list_head* pos;
4295+ struct pcp_semaphore* sem;
4296+
4297+ /* base case is that the task uses its normal priority */
4298+ prio = get_base_prio(t);
4299+
4300+ /* now search the list of semaphores that we own for a higher priority
4301+ * to inherit
4302+ */
4303+ list_for_each(pos, &t->rt_param.owned_semaphores) {
4304+ sem = list_entry(pos, struct pcp_semaphore, owned_list);
4305+ /* sem->blocked_prio could be NULL */
4306+ if (!_rm_higher_prio(prio, sem->blocked_prio))
4307+ prio = sem->blocked_prio;
4308+ }
4309+ return prio;
4310+}
4311+
4312+static noinline long local_pcp_up(struct pcp_semaphore *sem)
4313+{
4314+ long ret = 0;
4315+ struct task_struct* t = current;
4316+ int cpu;
4317+
4318+ cpu = smp_processor_id();
4319+
4320+ if (cpu != sem->cpu)
4321+ return -EPERM;
4322+
4323+ if (sem->holder == t) {
4324+ TRACE_TASK(t, "giving up PCP semaphore 0x%p.\n", sem);
4325+
4326+ /* we need to unblock all tasks in the wait_queue */
4327+ wake_up_all(&sem->blocked);
4328+
4329+ /* unlock semaphore */
4330+ sem->holder = NULL;
4331+ list_del(&sem->owned_list);
4332+
4333+ /* remove from system ceiling list */
4334+ if (in_list(&sem->list))
4335+ list_del(&sem->list);
4336+
4337+ if (sem->blocked_prio == get_prio(t)) {
4338+ /* We are currently inheriting from this
4339+ * semaphore. We need to figure out which priority
4340+ * we should fall back to.
4341+ */
4342+ TRACE_TASK(t, "giving up inherited prio.\n");
4343+ rm_set_prio(t, fetch_highest_prio(t));
4344+ }
4345+ /* reset semaphore priority inheritance */
4346+ sem->blocked_prio = NULL;
4347+ } else {
4348+ TRACE_TASK(t, "local_pcp_up EINVAL 0x%p.\n", sem);
4349+ ret = -EINVAL;
4350+ }
4351+
4352+ TS_PCP_UP_END;
4353+ return ret;
4354+}
4355+
4356+static noinline struct task_struct* wqlist2task(struct list_head* l)
4357+{
4358+ return (struct task_struct*)
4359+ list_entry(l, wait_queue_t, task_list)->private;
4360+}
4361+
4362+static noinline int wait_order(struct list_head* la, struct list_head* lb)
4363+{
4364+ return rm_higher_prio(wqlist2task(la), wqlist2task(lb));
4365+}
4366+
4367+/* The default function is too picky.
4368+ * We really only want to wake up one task.
4369+ */
4370+int single_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
4371+{
4372+ int ret = default_wake_function(wait, mode, sync, key);
4373+ if (!ret)
4374+ TRACE("Overriding default_wake_function() return code.\n");
4375+ return 1;
4376+}
4377+
4378+static noinline long global_pcp_down(struct pcp_semaphore* sem)
4379+{
4380+ unsigned long flags;
4381+ long ret = 0;
4382+ struct task_struct* t = current;
4383+
4384+ /* don't allow recursive locking */
4385+ if (sem->holder == t)
4386+ return -EINVAL;
4387+
4388+ spin_lock_irqsave(&sem->blocked.lock, flags);
4389+
4390+ /* Get the global priority. Do this before
4391+ * we block, so that we wake up as a high-priority task.
4392+ */
4393+ t->rt_param.pcp_prio.in_global_cs = 1;
4394+ rm_set_prio(t, &t->rt_param.pcp_prio);
4395+
4396+ if (sem->holder) {
4397+ /* semaphore is not free. We need to block. */
4398+ DECLARE_WAITQUEUE(waitq, t);
4399+ TRACE_TASK(t, "blocks on MPCP semaphore %p.\n", sem);
4400+ waitq.flags = WQ_FLAG_EXCLUSIVE;
4401+ waitq.func = single_wake_function;
4402+ /* insert ordered by priority */
4403+ list_insert(&waitq.task_list, &sem->blocked.task_list,
4404+ wait_order);
4405+ t->state = TASK_UNINTERRUPTIBLE;
4406+ spin_unlock_irqrestore(&sem->blocked.lock, flags);
4407+ preempt_enable_no_resched();
4408+ TS_MPCP_DOWN_END;
4409+
4410+ schedule();
4411+
4412+ preempt_disable();
4413+ /* once we wake up we are the owner of the lock */
4414+ spin_lock_irqsave(&sem->blocked.lock, flags);
4415+ remove_wait_queue_locked(&sem->blocked, &waitq);
4416+ } else {
4417+ /* semaphore is free. We can proceed. */
4418+ TS_MPCP_DOWN_END;
4419+ sem->holder = t;
4420+ }
4421+ if (sem->holder != t) {
4422+ if (sem->holder)
4423+ TRACE("expected %s/%d, but I am %s/%d\n",
4424+ sem->holder->comm, sem->holder->pid, t->comm, t->pid);
4425+ else
4426+ TRACE("expected NULL, but I am %s/%d\n",
4427+ t->comm, t->pid);
4428+ }
4429+ TRACE_TASK(t, "acquired MPCP semaphore %p.\n", sem);
4430+
4431+
4432+ spin_unlock_irqrestore(&sem->blocked.lock, flags);
4433+ return ret;
4434+}
4435+
4436+static noinline long global_pcp_up(struct pcp_semaphore* sem)
4437+{
4438+ unsigned long flags;
4439+ long ret = 0;
4440+ struct task_struct* t = current;
4441+
4442+ if (sem->holder != t)
4443+ return -EINVAL;
4444+
4445+ TRACE_TASK(t, "releasing MPCP semaphore %p.\n", sem);
4446+
4447+ spin_lock_irqsave(&sem->blocked.lock, flags);
4448+ if (waitqueue_active(&sem->blocked)) {
4449+ /* pass ownership on */
4450+ sem->holder = wqlist2task(sem->blocked.task_list.next);
4451+ TRACE_TASK(t, "waking up next (=%s/%d) on MPCP semaphore %p.\n",
4452+ sem->holder->comm, sem->holder->pid, sem);
4453+ /* wake up first */
4454+ wake_up_locked(&sem->blocked);
4455+ } else
4456+ sem->holder = NULL;
4457+
4458+ /* restore our own priority */
4459+ t->rt_param.pcp_prio.in_global_cs = 0;
4460+ rm_set_prio(t, &t->rt_param.pcp_prio);
4461+
4462+ TS_MPCP_UP_END;
4463+ spin_unlock_irqrestore(&sem->blocked.lock, flags);
4464+ return ret;
4465+}
4466+
4467+static noinline int request_order(struct list_head* la, struct list_head* lb)
4468+{
4469+ struct dpcp_request *a, *b;
4470+ a = list_entry(la, struct dpcp_request, list);
4471+ b = list_entry(lb, struct dpcp_request, list);
4472+ return a->prio < b->prio;
4473+}
4474+
4475+static noinline long dpcp_invoke(struct pcp_semaphore* sem, long arg)
4476+{
4477+ unsigned long flags;
4478+ long ret = 0;
4479+ struct task_struct* t = current, *a;
4480+ struct dpcp_request req;
4481+
4482+ spin_lock_irqsave(&sem->dpcp_lock, flags);
4483+
4484+ init_completion(&req.done);
4485+ req.arg = arg;
4486+ req.prio = t->rt_param.pcp_prio.prio;
4487+ req.pid = t->rt_param.pcp_prio.pid;
4488+
4489+ list_insert(&req.list, &sem->dpcp_requests,
4490+ request_order);
4491+
4492+ if (!(sem->dpcp_count++)) {
4493+ /* agent needs to be awakened */
4494+ TRACE_TASK(t, "waking DPCP agent for %p.\n", sem);
4495+ if (sem->dpcp_agent) {
4496+ a = sem->dpcp_agent;
4497+ /* set agent priority */
4498+ a->rt_param.pcp_prio.in_global_cs = 1;
4499+ a->rt_param.pcp_prio.prio = req.prio;
4500+ rm_set_prio(a, &a->rt_param.pcp_prio);
4501+ }
4502+ complete(&sem->dpcp_job);
4503+ }
4504+
4505+ spin_unlock_irqrestore(&sem->dpcp_lock, flags);
4506+ TRACE_TASK(t, "blocking on DPCP sem %p.\n", sem);
4507+ preempt_enable_no_resched();
4508+ TS_DPCP_INVOKE_END;
4509+
4510+ wait_for_completion(&req.done);
4511+
4512+ preempt_disable();
4513+ /* we don't need to clean up, the remote agent did that for us */
4514+ return ret;
4515+}
4516+
4517+static noinline long dpcp_agent(struct pcp_semaphore* sem, long flags, long *arg)
4518+{
4519+ unsigned long spinflags;
4520+ long ret = 0;
4521+ struct task_struct* t = current;
4522+
4523+ spin_lock_irqsave(&sem->dpcp_lock, spinflags);
4524+
4525+ /* defend against multiple concurrent agents */
4526+ if (sem->dpcp_agent && sem->dpcp_agent != t) {
4527+ spin_unlock_irqrestore(&sem->dpcp_lock, spinflags);
4528+ return -EBUSY;
4529+ } else
4530+ sem->dpcp_agent = t;
4531+
4532+ if (sem->cpu != get_partition(t)) {
4533+ int cpu = smp_processor_id();
4534+ spin_unlock_irqrestore(&sem->dpcp_lock, spinflags);
4535+ printk(KERN_CRIT
4536+ "dpcp_agent: sem->cpu: %d, but agent "
4537+ "is on %d, and part=%d\n",
4538+ sem->cpu, cpu, get_partition(t));
4539+ return -EINVAL;
4540+ }
4541+
4542+ if ((flags & DPCP_COMPLETE) && sem->dpcp_current) {
4543+ TRACE_TASK(t, "completing DPCP sem %p.\n", sem);
4544+ /* we need to release the holder */
4545+ complete(&sem->dpcp_current->done);
4546+ sem->dpcp_count--;
4547+ sem->dpcp_current = NULL;
4548+ }
4549+
4550+ if (flags & DPCP_WAIT) {
4551+ do {
4552+ if (sem->dpcp_count) {
4553+ /* pass ownership on */
4554+ sem->dpcp_current = list_entry(
4555+ sem->dpcp_requests.next,
4556+ struct dpcp_request, list);
4557+ list_del(sem->dpcp_requests.next);
4558+ t->rt_param.pcp_prio.in_global_cs = 1;
4559+ t->rt_param.pcp_prio.prio =
4560+ sem->dpcp_current->prio;
4561+ t->rt_param.pcp_prio.pid = sem->dpcp_current->pid;
4562+ rm_set_prio(t, &t->rt_param.pcp_prio);
4563+ TS_DPCP_AGENT2_END;
4564+ } else {
4565+ /* need to wait */
4566+ spin_unlock_irqrestore(&sem->dpcp_lock,
4567+ spinflags);
4568+ TRACE_TASK(t, "agent waiting for "
4569+ "DPCP sem %p.\n", sem);
4570+
4571+ preempt_enable_no_resched();
4572+ TS_DPCP_AGENT2_END;
4573+ ret = wait_for_completion_interruptible(&sem->dpcp_job);
4574+ preempt_disable();
4575+ TRACE_TASK(t, "got DPCP job on sem %p, "
4576+ "ret=%d.\n", sem, ret);
4577+ spin_lock_irqsave(&sem->dpcp_lock, spinflags);
4578+ if (ret != 0) {
4579+ /* FIXME: set priority */
4580+ break;
4581+ }
4582+ }
4583+ } while (!sem->dpcp_current);
4584+ if (ret == 0)
4585+ *arg = sem->dpcp_current->arg;
4586+ } else {
4587+ /* restore our own priority */
4588+ t->rt_param.pcp_prio.in_global_cs = 0;
4589+ t->rt_param.pcp_prio.prio = ULLONG_MAX;
4590+ rm_set_prio(t, &t->rt_param.pcp_prio);
4591+ sem->dpcp_agent = NULL;
4592+ }
4593+
4594+ spin_unlock_irqrestore(&sem->dpcp_lock, spinflags);
4595+ return ret;
4596+}
4597+
4598+
4599+/* system calls */
4600+
4601+asmlinkage long sys_pcp_down(int sem_od)
4602+{
4603+ long ret = 0;
4604+ struct pcp_semaphore * sem;
4605+
4606+ preempt_disable();
4607+ TS_MPCP_DOWN_START;
4608+ TS_PCP1_DOWN_START;
4609+
4610+ if (!is_realtime(current)) {
4611+ ret = -EPERM;
4612+ goto out;
4613+ }
4614+
4615+ sem = lookup_pcp_sem(sem_od);
4616+ if (sem) {
4617+ if (sem->cpu != GLOBAL_SEM)
4618+ ret = local_pcp_down(sem);
4619+ else
4620+ ret = global_pcp_down(sem);
4621+ } else
4622+ ret = -EINVAL;
4623+
4624+out:
4625+ preempt_enable();
4626+ return ret;
4627+}
4628+
4629+asmlinkage long sys_pcp_up(int sem_od)
4630+{
4631+ long ret = 0;
4632+ struct pcp_semaphore * sem;
4633+
4634+ preempt_disable();
4635+ TS_PCP_UP_START;
4636+ TS_MPCP_UP_START;
4637+
4638+ if (!is_realtime(current)) {
4639+ ret = -EPERM;
4640+ goto out;
4641+ }
4642+
4643+ sem = lookup_pcp_sem(sem_od);
4644+ if (sem) {
4645+ if (sem->cpu != GLOBAL_SEM)
4646+ ret = local_pcp_up(sem);
4647+ else
4648+ ret = global_pcp_up(sem);
4649+ } else
4650+ ret = -EINVAL;
4651+
4652+out:
4653+ preempt_enable();
4654+ return ret;
4655+}
4656+
4657+
4658+asmlinkage long sys_dpcp_invoke(int sem_od, long arg)
4659+{
4660+ long ret = 0;
4661+ struct pcp_semaphore * sem;
4662+
4663+ preempt_disable();
4664+ TS_DPCP_INVOKE_START;
4665+
4666+ if (!is_realtime(current)) {
4667+ ret = -EPERM;
4668+ goto out;
4669+ }
4670+
4671+ sem = lookup_pcp_sem(sem_od);
4672+ if (sem) {
4673+ ret = dpcp_invoke(sem, arg);
4674+ } else
4675+ ret = -EINVAL;
4676+
4677+out:
4678+ preempt_enable();
4679+ return ret;
4680+}
4681+
4682+asmlinkage long sys_dpcp_agent(int sem_od, long flags, long __user *__arg)
4683+{
4684+ long ret = 0;
4685+ long arg;
4686+ struct pcp_semaphore * sem;
4687+
4688+ preempt_disable();
4689+ TS_DPCP_AGENT1_START;
4690+
4691+ if (!is_realtime(current)) {
4692+ ret = -EPERM;
4693+ goto out;
4694+ }
4695+
4696+ sem = lookup_pcp_sem(sem_od);
4697+ if (sem) {
4698+ TS_DPCP_AGENT1_END;
4699+ if (flags & DPCP_COMPLETE) {
4700+ TS_PCP_UP_START;
4701+ local_pcp_up(sem);
4702+ }
4703+ TS_DPCP_AGENT2_START;
4704+ ret = dpcp_agent(sem, flags, &arg);
4705+ if (ret == 0 && (flags & DPCP_WAIT)) {
4706+ ret = put_user(arg, __arg);
4707+ if (ret == 0) {
4708+ TS_PCP1_DOWN_START;
4709+ local_pcp_down(sem);
4710+ }
4711+ }
4712+ } else
4713+ ret = -EINVAL;
4714+
4715+out:
4716+ preempt_enable();
4717+ return ret;
4718+}
4719+
4720+
4721+/* FDSO callbacks */
4722+
4723+static noinline void* create_pcp_semaphore(void)
4724+{
4725+ struct pcp_semaphore* sem;
4726+
4727+ sem = kmalloc(sizeof(struct pcp_semaphore), GFP_KERNEL);
4728+ if (!sem)
4729+ return NULL;
4730+ init_pcp_sem(sem, UNDEF_SEM);
4731+ TRACE("allocated PCP semaphore %p\n", sem);
4732+ return sem;
4733+}
4734+
4735+static noinline void destroy_pcp_semaphore(void* obj)
4736+{
4737+ struct pcp_semaphore* sem = (struct pcp_semaphore*) obj;
4738+ WARN_ON(sem->holder);
4739+ WARN_ON(in_list(&sem->list));
4740+ kfree(sem);
4741+}
4742+
4743+static noinline void update_pcp_ceiling(struct pcp_semaphore* sem, struct task_struct* t, int global)
4744+{
4745+ struct pcp_priority prio = {get_rt_period(t), 1, t->pid};
4746+ if (global && !sem->ceiling.in_global_cs)
4747+ sem->ceiling.in_global_cs = 1;
4748+ if (_rm_higher_prio(&prio, &sem->ceiling))
4749+ sem->ceiling = prio;
4750+}
4751+
4752+static noinline int open_pcp_semaphore(struct od_table_entry* entry, void __user *__arg)
4753+{
4754+ struct pcp_semaphore* sem = (struct pcp_semaphore*) entry->obj->obj;
4755+ int *arg = (int*) __arg;
4756+ struct task_struct* t = current;
4757+ int cpu= get_partition(t);
4758+
4759+ TRACE("opening PCP semaphore %p, cpu=%d\n", sem, sem->cpu);
4760+ if (!pcp_active())
4761+ return -EBUSY;
4762+
4763+ if (arg && get_user(cpu, arg) != 0)
4764+ return -EFAULT;
4765+
4766+ if (sem->cpu == UNDEF_SEM)
4767+ sem->cpu = cpu;
4768+
4769+ update_pcp_ceiling(sem, t, sem->cpu != get_partition(t));
4770+
4771+ return 0;
4772+}
4773+
4774+static noinline void update_mpcp_ceiling(struct pcp_semaphore* sem, struct task_struct* t)
4775+{
4776+ struct pcp_priority prio = {get_rt_period(t), 1, t->pid};
4777+ if (_rm_higher_prio(&prio, &sem->ceiling))
4778+ sem->ceiling = prio;
4779+}
4780+
4781+static noinline int open_mpcp_semaphore(struct od_table_entry* entry, void* __user arg)
4782+{
4783+ struct pcp_semaphore* sem = (struct pcp_semaphore*) entry->obj->obj;
4784+ int ret = 0;
4785+ struct task_struct* t = current;
4786+
4787+ if (!pcp_active())
4788+ return -EBUSY;
4789+
4790+ if (sem->cpu == UNDEF_SEM)
4791+ sem->cpu = GLOBAL_SEM;
4792+
4793+ update_mpcp_ceiling(sem, t);
4794+
4795+ return ret;
4796+}
4797+
4798+struct fdso_ops pcp_sem_ops = {
4799+ .create = create_pcp_semaphore,
4800+ .destroy = destroy_pcp_semaphore,
4801+ .open = open_pcp_semaphore
4802+};
4803+
4804+struct fdso_ops mpcp_sem_ops = {
4805+ .create = create_pcp_semaphore,
4806+ .destroy = destroy_pcp_semaphore,
4807+ .open = open_mpcp_semaphore
4808+};
4809+
4810+static noinline int __init pcp_boot_init(void)
4811+{
4812+ int i;
4813+
4814+ printk("Initializing PCP per-CPU ceilings...");
4815+ for (i = 0; i < NR_CPUS; i++) {
4816+ INIT_LIST_HEAD(&per_cpu(sys_ceiling, i));
4817+ per_cpu(pcp_lock, i) = __SPIN_LOCK_UNLOCKED(pcp_lock);
4818+ }
4819+ printk(" done!\n");
4820+
4821+ return 0;
4822+}
4823+
4824+module_init(pcp_boot_init);
4825diff --git a/litmus/rm_common.c b/litmus/rm_common.c
4826new file mode 100644
4827index 0000000..9bf21fd
4828--- /dev/null
4829+++ b/litmus/rm_common.c
4830@@ -0,0 +1,76 @@
4831+/*
4832+ * litmus/rm_common.c
4833+ *
4834+ * Common functions for RM based schedulers.
4835+ *
4836+ * FIXME: Too much code duplication with edf_common.c
4837+ */
4838+
4839+#include <linux/percpu.h>
4840+#include <linux/sched.h>
4841+#include <linux/list.h>
4842+
4843+#include <litmus/litmus.h>
4844+#include <litmus/sched_plugin.h>
4845+#include <litmus/sched_trace.h>
4846+
4847+
4848+#include <litmus/rm_common.h>
4849+
4850+/* rm_higher_prio - returns true if first has a higher RM priority
4851+ * than second. Period ties are broken by PID.
4852+ *
4853+ * first first must not be NULL and a real-time task.
4854+ * second may be NULL or a non-rt task.
4855+ */
4856+int rm_higher_prio(struct task_struct* first,
4857+ struct task_struct* second)
4858+{
4859+ struct pcp_priority *p1, *p2;
4860+
4861+ /* verify assumptions in DEBUG build */
4862+ BUG_ON(!first);
4863+ BUG_ON(!is_realtime(first));
4864+ BUG_ON(second && !is_realtime(second) && second->rt_param.cur_prio);
4865+
4866+ p1 = first->rt_param.cur_prio;
4867+
4868+ /* if second is not a real-time task, then cur_prio is NULL */
4869+ p2 = second ? second->rt_param.cur_prio : NULL;
4870+ return _rm_higher_prio(p1, p2);
4871+}
4872+
4873+int rm_ready_order(struct list_head* a, struct list_head* b)
4874+{
4875+ return rm_higher_prio(
4876+ list_entry(a, struct task_struct, rt_list),
4877+ list_entry(b, struct task_struct, rt_list));
4878+}
4879+
4880+
4881+void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched)
4882+{
4883+ rt_domain_init(rt, resched, rm_ready_order);
4884+}
4885+
4886+/* need_to_preempt - check whether the task t needs to be preempted
4887+ * call only with irqs disabled and with ready_lock acquired
4888+ * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
4889+ */
4890+int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t)
4891+{
4892+ /* we need the read lock for edf_ready_queue */
4893+ /* no need to preempt if there is nothing pending */
4894+ if (!ready_jobs_pending(rt))
4895+ return 0;
4896+ /* we need to reschedule if t doesn't exist */
4897+ if (!t)
4898+ return 1;
4899+
4900+ /* NOTE: We cannot check for non-preemptibility since we
4901+ * don't know what address space we're currently in.
4902+ */
4903+
4904+ /* make sure to get non-rt stuff out of the way */
4905+ return !is_realtime(t) || rm_higher_prio(next_ready(rt), t);
4906+}
4907diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
4908new file mode 100644
4909index 0000000..fe7bd29
4910--- /dev/null
4911+++ b/litmus/rt_domain.c
4912@@ -0,0 +1,130 @@
4913+/*
4914+ * kernel/rt_domain.c
4915+ *
4916+ * LITMUS real-time infrastructure. This file contains the
4917+ * functions that manipulate RT domains. RT domains are an abstraction
4918+ * of a ready queue and a release queue.
4919+ */
4920+
4921+#include <linux/percpu.h>
4922+#include <linux/sched.h>
4923+#include <linux/list.h>
4924+
4925+#include <litmus/litmus.h>
4926+#include <litmus/sched_plugin.h>
4927+#include <litmus/sched_trace.h>
4928+
4929+#include <litmus/rt_domain.h>
4930+
4931+
4932+static int dummy_resched(rt_domain_t *rt)
4933+{
4934+ return 0;
4935+}
4936+
4937+static int dummy_order(struct list_head* a, struct list_head* b)
4938+{
4939+ return 0;
4940+}
4941+
4942+int release_order(struct list_head* a, struct list_head* b)
4943+{
4944+ return earlier_release(
4945+ list_entry(a, struct task_struct, rt_list),
4946+ list_entry(b, struct task_struct, rt_list));
4947+}
4948+
4949+
4950+void rt_domain_init(rt_domain_t *rt,
4951+ check_resched_needed_t f,
4952+ list_cmp_t order)
4953+{
4954+ BUG_ON(!rt);
4955+ if (!f)
4956+ f = dummy_resched;
4957+ if (!order)
4958+ order = dummy_order;
4959+ INIT_LIST_HEAD(&rt->ready_queue);
4960+ INIT_LIST_HEAD(&rt->release_queue);
4961+ rt->ready_lock = RW_LOCK_UNLOCKED;
4962+ rt->release_lock = SPIN_LOCK_UNLOCKED;
4963+ rt->check_resched = f;
4964+ rt->order = order;
4965+}
4966+
4967+/* add_ready - add a real-time task to the rt ready queue. It must be runnable.
4968+ * @new: the newly released task
4969+ */
4970+void __add_ready(rt_domain_t* rt, struct task_struct *new)
4971+{
4972+ TRACE("rt: adding %s/%d (%llu, %llu) to ready queue at %llu\n",
4973+ new->comm, new->pid, get_exec_cost(new), get_rt_period(new),
4974+ sched_clock());
4975+
4976+ if (!list_insert(&new->rt_list, &rt->ready_queue, rt->order))
4977+ rt->check_resched(rt);
4978+}
4979+
4980+struct task_struct* __take_ready(rt_domain_t* rt)
4981+{
4982+ struct task_struct *t = __peek_ready(rt);
4983+
4984+ /* kick it out of the ready list */
4985+ if (t)
4986+ list_del(&t->rt_list);
4987+ return t;
4988+}
4989+
4990+struct task_struct* __peek_ready(rt_domain_t* rt)
4991+{
4992+ if (!list_empty(&rt->ready_queue))
4993+ return next_ready(rt);
4994+ else
4995+ return NULL;
4996+}
4997+
4998+/* add_release - add a real-time task to the rt release queue.
4999+ * @task: the sleeping task
5000+ */
5001+void __add_release(rt_domain_t* rt, struct task_struct *task)
5002+{
5003+ TRACE("rt: adding %s/%d (%llu, %llu) rel=%llu to release queue\n",
5004+ task->comm, task->pid, get_exec_cost(task), get_rt_period(task),
5005+ get_release(task));
5006+
5007+ list_insert(&task->rt_list, &rt->release_queue, release_order);
5008+}
5009+
5010+void __release_pending(rt_domain_t* rt)
5011+{
5012+ struct list_head *pos, *save;
5013+ struct task_struct *queued;
5014+ lt_t now = sched_clock();
5015+ list_for_each_safe(pos, save, &rt->release_queue) {
5016+ queued = list_entry(pos, struct task_struct, rt_list);
5017+ if (likely(is_released(queued, now))) {
5018+ /* this one is ready to go*/
5019+ list_del(pos);
5020+ set_rt_flags(queued, RT_F_RUNNING);
5021+
5022+ sched_trace_job_release(queued);
5023+
5024+ /* now it can be picked up */
5025+ barrier();
5026+ add_ready(rt, queued);
5027+ }
5028+ else
5029+ /* the release queue is ordered */
5030+ break;
5031+ }
5032+}
5033+
5034+void try_release_pending(rt_domain_t* rt)
5035+{
5036+ unsigned long flags;
5037+
5038+ if (spin_trylock_irqsave(&rt->release_lock, flags)) {
5039+ __release_pending(rt);
5040+ spin_unlock_irqrestore(&rt->release_lock, flags);
5041+ }
5042+}
5043diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
5044new file mode 100644
5045index 0000000..314f8a1
5046--- /dev/null
5047+++ b/litmus/sched_gsn_edf.c
5048@@ -0,0 +1,733 @@
5049+/*
5050+ * kernel/sched_gsn_edf.c
5051+ *
5052+ * Implementation of the GSN-EDF scheduling algorithm.
5053+ *
5054+ * This version uses the simple approach and serializes all scheduling
5055+ * decisions by the use of a queue lock. This is probably not the
5056+ * best way to do it, but it should suffice for now.
5057+ */
5058+
5059+#include <linux/spinlock.h>
5060+#include <linux/percpu.h>
5061+#include <linux/sched.h>
5062+#include <linux/list.h>
5063+
5064+#include <litmus/litmus.h>
5065+#include <litmus/jobs.h>
5066+#include <litmus/sched_plugin.h>
5067+#include <litmus/edf_common.h>
5068+#include <litmus/sched_trace.h>
5069+
5070+#include <linux/module.h>
5071+
5072+/* Overview of GSN-EDF operations.
5073+ *
5074+ * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This
5075+ * description only covers how the individual operations are implemented in
5076+ * LITMUS.
5077+ *
5078+ * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage
5079+ * structure (NOT the actually scheduled
5080+ * task). If there is another linked task To
5081+ * already it will set To->linked_on = NO_CPU
5082+ * (thereby removing its association with this
5083+ * CPU). However, it will not requeue the
5084+ * previously linked task (if any). It will set
5085+ * T's state to RT_F_RUNNING and check whether
5086+ * it is already running somewhere else. If T
5087+ * is scheduled somewhere else it will link
5088+ * it to that CPU instead (and pull the linked
5089+ * task to cpu). T may be NULL.
5090+ *
5091+ * unlink(T) - Unlink removes T from all scheduler data
5092+ * structures. If it is linked to some CPU it
5093+ * will link NULL to that CPU. If it is
5094+ * currently queued in the gsnedf queue it will
5095+ * be removed from the T->rt_list. It is safe to
5096+ * call unlink(T) if T is not linked. T may not
5097+ * be NULL.
5098+ *
5099+ * requeue(T) - Requeue will insert T into the appropriate
5100+ * queue. If the system is in real-time mode and
5101+ * the T is released already, it will go into the
5102+ * ready queue. If the system is not in
5103+ * real-time mode is T, then T will go into the
5104+ * release queue. If T's release time is in the
5105+ * future, it will go into the release
5106+ * queue. That means that T's release time/job
5107+ * no/etc. has to be updated before requeu(T) is
5108+ * called. It is not safe to call requeue(T)
5109+ * when T is already queued. T may not be NULL.
5110+ *
5111+ * gsnedf_job_arrival(T) - This is the catch all function when T enters
5112+ * the system after either a suspension or at a
5113+ * job release. It will queue T (which means it
5114+ * is not safe to call gsnedf_job_arrival(T) if
5115+ * T is already queued) and then check whether a
5116+ * preemption is necessary. If a preemption is
5117+ * necessary it will update the linkage
5118+ * accordingly and cause scheduled to be called
5119+ * (either with an IPI or need_resched). It is
5120+ * safe to call gsnedf_job_arrival(T) if T's
5121+ * next job has not been actually released yet
5122+ * (releast time in the future). T will be put
5123+ * on the release queue in that case.
5124+ *
5125+ * job_completion(T) - Take care of everything that needs to be done
5126+ * to prepare T for its next release and place
5127+ * it in the right queue with
5128+ * gsnedf_job_arrival().
5129+ *
5130+ *
5131+ * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is
5132+ * equivalent to unlink(T). Note that if you unlink a task from a CPU none of
5133+ * the functions will automatically propagate pending task from the ready queue
5134+ * to a linked task. This is the job of the calling function ( by means of
5135+ * __take_ready).
5136+ */
5137+
5138+
5139+/* cpu_entry_t - maintain the linked and scheduled state
5140+ */
5141+typedef struct {
5142+ int cpu;
5143+ struct task_struct* linked; /* only RT tasks */
5144+ struct task_struct* scheduled; /* only RT tasks */
5145+ struct list_head list;
5146+ atomic_t will_schedule; /* prevent unneeded IPIs */
5147+} cpu_entry_t;
5148+DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries);
5149+
5150+#define set_will_schedule() \
5151+ (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 1))
5152+#define clear_will_schedule() \
5153+ (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 0))
5154+#define test_will_schedule(cpu) \
5155+ (atomic_read(&per_cpu(gsnedf_cpu_entries, cpu).will_schedule))
5156+
5157+
5158+#define NO_CPU 0xffffffff
5159+
5160+/* The gsnedf_lock is used to serialize all scheduling events.
5161+ * It protects
5162+ */
5163+static DEFINE_SPINLOCK(gsnedf_lock);
5164+/* the cpus queue themselves according to priority in here */
5165+static LIST_HEAD(gsnedf_cpu_queue);
5166+
5167+static rt_domain_t gsnedf;
5168+
5169+
5170+/* update_cpu_position - Move the cpu entry to the correct place to maintain
5171+ * order in the cpu queue. Caller must hold gsnedf lock.
5172+ *
5173+ * This really should be a heap.
5174+ */
5175+static void update_cpu_position(cpu_entry_t *entry)
5176+{
5177+ cpu_entry_t *other;
5178+ struct list_head *pos;
5179+
5180+ if (likely(in_list(&entry->list)))
5181+ list_del(&entry->list);
5182+ /* if we do not execute real-time jobs we just move
5183+ * to the end of the queue
5184+ */
5185+ if (entry->linked) {
5186+ list_for_each(pos, &gsnedf_cpu_queue) {
5187+ other = list_entry(pos, cpu_entry_t, list);
5188+ if (edf_higher_prio(entry->linked, other->linked)) {
5189+ __list_add(&entry->list, pos->prev, pos);
5190+ return;
5191+ }
5192+ }
5193+ }
5194+ /* if we get this far we have the lowest priority job */
5195+ list_add_tail(&entry->list, &gsnedf_cpu_queue);
5196+}
5197+
5198+/* link_task_to_cpu - Update the link of a CPU.
5199+ * Handles the case where the to-be-linked task is already
5200+ * scheduled on a different CPU.
5201+ */
5202+static noinline void link_task_to_cpu(struct task_struct* linked,
5203+ cpu_entry_t *entry)
5204+{
5205+ cpu_entry_t *sched;
5206+ struct task_struct* tmp;
5207+ int on_cpu;
5208+
5209+ BUG_ON(linked && !is_realtime(linked));
5210+
5211+ /* Currently linked task is set to be unlinked. */
5212+ if (entry->linked) {
5213+ entry->linked->rt_param.linked_on = NO_CPU;
5214+ }
5215+
5216+ /* Link new task to CPU. */
5217+ if (linked) {
5218+ set_rt_flags(linked, RT_F_RUNNING);
5219+ /* handle task is already scheduled somewhere! */
5220+ on_cpu = linked->rt_param.scheduled_on;
5221+ if (on_cpu != NO_CPU) {
5222+ sched = &per_cpu(gsnedf_cpu_entries, on_cpu);
5223+ /* this should only happen if not linked already */
5224+ BUG_ON(sched->linked == linked);
5225+
5226+ /* If we are already scheduled on the CPU to which we
5227+ * wanted to link, we don't need to do the swap --
5228+ * we just link ourselves to the CPU and depend on
5229+ * the caller to get things right.
5230+ */
5231+ if (entry != sched) {
5232+ tmp = sched->linked;
5233+ linked->rt_param.linked_on = sched->cpu;
5234+ sched->linked = linked;
5235+ update_cpu_position(sched);
5236+ linked = tmp;
5237+ }
5238+ }
5239+ if (linked) /* might be NULL due to swap */
5240+ linked->rt_param.linked_on = entry->cpu;
5241+ }
5242+ entry->linked = linked;
5243+ update_cpu_position(entry);
5244+}
5245+
5246+/* unlink - Make sure a task is not linked any longer to an entry
5247+ * where it was linked before. Must hold gsnedf_lock.
5248+ */
5249+static noinline void unlink(struct task_struct* t)
5250+{
5251+ cpu_entry_t *entry;
5252+
5253+ if (unlikely(!t)) {
5254+ TRACE_BUG_ON(!t);
5255+ return;
5256+ }
5257+
5258+ if (t->rt_param.linked_on != NO_CPU) {
5259+ /* unlink */
5260+ entry = &per_cpu(gsnedf_cpu_entries, t->rt_param.linked_on);
5261+ t->rt_param.linked_on = NO_CPU;
5262+ link_task_to_cpu(NULL, entry);
5263+ } else if (in_list(&t->rt_list)) {
5264+ /* This is an interesting situation: t is scheduled,
5265+ * but was just recently unlinked. It cannot be
5266+ * linked anywhere else (because then it would have
5267+ * been relinked to this CPU), thus it must be in some
5268+ * queue. We must remove it from the list in this
5269+ * case.
5270+ */
5271+ list_del(&t->rt_list);
5272+ }
5273+}
5274+
5275+
5276+/* preempt - force a CPU to reschedule
5277+ */
5278+static noinline void preempt(cpu_entry_t *entry)
5279+{
5280+ /* We cannot make the is_np() decision here if it is a remote CPU
5281+ * because requesting exit_np() requires that we currently use the
5282+ * address space of the task. Thus, in the remote case we just send
5283+ * the IPI and let schedule() handle the problem.
5284+ */
5285+
5286+ if (smp_processor_id() == entry->cpu) {
5287+ if (entry->scheduled && is_np(entry->scheduled))
5288+ request_exit_np(entry->scheduled);
5289+ else
5290+ set_tsk_need_resched(current);
5291+ } else
5292+ /* in case that it is a remote CPU we have to defer the
5293+ * the decision to the remote CPU
5294+ * FIXME: We could save a few IPI's here if we leave the flag
5295+ * set when we are waiting for a np_exit().
5296+ */
5297+ if (!test_will_schedule(entry->cpu))
5298+ smp_send_reschedule(entry->cpu);
5299+}
5300+
5301+/* requeue - Put an unlinked task into gsn-edf domain.
5302+ * Caller must hold gsnedf_lock.
5303+ */
5304+static noinline void requeue(struct task_struct* task)
5305+{
5306+ BUG_ON(!task);
5307+ /* sanity check rt_list before insertion */
5308+ BUG_ON(in_list(&task->rt_list));
5309+
5310+ if (get_rt_flags(task) == RT_F_SLEEP) {
5311+ /* this task has expired
5312+ * _schedule has already taken care of updating
5313+ * the release and
5314+ * deadline. We just must check if it has been released.
5315+ */
5316+ if (is_released(task, sched_clock()))
5317+ __add_ready(&gsnedf, task);
5318+ else {
5319+ /* it has got to wait */
5320+ __add_release(&gsnedf, task);
5321+ }
5322+
5323+ } else
5324+ /* this is a forced preemption
5325+ * thus the task stays in the ready_queue
5326+ * we only must make it available to others
5327+ */
5328+ __add_ready(&gsnedf, task);
5329+}
5330+
5331+/* gsnedf_job_arrival: task is either resumed or released */
5332+static noinline void gsnedf_job_arrival(struct task_struct* task)
5333+{
5334+ cpu_entry_t* last;
5335+
5336+ BUG_ON(list_empty(&gsnedf_cpu_queue));
5337+ BUG_ON(!task);
5338+
5339+ /* first queue arriving job */
5340+ requeue(task);
5341+
5342+ /* then check for any necessary preemptions */
5343+ last = list_entry(gsnedf_cpu_queue.prev, cpu_entry_t, list);
5344+ if (edf_preemption_needed(&gsnedf, last->linked)) {
5345+ /* preemption necessary */
5346+ task = __take_ready(&gsnedf);
5347+ TRACE("job_arrival: task %d linked to %d\n",
5348+ task->pid, last->cpu);
5349+ if (last->linked)
5350+ requeue(last->linked);
5351+
5352+ link_task_to_cpu(task, last);
5353+ preempt(last);
5354+ }
5355+}
5356+
5357+/* check for current job releases */
5358+static noinline void gsnedf_release_jobs(void)
5359+{
5360+ struct list_head *pos, *save;
5361+ struct task_struct *queued;
5362+ lt_t now = sched_clock();
5363+
5364+
5365+ list_for_each_safe(pos, save, &gsnedf.release_queue) {
5366+ queued = list_entry(pos, struct task_struct, rt_list);
5367+ if (likely(is_released(queued, now))) {
5368+ /* this one is ready to go*/
5369+ list_del(pos);
5370+ set_rt_flags(queued, RT_F_RUNNING);
5371+
5372+ sched_trace_job_release(queued);
5373+ gsnedf_job_arrival(queued);
5374+ }
5375+ else
5376+ /* the release queue is ordered */
5377+ break;
5378+ }
5379+}
5380+
5381+/* gsnedf_scheduler_tick - this function is called for every local timer
5382+ * interrupt.
5383+ *
5384+ * checks whether the current task has expired and checks
5385+ * whether we need to preempt it if it has not expired
5386+ */
5387+static void gsnedf_scheduler_tick(void)
5388+{
5389+ unsigned long flags;
5390+ struct task_struct* t = current;
5391+
5392+ if (is_realtime(t) && budget_exhausted(t)) {
5393+ if (!is_np(t)) {
5394+ /* np tasks will be preempted when they become
5395+ * preemptable again
5396+ */
5397+ set_tsk_need_resched(t);
5398+ set_will_schedule();
5399+ TRACE("gsnedf_scheduler_tick: "
5400+ "%d is preemptable "
5401+ " => FORCE_RESCHED\n", t->pid);
5402+ } else {
5403+ TRACE("gsnedf_scheduler_tick: "
5404+ "%d is non-preemptable, "
5405+ "preemption delayed.\n", t->pid);
5406+ request_exit_np(t);
5407+ }
5408+ }
5409+
5410+ /* only the first CPU needs to release jobs */
5411+ if (smp_processor_id() == 0) {
5412+ spin_lock_irqsave(&gsnedf_lock, flags);
5413+
5414+ /* Try to release pending jobs */
5415+ gsnedf_release_jobs();
5416+
5417+ /* We don't need to check linked != scheduled since
5418+ * set_tsk_need_resched has been set by preempt() if necessary.
5419+ */
5420+
5421+ spin_unlock_irqrestore(&gsnedf_lock, flags);
5422+ }
5423+}
5424+
5425+/* caller holds gsnedf_lock */
5426+static noinline void job_completion(struct task_struct *t)
5427+{
5428+ BUG_ON(!t);
5429+
5430+ sched_trace_job_completion(t);
5431+
5432+ TRACE_TASK(t, "job_completion().\n");
5433+
5434+ /* set flags */
5435+ set_rt_flags(t, RT_F_SLEEP);
5436+ /* prepare for next period */
5437+ prepare_for_next_period(t);
5438+ /* unlink */
5439+ unlink(t);
5440+ /* requeue
5441+ * But don't requeue a blocking task. */
5442+ if (is_running(t))
5443+ gsnedf_job_arrival(t);
5444+}
5445+
5446+
5447+/* Getting schedule() right is a bit tricky. schedule() may not make any
5448+ * assumptions on the state of the current task since it may be called for a
5449+ * number of reasons. The reasons include a scheduler_tick() determined that it
5450+ * was necessary, because sys_exit_np() was called, because some Linux
5451+ * subsystem determined so, or even (in the worst case) because there is a bug
5452+ * hidden somewhere. Thus, we must take extreme care to determine what the
5453+ * current state is.
5454+ *
5455+ * The CPU could currently be scheduling a task (or not), be linked (or not).
5456+ *
5457+ * The following assertions for the scheduled task could hold:
5458+ *
5459+ * - !is_running(scheduled) // the job blocks
5460+ * - scheduled->timeslice == 0 // the job completed (forcefully)
5461+ * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
5462+ * - linked != scheduled // we need to reschedule (for any reason)
5463+ * - is_np(scheduled) // rescheduling must be delayed,
5464+ * sys_exit_np must be requested
5465+ *
5466+ * Any of these can occur together.
5467+ */
5468+static int gsnedf_schedule(struct task_struct * prev,
5469+ struct task_struct ** next)
5470+{
5471+ cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
5472+ int out_of_time, sleep, preempt, np, exists, blocks;
5473+
5474+ /* Will be released in finish_switch. */
5475+ spin_lock(&gsnedf_lock);
5476+ clear_will_schedule();
5477+
5478+ /* sanity checking */
5479+ BUG_ON(entry->scheduled && entry->scheduled != prev);
5480+ BUG_ON(entry->scheduled && !is_realtime(prev));
5481+ BUG_ON(is_realtime(prev) && !entry->scheduled);
5482+
5483+ /* (0) Determine state */
5484+ exists = entry->scheduled != NULL;
5485+ blocks = exists && !is_running(entry->scheduled);
5486+ out_of_time = exists && budget_exhausted(entry->scheduled);
5487+ np = exists && is_np(entry->scheduled);
5488+ sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
5489+ preempt = entry->scheduled != entry->linked;
5490+
5491+ /* If a task blocks we have no choice but to reschedule.
5492+ */
5493+ if (blocks)
5494+ unlink(entry->scheduled);
5495+
5496+ /* Request a sys_exit_np() call if we would like to preempt but cannot.
5497+ * We need to make sure to update the link structure anyway in case
5498+ * that we are still linked. Multiple calls to request_exit_np() don't
5499+ * hurt.
5500+ */
5501+ if (np && (out_of_time || preempt || sleep)) {
5502+ unlink(entry->scheduled);
5503+ request_exit_np(entry->scheduled);
5504+ }
5505+
5506+ /* Any task that is preemptable and either exhausts its execution
5507+ * budget or wants to sleep completes. We may have to reschedule after
5508+ * this.
5509+ */
5510+ if (!np && (out_of_time || sleep))
5511+ job_completion(entry->scheduled);
5512+
5513+ /* Link pending task if we became unlinked.
5514+ */
5515+ if (!entry->linked)
5516+ link_task_to_cpu(__take_ready(&gsnedf), entry);
5517+
5518+ /* The final scheduling decision. Do we need to switch for some reason?
5519+ * If linked different from scheduled select linked as next.
5520+ */
5521+ if ((!np || blocks) &&
5522+ entry->linked != entry->scheduled) {
5523+ /* Schedule a linked job? */
5524+ if (entry->linked)
5525+ *next = entry->linked;
5526+ } else
5527+ /* Only override Linux scheduler if we have real-time task
5528+ * scheduled that needs to continue.
5529+ */
5530+ if (exists)
5531+ *next = prev;
5532+
5533+ spin_unlock(&gsnedf_lock);
5534+
5535+ /* don't race with a concurrent switch */
5536+ if (*next && prev != *next)
5537+ while ((*next)->rt_param.scheduled_on != NO_CPU)
5538+ cpu_relax();
5539+ return 0;
5540+}
5541+
5542+
5543+/* _finish_switch - we just finished the switch away from prev
5544+ */
5545+static void gsnedf_finish_switch(struct task_struct *prev)
5546+{
5547+ cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
5548+
5549+ entry->scheduled = is_realtime(current) ? current : NULL;
5550+
5551+ prev->rt_param.scheduled_on = NO_CPU;
5552+ current->rt_param.scheduled_on = smp_processor_id();
5553+}
5554+
5555+
5556+/* Prepare a task for running in RT mode
5557+ * Enqueues the task into master queue data structure
5558+ * returns
5559+ * -EPERM if task is not TASK_STOPPED
5560+ */
5561+static long gsnedf_prepare_task(struct task_struct * t)
5562+{
5563+ unsigned long flags;
5564+ TRACE("gsn edf: prepare task %d\n", t->pid);
5565+
5566+ if (t->state == TASK_STOPPED) {
5567+ t->rt_param.scheduled_on = NO_CPU;
5568+ t->rt_param.linked_on = NO_CPU;
5569+
5570+ /* delay by 1ms */
5571+ release_at(t, sched_clock() + 1000000);
5572+
5573+ /* The task should be running in the queue, otherwise signal
5574+ * code will try to wake it up with fatal consequences.
5575+ */
5576+ t->state = TASK_RUNNING;
5577+ spin_lock_irqsave(&gsnedf_lock, flags);
5578+ t->rt_param.litmus_controlled = 1;
5579+ requeue(t);
5580+ spin_unlock_irqrestore(&gsnedf_lock, flags);
5581+ return 0;
5582+ }
5583+ else
5584+ return -EPERM;
5585+}
5586+
5587+static void gsnedf_wake_up_task(struct task_struct *task)
5588+{
5589+ unsigned long flags;
5590+ lt_t now;
5591+ /* We must determine whether task should go into the release
5592+ * queue or into the ready queue. It may enter the ready queue
5593+ * if it has credit left in its time slice and has not yet reached
5594+ * its deadline. If it is now passed its deadline we assume this the
5595+ * arrival of a new sporadic job and thus put it in the ready queue
5596+ * anyway.If it has zero budget and the next release is in the future
5597+ * it has to go to the release queue.
5598+ */
5599+ TRACE("gsnedf: %d unsuspends with budget=%d\n",
5600+ task->pid, task->time_slice);
5601+
5602+ spin_lock_irqsave(&gsnedf_lock, flags);
5603+ if (!task->rt_param.litmus_controlled) {
5604+ task->rt_param.litmus_controlled = 1;
5605+ /* We need to take suspensions because of semaphores into
5606+ * account! If a job resumes after being suspended due to acquiring
5607+ * a semaphore, it should never be treated as a new job release.
5608+ */
5609+ if (get_rt_flags(task) == RT_F_EXIT_SEM) {
5610+ set_rt_flags(task, RT_F_RUNNING);
5611+ } else {
5612+ now = sched_clock();
5613+ if (is_tardy(task, now)) {
5614+ /* new sporadic release */
5615+ release_at(task, now);
5616+ sched_trace_job_release(task);
5617+ }
5618+ else if (task->time_slice)
5619+ /* came back in time before deadline
5620+ */
5621+ set_rt_flags(task, RT_F_RUNNING);
5622+ }
5623+ task->state = TASK_RUNNING;
5624+ gsnedf_job_arrival(task);
5625+ }
5626+ spin_unlock_irqrestore(&gsnedf_lock, flags);
5627+}
5628+
5629+static void gsnedf_task_blocks(struct task_struct *t)
5630+{
5631+ unsigned long flags;
5632+
5633+ /* unlink if necessary */
5634+ spin_lock_irqsave(&gsnedf_lock, flags);
5635+ unlink(t);
5636+ t->rt_param.litmus_controlled = 0;
5637+ spin_unlock_irqrestore(&gsnedf_lock, flags);
5638+
5639+ BUG_ON(!is_realtime(t));
5640+ TRACE("task %d suspends with budget=%d\n", t->pid, t->time_slice);
5641+ BUG_ON(t->rt_list.next != LIST_POISON1);
5642+ BUG_ON(t->rt_list.prev != LIST_POISON2);
5643+}
5644+
5645+
5646+/* When _tear_down is called, the task should not be in any queue any more
5647+ * as it must have blocked first. We don't have any internal state for the task,
5648+ * it is all in the task_struct.
5649+ */
5650+static long gsnedf_tear_down(struct task_struct * t)
5651+{
5652+ BUG_ON(!is_realtime(t));
5653+ TRACE_TASK(t, "RIP\n");
5654+ BUG_ON(t->array);
5655+ BUG_ON(t->rt_list.next != LIST_POISON1);
5656+ BUG_ON(t->rt_list.prev != LIST_POISON2);
5657+ return 0;
5658+}
5659+
5660+static long gsnedf_pi_block(struct pi_semaphore *sem,
5661+ struct task_struct *new_waiter)
5662+{
5663+ /* This callback has to handle the situation where a new waiter is
5664+ * added to the wait queue of the semaphore.
5665+ *
5666+ * We must check if has a higher priority than the currently
5667+ * highest-priority task, and then potentially reschedule.
5668+ */
5669+
5670+ BUG_ON(!new_waiter);
5671+
5672+ if (edf_higher_prio(new_waiter, sem->hp.task)) {
5673+ TRACE_TASK(new_waiter, " boosts priority\n");
5674+ /* called with IRQs disabled */
5675+ spin_lock(&gsnedf_lock);
5676+ /* store new highest-priority task */
5677+ sem->hp.task = new_waiter;
5678+ if (sem->holder) {
5679+ /* let holder inherit */
5680+ sem->holder->rt_param.inh_task = new_waiter;
5681+ unlink(sem->holder);
5682+ gsnedf_job_arrival(sem->holder);
5683+ }
5684+ spin_unlock(&gsnedf_lock);
5685+ }
5686+
5687+ return 0;
5688+}
5689+
5690+static long gsnedf_inherit_priority(struct pi_semaphore *sem,
5691+ struct task_struct *new_owner)
5692+{
5693+ /* We don't need to acquire the gsnedf_lock since at the time of this
5694+ * call new_owner isn't actually scheduled yet (it's still sleeping)
5695+ * and since the calling function already holds sem->wait.lock, which
5696+ * prevents concurrent sem->hp.task changes.
5697+ */
5698+
5699+ if (sem->hp.task && sem->hp.task != new_owner) {
5700+ new_owner->rt_param.inh_task = sem->hp.task;
5701+ TRACE_TASK(new_owner, "inherited priority from %s/%d\n",
5702+ sem->hp.task->comm, sem->hp.task->pid);
5703+ } else
5704+ TRACE_TASK(new_owner,
5705+ "cannot inherit priority, "
5706+ "no higher priority job waits.\n");
5707+ return 0;
5708+}
5709+
5710+/* This function is called on a semaphore release, and assumes that
5711+ * the current task is also the semaphore holder.
5712+ */
5713+static long gsnedf_return_priority(struct pi_semaphore *sem)
5714+{
5715+ struct task_struct* t = current;
5716+ int ret = 0;
5717+
5718+ /* Find new highest-priority semaphore task
5719+ * if holder task is the current hp.task.
5720+ *
5721+ * Calling function holds sem->wait.lock.
5722+ */
5723+ if (t == sem->hp.task)
5724+ set_hp_task(sem, edf_higher_prio);
5725+
5726+ TRACE_CUR("gsnedf_return_priority for lock %p\n", sem);
5727+
5728+ if (t->rt_param.inh_task) {
5729+ /* interrupts already disabled by PI code */
5730+ spin_lock(&gsnedf_lock);
5731+
5732+ /* Reset inh_task to NULL. */
5733+ t->rt_param.inh_task = NULL;
5734+
5735+ /* Check if rescheduling is necessary */
5736+ unlink(t);
5737+ gsnedf_job_arrival(t);
5738+ spin_unlock(&gsnedf_lock);
5739+ }
5740+
5741+ return ret;
5742+}
5743+
5744+/* Plugin object */
5745+static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
5746+ .plugin_name = "GSN-EDF",
5747+ .scheduler_tick = gsnedf_scheduler_tick,
5748+ .prepare_task = gsnedf_prepare_task,
5749+ .sleep_next_period = complete_job,
5750+ .tear_down = gsnedf_tear_down,
5751+ .schedule = gsnedf_schedule,
5752+ .finish_switch = gsnedf_finish_switch,
5753+ .wake_up_task = gsnedf_wake_up_task,
5754+ .task_blocks = gsnedf_task_blocks,
5755+ .inherit_priority = gsnedf_inherit_priority,
5756+ .return_priority = gsnedf_return_priority,
5757+ .pi_block = gsnedf_pi_block
5758+};
5759+
5760+
5761+static int __init init_gsn_edf(void)
5762+{
5763+ int cpu;
5764+ cpu_entry_t *entry;
5765+
5766+ /* initialize CPU state */
5767+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
5768+ entry = &per_cpu(gsnedf_cpu_entries, cpu);
5769+ atomic_set(&entry->will_schedule, 0);
5770+ entry->linked = NULL;
5771+ entry->scheduled = NULL;
5772+ entry->cpu = cpu;
5773+ INIT_LIST_HEAD(&entry->list);
5774+ }
5775+
5776+ edf_domain_init(&gsnedf, NULL);
5777+ return register_sched_plugin(&gsn_edf_plugin);
5778+}
5779+
5780+
5781+module_init(init_gsn_edf);
5782diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
5783new file mode 100644
5784index 0000000..f05fc56
5785--- /dev/null
5786+++ b/litmus/sched_plugin.c
5787@@ -0,0 +1,169 @@
5788+/* sched_plugin.c -- core infrastructure for the scheduler plugin system
5789+ *
5790+ * This file includes the initialization of the plugin system, the no-op Linux
5791+ * scheduler plugin and some dummy functions.
5792+ */
5793+
5794+#include <linux/list.h>
5795+#include <linux/spinlock.h>
5796+
5797+#include <litmus/litmus.h>
5798+#include <litmus/sched_plugin.h>
5799+
5800+
5801+/*************************************************************
5802+ * Dummy plugin functions *
5803+ *************************************************************/
5804+
5805+static void litmus_dummy_finish_switch(struct task_struct * prev)
5806+{
5807+}
5808+
5809+static int litmus_dummy_schedule(struct task_struct * prev,
5810+ struct task_struct** next)
5811+{
5812+ return 0;
5813+}
5814+
5815+static void litmus_dummy_scheduler_tick(void)
5816+{
5817+}
5818+
5819+static long litmus_dummy_prepare_task(struct task_struct *t)
5820+{
5821+ return -ENOSYS;
5822+}
5823+
5824+static void litmus_dummy_wake_up_task(struct task_struct *task)
5825+{
5826+ printk(KERN_WARNING "task %d: unhandled real-time wake up!\n",
5827+ task->pid);
5828+}
5829+
5830+static void litmus_dummy_task_blocks(struct task_struct *task)
5831+{
5832+}
5833+
5834+static long litmus_dummy_tear_down(struct task_struct *task)
5835+{
5836+ return 0;
5837+}
5838+
5839+static long litmus_dummy_sleep_next_period(void)
5840+{
5841+ return -ENOSYS;
5842+}
5843+
5844+static long litmus_dummy_inherit_priority(struct pi_semaphore *sem,
5845+ struct task_struct *new_owner)
5846+{
5847+ return -ENOSYS;
5848+}
5849+
5850+static long litmus_dummy_return_priority(struct pi_semaphore *sem)
5851+{
5852+ return -ENOSYS;
5853+}
5854+
5855+static long litmus_dummy_pi_block(struct pi_semaphore *sem,
5856+ struct task_struct *new_waiter)
5857+{
5858+ return -ENOSYS;
5859+}
5860+
5861+
5862+/* The default scheduler plugin. It doesn't do anything and lets Linux do its
5863+ * job.
5864+ */
5865+struct sched_plugin linux_sched_plugin = {
5866+ .plugin_name = "Linux",
5867+ .scheduler_tick = litmus_dummy_scheduler_tick,
5868+ .prepare_task = litmus_dummy_prepare_task,
5869+ .tear_down = litmus_dummy_tear_down,
5870+ .wake_up_task = litmus_dummy_wake_up_task,
5871+ .task_blocks = litmus_dummy_task_blocks,
5872+ .sleep_next_period = litmus_dummy_sleep_next_period,
5873+ .schedule = litmus_dummy_schedule,
5874+ .finish_switch = litmus_dummy_finish_switch,
5875+ .inherit_priority = litmus_dummy_inherit_priority,
5876+ .return_priority = litmus_dummy_return_priority,
5877+ .pi_block = litmus_dummy_pi_block
5878+};
5879+
5880+/*
5881+ * The reference to current plugin that is used to schedule tasks within
5882+ * the system. It stores references to actual function implementations
5883+ * Should be initialized by calling "init_***_plugin()"
5884+ */
5885+struct sched_plugin *curr_sched_plugin = &linux_sched_plugin;
5886+
5887+/* the list of registered scheduling plugins */
5888+static LIST_HEAD(sched_plugins);
5889+static DEFINE_SPINLOCK(sched_plugins_lock);
5890+
5891+#define CHECK(func) {\
5892+ if (!plugin->func) \
5893+ plugin->func = litmus_dummy_ ## func;}
5894+
5895+/* FIXME: get reference to module */
5896+int register_sched_plugin(struct sched_plugin* plugin)
5897+{
5898+ printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n",
5899+ plugin->plugin_name);
5900+
5901+ /* make sure we don't trip over null pointers later */
5902+ CHECK(finish_switch);
5903+ CHECK(schedule);
5904+ CHECK(scheduler_tick);
5905+ CHECK(wake_up_task);
5906+ CHECK(tear_down);
5907+ CHECK(task_blocks);
5908+ CHECK(prepare_task);
5909+ CHECK(sleep_next_period);
5910+ CHECK(inherit_priority);
5911+ CHECK(return_priority);
5912+ CHECK(pi_block);
5913+
5914+ spin_lock(&sched_plugins_lock);
5915+ list_add(&plugin->list, &sched_plugins);
5916+ spin_unlock(&sched_plugins_lock);
5917+
5918+ return 0;
5919+}
5920+
5921+
5922+/* FIXME: reference counting, etc. */
5923+struct sched_plugin* find_sched_plugin(const char* name)
5924+{
5925+ struct list_head *pos;
5926+ struct sched_plugin *plugin;
5927+
5928+ spin_lock(&sched_plugins_lock);
5929+ list_for_each(pos, &sched_plugins) {
5930+ plugin = list_entry(pos, struct sched_plugin, list);
5931+ if (!strcmp(plugin->plugin_name, name))
5932+ goto out_unlock;
5933+ }
5934+ plugin = NULL;
5935+
5936+out_unlock:
5937+ spin_unlock(&sched_plugins_lock);
5938+ return plugin;
5939+}
5940+
5941+int print_sched_plugins(char* buf, int max)
5942+{
5943+ int count = 0;
5944+ struct list_head *pos;
5945+ struct sched_plugin *plugin;
5946+
5947+ spin_lock(&sched_plugins_lock);
5948+ list_for_each(pos, &sched_plugins) {
5949+ plugin = list_entry(pos, struct sched_plugin, list);
5950+ count += snprintf(buf + count, max - count, "%s\n", plugin->plugin_name);
5951+ if (max - count <= 0)
5952+ break;
5953+ }
5954+ spin_unlock(&sched_plugins_lock);
5955+ return count;
5956+}
5957diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
5958new file mode 100644
5959index 0000000..27f4b5c
5960--- /dev/null
5961+++ b/litmus/sched_psn_edf.c
5962@@ -0,0 +1,458 @@
5963+
5964+/*
5965+ * kernel/sched_psn_edf.c
5966+ *
5967+ * Implementation of the PSN-EDF scheduler plugin.
5968+ * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
5969+ *
5970+ * Suspensions and non-preemptable sections are supported.
5971+ * Priority inheritance is not supported.
5972+ */
5973+
5974+#include <linux/percpu.h>
5975+#include <linux/sched.h>
5976+#include <linux/list.h>
5977+#include <linux/spinlock.h>
5978+
5979+#include <linux/module.h>
5980+
5981+#include <litmus/litmus.h>
5982+#include <litmus/jobs.h>
5983+#include <litmus/sched_plugin.h>
5984+#include <litmus/edf_common.h>
5985+
5986+
5987+typedef struct {
5988+ rt_domain_t domain;
5989+ int cpu;
5990+ struct task_struct* scheduled; /* only RT tasks */
5991+ spinlock_t lock; /* protects the domain and
5992+ * serializes scheduling decisions
5993+ */
5994+} psnedf_domain_t;
5995+
5996+DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
5997+
5998+#define local_edf (&__get_cpu_var(psnedf_domains).domain)
5999+#define local_pedf (&__get_cpu_var(psnedf_domains))
6000+#define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain)
6001+#define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu))
6002+#define task_edf(task) remote_edf(get_partition(task))
6003+#define task_pedf(task) remote_pedf(get_partition(task))
6004+
6005+
6006+static void psnedf_domain_init(psnedf_domain_t* pedf,
6007+ check_resched_needed_t check,
6008+ int cpu)
6009+{
6010+ edf_domain_init(&pedf->domain, check);
6011+ pedf->cpu = cpu;
6012+ pedf->lock = SPIN_LOCK_UNLOCKED;
6013+ pedf->scheduled = NULL;
6014+}
6015+
6016+static void requeue(struct task_struct* t, rt_domain_t *edf)
6017+{
6018+ /* only requeue if t is actually running */
6019+ BUG_ON(!is_running(t));
6020+
6021+ if (t->state != TASK_RUNNING)
6022+ TRACE_TASK(t, "requeue: !TASK_RUNNING");
6023+
6024+ set_rt_flags(t, RT_F_RUNNING);
6025+ if (is_released(t, sched_clock()))
6026+ __add_ready(edf, t);
6027+ else
6028+ __add_release(edf, t); /* it has got to wait */
6029+}
6030+
6031+/* we assume the lock is being held */
6032+static void preempt(psnedf_domain_t *pedf)
6033+{
6034+ if (smp_processor_id() == pedf->cpu) {
6035+ if (pedf->scheduled && is_np(pedf->scheduled))
6036+ request_exit_np(pedf->scheduled);
6037+ else
6038+ set_tsk_need_resched(current);
6039+ } else
6040+ /* in case that it is a remote CPU we have to defer the
6041+ * the decision to the remote CPU
6042+ */
6043+ smp_send_reschedule(pedf->cpu);
6044+}
6045+
6046+/* This check is trivial in partioned systems as we only have to consider
6047+ * the CPU of the partition.
6048+ */
6049+static int psnedf_check_resched(rt_domain_t *edf)
6050+{
6051+ psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
6052+ int ret = 0;
6053+
6054+ /* because this is a callback from rt_domain_t we already hold
6055+ * the necessary lock for the ready queue
6056+ */
6057+ if (edf_preemption_needed(edf, pedf->scheduled)) {
6058+ preempt(pedf);
6059+ ret = 1;
6060+ }
6061+ return ret;
6062+}
6063+
6064+
6065+static void psnedf_scheduler_tick(void)
6066+{
6067+ unsigned long flags;
6068+ struct task_struct *t = current;
6069+ rt_domain_t *edf = local_edf;
6070+ psnedf_domain_t *pedf = local_pedf;
6071+
6072+ /* Check for inconsistency. We don't need the lock for this since
6073+ * ->scheduled is only changed in schedule, which obviously is not
6074+ * executing in parallel on this CPU
6075+ */
6076+ BUG_ON(is_realtime(t) && t != pedf->scheduled);
6077+
6078+ if (is_realtime(t) && budget_exhausted(t)) {
6079+ if (!is_np(t))
6080+ set_tsk_need_resched(t);
6081+ else {
6082+ TRACE("psnedf_scheduler_tick: "
6083+ "%d is non-preemptable, "
6084+ "preemption delayed.\n", t->pid);
6085+ request_exit_np(t);
6086+ }
6087+ }
6088+
6089+ spin_lock_irqsave(&pedf->lock, flags);
6090+ __release_pending(edf);
6091+ if (edf_preemption_needed(edf, t))
6092+ set_tsk_need_resched(t);
6093+ spin_unlock_irqrestore(&pedf->lock, flags);
6094+}
6095+
6096+static void job_completion(struct task_struct* t)
6097+{
6098+ TRACE_TASK(t, "job_completion().\n");
6099+ set_rt_flags(t, RT_F_SLEEP);
6100+ prepare_for_next_period(t);
6101+}
6102+
6103+static int psnedf_schedule(struct task_struct * prev,
6104+ struct task_struct ** next)
6105+{
6106+ psnedf_domain_t* pedf = local_pedf;
6107+ rt_domain_t* edf = &pedf->domain;
6108+
6109+ int out_of_time, sleep, preempt,
6110+ np, exists, blocks, resched;
6111+
6112+ spin_lock(&pedf->lock);
6113+
6114+ /* sanity checking */
6115+ BUG_ON(pedf->scheduled && pedf->scheduled != prev);
6116+ BUG_ON(pedf->scheduled && !is_realtime(prev));
6117+
6118+ /* (0) Determine state */
6119+ exists = pedf->scheduled != NULL;
6120+ blocks = exists && !is_running(pedf->scheduled);
6121+ out_of_time = exists && budget_exhausted(pedf->scheduled);
6122+ np = exists && is_np(pedf->scheduled);
6123+ sleep = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP;
6124+ preempt = edf_preemption_needed(edf, prev);
6125+
6126+ /* If we need to preempt do so.
6127+ * The following checks set resched to 1 in case of special
6128+ * circumstances.
6129+ */
6130+ resched = preempt;
6131+
6132+ /* If a task blocks we have no choice but to reschedule.
6133+ */
6134+ if (blocks)
6135+ resched = 1;
6136+
6137+ /* Request a sys_exit_np() call if we would like to preempt but cannot.
6138+ * Multiple calls to request_exit_np() don't hurt.
6139+ */
6140+ if (np && (out_of_time || preempt || sleep))
6141+ request_exit_np(pedf->scheduled);
6142+
6143+ /* Any task that is preemptable and either exhausts its execution
6144+ * budget or wants to sleep completes. We may have to reschedule after
6145+ * this.
6146+ */
6147+ if (!np && (out_of_time || sleep)) {
6148+ job_completion(pedf->scheduled);
6149+ resched = 1;
6150+ }
6151+
6152+ /* The final scheduling decision. Do we need to switch for some reason?
6153+ * Switch if we are in RT mode and have no task or if we need to
6154+ * resched.
6155+ */
6156+ *next = NULL;
6157+ if ((!np || blocks) && (resched || !exists)) {
6158+ /* Take care of a previously scheduled
6159+ * job by taking it out of the Linux runqueue.
6160+ */
6161+ if (pedf->scheduled) {
6162+ /* as opposed to global schedulers that switch without
6163+ * a lock being held we can requeue already here since
6164+ * no other CPU will schedule from this domain.
6165+ */
6166+ if (!blocks)
6167+ requeue(pedf->scheduled, edf);
6168+ }
6169+ *next = __take_ready(edf);
6170+ } else
6171+ /* Only override Linux scheduler if we have a real-time task
6172+ * scheduled that needs to continue.
6173+ */
6174+ if (exists)
6175+ *next = prev;
6176+
6177+ if (*next)
6178+ set_rt_flags(*next, RT_F_RUNNING);
6179+
6180+ pedf->scheduled = *next;
6181+ spin_unlock(&pedf->lock);
6182+ return 0;
6183+}
6184+
6185+
6186+/* Prepare a task for running in RT mode
6187+ * Enqueues the task into master queue data structure
6188+ * returns
6189+ * -EPERM if task is not TASK_STOPPED
6190+ */
6191+static long psnedf_prepare_task(struct task_struct * t)
6192+{
6193+ rt_domain_t* edf = task_edf(t);
6194+ psnedf_domain_t* pedf = task_pedf(t);
6195+ unsigned long flags;
6196+
6197+ TRACE("[%d] psn edf: prepare task %d on CPU %d\n",
6198+ smp_processor_id(), t->pid, get_partition(t));
6199+ if (t->state == TASK_STOPPED) {
6200+
6201+ /* 1ms delay */
6202+ release_at(t, sched_clock() + 1000000);
6203+
6204+ /* The task should be running in the queue, otherwise signal
6205+ * code will try to wake it up with fatal consequences.
6206+ */
6207+ t->state = TASK_RUNNING;
6208+ spin_lock_irqsave(&pedf->lock, flags);
6209+ t->rt_param.litmus_controlled = 1;
6210+ __add_release(edf, t);
6211+ spin_unlock_irqrestore(&pedf->lock, flags);
6212+ return 0;
6213+ } else
6214+ return -EPERM;
6215+}
6216+
6217+static void psnedf_wake_up_task(struct task_struct *task)
6218+{
6219+ unsigned long flags;
6220+ psnedf_domain_t* pedf = task_pedf(task);
6221+ rt_domain_t* edf = task_edf(task);
6222+ lt_t now;
6223+
6224+ TRACE("psnedf: %d unsuspends with budget=%d\n",
6225+ task->pid, task->time_slice);
6226+
6227+ spin_lock_irqsave(&pedf->lock, flags);
6228+ if (!task->rt_param.litmus_controlled) {
6229+ BUG_ON(in_list(&task->rt_list));
6230+ task->rt_param.litmus_controlled = 1;
6231+ /* We need to take suspensions because of semaphores into
6232+ * account! If a job resumes after being suspended due to acquiring
6233+ * a semaphore, it should never be treated as a new job release.
6234+ */
6235+ now = sched_clock();
6236+ if (is_tardy(task, now) &&
6237+ get_rt_flags(task) != RT_F_EXIT_SEM) {
6238+ /* new sporadic release */
6239+ release_at(task, now);
6240+ sched_trace_job_release(task);
6241+ }
6242+ task->state = TASK_RUNNING;
6243+ requeue(task, edf);
6244+ }
6245+ spin_unlock_irqrestore(&pedf->lock, flags);
6246+}
6247+
6248+static void psnedf_task_blocks(struct task_struct *t)
6249+{
6250+ BUG_ON(!is_realtime(t));
6251+ /* not really anything to do since it can only block if
6252+ * it is running, and when it is not running it is not in any
6253+ * queue anyway.
6254+ */
6255+ TRACE("task %d blocks with budget=%d\n", t->pid, t->time_slice);
6256+ BUG_ON(in_list(&t->rt_list));
6257+ t->rt_param.litmus_controlled = 0;
6258+}
6259+
6260+
6261+/* When _tear_down is called, the task should not be in any queue any more
6262+ * as it must have blocked first. We don't have any internal state for the task,
6263+ * it is all in the task_struct.
6264+ */
6265+static long psnedf_tear_down(struct task_struct * t)
6266+{
6267+ BUG_ON(!is_realtime(t));
6268+ TRACE_TASK(t, "tear down called");
6269+ BUG_ON(t->array);
6270+ BUG_ON(in_list(&t->rt_list));
6271+ return 0;
6272+}
6273+
6274+static long psnedf_pi_block(struct pi_semaphore *sem,
6275+ struct task_struct *new_waiter)
6276+{
6277+ psnedf_domain_t* pedf;
6278+ rt_domain_t* edf;
6279+ struct task_struct* t;
6280+ int cpu = get_partition(new_waiter);
6281+
6282+ BUG_ON(!new_waiter);
6283+
6284+ if (edf_higher_prio(new_waiter, sem->hp.cpu_task[cpu])) {
6285+ TRACE_TASK(new_waiter, " boosts priority\n");
6286+ pedf = task_pedf(new_waiter);
6287+ edf = task_edf(new_waiter);
6288+
6289+ /* interrupts already disabled */
6290+ spin_lock(&pedf->lock);
6291+
6292+ /* store new highest-priority task */
6293+ sem->hp.cpu_task[cpu] = new_waiter;
6294+ if (sem->holder &&
6295+ get_partition(sem->holder) == get_partition(new_waiter)) {
6296+ /* let holder inherit */
6297+ sem->holder->rt_param.inh_task = new_waiter;
6298+ t = sem->holder;
6299+ if (in_list(&t->rt_list)) {
6300+ /* queued in domain*/
6301+ list_del(&t->rt_list);
6302+ /* readd to make priority change take place */
6303+ if (is_released(t, sched_clock()))
6304+ __add_ready(edf, t);
6305+ else
6306+ __add_release(edf, t);
6307+ }
6308+ }
6309+
6310+ /* check if we need to reschedule */
6311+ if (edf_preemption_needed(edf, current))
6312+ preempt(pedf);
6313+
6314+ spin_unlock(&pedf->lock);
6315+ }
6316+
6317+ return 0;
6318+}
6319+
6320+static long psnedf_inherit_priority(struct pi_semaphore *sem,
6321+ struct task_struct *new_owner)
6322+{
6323+ int cpu = get_partition(new_owner);
6324+
6325+ /* FIXME: This doesn't look correct at all!
6326+ * Why do we inherit in any case???
6327+ */
6328+ new_owner->rt_param.inh_task = sem->hp.cpu_task[cpu];
6329+ if (sem->hp.cpu_task[cpu] && new_owner != sem->hp.cpu_task[cpu]) {
6330+ TRACE_TASK(new_owner,
6331+ "inherited priority from %s/%d\n",
6332+ sem->hp.cpu_task[cpu]->comm,
6333+ sem->hp.cpu_task[cpu]->pid);
6334+ } else
6335+ TRACE_TASK(new_owner,
6336+ "cannot inherit priority: "
6337+ "no higher priority job waits on this CPU!\n");
6338+ /* make new owner non-preemptable as required by FMLP under
6339+ * PSN-EDF.
6340+ */
6341+ make_np(new_owner);
6342+ return 0;
6343+}
6344+
6345+
6346+/* This function is called on a semaphore release, and assumes that
6347+ * the current task is also the semaphore holder.
6348+ */
6349+static long psnedf_return_priority(struct pi_semaphore *sem)
6350+{
6351+ struct task_struct* t = current;
6352+ psnedf_domain_t* pedf = task_pedf(t);
6353+ rt_domain_t* edf = task_edf(t);
6354+ int ret = 0;
6355+ int cpu = get_partition(current);
6356+
6357+
6358+ /* Find new highest-priority semaphore task
6359+ * if holder task is the current hp.cpu_task[cpu].
6360+ *
6361+ * Calling function holds sem->wait.lock.
6362+ */
6363+ if (t == sem->hp.cpu_task[cpu])
6364+ set_hp_cpu_task(sem, cpu, edf_higher_prio);
6365+
6366+ take_np(t);
6367+ if (current->rt_param.inh_task) {
6368+ TRACE_CUR("return priority of %s/%d\n",
6369+ current->rt_param.inh_task->comm,
6370+ current->rt_param.inh_task->pid);
6371+ spin_lock(&pedf->lock);
6372+
6373+ /* Reset inh_task to NULL. */
6374+ current->rt_param.inh_task = NULL;
6375+
6376+ /* check if we need to reschedule */
6377+ if (edf_preemption_needed(edf, current))
6378+ preempt(pedf);
6379+
6380+ spin_unlock(&pedf->lock);
6381+ } else
6382+ TRACE_CUR(" no priority to return %p\n", sem);
6383+
6384+ return ret;
6385+}
6386+
6387+
6388+/* Plugin object */
6389+static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
6390+ .plugin_name = "PSN-EDF",
6391+ .srp_active = 1,
6392+ .scheduler_tick = psnedf_scheduler_tick,
6393+ .prepare_task = psnedf_prepare_task,
6394+ .sleep_next_period = complete_job,
6395+ .tear_down = psnedf_tear_down,
6396+ .schedule = psnedf_schedule,
6397+ .wake_up_task = psnedf_wake_up_task,
6398+ .task_blocks = psnedf_task_blocks,
6399+ .pi_block = psnedf_pi_block,
6400+ .inherit_priority = psnedf_inherit_priority,
6401+ .return_priority = psnedf_return_priority
6402+};
6403+
6404+
6405+static int __init init_psn_edf(void)
6406+{
6407+ int i;
6408+
6409+ for (i = 0; i < NR_CPUS; i++)
6410+ {
6411+ psnedf_domain_init(remote_pedf(i),
6412+ psnedf_check_resched, i);
6413+ printk("PSN-EDF: CPU partition %d initialized.\n", i);
6414+ }
6415+ return register_sched_plugin(&psn_edf_plugin);
6416+}
6417+
6418+
6419+
6420+module_init(init_psn_edf);
6421diff --git a/litmus/sched_rm.c b/litmus/sched_rm.c
6422new file mode 100644
6423index 0000000..57acde4
6424--- /dev/null
6425+++ b/litmus/sched_rm.c
6426@@ -0,0 +1,397 @@
6427+
6428+/* RM implementation.
6429+ * Will support the M-PCP eventually.
6430+ */
6431+
6432+#include <linux/percpu.h>
6433+#include <linux/sched.h>
6434+#include <linux/list.h>
6435+#include <linux/spinlock.h>
6436+
6437+#include <linux/module.h>
6438+
6439+#include <litmus/litmus.h>
6440+#include <litmus/jobs.h>
6441+#include <litmus/sched_plugin.h>
6442+#include <litmus/rm_common.h>
6443+
6444+
6445+typedef struct {
6446+ rt_domain_t domain;
6447+ int cpu;
6448+ struct task_struct* scheduled; /* only RT tasks */
6449+ spinlock_t lock; /* protects the domain and
6450+ * serializes scheduling decisions
6451+ */
6452+} rm_domain_t;
6453+
6454+DEFINE_PER_CPU(rm_domain_t, rm_domains);
6455+
6456+#define local_dom (&__get_cpu_var(rm_domains).domain)
6457+#define local_part (&__get_cpu_var(rm_domains))
6458+#define remote_dom(cpu) (&per_cpu(rm_domains, cpu).domain)
6459+#define remote_part(cpu) (&per_cpu(rm_domains, cpu))
6460+#define task_dom(task) remote_dom(get_partition(task))
6461+#define task_part(task) remote_part(get_partition(task))
6462+
6463+
6464+static void prm_domain_init(rm_domain_t* part,
6465+ check_resched_needed_t check,
6466+ int cpu)
6467+{
6468+ rm_domain_init(&part->domain, check);
6469+ part->cpu = cpu;
6470+ part->lock = SPIN_LOCK_UNLOCKED;
6471+ part->scheduled = NULL;
6472+}
6473+
6474+static void requeue(struct task_struct* t, rt_domain_t *dom)
6475+{
6476+ /* only requeue if t is actually running */
6477+ BUG_ON(!is_running(t));
6478+
6479+ if (t->state != TASK_RUNNING)
6480+ TRACE_TASK(t, "requeue: !TASK_RUNNING");
6481+
6482+ set_rt_flags(t, RT_F_RUNNING);
6483+ if (is_released(t, sched_clock()))
6484+ __add_ready(dom, t);
6485+ else
6486+ __add_release(dom, t); /* it has got to wait */
6487+}
6488+
6489+/* we assume the lock is being held */
6490+static void preempt(rm_domain_t *part)
6491+{
6492+ if (smp_processor_id() == part->cpu) {
6493+ if (part->scheduled && is_np(part->scheduled))
6494+ request_exit_np(part->scheduled);
6495+ else
6496+ set_tsk_need_resched(current);
6497+ } else
6498+ /* in case that it is a remote CPU we have to defer the
6499+ * the decision to the remote CPU
6500+ */
6501+ smp_send_reschedule(part->cpu);
6502+}
6503+
6504+/* This check is trivial in partioned systems as we only have to consider
6505+ * the CPU of the partition.
6506+ */
6507+static int rm_check_resched(rt_domain_t *dom)
6508+{
6509+ rm_domain_t *part = container_of(dom, rm_domain_t, domain);
6510+ int ret = 0;
6511+
6512+ /* because this is a callback from rt_domain_t we already hold
6513+ * the necessary lock for the ready queue
6514+ */
6515+ if (rm_preemption_needed(dom, part->scheduled)) {
6516+ preempt(part);
6517+ ret = 1;
6518+ }
6519+ return ret;
6520+}
6521+
6522+static void __rm_set_prio(struct task_struct *t, struct pcp_priority* new_prio,
6523+ rm_domain_t* part)
6524+{
6525+ t->rt_param.cur_prio = new_prio;
6526+ if (in_list(&t->rt_list)) {
6527+ list_del(&t->rt_list);
6528+ requeue(t, &part->domain);
6529+ } else
6530+ rm_check_resched(&part->domain);
6531+}
6532+
6533+/* call only with IRQs disabled */
6534+void rm_set_prio(struct task_struct *t, struct pcp_priority* new_prio)
6535+{
6536+ unsigned long flags;
6537+ rm_domain_t *part = task_part(t);
6538+
6539+ BUG_ON(!is_realtime(t));
6540+ spin_lock_irqsave(&part->lock, flags);
6541+ __rm_set_prio(t, new_prio, part);
6542+ spin_unlock_irqrestore(&part->lock, flags);
6543+}
6544+
6545+static void rm_scheduler_tick(void)
6546+{
6547+ unsigned long flags;
6548+ struct task_struct *t = current;
6549+ rt_domain_t *dom = local_dom;
6550+ rm_domain_t *part = local_part;
6551+
6552+ /* Check for inconsistency. We don't need the lock for this since
6553+ * ->scheduled is only changed in schedule, which obviously is not
6554+ * executing in parallel on this CPU
6555+ */
6556+ BUG_ON(is_realtime(t) && t != part->scheduled);
6557+
6558+/* if (is_realtime(t) && budget_exhausted(t)) {
6559+ if (!is_np(t))
6560+ set_tsk_need_resched(t);
6561+ else {
6562+ TRACE("rm_scheduler_tick: "
6563+ "%d is non-preemptable, "
6564+ "preemption delayed.\n", t->pid);
6565+ request_exit_np(t);
6566+ }
6567+ }
6568+*/
6569+ spin_lock_irqsave(&part->lock, flags);
6570+ __release_pending(dom);
6571+ if (rm_preemption_needed(dom, t))
6572+ set_tsk_need_resched(t);
6573+ spin_unlock_irqrestore(&part->lock, flags);
6574+}
6575+
6576+static void job_completion(struct task_struct* t)
6577+{
6578+ TRACE_TASK(t, "job_completion().\n");
6579+ set_rt_flags(t, RT_F_SLEEP);
6580+ prepare_for_next_period(t);
6581+}
6582+
6583+static int rm_schedule(struct task_struct * prev,
6584+ struct task_struct ** next)
6585+{
6586+ rm_domain_t* part = local_part;
6587+ rt_domain_t* dom = &part->domain;
6588+
6589+ int sleep, preempt,
6590+ np, exists, blocks, resched;
6591+// int out_of_time;
6592+
6593+ spin_lock(&part->lock);
6594+
6595+ /* sanity checking */
6596+ BUG_ON(part->scheduled && part->scheduled != prev);
6597+ BUG_ON(part->scheduled && !is_realtime(prev));
6598+
6599+ /* (0) Determine state */
6600+ exists = part->scheduled != NULL;
6601+ blocks = exists && !is_running(part->scheduled);
6602+// out_of_time = exists && budget_exhausted(part->scheduled);
6603+#define out_of_time 0
6604+ np = exists && is_np(part->scheduled);
6605+ sleep = exists && get_rt_flags(part->scheduled) == RT_F_SLEEP;
6606+ preempt = rm_preemption_needed(dom, prev);
6607+
6608+ /* If we need to preempt do so.
6609+ * The following checks set resched to 1 in case of special
6610+ * circumstances.
6611+ */
6612+ resched = preempt;
6613+
6614+ /* If a task blocks we have no choice but to reschedule.
6615+ */
6616+ if (blocks)
6617+ resched = 1;
6618+
6619+ /* Request a sys_exit_np() call if we would like to preempt but cannot.
6620+ * Multiple calls to request_exit_np() don't hurt.
6621+ */
6622+ if (np && (out_of_time || preempt || sleep))
6623+ request_exit_np(part->scheduled);
6624+
6625+ /* Any task that is preemptable and either exhausts its execution
6626+ * budget or wants to sleep completes. We may have to reschedule after
6627+ * this.
6628+ */
6629+ if (!np && (out_of_time || sleep)) {
6630+ job_completion(part->scheduled);
6631+ resched = 1;
6632+ }
6633+
6634+ /* The final scheduling decision. Do we need to switch for some reason?
6635+ * Switch if we are in RT mode and have no task or if we need to
6636+ * resched.
6637+ */
6638+ *next = NULL;
6639+ if ((!np || blocks) && (resched || !exists)) {
6640+ /* Take care of a previously scheduled
6641+ * job by taking it out of the Linux runqueue.
6642+ */
6643+ if (part->scheduled) {
6644+ /* as opposed to global schedulers that switch without
6645+ * a lock being held we can requeue already here since
6646+ * no other CPU will schedule from this domain.
6647+ */
6648+ if (!blocks)
6649+ requeue(part->scheduled, dom);
6650+ }
6651+ *next = __take_ready(dom);
6652+ } else
6653+ /* Only override Linux scheduler if we have a real-time task
6654+ * scheduled that needs to continue.
6655+ */
6656+ if (exists)
6657+ *next = prev;
6658+
6659+ if (*next)
6660+ set_rt_flags(*next, RT_F_RUNNING);
6661+
6662+ part->scheduled = *next;
6663+ spin_unlock(&part->lock);
6664+ return 0;
6665+}
6666+
6667+
6668+/* Prepare a task for running in RT mode
6669+ * Enqueues the task into master queue data structure
6670+ * returns
6671+ * -EPERM if task is not TASK_STOPPED
6672+ */
6673+static long rm_prepare_task(struct task_struct * t)
6674+{
6675+ rt_domain_t* dom = task_dom(t);
6676+ rm_domain_t* part = task_part(t);
6677+ unsigned long flags;
6678+
6679+ TRACE("[%d] P-RM: prepare task %d on CPU %d\n",
6680+ smp_processor_id(), t->pid, get_partition(t));
6681+ if (t->state == TASK_STOPPED) {
6682+//FIXME if (!t->rt_param.task_params.prio) {
6683+ TRACE_TASK(t, "using rate-monotonic prio assignment\n");
6684+ t->rt_param.pcp_prio.prio = get_rt_period(t);
6685+// } else {
6686+// TRACE_TASK(t, "using user-defined static prio assignment\n");
6687+// t->rt_param.pcp_prio.prio = t->rt_param.task_params.prio;
6688+// }
6689+ t->rt_param.pcp_prio.in_global_cs = 0;
6690+ t->rt_param.pcp_prio.pid = t->pid;
6691+ t->rt_param.cur_prio = &t->rt_param.pcp_prio;
6692+ INIT_LIST_HEAD(&t->rt_param.owned_semaphores);
6693+ /* 1ms delay */
6694+ release_at(t, sched_clock() + 1000000);
6695+
6696+ /* The task should be running in the queue, otherwise signal
6697+ * code will try to wake it up with fatal consequences.
6698+ */
6699+ t->state = TASK_RUNNING;
6700+
6701+ spin_lock_irqsave(&part->lock, flags);
6702+ t->rt_param.litmus_controlled = 1;
6703+ __add_release(dom, t);
6704+ spin_unlock_irqrestore(&part->lock, flags);
6705+ return 0;
6706+ } else
6707+ return -EPERM;
6708+}
6709+
6710+static void rm_wake_up_task(struct task_struct *task)
6711+{
6712+ unsigned long flags;
6713+ rm_domain_t* part = task_part(task);
6714+ rt_domain_t* dom = task_dom(task);
6715+
6716+ TRACE_TASK(task, "P-RM: %d unsuspends.\n");
6717+
6718+ spin_lock_irqsave(&part->lock, flags);
6719+ if (!task->rt_param.litmus_controlled) {
6720+ BUG_ON(in_list(&task->rt_list));
6721+ task->rt_param.litmus_controlled = 1;
6722+ task->state = TASK_RUNNING;
6723+ requeue(task, dom);
6724+ }
6725+ spin_unlock_irqrestore(&part->lock, flags);
6726+}
6727+
6728+static void rm_task_blocks(struct task_struct *t)
6729+{
6730+ BUG_ON(!is_realtime(t));
6731+ /* not really anything to do since it can only block if
6732+ * it is running, and when it is not running it is not in any
6733+ * queue anyway.
6734+ */
6735+ TRACE("task %d blocks with budget=%d\n", t->pid, t->time_slice);
6736+ BUG_ON(in_list(&t->rt_list));
6737+ t->rt_param.litmus_controlled = 0;
6738+}
6739+
6740+
6741+/* When _tear_down is called, the task should not be in any queue any more
6742+ * as it must have blocked first. We don't have any internal state for the task,
6743+ * it is all in the task_struct.
6744+ */
6745+static long rm_tear_down(struct task_struct * t)
6746+{
6747+ BUG_ON(!is_realtime(t));
6748+ TRACE_TASK(t, "tear down called");
6749+ BUG_ON(t->array);
6750+ BUG_ON(in_list(&t->rt_list));
6751+ return 0;
6752+}
6753+
6754+static struct pcp_priority boosted = {0, 1, INT_MAX};
6755+
6756+static long rm_pi_block(struct pi_semaphore *sem,
6757+ struct task_struct *new_waiter)
6758+{
6759+ return 0;
6760+}
6761+
6762+static long rm_inherit_priority(struct pi_semaphore *sem,
6763+ struct task_struct *new_owner)
6764+{
6765+ rm_set_prio(new_owner, &boosted);
6766+ TRACE_TASK(new_owner, "priority boosted");
6767+ make_np(new_owner);
6768+ return 0;
6769+}
6770+
6771+
6772+/* This function is called on a semaphore release, and assumes that
6773+ * the current task is also the semaphore holder.
6774+ */
6775+static long rm_return_priority(struct pi_semaphore *sem)
6776+{
6777+ struct task_struct* t = current;
6778+
6779+ take_np(t);
6780+ /* reset prio to trigger resched if required */
6781+ rm_set_prio(t, &t->rt_param.pcp_prio);
6782+ TRACE_TASK(t, "prio boost ended");
6783+ return 0;
6784+}
6785+
6786+/* Plugin object */
6787+static struct sched_plugin p_rm_plugin __cacheline_aligned_in_smp = {
6788+ .plugin_name = "P-RM",
6789+ /* PCP and SRP don't really work together, but this is something the
6790+ * user has to get right for the moment.
6791+ * System will not crash and burn, but timing correctness is not ensured.
6792+ * Just don't use both APIs at the same time for now.
6793+ */
6794+ .pcp_active = 1,
6795+ .srp_active = 1,
6796+ .scheduler_tick = rm_scheduler_tick,
6797+ .prepare_task = rm_prepare_task,
6798+ .sleep_next_period = complete_job,
6799+ .tear_down = rm_tear_down,
6800+ .schedule = rm_schedule,
6801+ .wake_up_task = rm_wake_up_task,
6802+ .task_blocks = rm_task_blocks,
6803+ .pi_block = rm_pi_block,
6804+ .inherit_priority = rm_inherit_priority,
6805+ .return_priority = rm_return_priority
6806+};
6807+
6808+static int __init init_rm(void)
6809+{
6810+ int i;
6811+
6812+ for (i = 0; i < NR_CPUS; i++)
6813+ {
6814+ prm_domain_init(remote_part(i),
6815+ rm_check_resched, i);
6816+ printk("P-RM: CPU partition %d initialized.\n", i);
6817+ }
6818+ return register_sched_plugin(&p_rm_plugin);
6819+}
6820+
6821+
6822+
6823+module_init(init_rm);
6824diff --git a/litmus/sched_trace.c b/litmus/sched_trace.c
6825new file mode 100644
6826index 0000000..0976e83
6827--- /dev/null
6828+++ b/litmus/sched_trace.c
6829@@ -0,0 +1,541 @@
6830+/* sched_trace.c -- record scheduling events to a byte stream.
6831+ *
6832+ * TODO: Move ring buffer to a lockfree implementation.
6833+ */
6834+
6835+#include <linux/spinlock.h>
6836+#include <linux/fs.h>
6837+#include <linux/cdev.h>
6838+#include <asm/semaphore.h>
6839+#include <asm/uaccess.h>
6840+#include <linux/module.h>
6841+
6842+#include <litmus/sched_trace.h>
6843+#include <litmus/litmus.h>
6844+
6845+
6846+typedef struct {
6847+ /* guard read and write pointers */
6848+ spinlock_t lock;
6849+ /* guard against concurrent freeing of buffer */
6850+ rwlock_t del_lock;
6851+
6852+ /* memory allocated for ring buffer */
6853+ unsigned long order;
6854+ char* buf;
6855+ char* end;
6856+
6857+ /* Read/write pointer. May not cross.
6858+ * They point to the position of next write and
6859+ * last read.
6860+ */
6861+ char* writep;
6862+ char* readp;
6863+
6864+} ring_buffer_t;
6865+
6866+#define EMPTY_RING_BUFFER { \
6867+ .lock = SPIN_LOCK_UNLOCKED, \
6868+ .del_lock = RW_LOCK_UNLOCKED, \
6869+ .buf = NULL, \
6870+ .end = NULL, \
6871+ .writep = NULL, \
6872+ .readp = NULL \
6873+}
6874+
6875+void rb_init(ring_buffer_t* buf)
6876+{
6877+ *buf = (ring_buffer_t) EMPTY_RING_BUFFER;
6878+}
6879+
6880+int rb_alloc_buf(ring_buffer_t* buf, unsigned long order)
6881+{
6882+ unsigned long flags;
6883+ int error = 0;
6884+ char *mem;
6885+
6886+ /* do memory allocation while not atomic */
6887+ mem = (char *) __get_free_pages(GFP_KERNEL, order);
6888+ if (!mem)
6889+ return -ENOMEM;
6890+ write_lock_irqsave(&buf->del_lock, flags);
6891+ BUG_ON(buf->buf);
6892+ buf->buf = mem;
6893+ buf->end = buf->buf + PAGE_SIZE * (1 << order) - 1;
6894+ memset(buf->buf, 0xff, buf->end - buf->buf);
6895+ buf->order = order;
6896+ buf->writep = buf->buf + 1;
6897+ buf->readp = buf->buf;
6898+ write_unlock_irqrestore(&buf->del_lock, flags);
6899+ return error;
6900+}
6901+
6902+int rb_free_buf(ring_buffer_t* buf)
6903+{
6904+ unsigned long flags;
6905+ int error = 0;
6906+ write_lock_irqsave(&buf->del_lock, flags);
6907+ BUG_ON(!buf->buf);
6908+ free_pages((unsigned long) buf->buf, buf->order);
6909+ buf->buf = NULL;
6910+ buf->end = NULL;
6911+ buf->writep = NULL;
6912+ buf->readp = NULL;
6913+ write_unlock_irqrestore(&buf->del_lock, flags);
6914+ return error;
6915+}
6916+
6917+/* Assumption: concurrent writes are serialized externally
6918+ *
6919+ * Will only succeed if there is enough space for all len bytes.
6920+ */
6921+int rb_put(ring_buffer_t* buf, char* mem, size_t len)
6922+{
6923+ unsigned long flags;
6924+ char* r , *w;
6925+ int error = 0;
6926+ read_lock_irqsave(&buf->del_lock, flags);
6927+ if (!buf->buf) {
6928+ error = -ENODEV;
6929+ goto out;
6930+ }
6931+ spin_lock(&buf->lock);
6932+ r = buf->readp;
6933+ w = buf->writep;
6934+ spin_unlock(&buf->lock);
6935+ if (r < w && buf->end - w >= len - 1) {
6936+ /* easy case: there is enough space in the buffer
6937+ * to write it in one continous chunk*/
6938+ memcpy(w, mem, len);
6939+ w += len;
6940+ if (w > buf->end)
6941+ /* special case: fit exactly into buffer
6942+ * w is now buf->end + 1
6943+ */
6944+ w = buf->buf;
6945+ } else if (w < r && r - w >= len) { /* >= len because may not cross */
6946+ /* we are constrained by the read pointer but we there
6947+ * is enough space
6948+ */
6949+ memcpy(w, mem, len);
6950+ w += len;
6951+ } else if (r <= w && buf->end - w < len - 1) {
6952+ /* the wrap around case: there may or may not be space */
6953+ if ((buf->end - w) + (r - buf->buf) >= len - 1) {
6954+ /* copy chunk that fits at the end */
6955+ memcpy(w, mem, buf->end - w + 1);
6956+ mem += buf->end - w + 1;
6957+ len -= (buf->end - w + 1);
6958+ w = buf->buf;
6959+ /* copy the rest */
6960+ memcpy(w, mem, len);
6961+ w += len;
6962+ }
6963+ else
6964+ error = -ENOMEM;
6965+ } else {
6966+ error = -ENOMEM;
6967+ }
6968+ if (!error) {
6969+ spin_lock(&buf->lock);
6970+ buf->writep = w;
6971+ spin_unlock(&buf->lock);
6972+ }
6973+ out:
6974+ read_unlock_irqrestore(&buf->del_lock, flags);
6975+ return error;
6976+}
6977+
6978+/* Assumption: concurrent reads are serialized externally */
6979+int rb_get(ring_buffer_t* buf, char* mem, size_t len)
6980+{
6981+ unsigned long flags;
6982+ char* r , *w;
6983+ int error = 0;
6984+ read_lock_irqsave(&buf->del_lock, flags);
6985+ if (!buf->buf) {
6986+ error = -ENODEV;
6987+ goto out;
6988+ }
6989+ spin_lock(&buf->lock);
6990+ r = buf->readp;
6991+ w = buf->writep;
6992+ spin_unlock(&buf->lock);
6993+
6994+ if (w <= r && buf->end - r >= len) {
6995+ /* easy case: there is enough data in the buffer
6996+ * to get it in one chunk*/
6997+ memcpy(mem, r + 1, len);
6998+ r += len;
6999+ error = len;
7000+
7001+ } else if (r + 1 < w && w - r - 1 >= len) {
7002+ /* we are constrained by the write pointer but
7003+ * there is enough data
7004+ */
7005+ memcpy(mem, r + 1, len);
7006+ r += len;
7007+ error = len;
7008+
7009+ } else if (r + 1 < w && w - r - 1 < len) {
7010+ /* we are constrained by the write pointer and there
7011+ * there is not enough data
7012+ */
7013+ memcpy(mem, r + 1, w - r - 1);
7014+ error = w - r - 1;
7015+ r += w - r - 1;
7016+
7017+ } else if (w <= r && buf->end - r < len) {
7018+ /* the wrap around case: there may or may not be enough data
7019+ * first let's get what is available
7020+ */
7021+ memcpy(mem, r + 1, buf->end - r);
7022+ error += (buf->end - r);
7023+ mem += (buf->end - r);
7024+ len -= (buf->end - r);
7025+ r += (buf->end - r);
7026+
7027+ if (w > buf->buf) {
7028+ /* there is more to get */
7029+ r = buf->buf - 1;
7030+ if (w - r >= len) {
7031+ /* plenty */
7032+ memcpy(mem, r + 1, len);
7033+ error += len;
7034+ r += len;
7035+ } else {
7036+ memcpy(mem, r + 1, w - r - 1);
7037+ error += w - r - 1;
7038+ r += w - r - 1;
7039+ }
7040+ }
7041+ } /* nothing available */
7042+
7043+ if (error > 0) {
7044+ spin_lock(&buf->lock);
7045+ buf->readp = r;
7046+ spin_unlock(&buf->lock);
7047+ }
7048+ out:
7049+ read_unlock_irqrestore(&buf->del_lock, flags);
7050+ return error;
7051+}
7052+
7053+
7054+
7055+/******************************************************************************/
7056+/* DEVICE FILE DRIVER */
7057+/******************************************************************************/
7058+
7059+
7060+
7061+/* Allocate a buffer of about 1 MB per CPU.
7062+ *
7063+ */
7064+#define BUFFER_ORDER 8
7065+
7066+typedef struct {
7067+ ring_buffer_t buf;
7068+ atomic_t reader_cnt;
7069+ struct semaphore reader_mutex;
7070+} trace_buffer_t;
7071+
7072+
7073+/* This does not initialize the semaphore!! */
7074+
7075+#define EMPTY_TRACE_BUFFER \
7076+ { .buf = EMPTY_RING_BUFFER, .reader_cnt = ATOMIC_INIT(0)}
7077+
7078+static DEFINE_PER_CPU(trace_buffer_t, trace_buffer);
7079+
7080+#ifdef CONFIG_SCHED_DEBUG_TRACE
7081+static spinlock_t log_buffer_lock = SPIN_LOCK_UNLOCKED;
7082+#endif
7083+static trace_buffer_t log_buffer = EMPTY_TRACE_BUFFER;
7084+
7085+static void init_buffers(void)
7086+{
7087+ int i;
7088+
7089+ for (i = 0; i < NR_CPUS; i++) {
7090+ rb_init(&per_cpu(trace_buffer, i).buf);
7091+ init_MUTEX(&per_cpu(trace_buffer, i).reader_mutex);
7092+ atomic_set(&per_cpu(trace_buffer, i).reader_cnt, 0);
7093+ }
7094+ /* only initialize the mutex, the rest was initialized as part
7095+ * of the static initialization macro
7096+ */
7097+ init_MUTEX(&log_buffer.reader_mutex);
7098+}
7099+
7100+static int trace_release(struct inode *in, struct file *filp)
7101+{
7102+ int error = -EINVAL;
7103+ trace_buffer_t* buf = filp->private_data;
7104+
7105+ BUG_ON(!filp->private_data);
7106+
7107+ if (down_interruptible(&buf->reader_mutex)) {
7108+ error = -ERESTARTSYS;
7109+ goto out;
7110+ }
7111+
7112+ /* last release must deallocate buffers */
7113+ if (atomic_dec_return(&buf->reader_cnt) == 0) {
7114+ error = rb_free_buf(&buf->buf);
7115+ }
7116+
7117+ up(&buf->reader_mutex);
7118+ out:
7119+ return error;
7120+}
7121+
7122+static ssize_t trace_read(struct file *filp, char __user *to, size_t len,
7123+ loff_t *f_pos)
7124+{
7125+ /* we ignore f_pos, this is strictly sequential */
7126+
7127+ ssize_t error = -EINVAL;
7128+ char* mem;
7129+ trace_buffer_t *buf = filp->private_data;
7130+
7131+ if (down_interruptible(&buf->reader_mutex)) {
7132+ error = -ERESTARTSYS;
7133+ goto out;
7134+ }
7135+
7136+ if (len > 64 * 1024)
7137+ len = 64 * 1024;
7138+ mem = kmalloc(len, GFP_KERNEL);
7139+ if (!mem) {
7140+ error = -ENOMEM;
7141+ goto out_unlock;
7142+ }
7143+
7144+ error = rb_get(&buf->buf, mem, len);
7145+ while (!error) {
7146+ set_current_state(TASK_INTERRUPTIBLE);
7147+ schedule_timeout(110);
7148+ if (signal_pending(current))
7149+ error = -ERESTARTSYS;
7150+ else
7151+ error = rb_get(&buf->buf, mem, len);
7152+ }
7153+
7154+ if (error > 0 && copy_to_user(to, mem, error))
7155+ error = -EFAULT;
7156+
7157+ kfree(mem);
7158+ out_unlock:
7159+ up(&buf->reader_mutex);
7160+ out:
7161+ return error;
7162+}
7163+
7164+
7165+/* trace_open - Open one of the per-CPU sched_trace buffers.
7166+ */
7167+static int trace_open(struct inode *in, struct file *filp)
7168+{
7169+ int error = -EINVAL;
7170+ int cpu = MINOR(in->i_rdev);
7171+ trace_buffer_t* buf;
7172+
7173+ if (!cpu_online(cpu)) {
7174+ printk(KERN_WARNING "sched trace: "
7175+ "CPU #%d is not online. (open failed)\n", cpu);
7176+ error = -ENODEV;
7177+ goto out;
7178+ }
7179+
7180+ buf = &per_cpu(trace_buffer, cpu);
7181+
7182+ if (down_interruptible(&buf->reader_mutex)) {
7183+ error = -ERESTARTSYS;
7184+ goto out;
7185+ }
7186+
7187+ /* first open must allocate buffers */
7188+ if (atomic_inc_return(&buf->reader_cnt) == 1) {
7189+ if ((error = rb_alloc_buf(&buf->buf, BUFFER_ORDER)))
7190+ {
7191+ atomic_dec(&buf->reader_cnt);
7192+ goto out_unlock;
7193+ }
7194+ }
7195+
7196+ error = 0;
7197+ filp->private_data = buf;
7198+
7199+ out_unlock:
7200+ up(&buf->reader_mutex);
7201+ out:
7202+ return error;
7203+}
7204+
7205+/* log_open - open the global log message ring buffer.
7206+ */
7207+static int log_open(struct inode *in, struct file *filp)
7208+{
7209+ int error = -EINVAL;
7210+ trace_buffer_t* buf;
7211+
7212+ buf = &log_buffer;
7213+
7214+ if (down_interruptible(&buf->reader_mutex)) {
7215+ error = -ERESTARTSYS;
7216+ goto out;
7217+ }
7218+
7219+ /* first open must allocate buffers */
7220+ if (atomic_inc_return(&buf->reader_cnt) == 1) {
7221+ if ((error = rb_alloc_buf(&buf->buf, BUFFER_ORDER)))
7222+ {
7223+ atomic_dec(&buf->reader_cnt);
7224+ goto out_unlock;
7225+ }
7226+ }
7227+
7228+ error = 0;
7229+ filp->private_data = buf;
7230+
7231+ out_unlock:
7232+ up(&buf->reader_mutex);
7233+ out:
7234+ return error;
7235+}
7236+
7237+/******************************************************************************/
7238+/* Device Registration */
7239+/******************************************************************************/
7240+
7241+/* the major numbes are from the unassigned/local use block
7242+ *
7243+ * This should be converted to dynamic allocation at some point...
7244+ */
7245+#define TRACE_MAJOR 250
7246+#define LOG_MAJOR 251
7247+
7248+/* trace_fops - The file operations for accessing the per-CPU scheduling event
7249+ * trace buffers.
7250+ */
7251+struct file_operations trace_fops = {
7252+ .owner = THIS_MODULE,
7253+ .open = trace_open,
7254+ .release = trace_release,
7255+ .read = trace_read,
7256+};
7257+
7258+/* log_fops - The file operations for accessing the global LITMUS log message
7259+ * buffer.
7260+ *
7261+ * Except for opening the device file it uses the same operations as trace_fops.
7262+ */
7263+struct file_operations log_fops = {
7264+ .owner = THIS_MODULE,
7265+ .open = log_open,
7266+ .release = trace_release,
7267+ .read = trace_read,
7268+};
7269+
7270+static int __init register_buffer_dev(const char* name,
7271+ struct file_operations* fops,
7272+ int major, int count)
7273+{
7274+ dev_t trace_dev;
7275+ struct cdev *cdev;
7276+ int error = 0;
7277+
7278+ trace_dev = MKDEV(major, 0);
7279+ error = register_chrdev_region(trace_dev, count, name);
7280+ if (error)
7281+ {
7282+ printk(KERN_WARNING "sched trace: "
7283+ "Could not register major/minor number %d\n", major);
7284+ return error;
7285+ }
7286+ cdev = cdev_alloc();
7287+ if (!cdev) {
7288+ printk(KERN_WARNING "sched trace: "
7289+ "Could not get a cdev for %s.\n", name);
7290+ return -ENOMEM;
7291+ }
7292+ cdev->owner = THIS_MODULE;
7293+ cdev->ops = fops;
7294+ error = cdev_add(cdev, trace_dev, count);
7295+ if (error) {
7296+ printk(KERN_WARNING "sched trace: "
7297+ "add_cdev failed for %s.\n", name);
7298+ return -ENOMEM;
7299+ }
7300+ return error;
7301+
7302+}
7303+
7304+static int __init init_sched_trace(void)
7305+{
7306+ int error1 = 0, error2 = 0;
7307+
7308+ printk("Initializing scheduler trace device\n");
7309+ init_buffers();
7310+
7311+ error1 = register_buffer_dev("schedtrace", &trace_fops,
7312+ TRACE_MAJOR, NR_CPUS);
7313+
7314+ error2 = register_buffer_dev("litmus_log", &log_fops,
7315+ LOG_MAJOR, 1);
7316+ if (error1 || error2)
7317+ return min(error1, error2);
7318+ else
7319+ return 0;
7320+}
7321+
7322+module_init(init_sched_trace);
7323+
7324+/******************************************************************************/
7325+/* KERNEL API */
7326+/******************************************************************************/
7327+
7328+/* The per-CPU LITMUS log buffer. Don't put it on the stack, it is too big for
7329+ * that and the kernel gets very picky with nested interrupts and small stacks.
7330+ */
7331+
7332+#ifdef CONFIG_SCHED_DEBUG_TRACE
7333+
7334+#define MSG_SIZE 255
7335+static DEFINE_PER_CPU(char[MSG_SIZE], fmt_buffer);
7336+
7337+/* sched_trace_log_message - This is the only function that accesses the the
7338+ * log buffer inside the kernel for writing.
7339+ * Concurrent access to it is serialized via the
7340+ * log_buffer_lock.
7341+ *
7342+ * The maximum length of a formatted message is 255.
7343+ */
7344+void sched_trace_log_message(const char* fmt, ...)
7345+{
7346+ unsigned long flags;
7347+ va_list args;
7348+ size_t len;
7349+ char* buf;
7350+
7351+ va_start(args, fmt);
7352+ local_irq_save(flags);
7353+
7354+ /* format message */
7355+ buf = __get_cpu_var(fmt_buffer);
7356+ len = vscnprintf(buf, MSG_SIZE, fmt, args);
7357+
7358+ spin_lock(&log_buffer_lock);
7359+ /* Don't copy the trailing null byte, we don't want null bytes
7360+ * in a text file.
7361+ */
7362+ rb_put(&log_buffer.buf, buf, len);
7363+ spin_unlock(&log_buffer_lock);
7364+
7365+ local_irq_restore(flags);
7366+ va_end(args);
7367+}
7368+
7369+#endif
7370+
7371diff --git a/litmus/sync.c b/litmus/sync.c
7372new file mode 100644
7373index 0000000..4405228
7374--- /dev/null
7375+++ b/litmus/sync.c
7376@@ -0,0 +1,84 @@
7377+/* litmus/sync.c - Support for synchronous and asynchronous task system releases.
7378+ *
7379+ *
7380+ */
7381+
7382+#include <asm/atomic.h>
7383+#include <asm/uaccess.h>
7384+#include <linux/spinlock.h>
7385+#include <linux/list.h>
7386+#include <linux/sched.h>
7387+#include <linux/completion.h>
7388+
7389+#include <litmus/litmus.h>
7390+#include <litmus/jobs.h>
7391+
7392+static DECLARE_COMPLETION(ts_release);
7393+
7394+static long do_wait_for_ts_release(void)
7395+{
7396+ long ret = 0;
7397+
7398+ /* If the interruption races with a release, the completion object
7399+ * may have a non-zero counter. To avoid this problem, this should
7400+ * be replaced by wait_for_completion().
7401+ *
7402+ * For debugging purposes, this is interruptible for now.
7403+ */
7404+ ret = wait_for_completion_interruptible(&ts_release);
7405+
7406+ return ret;
7407+}
7408+
7409+
7410+static long do_release_ts(lt_t start)
7411+{
7412+ int task_count = 0;
7413+ long flags;
7414+ struct list_head *pos;
7415+ struct task_struct *t;
7416+
7417+
7418+ spin_lock_irqsave(&ts_release.wait.lock, flags);
7419+
7420+ list_for_each(pos, &ts_release.wait.task_list) {
7421+ t = (struct task_struct*) list_entry(pos,
7422+ struct __wait_queue,
7423+ task_list)->private;
7424+ task_count++;
7425+ release_at(t, start + t->rt_param.task_params.phase);
7426+ }
7427+
7428+ spin_unlock_irqrestore(&ts_release.wait.lock, flags);
7429+
7430+ complete_n(&ts_release, task_count);
7431+
7432+ return task_count;
7433+}
7434+
7435+
7436+asmlinkage long sys_wait_for_ts_release(void)
7437+{
7438+ long ret = -EPERM;
7439+ struct task_struct *t = current;
7440+
7441+ if (is_realtime(t))
7442+ ret = do_wait_for_ts_release();
7443+
7444+ return ret;
7445+}
7446+
7447+
7448+asmlinkage long sys_release_ts(lt_t __user *__delay)
7449+{
7450+ long ret;
7451+ lt_t delay;
7452+
7453+ /* FIXME: check capabilities... */
7454+
7455+ ret = copy_from_user(&delay, __delay, sizeof(lt_t));
7456+ if (ret == 0)
7457+ ret = do_release_ts(sched_clock() + delay);
7458+
7459+ return ret;
7460+}
7461diff --git a/litmus/trace.c b/litmus/trace.c
7462new file mode 100644
7463index 0000000..bcdf103
7464--- /dev/null
7465+++ b/litmus/trace.c
7466@@ -0,0 +1,302 @@
7467+#include <linux/fs.h>
7468+#include <linux/cdev.h>
7469+#include <asm/semaphore.h>
7470+#include <asm/uaccess.h>
7471+#include <linux/module.h>
7472+
7473+#include <litmus/trace.h>
7474+
7475+/******************************************************************************/
7476+/* Allocation */
7477+/******************************************************************************/
7478+
7479+struct ft_buffer* trace_ts_buf = NULL;
7480+
7481+static unsigned int ts_seq_no = 0;
7482+
7483+feather_callback void save_timestamp(unsigned long event)
7484+{
7485+ unsigned int seq_no = fetch_and_inc((int *) &ts_seq_no);
7486+ struct timestamp *ts;
7487+ if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
7488+ ts->event = event;
7489+ ts->timestamp = ft_read_tsc();
7490+ ts->seq_no = seq_no;
7491+ ts->cpu = raw_smp_processor_id();
7492+ ft_buffer_finish_write(trace_ts_buf, ts);
7493+ }
7494+}
7495+
7496+static struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size)
7497+{
7498+ struct ft_buffer* buf;
7499+ size_t total = (size + 1) * count;
7500+ char* mem;
7501+ int order = 0, pages = 1;
7502+
7503+ buf = kmalloc(sizeof(struct ft_buffer), GFP_KERNEL);
7504+ if (!buf)
7505+ return NULL;
7506+
7507+ total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0);
7508+ while (pages < total) {
7509+ order++;
7510+ pages *= 2;
7511+ }
7512+
7513+ mem = (char*) __get_free_pages(GFP_KERNEL, order);
7514+ if (!mem) {
7515+ kfree(buf);
7516+ return NULL;
7517+ }
7518+
7519+ if (!init_ft_buffer(buf, count, size,
7520+ mem + (count * size), /* markers at the end */
7521+ mem)) { /* buffer objects */
7522+ free_pages((unsigned long) mem, order);
7523+ kfree(buf);
7524+ return NULL;
7525+ }
7526+ return buf;
7527+}
7528+
7529+static void free_ft_buffer(struct ft_buffer* buf)
7530+{
7531+ int order = 0, pages = 1;
7532+ size_t total;
7533+
7534+ if (buf) {
7535+ total = (buf->slot_size + 1) * buf->slot_count;
7536+ total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0);
7537+ while (pages < total) {
7538+ order++;
7539+ pages *= 2;
7540+ }
7541+ free_pages((unsigned long) buf->buffer_mem, order);
7542+ kfree(buf);
7543+ }
7544+}
7545+
7546+
7547+/******************************************************************************/
7548+/* DEVICE FILE DRIVER */
7549+/******************************************************************************/
7550+
7551+#define NO_TIMESTAMPS 262144
7552+
7553+static DECLARE_MUTEX(feather_lock);
7554+static int use_count = 0;
7555+
7556+static int trace_release(struct inode *in, struct file *filp)
7557+{
7558+ int err = -EINVAL;
7559+
7560+ if (down_interruptible(&feather_lock)) {
7561+ err = -ERESTARTSYS;
7562+ goto out;
7563+ }
7564+
7565+ printk(KERN_ALERT "%s/%d disconnects from feather trace device. "
7566+ "use_count=%d\n",
7567+ current->comm, current->pid, use_count);
7568+
7569+ if (use_count == 1) {
7570+ /* disable events */
7571+ ft_disable_all_events();
7572+
7573+ /* wait for any pending events to complete */
7574+ set_current_state(TASK_UNINTERRUPTIBLE);
7575+ schedule_timeout(HZ);
7576+
7577+ printk(KERN_ALERT "Failed trace writes: %u\n",
7578+ trace_ts_buf->failed_writes);
7579+
7580+ free_ft_buffer(trace_ts_buf);
7581+ trace_ts_buf = NULL;
7582+ }
7583+
7584+ use_count--;
7585+ up(&feather_lock);
7586+out:
7587+ return err;
7588+}
7589+
7590+
7591+static ssize_t trace_read(struct file *filp, char __user *to, size_t len,
7592+ loff_t *f_pos)
7593+{
7594+ /* we ignore f_pos, this is strictly sequential */
7595+ ssize_t error = 0;
7596+ struct timestamp ts;
7597+
7598+ if (down_interruptible(&feather_lock)) {
7599+ error = -ERESTARTSYS;
7600+ goto out;
7601+ }
7602+
7603+
7604+ while (len >= sizeof(struct timestamp)) {
7605+ if (ft_buffer_read(trace_ts_buf, &ts)) {
7606+ if (copy_to_user(to, &ts, sizeof(struct timestamp))) {
7607+ error = -EFAULT;
7608+ break;
7609+ } else {
7610+ len -= sizeof(struct timestamp);
7611+ to += sizeof(struct timestamp);
7612+ error += sizeof(struct timestamp);
7613+ }
7614+ } else {
7615+ set_current_state(TASK_INTERRUPTIBLE);
7616+ schedule_timeout(50);
7617+ if (signal_pending(current)) {
7618+ error = -ERESTARTSYS;
7619+ break;
7620+ }
7621+ }
7622+ }
7623+ up(&feather_lock);
7624+out:
7625+ return error;
7626+}
7627+
7628+#define ENABLE_CMD 0
7629+#define DISABLE_CMD 1
7630+
7631+static ssize_t trace_write(struct file *filp, const char __user *from,
7632+ size_t len, loff_t *f_pos)
7633+{
7634+ ssize_t error = -EINVAL;
7635+ unsigned long cmd;
7636+ unsigned long id;
7637+
7638+ if (len % sizeof(long) || len < 2 * sizeof(long))
7639+ goto out;
7640+
7641+ if (copy_from_user(&cmd, from, sizeof(long))) {
7642+ error = -EFAULT;
7643+ goto out;
7644+ }
7645+ len -= sizeof(long);
7646+ from += sizeof(long);
7647+
7648+ if (cmd != ENABLE_CMD && cmd != DISABLE_CMD)
7649+ goto out;
7650+
7651+ if (down_interruptible(&feather_lock)) {
7652+ error = -ERESTARTSYS;
7653+ goto out;
7654+ }
7655+
7656+ error = sizeof(long);
7657+ while (len) {
7658+ if (copy_from_user(&id, from, sizeof(long))) {
7659+ error = -EFAULT;
7660+ goto out;
7661+ }
7662+ len -= sizeof(long);
7663+ from += sizeof(long);
7664+ if (cmd) {
7665+ printk(KERN_INFO
7666+ "Disabling feather-trace event %lu.\n", id);
7667+ ft_disable_event(id);
7668+ } else {
7669+ printk(KERN_INFO
7670+ "Enabling feather-trace event %lu.\n", id);
7671+ ft_enable_event(id);
7672+ }
7673+ error += sizeof(long);
7674+ }
7675+
7676+ up(&feather_lock);
7677+ out:
7678+ return error;
7679+}
7680+
7681+static int trace_open(struct inode *in, struct file *filp)
7682+{
7683+ int err = 0;
7684+ unsigned int count = NO_TIMESTAMPS;
7685+
7686+ if (down_interruptible(&feather_lock)) {
7687+ err = -ERESTARTSYS;
7688+ goto out;
7689+ }
7690+
7691+ while (count && !trace_ts_buf) {
7692+ printk("trace: trying to allocate %u time stamps.\n", count);
7693+ trace_ts_buf = alloc_ft_buffer(count, sizeof(struct timestamp));
7694+ count /= 2;
7695+ }
7696+ if (!trace_ts_buf)
7697+ err = -ENOMEM;
7698+ else
7699+ use_count++;
7700+
7701+ up(&feather_lock);
7702+out:
7703+ return err;
7704+}
7705+
7706+/******************************************************************************/
7707+/* Device Registration */
7708+/******************************************************************************/
7709+
7710+#define FT_TRACE_MAJOR 252
7711+
7712+struct file_operations ft_trace_fops = {
7713+ .owner = THIS_MODULE,
7714+ .open = trace_open,
7715+ .release = trace_release,
7716+ .write = trace_write,
7717+ .read = trace_read,
7718+};
7719+
7720+
7721+static int __init register_buffer_dev(const char* name,
7722+ struct file_operations* fops,
7723+ int major, int count)
7724+{
7725+ dev_t trace_dev;
7726+ struct cdev *cdev;
7727+ int error = 0;
7728+
7729+ trace_dev = MKDEV(major, 0);
7730+ error = register_chrdev_region(trace_dev, count, name);
7731+ if (error)
7732+ {
7733+ printk(KERN_WARNING "trace: "
7734+ "Could not register major/minor number %d\n", major);
7735+ return error;
7736+ }
7737+ cdev = cdev_alloc();
7738+ if (!cdev) {
7739+ printk(KERN_WARNING "trace: "
7740+ "Could not get a cdev for %s.\n", name);
7741+ return -ENOMEM;
7742+ }
7743+ cdev->owner = THIS_MODULE;
7744+ cdev->ops = fops;
7745+ error = cdev_add(cdev, trace_dev, count);
7746+ if (error) {
7747+ printk(KERN_WARNING "trace: "
7748+ "add_cdev failed for %s.\n", name);
7749+ return -ENOMEM;
7750+ }
7751+ return error;
7752+
7753+}
7754+
7755+static int __init init_sched_trace(void)
7756+{
7757+ int error = 0;
7758+
7759+ printk("Initializing Feather-Trace device\n");
7760+ /* dummy entry to make linker happy */
7761+ ft_event0(666, save_timestamp);
7762+
7763+ error = register_buffer_dev("ft_trace", &ft_trace_fops,
7764+ FT_TRACE_MAJOR, 1);
7765+ return error;
7766+}
7767+
7768+module_init(init_sched_trace);
diff --git a/index.html b/index.html
index 8ec23fb..3b75c94 100644
--- a/index.html
+++ b/index.html
@@ -125,6 +125,23 @@
125 <cite>Proceedings of the 14th IEEE International Conference on Embedded and Real-Time Computing Systems and Applications</cite>, to appear, August 2008. 125 <cite>Proceedings of the 14th IEEE International Conference on Embedded and Real-Time Computing Systems and Applications</cite>, to appear, August 2008.
126 <a href="http://www.cs.unc.edu/~anderson/papers/rtcsa08.ps">Postscript</a>. <a href="http://www.cs.unc.edu/~anderson/papers/rtcsa08.pdf">PDF</a>. 126 <a href="http://www.cs.unc.edu/~anderson/papers/rtcsa08.ps">Postscript</a>. <a href="http://www.cs.unc.edu/~anderson/papers/rtcsa08.pdf">PDF</a>.
127 </p> 127 </p>
128 <p><strong>Note:</strong> The work described in this paper took part in a branch that is currently not part of
129 the main distribution. For reference, we provide the branch as a separate download:
130 </p>
131 <ul>
132 <li>
133 <a href="download/RTCSA08/litmus-rt-RTCSA08.patch">litmus-rt-RTCSA08.patch</a>
134 </li>
135 <li>
136 <a href="download/RTCSA08/liblitmus-RTCSA08.tgz">liblitmus-RTCSA08.tgz</a>
137 </li>
138 <li><a href="download/RTCSA08/SHA256SUMS">SHA256 check sums</a>
139 </li>
140 </ul>
141 <p>Please don't use this version for active development. If you are interested in this work, it would be best
142 to first port the desired features to LTIMUS<sup>RT</sup> 2008 and merge them into the main distribution.
143 </p>
144
128 </li> 145 </li>
129 146
130 <li> 147 <li>