Makefile | 2 +-
arch/i386/Kconfig | 28 ++
arch/i386/kernel/apic.c | 92 +++++
arch/i386/kernel/i386_ksyms.c | 1 +
arch/i386/kernel/signal.c | 3 +-
arch/i386/kernel/smp.c | 1 +
arch/i386/kernel/syscall_table.S | 22 +
fs/exec.c | 5 +-
fs/inode.c | 2 +
include/asm-i386/unistd.h | 25 ++-
include/linux/completion.h | 2 +
include/linux/fs.h | 5 +
include/linux/sched.h | 14 +
include/linux/uaccess.h | 16 +
include/litmus/edf_common.h | 27 ++
include/litmus/fdso.h | 78 ++++
include/litmus/feather_buffer.h | 108 +++++
include/litmus/feather_trace.h | 93 +++++
include/litmus/jobs.h | 9 +
include/litmus/litmus.h | 200 +++++++++
include/litmus/rm_common.h | 44 ++
include/litmus/rt_domain.h | 94 +++++
include/litmus/rt_param.h | 177 ++++++++
include/litmus/sched_plugin.h | 120 ++++++
include/litmus/sched_trace.h | 31 ++
include/litmus/trace.h | 106 +++++
kernel/exit.c | 4 +
kernel/fork.c | 5 +
kernel/sched.c | 177 ++++++++-
lib/semaphore-sleepers.c | 2 +-
litmus/Makefile | 9 +
litmus/edf_common.c | 95 +++++
litmus/fdso.c | 289 +++++++++++++
litmus/ft_event.c | 104 +++++
litmus/jobs.c | 43 ++
litmus/litmus.c | 830 ++++++++++++++++++++++++++++++++++++++
litmus/litmus_sem.c | 551 +++++++++++++++++++++++++
litmus/pcp.c | 764 +++++++++++++++++++++++++++++++++++
litmus/rm_common.c | 76 ++++
litmus/rt_domain.c | 130 ++++++
litmus/sched_gsn_edf.c | 733 +++++++++++++++++++++++++++++++++
litmus/sched_plugin.c | 169 ++++++++
litmus/sched_psn_edf.c | 458 +++++++++++++++++++++
litmus/sched_rm.c | 397 ++++++++++++++++++
litmus/sched_trace.c | 541 +++++++++++++++++++++++++
litmus/sync.c | 84 ++++
litmus/trace.c | 302 ++++++++++++++
47 files changed, 7052 insertions(+), 16 deletions(-)
diff --git a/Makefile b/Makefile
index 7e2750f..79cf62b 100644
--- a/Makefile
+++ b/Makefile
@@ -553,7 +553,7 @@ export mod_strip_cmd
ifeq ($(KBUILD_EXTMOD),)
-core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/
+core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ litmus/
vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
$(core-y) $(core-m) $(drivers-y) $(drivers-m) \
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 0dfee81..da6f1e9 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -1210,6 +1210,7 @@ config KPROBES
a probepoint and specifies the callback. Kprobes is useful
for kernel debugging, non-intrusive instrumentation and testing.
If in doubt, say "N".
+
endmenu
source "arch/i386/Kconfig.debug"
@@ -1259,3 +1260,30 @@ config X86_TRAMPOLINE
config KTIME_SCALAR
bool
default y
+
+
+menu "LITMUS^RT"
+
+
+config SCHED_TASK_TRACE
+ bool "Trace real-time tasks"
+ default y
+ help
+ Include support for the sched_trace_XXX() tracing functions. This
+ allows the collection of real-time task events such as job
+ completions, job releases, early completions, etc. This results in a
+ small overhead in the scheduling code. Disable if the overhead is not
+ acceptable (e.g., benchmarking).
+
+config SCHED_DEBUG_TRACE
+ bool "TRACE() debugging"
+ default y
+ help
+ Include support for sched_trace_log_messageg(), which is used to
+ implement TRACE(). If disabled, no TRACE() messages will be included
+ in the kernel, and no overheads due to debugging statements will be
+ incurred by the scheduler. Disable if the overhead is not acceptable
+ (e.g. benchmarking).
+
+
+endmenu
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 776d9be..36b0159 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -26,6 +26,7 @@
#include <linux/sysdev.h>
#include <linux/cpu.h>
#include <linux/module.h>
+#include <litmus/litmus.h>
#include <asm/atomic.h>
#include <asm/smp.h>
@@ -43,6 +44,8 @@
#include "io_ports.h"
+#include <litmus/trace.h>
+
/*
* cpu_mask that denotes the CPUs that needs timer interrupt coming in as
* IPIs in place of local APIC timers
@@ -54,6 +57,15 @@ static cpumask_t timer_bcast_ipi;
*/
static int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
+/*
+ * Definitions and variables related to quantum synchronization.
+ */
+#define WAIT_TO_SYNC 30000 /* time after boot until sync */
+static int stagger = 0; /* are we using staggered quanta? */
+static atomic_t qsync_time = ATOMIC_INIT(INITIAL_JIFFIES);
+static atomic_t quantum_sync_barrier = ATOMIC_INIT(0);
+static atomic_t sync_done = ATOMIC_INIT(0);
+
static inline void lapic_disable(void)
{
enable_local_apic = -1;
@@ -786,6 +798,23 @@ static int __init apic_set_verbosity(char *str)
__setup("apic=", apic_set_verbosity);
+/*
+ * Determine whether to use aligned or staggerd quanta.
+ */
+
+static int __init apic_synch_type(char *str)
+{
+ if (strcmp("aligned", str) == 0)
+ stagger = 0;
+ else if (strcmp("staggered", str) == 0)
+ stagger = 1;
+ else
+ stagger = 0; /* aligned quanta by default */
+ return 1;
+}
+
+__setup("quanta=", apic_synch_type);
+
static int __init detect_init_APIC (void)
{
u32 h, l, features;
@@ -1198,6 +1227,47 @@ EXPORT_SYMBOL(switch_ipi_to_APIC_timer);
#undef APIC_DIVISOR
/*
+ * This function is called to align all quanta, and to stagger quanta if
+ * necessary. It relies on a barrier to synchronize all processors, so
+ * that they all reset their APIC timers at the same time. If quanta
+ * should be staggered, the appropriate stagger delay is then added at
+ * each processor.
+ */
+
+void synchronize_quanta(void)
+{
+ int cpu = smp_processor_id();
+ int total_cpus = num_online_cpus();
+ int stagger_interval = jiffies_to_usecs(1) / total_cpus;
+
+ /*
+ * Disable APIC timer, wait for all other processors to reach barrier,
+ * and re-enable all timers concurrently.
+ */
+ disable_APIC_timer();
+ atomic_inc(&quantum_sync_barrier);
+ while (atomic_read(&quantum_sync_barrier) < total_cpus) {
+ /* Delay, otherwise atomic_inc's cannot occur. */
+ udelay(1);
+ }
+
+ /* Add necessary stagger for this CPU, if required. */
+ if (stagger) {
+ int stagger_us = cpu * stagger_interval;
+ udelay(stagger_us);
+ }
+
+ /* Re-enable all timers. */
+ __setup_APIC_LVTT(calibration_result);
+ enable_APIC_timer();
+
+ /* The first CPU signals that quantum sync is complete. */
+ if (cpu == 0)
+ atomic_inc(&sync_done);
+}
+
+
+/*
* Local timer interrupt handler. It does both profiling and
* process statistics/rescheduling.
*
@@ -1209,11 +1279,32 @@ EXPORT_SYMBOL(switch_ipi_to_APIC_timer);
inline void smp_local_timer_interrupt(void)
{
+/* s64 offset; */
+
+ TS_TICK_START;
+
profile_tick(CPU_PROFILING);
#ifdef CONFIG_SMP
update_process_times(user_mode_vm(get_irq_regs()));
#endif
+ /* Print out timing data - can be commented out if necessary. */
+/* offset = get_nsec_offset(); */
+/* TRACE("%d\n", offset); */
+
+ /*
+ * Synchronize quanta if we have reached qsync_time plus wait
+ * interval. The synchronization code itself is placed in its own
+ * (non-inline) function, to avoid issues with creating an inline
+ * function that is too large.
+ */
+ if (unlikely(!atomic_read(&sync_done) &&
+ time_after(jiffies,
+ (unsigned long)(atomic_read(&qsync_time) +
+ msecs_to_jiffies(WAIT_TO_SYNC))))) {
+ synchronize_quanta();
+ }
+
/*
* We take the 'long' return path, and there every subsystem
* grabs the apropriate locks (kernel lock/ irq lock).
@@ -1224,6 +1315,7 @@ inline void smp_local_timer_interrupt(void)
* Currently this isn't too much of an issue (performance wise),
* we can take more than 100K local irqs per second on a 100 MHz P5.
*/
+ TS_TICK_END;
}
/*
diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
index e3d4b73..9670f77 100644
--- a/arch/i386/kernel/i386_ksyms.c
+++ b/arch/i386/kernel/i386_ksyms.c
@@ -6,6 +6,7 @@ EXPORT_SYMBOL(__down_failed);
EXPORT_SYMBOL(__down_failed_interruptible);
EXPORT_SYMBOL(__down_failed_trylock);
EXPORT_SYMBOL(__up_wakeup);
+
/* Networking helper routines. */
EXPORT_SYMBOL(csum_partial_copy_generic);
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
index 65d7620..e95d732 100644
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -651,7 +651,6 @@ void do_notify_resume(struct pt_regs *regs, void *_unused,
/* deal with pending signal delivery */
if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
- do_signal(regs);
-
+ do_signal(regs);
clear_thread_flag(TIF_IRET);
}
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index 5285aff..91921a3 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -605,6 +605,7 @@ void smp_send_stop(void)
*/
fastcall void smp_reschedule_interrupt(struct pt_regs *regs)
{
+ set_tsk_need_resched(current);
ack_APIC_irq();
}
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index 2697e92..48e5e8e 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -319,3 +319,25 @@ ENTRY(sys_call_table)
.long sys_move_pages
.long sys_getcpu
.long sys_epoll_pwait
+ /* LITMUS syscalls */
+ .long sys_set_rt_task_param /* 320 */
+ .long sys_get_rt_task_param
+ .long sys_task_mode_transition
+ .long sys_sleep_next_period
+ .long sys_register_np_flag
+ .long sys_exit_np /* 325 */
+ .long sys_od_open
+ .long sys_od_close
+ .long sys_pi_down
+ .long sys_pi_up
+ .long sys_srp_down /* 330 */
+ .long sys_srp_up
+ .long sys_reg_task_srp_sem
+ .long sys_query_job_no
+ .long sys_wait_for_job_release
+ .long sys_wait_for_ts_release /* 335 */
+ .long sys_release_ts
+ .long sys_pcp_down
+ .long sys_pcp_up
+ .long sys_dpcp_invoke
+ .long sys_dpcp_agent /* 340 */
diff --git a/fs/exec.c b/fs/exec.c
index 11fe93f..353d6e3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -54,6 +54,8 @@
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
+#include <litmus/litmus.h>
+
#ifdef CONFIG_KMOD
#include <linux/kmod.h>
#endif
@@ -1140,7 +1142,8 @@ int do_execve(char * filename,
if (IS_ERR(file))
goto out_kfree;
- sched_exec();
+ sched_exec();
+ litmus_exec();
bprm->p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
diff --git a/fs/inode.c b/fs/inode.c
index bf21dc6..fcf8ce3 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -205,6 +205,8 @@ void inode_init_once(struct inode *inode)
INIT_LIST_HEAD(&inode->inotify_watches);
mutex_init(&inode->inotify_mutex);
#endif
+ INIT_LIST_HEAD(&inode->i_obj_list);
+ mutex_init(&inode->i_obj_mutex);
}
EXPORT_SYMBOL(inode_init_once);
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index 833fa17..d0ba5c3 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -325,10 +325,33 @@
#define __NR_move_pages 317
#define __NR_getcpu 318
#define __NR_epoll_pwait 319
+/* LITMUS */
+#define __NR_set_rt_task_param 320
+#define __NR_get_rt_task_param 321
+#define __NR_task_mode 322
+#define __NR_sleep_next_period 323
+#define __NR_register_np_flag 324
+#define __NR_exit_np 325
+#define __NR_od_open 326
+#define __NR_od_close 327
+#define __NR_pi_down 328
+#define __NR_pi_up 329
+#define __NR_srp_down 330
+#define __NR_srp_up 331
+#define __NR_reg_task_srp_sem 332
+#define __NR_query_job_no 333
+#define __NR_wait_for_job_release 334
+#define __NR_wait_for_ts_release 335
+#define __NR_release_ts 336
+#define __NR_pcp_down 337
+#define __NR_pcp_up 338
+#define __NR_dpcp_invoke 339
+#define __NR_dpcp_agent 340
+
#ifdef __KERNEL__
-#define NR_syscalls 320
+#define NR_syscalls 343
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 268c5a4..dc633ed 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -51,6 +51,8 @@ extern unsigned long FASTCALL(wait_for_completion_interruptible_timeout(
extern void FASTCALL(complete(struct completion *));
extern void FASTCALL(complete_all(struct completion *));
+extern void FASTCALL(complete_n(struct completion *, int n));
+
#define INIT_COMPLETION(x) ((x).done = 0)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1410e53..4e1117c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -524,6 +524,8 @@ static inline int mapping_writably_mapped(struct address_space *mapping)
#define i_size_ordered_init(inode) do { } while (0)
#endif
+struct inode_obj_id_table;
+
struct inode {
struct hlist_node i_hash;
struct list_head i_list;
@@ -589,6 +591,9 @@ struct inode {
void *i_security;
#endif
void *i_private; /* fs or device private pointer */
+
+ struct list_head i_obj_list;
+ struct mutex i_obj_mutex;
};
/*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4463735..c7929d6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3,6 +3,8 @@
#include <linux/auxvec.h> /* For AT_VECTOR_SIZE */
+#include <litmus/rt_param.h>
+
/*
* cloning flags:
*/
@@ -796,6 +798,8 @@ enum sleep_type {
SLEEP_INTERRUPTED,
};
+struct od_table_entry;
+
struct prio_array;
struct task_struct {
@@ -1051,6 +1055,16 @@ struct task_struct {
#ifdef CONFIG_FAULT_INJECTION
int make_it_fail;
#endif
+ /* litmus parameters and state */
+ struct rt_param rt_param;
+
+ /* allow scheduler plugins to queue in release lists, etc.
+ * Cleanup: Move this into the rt_param struct.
+ */
+ struct list_head rt_list;
+
+ /* references to PI semaphores, etc. */
+ struct od_table_entry* od_table;
};
static inline pid_t process_group(struct task_struct *tsk)
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 975c963..6ae0ff9 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -84,4 +84,20 @@ static inline unsigned long __copy_from_user_nocache(void *to,
ret; \
})
+/* This is a naive attempt at a write version of the above native Linux macro.
+ */
+#define poke_kernel_address(val, addr) \
+ ({ \
+ long ret; \
+ mm_segment_t old_fs = get_fs(); \
+ \
+ set_fs(KERNEL_DS); \
+ pagefault_disable(); \
+ ret = __put_user(val, (__force typeof(val) __user *)(addr)); \
+ pagefault_enable(); \
+ set_fs(old_fs); \
+ ret; \
+ })
+
+
#endif /* __LINUX_UACCESS_H__ */
diff --git a/include/litmus/edf_common.h b/include/litmus/edf_common.h
new file mode 100644
index 0000000..df711f5
--- /dev/null
+++ b/include/litmus/edf_common.h
@@ -0,0 +1,27 @@
+/* EDF common data structures and utility functions shared by all EDF
+ * based scheduler plugins
+ */
+
+/* CLEANUP: Add comments and make it less messy.
+ *
+ */
+
+#ifndef __UNC_EDF_COMMON_H__
+#define __UNC_EDF_COMMON_H__
+
+#include <litmus/rt_domain.h>
+
+
+void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched);
+
+int edf_higher_prio(struct task_struct* first,
+ struct task_struct* second);
+
+int edf_ready_order(struct list_head* a, struct list_head* b);
+
+int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t);
+
+#define job_completed(t) (!is_be(t) && \
+ (t)->rt_param.times.exec_time == (t)->rt_param.basic_params.exec_cost)
+
+#endif
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
new file mode 100644
index 0000000..5544c1b
--- /dev/null
+++ b/include/litmus/fdso.h
@@ -0,0 +1,78 @@
+/* fdso.h - file descriptor attached shared objects
+ *
+ * (c) 2007 B. Brandenburg, LITMUS^RT project
+ */
+
+#ifndef _LINUX_FDSO_H_
+#define _LINUX_FDSO_H_
+
+#include <linux/list.h>
+#include <asm/atomic.h>
+
+#include <linux/fs.h>
+
+#define MAX_OBJECT_DESCRIPTORS 32
+
+typedef enum {
+ MIN_OBJ_TYPE = 0,
+
+ PI_SEM = 0,
+ SRP_SEM = 1,
+ PCP_SEM = 2,
+ MPCP_SEM = 3,
+
+ MAX_OBJ_TYPE = 3
+} obj_type_t;
+
+struct inode_obj_id {
+ struct list_head list;
+ atomic_t count;
+ struct inode* inode;
+
+ obj_type_t type;
+ void* obj;
+ unsigned int id;
+};
+
+
+struct od_table_entry {
+ unsigned int used;
+
+ struct inode_obj_id* obj;
+ void* extra;
+};
+
+struct fdso_ops {
+ void* (*create) (void);
+ void (*destroy)(void*);
+ int (*open) (struct od_table_entry*, void* __user);
+ int (*close) (struct od_table_entry*);
+};
+
+/* translate a userspace supplied od into the raw table entry
+ * returns NULL if od is invalid
+ */
+struct od_table_entry* __od_lookup(int od);
+
+/* translate a userspace supplied od into the associated object
+ * returns NULL if od is invalid
+ */
+static inline void* od_lookup(int od, obj_type_t type)
+{
+ struct od_table_entry* e = __od_lookup(od);
+ return e && e->obj->type == type ? e->obj->obj : NULL;
+}
+
+static inline void* od_lookup2(int od, obj_type_t type, obj_type_t type2)
+{
+ struct od_table_entry* e = __od_lookup(od);
+ return e && (e->obj->type == type || e->obj->type == type2) ?
+ e->obj->obj : NULL;
+}
+
+#define lookup_pi_sem(od) ((struct pi_semaphore*) od_lookup(od, PI_SEM))
+#define lookup_srp_sem(od) ((struct srp_semaphore*) od_lookup(od, SRP_SEM))
+#define lookup_pcp_sem(od) ((struct pcp_semaphore*) \
+ od_lookup2(od, PCP_SEM, MPCP_SEM))
+
+#endif
diff --git a/include/litmus/feather_buffer.h b/include/litmus/feather_buffer.h
new file mode 100644
index 0000000..c788227
--- /dev/null
+++ b/include/litmus/feather_buffer.h
@@ -0,0 +1,108 @@
+#ifndef _FEATHER_BUFFER_H_
+#define _FEATHER_BUFFER_H_
+
+/* requires UINT_MAX and memcpy */
+
+static inline int fetch_and_inc(int *val)
+{
+ int ret = 1;
+ __asm__ __volatile__("lock; xaddl %0, %1" : "+r" (ret), "+m" (*val) : : "memory" );
+ return ret;
+}
+
+static inline int fetch_and_dec(int *val)
+{
+ int ret = -1;
+ __asm__ __volatile__("lock; xaddl %0, %1" : "+r" (ret), "+m" (*val) : : "memory" );
+ return ret;
+}
+
+#define SLOT_FREE 0
+#define SLOT_BUSY 1
+#define SLOT_READY 2
+
+struct ft_buffer {
+ unsigned int slot_count;
+ unsigned int slot_size;
+
+ int free_count;
+ unsigned int write_idx;
+ unsigned int read_idx;
+
+ char* slots;
+ void* buffer_mem;
+ unsigned int failed_writes;
+};
+
+static inline int init_ft_buffer(struct ft_buffer* buf,
+ unsigned int slot_count,
+ unsigned int slot_size,
+ char* slots,
+ void* buffer_mem)
+{
+ int i = 0;
+ if (!slot_count || UINT_MAX % slot_count != slot_count - 1) {
+ /* The slot count must divide UNIT_MAX + 1 so that when it
+ * wraps around the index correctly points to 0.
+ */
+ return 0;
+ } else {
+ buf->slot_count = slot_count;
+ buf->slot_size = slot_size;
+ buf->slots = slots;
+ buf->buffer_mem = buffer_mem;
+ buf->free_count = slot_count;
+ buf->write_idx = 0;
+ buf->read_idx = 0;
+ buf->failed_writes = 0;
+ for (i = 0; i < slot_count; i++)
+ buf->slots[i] = SLOT_FREE;
+ return 1;
+ }
+}
+
+static inline int ft_buffer_start_write(struct ft_buffer* buf, void **ptr)
+{
+ int free = fetch_and_dec(&buf->free_count);
+ unsigned int idx;
+ if (free <= 0) {
+ fetch_and_inc(&buf->free_count);
+ *ptr = 0;
+ fetch_and_inc(&buf->failed_writes);
+ return 0;
+ } else {
+ idx = fetch_and_inc((int*) &buf->write_idx) % buf->slot_count;
+ buf->slots[idx] = SLOT_BUSY;
+ *ptr = ((char*) buf->buffer_mem) + idx * buf->slot_size;
+ return 1;
+ }
+}
+
+static inline void ft_buffer_finish_write(struct ft_buffer* buf, void *ptr)
+{
+ unsigned int idx = ((char*) ptr - (char*) buf->buffer_mem) / buf->slot_size;
+ buf->slots[idx] = SLOT_READY;
+}
+
+
+/* exclusive reader access is assumed */
+static inline int ft_buffer_read(struct ft_buffer* buf, void* dest)
+{
+ unsigned int idx;
+ if (buf->free_count == buf->slot_count)
+ /* nothing available */
+ return 0;
+ idx = buf->read_idx % buf->slot_count;
+ if (buf->slots[idx] == SLOT_READY) {
+ memcpy(dest, ((char*) buf->buffer_mem) + idx * buf->slot_size,
+ buf->slot_size);
+ buf->slots[idx] = SLOT_FREE;
+ buf->read_idx++;
+ fetch_and_inc(&buf->free_count);
+ return 1;
+ } else
+ return 0;
+}
+
+
+#endif
diff --git a/include/litmus/feather_trace.h b/include/litmus/feather_trace.h
new file mode 100644
index 0000000..5c37ea7
--- /dev/null
+++ b/include/litmus/feather_trace.h
@@ -0,0 +1,93 @@
+#ifndef _FEATHER_TRACE_H_
+#define _FEATHER_TRACE_H_
+
+#define feather_callback __attribute__((regparm(0)))
+
+/* make the compiler reload any register that is not saved in
+ * a cdecl function call
+ */
+#define CLOBBER_LIST "memory", "cc", "eax", "ecx", "edx"
+
+#define ft_event(id, callback) \
+ __asm__ __volatile__( \
+ "1: jmp 2f \n\t" \
+ " call " #callback " \n\t" \
+ ".section __event_table, \"aw\" \n\t" \
+ ".long " #id ", 0, 1b, 2f \n\t" \
+ ".previous \n\t" \
+ "2: \n\t" \
+ : : : CLOBBER_LIST)
+
+#define ft_event0(id, callback) \
+ __asm__ __volatile__( \
+ "1: jmp 2f \n\t" \
+ " subl $4, %%esp \n\t" \
+ " movl $" #id ", (%%esp) \n\t" \
+ " call " #callback " \n\t" \
+ " addl $4, %%esp \n\t" \
+ ".section __event_table, \"aw\" \n\t" \
+ ".long " #id ", 0, 1b, 2f \n\t" \
+ ".previous \n\t" \
+ "2: \n\t" \
+ : : : CLOBBER_LIST)
+
+#define ft_event1(id, callback, param) \
+ __asm__ __volatile__( \
+ "1: jmp 2f \n\t" \
+ " subl $8, %%esp \n\t" \
+ " movl %0, 4(%%esp) \n\t" \
+ " movl $" #id ", (%%esp) \n\t" \
+ " call " #callback " \n\t" \
+ " addl $8, %%esp \n\t" \
+ ".section __event_table, \"aw\" \n\t" \
+ ".long " #id ", 0, 1b, 2f \n\t" \
+ ".previous \n\t" \
+ "2: \n\t" \
+ : : "r" (param) : CLOBBER_LIST)
+
+#define ft_event2(id, callback, param, param2) \
+ __asm__ __volatile__( \
+ "1: jmp 2f \n\t" \
+ " subl $12, %%esp \n\t" \
+ " movl %1, 8(%%esp) \n\t" \
+ " movl %0, 4(%%esp) \n\t" \
+ " movl $" #id ", (%%esp) \n\t" \
+ " call " #callback " \n\t" \
+ " addl $12, %%esp \n\t" \
+ ".section __event_table, \"aw\" \n\t" \
+ ".long " #id ", 0, 1b, 2f \n\t" \
+ ".previous \n\t" \
+ "2: \n\t" \
+ : : "r" (param), "r" (param2) : CLOBBER_LIST)
+
+
+#define ft_event3(id, callback, p, p2, p3) \
+ __asm__ __volatile__( \
+ "1: jmp 2f \n\t" \
+ " subl $16, %%esp \n\t" \
+ " movl %1, 12(%%esp) \n\t" \
+ " movl %1, 8(%%esp) \n\t" \
+ " movl %0, 4(%%esp) \n\t" \
+ " movl $" #id ", (%%esp) \n\t" \
+ " call " #callback " \n\t" \
+ " addl $16, %%esp \n\t" \
+ ".section __event_table, \"aw\" \n\t" \
+ ".long " #id ", 0, 1b, 2f \n\t" \
+ ".previous \n\t" \
+ "2: \n\t" \
+ : : "r" (p), "r" (p2), "r" (p3) : CLOBBER_LIST)
+
+
+static inline unsigned long long ft_read_tsc(void)
+{
+ unsigned long long ret;
+ __asm__ __volatile__("rdtsc" : "=A" (ret));
+ return ret;
+}
+
+int ft_enable_event(unsigned long id);
+int ft_disable_event(unsigned long id);
+int ft_is_event_enabled(unsigned long id);
+int ft_disable_all_events(void);
+
+#endif
diff --git a/include/litmus/jobs.h b/include/litmus/jobs.h
new file mode 100644
index 0000000..9bd361e
--- /dev/null
+++ b/include/litmus/jobs.h
@@ -0,0 +1,9 @@
+#ifndef __LITMUS_JOBS_H__
+#define __LITMUS_JOBS_H__
+
+void prepare_for_next_period(struct task_struct *t);
+void release_at(struct task_struct *t, lt_t start);
+long complete_job(void);
+
+#endif
+
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
new file mode 100644
index 0000000..5853ed5
--- /dev/null
+++ b/include/litmus/litmus.h
@@ -0,0 +1,200 @@
+/*
+ * Constant definitions related to
+ * scheduling policy.
+ */
+
+#ifndef _LINUX_LITMUS_H_
+#define _LINUX_LITMUS_H_
+
+#include <linux/jiffies.h>
+#include <litmus/sched_trace.h>
+
+typedef enum {
+ SCHED_LINUX = 0,
+ SCHED_GSN_EDF = 10,
+ SCHED_PSN_EDF = 11,
+ /* Add your scheduling policy here */
+
+ SCHED_DEFAULT = 0,
+ SCHED_INVALID = -1,
+} spolicy;
+
+
+typedef enum {
+ LITMUS_RESERVED_RANGE = 1024,
+
+} sched_setup_cmd_t;
+
+/* per-task modes */
+enum rt_task_mode_t {
+ BACKGROUND_TASK = 0,
+ LITMUS_RT_TASK = 1
+};
+
+/* Plugin boot options, for convenience */
+#define PLUGIN_LINUX "linux"
+#define PLUGIN_GSN_EDF "gsn_edf"
+#define PLUGIN_PSN_EDF "psn_edf"
+
+extern spolicy sched_policy;
+
+/* RT mode start time */
+extern volatile unsigned long rt_start_time;
+
+#define TRACE(fmt, args...) \
+ sched_trace_log_message("%d: " fmt, raw_smp_processor_id(), ## args)
+
+#define TRACE_TASK(t, fmt, args...) \
+ TRACE("(%s/%d) " fmt, (t)->comm, (t)->pid, ##args)
+
+#define TRACE_CUR(fmt, args...) \
+ TRACE_TASK(current, fmt, ## args)
+
+#define TRACE_BUG_ON(cond) \
+ do { if (cond) TRACE("BUG_ON(%s) at %s:%d " \
+ "called from %p current=%s/%d state=%d " \
+ "flags=%x partition=%d cpu=%d rtflags=%d"\
+ " job=%u knp=%d timeslice=%u\n", \
+ #cond, __FILE__, __LINE__, __builtin_return_address(0), current->comm, \
+ current->pid, current->state, current->flags, \
+ get_partition(current), smp_processor_id(), get_rt_flags(current), \
+ current->rt_param.job_params.job_no, current->rt_param.kernel_np, \
+ current->time_slice\
+ ); } while(0);
+
+
+/* in_list - is a given list_head queued on some list?
+ */
+static inline int in_list(struct list_head* list)
+{
+ return !( /* case 1: deleted */
+ (list->next == LIST_POISON1 &&
+ list->prev == LIST_POISON2)
+ ||
+ /* case 2: initialized */
+ (list->next == list &&
+ list->prev == list)
+ );
+}
+
+typedef int (*prio_cmp_t)(struct task_struct* first,
+ struct task_struct* second);
+
+typedef int (*list_cmp_t)(struct list_head*, struct list_head*);
+
+static inline unsigned int list_insert(struct list_head* new,
+ struct list_head* head,
+ list_cmp_t order_before)
+{
+ struct list_head *pos;
+ unsigned int passed = 0;
+
+ BUG_ON(!new);
+
+ /* find a spot where the new entry is less than the next */
+ list_for_each(pos, head) {
+ if (unlikely(order_before(new, pos))) {
+ /* pos is not less than new, thus insert here */
+ __list_add(new, pos->prev, pos);
+ goto out;
+ }
+ passed++;
+ }
+ /* if we get to this point either the list is empty or every entry
+ * queued element is less than new.
+ * Let's add new to the end. */
+ list_add_tail(new, head);
+ out:
+ return passed;
+}
+
+void list_qsort(struct list_head* list, list_cmp_t less_than);
+
+
+#define RT_PREEMPTIVE 0x2050 /* = NP */
+#define RT_NON_PREEMPTIVE 0x4e50 /* = P */
+#define RT_EXIT_NP_REQUESTED 0x5251 /* = RQ */
+
+/* returns 1 if task t has registered np flag and set it to RT_NON_PREEMPTIVE
+ */
+int is_np(struct task_struct *t);
+
+/* request that the task should call sys_exit_np()
+ */
+void request_exit_np(struct task_struct *t);
+
+/* kill naughty tasks
+ */
+void scheduler_signal(struct task_struct *t, unsigned int signal);
+void send_scheduler_signals(void);
+void np_mem_kill(struct task_struct *t);
+
+void litmus_fork(struct task_struct *tsk);
+void litmus_exec(void);
+/* clean up real-time state of a task */
+void exit_litmus(struct task_struct *dead_tsk);
+
+long transition_to_rt(struct task_struct* tsk);
+long transition_to_be(struct task_struct* tsk);
+
+#define is_realtime(t) ((t)->rt_param.is_realtime)
+#define rt_transition_pending(t) \
+ ((t)->rt_param.transition_pending)
+
+/* Realtime utility macros */
+#define get_rt_flags(t) ((t)->rt_param.flags)
+#define set_rt_flags(t,f) (t)->rt_param.flags=(f)
+#define get_exec_cost(t) ((t)->rt_param.task_params.exec_cost)
+#define get_exec_time(t) ((t)->rt_param.job_params.exec_time)
+#define get_rt_period(t) ((t)->rt_param.task_params.period)
+#define get_partition(t) (t)->rt_param.task_params.cpu
+#define get_deadline(t) ((t)->rt_param.job_params.deadline)
+#define get_class(t) ((t)->rt_param.task_params.cls)
+
+inline static int budget_exhausted(struct task_struct* t)
+{
+ return get_exec_time(t) >= get_exec_cost(t);
+}
+
+
+#define is_hrt(t) \
+ ((t)->rt_param.task_params.class == RT_CLASS_HARD)
+#define is_srt(t) \
+ ((t)->rt_param.task_params.class == RT_CLASS_SOFT)
+#define is_be(t) \
+ ((t)->rt_param.task_params.class == RT_CLASS_BEST_EFFORT)
+
+#define get_release(t) ((t)->rt_param.job_params.release)
+
+/* Honor the flag in the preempt_count variable that is set
+ * when scheduling is in progress.
+ */
+#define is_running(t) \
+ ((t)->state == TASK_RUNNING || \
+ (t)->thread_info->preempt_count & PREEMPT_ACTIVE)
+
+#define is_blocked(t) \
+ (!is_running(t))
+#define is_released(t, now) \
+ (lt_before_eq(get_release(t), now))
+#define is_tardy(t, now) \
+ (lt_before_eq((t)->rt_param.job_params.deadline, now))
+
+/* real-time comparison macros */
+#define earlier_deadline(a, b) (lt_before(\
+ (a)->rt_param.job_params.deadline,\
+ (b)->rt_param.job_params.deadline))
+#define earlier_release(a, b) (lt_before(\
+ (a)->rt_param.job_params.release,\
+ (b)->rt_param.job_params.release))
+
+#define shorter_period(a, b) (lt_before(\
+ (a)->rt_param.task_params.period, \
+ (b)->rt_param.task_params.period))
+
+#define make_np(t) do {t->rt_param.kernel_np++;} while(0);
+#define take_np(t) do {t->rt_param.kernel_np--;} while(0);
+
+void srp_ceiling_block(void);
+
+#endif
diff --git a/include/litmus/rm_common.h b/include/litmus/rm_common.h
new file mode 100644
index 0000000..11e8365
--- /dev/null
+++ b/include/litmus/rm_common.h
@@ -0,0 +1,44 @@
+/* rate monotonic helper functions.
+ */
+
+
+#ifndef __UNC_RM_COMMON_H__
+#define __UNC_RM_COMMON_H__
+
+#include <litmus/rt_domain.h>
+
+static inline int _rm_higher_prio(struct pcp_priority *p1,
+ struct pcp_priority *p2)
+{
+ /* does the second task exist and is it a real-time task? If
+ * not, the first task (which is a RT task) has higher
+ * priority.
+ */
+
+ if (unlikely(!p2))
+ return 1;
+
+ if (p1->in_global_cs == p2->in_global_cs) {
+ /* tie break by RM priority */
+ if (p1->prio == p2->prio)
+ /* tie break equal periods by PID */
+ return p1->pid < p2->pid;
+ else
+ /* shorter period or lower index has higher priority */
+ return p1->prio < p2->prio;
+ } else
+ /* gcs always have higher priority */
+ return p1->in_global_cs > p2->in_global_cs;
+}
+
+
+void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched);
+
+int rm_higher_prio(struct task_struct* first,
+ struct task_struct* second);
+
+int rm_ready_order(struct list_head* a, struct list_head* b);
+
+int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t);
+
+#endif
diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h
new file mode 100644
index 0000000..79b6034
--- /dev/null
+++ b/include/litmus/rt_domain.h
@@ -0,0 +1,94 @@
+/* CLEANUP: Add comments and make it less messy.
+ *
+ */
+
+#ifndef __UNC_RT_DOMAIN_H__
+#define __UNC_RT_DOMAIN_H__
+
+struct _rt_domain;
+
+typedef int (*check_resched_needed_t)(struct _rt_domain *rt);
+typedef void (*release_at_t)(struct task_struct *t, lt_t start);
+
+typedef struct _rt_domain {
+ /* runnable rt tasks are in here */
+ rwlock_t ready_lock;
+ struct list_head ready_queue;
+
+ /* real-time tasks waiting for release are in here */
+ spinlock_t release_lock;
+ struct list_head release_queue;
+
+ /* how do we check if we need to kick another CPU? */
+ check_resched_needed_t check_resched;
+
+ /* how are tasks ordered in the ready queue? */
+ list_cmp_t order;
+} rt_domain_t;
+
+#define next_ready(rt) \
+ (list_entry((rt)->ready_queue.next, struct task_struct, rt_list))
+
+#define ready_jobs_pending(rt) \
+ (!list_empty(&(rt)->ready_queue))
+
+void rt_domain_init(rt_domain_t *rt, check_resched_needed_t f,
+ list_cmp_t order);
+
+void __add_ready(rt_domain_t* rt, struct task_struct *new);
+void __add_release(rt_domain_t* rt, struct task_struct *task);
+
+struct task_struct* __take_ready(rt_domain_t* rt);
+struct task_struct* __peek_ready(rt_domain_t* rt);
+
+void try_release_pending(rt_domain_t* rt);
+void __release_pending(rt_domain_t* rt);
+
+static inline void add_ready(rt_domain_t* rt, struct task_struct *new)
+{
+ unsigned long flags;
+ /* first we need the write lock for rt_ready_queue */
+ write_lock_irqsave(&rt->ready_lock, flags);
+ __add_ready(rt, new);
+ write_unlock_irqrestore(&rt->ready_lock, flags);
+}
+
+static inline struct task_struct* take_ready(rt_domain_t* rt)
+{
+ unsigned long flags;
+ struct task_struct* ret;
+ /* first we need the write lock for rt_ready_queue */
+ write_lock_irqsave(&rt->ready_lock, flags);
+ ret = __take_ready(rt);
+ write_unlock_irqrestore(&rt->ready_lock, flags);
+ return ret;
+}
+
+
+static inline void add_release(rt_domain_t* rt, struct task_struct *task)
+{
+ unsigned long flags;
+ /* first we need the write lock for rt_ready_queue */
+ spin_lock_irqsave(&rt->release_lock, flags);
+ __add_release(rt, task);
+ spin_unlock_irqrestore(&rt->release_lock, flags);
+}
+
+static inline int __jobs_pending(rt_domain_t* rt)
+{
+ return !list_empty(&rt->ready_queue);
+}
+
+static inline int jobs_pending(rt_domain_t* rt)
+{
+ unsigned long flags;
+ int ret;
+ /* first we need the write lock for rt_ready_queue */
+ read_lock_irqsave(&rt->ready_lock, flags);
+ ret = __jobs_pending(rt);
+ read_unlock_irqrestore(&rt->ready_lock, flags);
+ return ret;
+}
+
+
+#endif
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
new file mode 100644
index 0000000..37a4495
--- /dev/null
+++ b/include/litmus/rt_param.h
@@ -0,0 +1,177 @@
+/*
+ * Definition of the scheduler plugin interface.
+ *
+ */
+#ifndef _LINUX_RT_PARAM_H_
+#define _LINUX_RT_PARAM_H_
+
+/* Litmus time type. */
+typedef unsigned long long lt_t;
+
+static inline int lt_after(lt_t a, lt_t b)
+{
+ return ((long long) b) - ((long long) a) < 0;
+}
+#define lt_before(a, b) lt_after(b, a)
+
+static inline int lt_after_eq(lt_t a, lt_t b)
+{
+ return ((long long) a) - ((long long) b) >= 0;
+}
+#define lt_before_eq(a, b) lt_after_eq(b, a)
+
+/* different types of clients */
+typedef enum {
+ RT_CLASS_HARD,
+ RT_CLASS_SOFT,
+ RT_CLASS_BEST_EFFORT
+} task_class_t;
+
+struct rt_task {
+ lt_t exec_cost;
+ lt_t period;
+ lt_t phase;
+ lt_t prio;
+ unsigned int cpu;
+ task_class_t cls;
+};
+
+#define DPCP_WAIT 0x1
+#define DPCP_COMPLETE 0x2
+
+/* don't export internal data structures to user space (liblitmus) */
+#ifdef __KERNEL__
+
+#include <linux/list.h>
+
+struct rt_job {
+ /* Time instant the the job was or will be released. */
+ lt_t release;
+ /* What is the current deadline? */
+ lt_t deadline;
+ /* How much service has this job received so far?
+ */
+ lt_t exec_time;
+
+ /* Which job is this. This is used to let user space
+ * specify which job to wait for, which is important if jobs
+ * overrun. If we just call sys_sleep_next_period() then we
+ * will unintentionally miss jobs after an overrun.
+ *
+ * Increase this sequence number when a job is released.
+ */
+ unsigned int job_no;
+
+ /* when did this job start executing? */
+ lt_t exec_start;
+};
+
+
+/* make priority inheritance cleaner for PCP */
+struct pcp_priority {
+ lt_t prio;
+ int in_global_cs;
+ int pid;
+};
+
+struct pcp_semaphore;
+
+/* RT task parameters for scheduling extensions
+ * These parameters are inherited during clone and therefore must
+ * be explicitly set up before the task set is launched.
+ */
+struct rt_param {
+ /* is the task sleeping? */
+ unsigned int flags:8;
+
+ /* Real-time marker: 1 iff it is a LITMUS real-time task.
+ */
+ unsigned int is_realtime:1;
+
+ /* is a BE->RT or RT->BE transition pending? */
+ unsigned int transition_pending:1;
+
+ /* is this task under control of litmus?
+ *
+ * this is necessary because otherwise signal delivery code
+ * may try to wake up a task that is already queued in plugin
+ * data structures.
+ *
+ * bbb: I believe this flag is fundamentally flawed and should be
+ * taken out in the redesign.
+ */
+ unsigned int litmus_controlled:1;
+
+ /* do we need to check for srp blocking? */
+ unsigned int srp_non_recurse:1;
+
+ /* if a BE->RT transition failed, then this field contains the error */
+ unsigned long transition_error;
+
+ /* user controlled parameters */
+ struct rt_task task_params;
+
+ /* timing parameters */
+ struct rt_job job_params;
+
+
+ /* task representing the current "inherited" task
+ * priority, assigned by inherit_priority and
+ * return priority in the scheduler plugins.
+ * could point to self if PI does not result in
+ * an increased task priority.
+ */
+ struct task_struct* inh_task;
+
+ /* Don't just dereference this pointer in kernel space!
+ * It might very well point to junk or nothing at all.
+ * NULL indicates that the task has not requested any non-preemptable
+ * section support.
+ * Not inherited upon fork.
+ */
+ short* np_flag;
+
+ /* For the FMLP under PSN-EDF, it is required to make the task
+ * non-preemptive from kernel space. In order not to interfere with
+ * user space, this counter indicates the kernel space np setting.
+ * kernel_np > 0 => task is non-preemptive
+ */
+ unsigned int kernel_np;
+
+ /* This field can be used by plugins to store where the task
+ * is currently scheduled. It is the responsibility of the
+ * plugin to avoid race conditions.
+ *
+ * Used by GSN-EDF.
+ */
+ int scheduled_on;
+
+ /* This field can be used by plugins to store where the task
+ * is currently linked. It is the responsibility of the plugin
+ * to avoid race conditions.
+ *
+ * Used by GSN-EDF.
+ */
+ int linked_on;
+
+ /* Used by RM
+ */
+ struct pcp_priority pcp_prio;
+ struct pcp_priority* cur_prio;
+ struct list_head owned_semaphores;
+ struct pcp_semaphore* blocked_on;
+
+ /* Fields saved before BE->RT transition.
+ */
+ int old_policy;
+ int old_prio;
+};
+
+/* Possible RT flags */
+#define RT_F_RUNNING 0x00000000
+#define RT_F_SLEEP 0x00000001
+#define RT_F_EXIT_SEM 0x00000008
+
+#endif
+
+#endif
diff --git a/include/litmus/sched_plugin.h b/include/litmus/sched_plugin.h
new file mode 100644
index 0000000..337668f
--- /dev/null
+++ b/include/litmus/sched_plugin.h
@@ -0,0 +1,120 @@
+/*
+ * Definition of the scheduler plugin interface.
+ *
+ */
+#ifndef _LINUX_SCHED_PLUGIN_H_
+#define _LINUX_SCHED_PLUGIN_H_
+
+#include <linux/sched.h>
+#include <litmus/litmus.h>
+
+/* struct for semaphore with priority inheritance */
+struct pi_semaphore {
+ atomic_t count;
+ int sleepers;
+ wait_queue_head_t wait;
+ union {
+ /* highest-prio holder/waiter */
+ struct task_struct *task;
+ struct task_struct* cpu_task[NR_CPUS];
+ } hp;
+ /* current lock holder */
+ struct task_struct *holder;
+};
+
+int set_hp_task(struct pi_semaphore *sem, prio_cmp_t cmp);
+int set_hp_cpu_task(struct pi_semaphore *sem, int cpu, prio_cmp_t cmp);
+
+/********************* scheduler invocation ******************/
+
+/* Plugin-specific realtime tick handler */
+typedef void (*scheduler_tick_t) (void);
+/* Novell make sched decision function */
+typedef int (*schedule_t) (struct task_struct * prev,
+ struct task_struct ** next);
+/* Clean up after the task switch has occured.
+ * This function is called after every (even non-rt) task switch.
+ */
+typedef void (*finish_switch_t)(struct task_struct *prev);
+
+
+/********************* task state changes ********************/
+
+/* called to setup a new real-time task */
+typedef long (*prepare_task_t) (struct task_struct *task);
+/* called to re-introduce a task after blocking */
+typedef void (*wake_up_task_t) (struct task_struct *task);
+/* called to notify the plugin of a blocking real-time task
+ * it will only be called for real-time tasks and before schedule is called */
+typedef void (*task_blocks_t) (struct task_struct *task);
+/* called when a real-time task exits. Free any allocated resources */
+typedef long (*tear_down_t) (struct task_struct *);
+
+/* Called when the new_owner is released from the wait queue
+ * it should now inherit the priority from sem, _before_ it gets readded
+ * to any queue
+ */
+typedef long (*inherit_priority_t) (struct pi_semaphore *sem,
+ struct task_struct *new_owner);
+
+/* Called when the current task releases a semahpore where it might have
+ * inherited a piority from
+ */
+typedef long (*return_priority_t) (struct pi_semaphore *sem);
+
+/* Called when a task tries to acquire a semaphore and fails. Check if its
+ * priority is higher than that of the current holder.
+ */
+typedef long (*pi_block_t) (struct pi_semaphore *sem, struct task_struct *t);
+
+
+/********************* sys call backends ********************/
+/* This function causes the caller to sleep until the next release */
+typedef long (*sleep_next_period_t) (void);
+
+struct sched_plugin {
+ struct list_head list;
+ /* basic info */
+ char *plugin_name;
+ unsigned int srp_active:1;
+ unsigned int pcp_active:1;
+
+ /* scheduler invocation */
+ scheduler_tick_t scheduler_tick;
+ schedule_t schedule;
+ finish_switch_t finish_switch;
+
+ /* syscall backend */
+ sleep_next_period_t sleep_next_period;
+
+ /* task state changes */
+ prepare_task_t prepare_task;
+ wake_up_task_t wake_up_task;
+ task_blocks_t task_blocks;
+ tear_down_t tear_down;
+
+ /* priority inheritance */
+ inherit_priority_t inherit_priority;
+ return_priority_t return_priority;
+ pi_block_t pi_block;
+} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
+
+
+extern struct sched_plugin *curr_sched_plugin;
+
+int register_sched_plugin(struct sched_plugin* plugin);
+struct sched_plugin* find_sched_plugin(const char* name);
+int print_sched_plugins(char* buf, int max);
+
+static inline int pcp_active(void)
+{
+ return curr_sched_plugin->pcp_active;
+}
+
+static inline int srp_active(void)
+{
+ return curr_sched_plugin->srp_active;
+}
+
+
+#endif
diff --git a/include/litmus/sched_trace.h b/include/litmus/sched_trace.h
new file mode 100644
index 0000000..f9938c2
--- /dev/null
+++ b/include/litmus/sched_trace.h
@@ -0,0 +1,31 @@
+/* sched_trace.h -- record scheduler events to a byte stream for offline analysis.
+ */
+#ifndef _LINUX_SCHED_TRACE_H_
+#define _LINUX_SCHED_TRACE_H_
+
+#include <linux/sched.h>
+
+/* dummies, need to be re-implemented */
+
+/* used in sched.c */
+#define sched_trace_task_arrival(t)
+#define sched_trace_task_departure(t)
+#define sched_trace_task_preemption(t, by)
+#define sched_trace_task_scheduled(t)
+
+/* used in scheduler plugins */
+#define sched_trace_job_release(t)
+#define sched_trace_job_completion(t)
+
+
+#ifdef CONFIG_SCHED_DEBUG_TRACE
+void sched_trace_log_message(const char* fmt, ...);
+
+#else
+
+#define sched_trace_log_message(fmt, ...)
+
+#endif
+
+
+#endif
diff --git a/include/litmus/trace.h b/include/litmus/trace.h
new file mode 100644
index 0000000..5c2c2c0
--- /dev/null
+++ b/include/litmus/trace.h
@@ -0,0 +1,106 @@
+
+#ifndef _SYS_TRACE_H_
+#define _SYS_TRACE_H_
+
+#include <litmus/feather_trace.h>
+#include <litmus/feather_buffer.h>
+
+
+/*********************** TIMESTAMPS ************************/
+
+struct timestamp {
+ unsigned long event;
+ unsigned long long timestamp;
+ unsigned int seq_no;
+ int cpu;
+};
+
+
+/* buffer holding time stamps - will be provided by driver */
+extern struct ft_buffer* trace_ts_buf;
+
+/* save_timestamp: stores current time as struct timestamp
+ * in trace_ts_buf
+ */
+asmlinkage void save_timestamp(unsigned long event);
+
+#define TIMESTAMP(id) ft_event0(id, save_timestamp)
+
+/* Convention for timestamps
+ * =========================
+ *
+ * In order to process the trace files with a common tool, we use the following
+ * convention to measure execution times: The end time id of a code segment is
+ * always the next number after the start time event id.
+ */
+
+#define TS_SCHED_START TIMESTAMP(100)
+#define TS_SCHED_END TIMESTAMP(101)
+#define TS_CXS_START TIMESTAMP(102)
+#define TS_CXS_END TIMESTAMP(103)
+
+#define TS_TICK_START TIMESTAMP(110)
+#define TS_TICK_END TIMESTAMP(111)
+
+#define TS_PLUGIN_SCHED_START TIMESTAMP(120)
+#define TS_PLUGIN_SCHED_END TIMESTAMP(121)
+
+#define TS_PLUGIN_TICK_START TIMESTAMP(130)
+#define TS_PLUGIN_TICK_END TIMESTAMP(131)
+
+#define TS_ENTER_NP_START TIMESTAMP(140)
+#define TS_ENTER_NP_END TIMESTAMP(141)
+
+#define TS_EXIT_NP_START TIMESTAMP(150)
+#define TS_EXIT_NP_END TIMESTAMP(151)
+
+#define TS_SRP_UP_START TIMESTAMP(160)
+#define TS_SRP_UP_END TIMESTAMP(161)
+#define TS_SRP_DOWN_START TIMESTAMP(162)
+#define TS_SRP_DOWN_END TIMESTAMP(163)
+
+#define TS_PI_UP_START TIMESTAMP(170)
+#define TS_PI_UP_END TIMESTAMP(171)
+#define TS_PI_DOWN_START TIMESTAMP(172)
+#define TS_PI_DOWN_END TIMESTAMP(173)
+
+#define TS_FIFO_UP_START TIMESTAMP(180)
+#define TS_FIFO_UP_END TIMESTAMP(181)
+#define TS_FIFO_DOWN_START TIMESTAMP(182)
+#define TS_FIFO_DOWN_END TIMESTAMP(183)
+
+#define PCP1 200
+#define PCP2 204
+
+#define DPCP 210
+#define MPCP 220
+#define FMLP 230
+#define SRPT 240
+
+#define TS_PCP_UP_START TIMESTAMP(PCP1)
+#define TS_PCP_UP_END TIMESTAMP(PCP1 + 1)
+#define TS_PCP1_DOWN_START TIMESTAMP(PCP1 + 2)
+#define TS_PCP1_DOWN_END TIMESTAMP(PCP1 + 3)
+#define TS_PCP2_DOWN_START TIMESTAMP(PCP2 + 2)
+#define TS_PCP2_DOWN_END TIMESTAMP(PCP2 + 3)
+
+
+#define TS_DPCP_INVOKE_START TIMESTAMP(DPCP)
+#define TS_DPCP_INVOKE_END TIMESTAMP(DPCP + 1)
+#define TS_DPCP_AGENT1_START TIMESTAMP(DPCP + 2)
+#define TS_DPCP_AGENT1_END TIMESTAMP(DPCP + 3)
+#define TS_DPCP_AGENT2_START TIMESTAMP(DPCP + 4)
+#define TS_DPCP_AGENT2_END TIMESTAMP(DPCP + 5)
+
+
+#define TS_MPCP_UP_START TIMESTAMP(MPCP)
+#define TS_MPCP_UP_END TIMESTAMP(MPCP + 1)
+#define TS_MPCP_DOWN_START TIMESTAMP(MPCP + 2)
+#define TS_MPCP_DOWN_END TIMESTAMP(MPCP + 3)
+
+
+#define TS_SRPT_START TIMESTAMP(SRPT)
+#define TS_SRPT_END TIMESTAMP(SRPT + 1)
+
+
+#endif /* !_SYS_TRACE_H_ */
diff --git a/kernel/exit.c b/kernel/exit.c
index fec12eb..8a0eb79 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -50,6 +50,8 @@
extern void sem_exit (void);
+extern void exit_od_table(struct task_struct* t);
+
static void exit_mm(struct task_struct * tsk);
static void __unhash_process(struct task_struct *p)
@@ -916,6 +918,8 @@ fastcall NORET_TYPE void do_exit(long code)
if (unlikely(tsk->audit_context))
audit_free(tsk);
+ exit_od_table(tsk);
+
taskstats_exit(tsk, group_dead);
exit_mm(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index d57118d..6fa6e03 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -57,6 +57,9 @@
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+
/*
* Protected counters by write_lock_irq(&tasklist_lock)
*/
@@ -118,6 +121,8 @@ void __put_task_struct(struct task_struct *tsk)
WARN_ON(atomic_read(&tsk->usage));
WARN_ON(tsk == current);
+ exit_litmus(tsk);
+
security_task_free(tsk);
free_uid(tsk->user);
put_group_info(tsk->group_info);
diff --git a/kernel/sched.c b/kernel/sched.c
index cca93cc..fb35f31 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -56,6 +56,12 @@
#include <asm/unistd.h>
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/sched_trace.h>
+#include <litmus/rt_param.h>
+#include <litmus/trace.h>
+
/*
* Convert user-nice values [ -20 ... 0 ... 19 ]
* to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
@@ -836,7 +842,7 @@ static int effective_prio(struct task_struct *p)
* keep the priority unchanged. Otherwise, update priority
* to the normal priority:
*/
- if (!rt_prio(p->prio))
+ if (!rt_prio(p->prio) && !is_realtime(p))
return p->normal_prio;
return p->prio;
}
@@ -844,7 +850,7 @@ static int effective_prio(struct task_struct *p)
/*
* __activate_task - move a task to the runqueue.
*/
-static void __activate_task(struct task_struct *p, struct rq *rq)
+void __activate_task(struct task_struct *p, struct rq *rq)
{
struct prio_array *target = rq->active;
@@ -999,7 +1005,7 @@ out:
/*
* deactivate_task - remove a task from the runqueue.
*/
-static void deactivate_task(struct task_struct *p, struct rq *rq)
+void deactivate_task(struct task_struct *p, struct rq *rq)
{
dec_nr_running(p, rq);
dequeue_task(p, p->array);
@@ -1408,6 +1414,10 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
#endif
rq = task_rq_lock(p, &flags);
+
+ if (is_realtime(p))
+ TRACE("try_to_wake_up(%s/%d)\n", p->comm, p->pid);
+
old_state = p->state;
if (!(old_state & state))
goto out;
@@ -1415,6 +1425,12 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
if (p->array)
goto out_running;
+ sched_trace_task_arrival(p);
+ if (is_realtime(p)) {
+ curr_sched_plugin->wake_up_task(p);
+ goto out_running;
+ }
+
cpu = task_cpu(p);
this_cpu = smp_processor_id();
@@ -1576,6 +1592,8 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags)
{
int cpu = get_cpu();
+ litmus_fork(p);
+
#ifdef CONFIG_SMP
cpu = sched_balance_self(cpu, SD_BALANCE_FORK);
#endif
@@ -1730,6 +1748,9 @@ void fastcall sched_exit(struct task_struct *p)
unsigned long flags;
struct rq *rq;
+ if (is_realtime(p))
+ return;
+
/*
* If the child was a (relative-) CPU hog then decrease
* the sleep_avg of the parent as well.
@@ -1765,6 +1786,31 @@ static inline void prepare_task_switch(struct rq *rq, struct task_struct *next)
prepare_arch_switch(next);
}
+static void litmus_transition(struct task_struct *tsk, struct rq *rq)
+{
+ int wakeup = 0;
+ WARN_ON(tsk->state != TASK_STOPPED);
+
+ tsk->rt_param.transition_pending = 0;
+ if (is_realtime(tsk)) {
+ /* RT -> BE transition */
+ tsk->rt_param.transition_error = transition_to_be(tsk);
+ wakeup = tsk->rt_param.transition_error == 0;
+ } else {
+ /* BE -> RT transition */
+ tsk->rt_param.transition_error = transition_to_rt(tsk);
+ /* If it was rejected as a real-time task, then
+ * keep it running as a best-effort task.
+ */
+ wakeup = tsk->rt_param.transition_error != 0;
+ }
+ if (wakeup) {
+ /* we still hold the runqueue lock */
+ tsk->state = TASK_RUNNING;
+ __activate_task(tsk, rq);
+ }
+}
+
/**
* finish_task_switch - clean up after a task-switch
* @rq: runqueue associated with task-switch
@@ -1801,6 +1847,15 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
*/
prev_state = prev->state;
finish_arch_switch(prev);
+ /* Requeue previous real-time task before we drop the rq lock, cause
+ * that may lead to a preemption.
+ */
+ curr_sched_plugin->finish_switch(prev);
+ sched_trace_task_scheduled(current);
+ if (rt_transition_pending(prev))
+ litmus_transition(prev, rq);
+ /* trace before IRQs are enabled */
+ TS_CXS_END;
finish_lock_switch(rq, prev);
if (mm)
mmdrop(mm);
@@ -2095,6 +2150,10 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
struct sched_domain *sd, enum idle_type idle,
int *all_pinned)
{
+ /* Don't migrate LITMUS^RT tasks. */
+ if (is_realtime(p))
+ return 0;
+
/*
* We do not migrate tasks that are:
* 1) running (obviously), or
@@ -3220,11 +3279,30 @@ void scheduler_tick(void)
update_cpu_clock(p, rq, now);
+ /* real-time accounting is done by the plugin
+ * call linux functions only for background tasks
+ */
if (p == rq->idle)
- /* Task on the idle queue */
- wake_priority_sleeper(rq);
- else
+ /* Task on the idle queue */
+ wake_priority_sleeper(rq);
+ else if (is_realtime(p)) {
+ /* time accounting for LITMUS^RT tasks */
+ p->rt_param.job_params.exec_time +=
+ now - p->rt_param.job_params.exec_start;
+ p->rt_param.job_params.exec_start = now;
+ } else
+ /* normal Linux tasks */
task_running_tick(rq, p);
+
+ /* check whether the RT scheduler plugin requires a call to
+ * schedule
+ */
+ TS_PLUGIN_TICK_START;
+ curr_sched_plugin->scheduler_tick();
+ TS_PLUGIN_TICK_END;
+
+ send_scheduler_signals();
+
#ifdef CONFIG_SMP
update_load(rq);
if (time_after_eq(jiffies, rq->next_balance))
@@ -3406,6 +3484,7 @@ static inline int interactive_sleep(enum sleep_type sleep_type)
sleep_type == SLEEP_INTERRUPTED);
}
+
/*
* schedule() is the main scheduler function.
*/
@@ -3420,6 +3499,7 @@ asmlinkage void __sched schedule(void)
long *switch_count;
struct rq *rq;
+
/*
* Test if we are atomic. Since do_exit() needs to call into
* schedule() atomically, we ignore that path for now.
@@ -3427,8 +3507,9 @@ asmlinkage void __sched schedule(void)
*/
if (unlikely(in_atomic() && !current->exit_state)) {
printk(KERN_ERR "BUG: scheduling while atomic: "
- "%s/0x%08x/%d\n",
- current->comm, preempt_count(), current->pid);
+ "%s/0x%08x/%d %s\n",
+ current->comm, preempt_count(), current->pid,
+ is_realtime(current) ? "rt" : "non-rt");
debug_show_held_locks(current);
if (irqs_disabled())
print_irqtrace_events(current);
@@ -3438,6 +3519,7 @@ asmlinkage void __sched schedule(void)
need_resched:
preempt_disable();
+ TS_SCHED_START;
prev = current;
release_kernel_lock(prev);
need_resched_nonpreemptible:
@@ -3470,6 +3552,7 @@ need_resched_nonpreemptible:
spin_lock_irq(&rq->lock);
switch_count = &prev->nivcsw;
+ /* check for blocking tasks */
if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
switch_count = &prev->nvcsw;
if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
@@ -3478,11 +3561,60 @@ need_resched_nonpreemptible:
else {
if (prev->state == TASK_UNINTERRUPTIBLE)
rq->nr_uninterruptible++;
+
+ if (is_realtime(prev)) {
+ TRACE_TASK(prev, "blocks, state = %d\n",
+ prev->state);
+ curr_sched_plugin->task_blocks(prev);
+ /* Enable this for all tasks to get _a lot_ of
+ * data. Can be helpful for debugging.
+ */
+ sched_trace_task_departure(prev);
+ }
+ /* only indirect switching is supported in the current
+ * version of LITMUS
+ */
deactivate_task(prev, rq);
}
}
+ next = NULL;
+
+ if (is_realtime(prev)) {
+ /* If we are invoked after scheduler_tick(), then
+ * prev is charged a tiny amount of overhead time.
+ * Since analysis has (or should have) accounted for
+ * overheads, this is ok.
+ */
+ prev->rt_param.job_params.exec_time +=
+ now - prev->rt_param.job_params.exec_start;
+ prev->rt_param.job_params.exec_start = now;
+ }
+
+ /* consult the real-time plugin */
+ TS_PLUGIN_SCHED_START;
+ curr_sched_plugin->schedule(prev, &next);
+ TS_PLUGIN_SCHED_END;
+
cpu = smp_processor_id();
+
+ if (prev != next && is_realtime(prev) && is_running(prev))
+ deactivate_task(prev, rq);
+ if (next && prev != next) {
+ __activate_task(next, rq);
+ set_task_cpu(next, cpu);
+ }
+
+ /* If the real-time plugin wants to switch to a specific task
+ * it'll be on the rq and have the highest priority. There will
+ * be exaclty one such task, thus the selection of the next task
+ * is unambiguous and the following code can only get
+ * triggered if there are no RT tasks pending (on this CPU). Thus,
+ * we may as well skip it.
+ */
+ if (next)
+ goto switch_tasks;
+
if (unlikely(!rq->nr_running)) {
idle_balance(cpu, rq);
if (!rq->nr_running) {
@@ -3546,12 +3678,17 @@ switch_tasks:
prev->timestamp = prev->last_ran = now;
sched_info_switch(prev, next);
+ TS_SCHED_END;
if (likely(prev != next)) {
+ TS_CXS_START;
+ if (is_running(prev))
+ sched_trace_task_preemption(prev, next);
next->timestamp = now;
rq->nr_switches++;
rq->curr = next;
++*switch_count;
+ next->rt_param.job_params.exec_start = now;
prepare_task_switch(rq, next);
prev = context_switch(rq, prev, next);
barrier();
@@ -3561,8 +3698,11 @@ switch_tasks:
* frame will be invalid.
*/
finish_task_switch(this_rq(), prev);
- } else
+ } else {
spin_unlock_irq(&rq->lock);
+ }
+
+ send_scheduler_signals();
prev = current;
if (unlikely(reacquire_kernel_lock(prev) < 0))
@@ -3570,6 +3710,8 @@ switch_tasks:
preempt_enable_no_resched();
if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
goto need_resched;
+ if (srp_active())
+ srp_ceiling_block();
}
EXPORT_SYMBOL(schedule);
@@ -3691,6 +3833,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
}
}
+
/**
* __wake_up - wake up threads blocked on a waitqueue.
* @q: the waitqueue
@@ -3709,6 +3852,7 @@ void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode,
}
EXPORT_SYMBOL(__wake_up);
+
/*
* Same as __wake_up but called with the spinlock in wait_queue_head_t held.
*/
@@ -3717,6 +3861,7 @@ void fastcall __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
__wake_up_common(q, mode, 1, 0, NULL);
}
+
/**
* __wake_up_sync - wake up threads blocked on a waitqueue.
* @q: the waitqueue
@@ -3772,6 +3917,18 @@ void fastcall complete_all(struct completion *x)
}
EXPORT_SYMBOL(complete_all);
+void fastcall complete_n(struct completion *x, int n)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&x->wait.lock, flags);
+ x->done += n;
+ __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
+ n, 0, NULL);
+ spin_unlock_irqrestore(&x->wait.lock, flags);
+}
+EXPORT_SYMBOL(complete_n);
+
void fastcall __sched wait_for_completion(struct completion *x)
{
might_sleep();
@@ -4175,7 +4332,7 @@ static inline struct task_struct *find_process_by_pid(pid_t pid)
}
/* Actually do priority change: must hold rq lock. */
-static void __setscheduler(struct task_struct *p, int policy, int prio)
+void __setscheduler(struct task_struct *p, int policy, int prio)
{
BUG_ON(p->array);
diff --git a/lib/semaphore-sleepers.c b/lib/semaphore-sleepers.c
index 1281805..3f4d543 100644
--- a/lib/semaphore-sleepers.c
+++ b/lib/semaphore-sleepers.c
@@ -108,7 +108,7 @@ fastcall int __sched __down_interruptible(struct semaphore * sem)
/*
* With signals pending, this turns into
* the trylock failure case - we won't be
- * sleeping, and we* can't get the lock as
+ * sleeping, and we can't get the lock as
* it has contention. Just correct the count
* and exit.
*/
diff --git a/litmus/Makefile b/litmus/Makefile
new file mode 100644
index 0000000..db2518d
--- /dev/null
+++ b/litmus/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile for LITMUS^RT
+#
+
+obj-y = sched_plugin.o litmus.o sched_trace.o \
+ edf_common.o rm_common.o\
+ sched_gsn_edf.o sched_psn_edf.o litmus_sem.o \
+ trace.o ft_event.o rt_domain.o fdso.o \
+ sched_rm.o sync.o jobs.o pcp.o
diff --git a/litmus/edf_common.c b/litmus/edf_common.c
new file mode 100644
index 0000000..2a52835
--- /dev/null
+++ b/litmus/edf_common.c
@@ -0,0 +1,95 @@
+/*
+ * kernel/edf_common.c
+ *
+ * Common functions for EDF based scheduler.
+ */
+
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/sched_trace.h>
+
+
+#include <litmus/edf_common.h>
+
+/* edf_higher_prio - returns true if first has a higher EDF priority
+ * than second. Deadline ties are broken by PID.
+ *
+ * first first must not be NULL and a real-time task.
+ * second may be NULL or a non-rt task.
+ */
+int edf_higher_prio(struct task_struct* first,
+ struct task_struct* second)
+{
+ struct task_struct *first_task = first;
+ struct task_struct *second_task = second;
+
+ /* Check for inherited priorities. Change task
+ * used for comparison in such a case.
+ */
+ if (first && first->rt_param.inh_task)
+ first_task = first->rt_param.inh_task;
+ if (second && second->rt_param.inh_task)
+ second_task = second->rt_param.inh_task;
+
+ return
+ /* does the second task exist and is it a real-time task? If
+ * not, the first task (which is a RT task) has higher
+ * priority.
+ */
+ !second_task || !is_realtime(second_task) ||
+
+ /* is the deadline of the first task earlier?
+ * Then it has higher priority.
+ */
+ earlier_deadline(first_task, second_task) ||
+
+ /* Do we have a deadline tie?
+ * Then break by PID.
+ */
+ (get_deadline(first_task) == get_deadline(second_task) &&
+ (first_task->pid < second_task->pid ||
+
+ /* If the PIDs are the same then the task with the inherited
+ * priority wins.
+ */
+ (first_task->pid == second_task->pid &&
+ !second->rt_param.inh_task)));
+}
+
+int edf_ready_order(struct list_head* a, struct list_head* b)
+{
+ return edf_higher_prio(
+ list_entry(a, struct task_struct, rt_list),
+ list_entry(b, struct task_struct, rt_list));
+}
+
+void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched)
+{
+ rt_domain_init(rt, resched, edf_ready_order);
+}
+
+/* need_to_preempt - check whether the task t needs to be preempted
+ * call only with irqs disabled and with ready_lock acquired
+ * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
+ */
+int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t)
+{
+ /* we need the read lock for edf_ready_queue */
+ /* no need to preempt if there is nothing pending */
+ if (!ready_jobs_pending(rt))
+ return 0;
+ /* we need to reschedule if t doesn't exist */
+ if (!t)
+ return 1;
+
+ /* NOTE: We cannot check for non-preemptibility since we
+ * don't know what address space we're currently in.
+ */
+
+ /* make sure to get non-rt stuff out of the way */
+ return !is_realtime(t) || edf_higher_prio(next_ready(rt), t);
+}
diff --git a/litmus/fdso.c b/litmus/fdso.c
new file mode 100644
index 0000000..ded9918
--- /dev/null
+++ b/litmus/fdso.c
@@ -0,0 +1,289 @@
+/* fdso.c - file descriptor attached shared objects
+ *
+ * (c) 2007 B. Brandenburg, LITMUS^RT project
+ *
+ * Notes:
+ * - objects descriptor (OD) tables are not cloned during a fork.
+ * - objects are created on-demand, and freed after the last reference
+ * is dropped.
+ * - for now, object types are hard coded.
+ * - As long as we have live objects, we keep a reference to the inode.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mutex.h>
+#include <linux/file.h>
+#include <asm/uaccess.h>
+
+#include <litmus/fdso.h>
+
+extern struct fdso_ops pi_sem_ops;
+extern struct fdso_ops srp_sem_ops;
+extern struct fdso_ops pcp_sem_ops;
+extern struct fdso_ops mpcp_sem_ops;
+
+static const struct fdso_ops* fdso_ops[] = {
+ &pi_sem_ops,
+ &srp_sem_ops,
+ &pcp_sem_ops,
+ &mpcp_sem_ops,
+};
+
+static void* fdso_create(obj_type_t type)
+{
+ return fdso_ops[type]->create();
+}
+
+static void fdso_destroy(obj_type_t type, void* obj)
+{
+ fdso_ops[type]->destroy(obj);
+}
+
+static int fdso_open(struct od_table_entry* entry, void* __user config)
+{
+ if (fdso_ops[entry->obj->type]->open)
+ return fdso_ops[entry->obj->type]->open(entry, config);
+ else
+ return 0;
+}
+
+static int fdso_close(struct od_table_entry* entry)
+{
+ if (fdso_ops[entry->obj->type]->close)
+ return fdso_ops[entry->obj->type]->close(entry);
+ else
+ return 0;
+}
+
+/* inode must be locked already */
+static struct inode_obj_id* alloc_inode_obj(struct inode* inode,
+ obj_type_t type,
+ unsigned int id)
+{
+ struct inode_obj_id* obj;
+ void* raw_obj;
+
+ raw_obj = fdso_create(type);
+ if (!raw_obj)
+ return NULL;
+
+ obj = kmalloc(sizeof(struct inode_obj_id), GFP_KERNEL);
+ if (!obj)
+ return NULL;
+ INIT_LIST_HEAD(&obj->list);
+ atomic_set(&obj->count, 1);
+ obj->type = type;
+ obj->id = id;
+ obj->obj = raw_obj;
+ obj->inode = inode;
+
+ list_add(&obj->list, &inode->i_obj_list);
+ atomic_inc(&inode->i_count);
+/*
+ printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n",
+ inode, type, id);
+*/
+ return obj;
+}
+
+/* inode must be locked already */
+static struct inode_obj_id* get_inode_obj(struct inode* inode,
+ obj_type_t type,
+ unsigned int id)
+{
+ struct list_head* pos;
+ struct inode_obj_id* obj = NULL;
+
+ list_for_each(pos, &inode->i_obj_list) {
+ obj = list_entry(pos, struct inode_obj_id, list);
+ if (obj->id == id && obj->type == type) {
+ atomic_inc(&obj->count);
+ return obj;
+ }
+ }
+/*
+ printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n",
+ inode, type, id);
+*/
+ return NULL;
+}
+
+
+static void put_inode_obj(struct inode_obj_id* obj)
+{
+ struct inode* inode;
+ int let_go = 0;
+
+ inode = obj->inode;
+ if (atomic_dec_and_test(&obj->count)) {
+
+ mutex_lock(&inode->i_obj_mutex);
+ /* no new references can be obtained */
+ if (!atomic_read(&obj->count)) {
+ list_del(&obj->list);
+ fdso_destroy(obj->type, obj->obj);
+ kfree(obj);
+ let_go = 1;
+ }
+ mutex_unlock(&inode->i_obj_mutex);
+ if (let_go)
+ iput(inode);
+ }
+}
+
+static struct od_table_entry* get_od_entry(struct task_struct* t)
+{
+ struct od_table_entry* table;
+ int i;
+
+
+ table = t->od_table;
+ if (!table) {
+ table = (struct od_table_entry*)
+ kzalloc(sizeof(struct od_table_entry) *
+ MAX_OBJECT_DESCRIPTORS, GFP_KERNEL);
+ t->od_table = table;
+ }
+
+ for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++)
+ if (!table[i].used) {
+ table[i].used = 1;
+ return table + i;
+ }
+ return NULL;
+}
+
+static int put_od_entry(struct od_table_entry* od)
+{
+ put_inode_obj(od->obj);
+ od->used = 0;
+ return 0;
+}
+
+void exit_od_table(struct task_struct* t)
+{
+ int i;
+
+ if (t->od_table) {
+ for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++)
+ if (t->od_table[i].used)
+ put_od_entry(t->od_table + i);
+ kfree(t->od_table);
+ t->od_table = NULL;
+ }
+}
+
+static int do_sys_od_open(struct file* file, obj_type_t type, int id,
+ void* __user config)
+{
+ int idx = 0, err;
+ struct inode* inode;
+ struct inode_obj_id* obj = NULL;
+ struct od_table_entry* entry;
+
+ inode = file->f_dentry->d_inode;
+
+ entry = get_od_entry(current);
+ if (!entry)
+ return -ENOMEM;
+
+ mutex_lock(&inode->i_obj_mutex);
+ obj = get_inode_obj(inode, type, id);
+ if (!obj)
+ obj = alloc_inode_obj(inode, type, id);
+ if (!obj) {
+ idx = -ENOMEM;
+ entry->used = 0;
+ } else {
+ entry->obj = obj;
+ entry->extra = NULL;
+ idx = entry - current->od_table;
+ }
+
+ mutex_unlock(&inode->i_obj_mutex);
+
+ /* FIXME: What if the allocation failed? */
+ err = fdso_open(entry, config);
+ if (err < 0) {
+ /* The class rejected the open call.
+ * We need to clean up and tell user space.
+ */
+ put_od_entry(entry);
+ idx = err;
+ }
+
+ return idx;
+}
+
+
+struct od_table_entry* __od_lookup(int od)
+{
+ struct task_struct *t = current;
+
+ if (!t->od_table)
+ return NULL;
+ if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
+ return NULL;
+ if (!t->od_table[od].used)
+ return NULL;
+ return t->od_table + od;
+}
+
+
+asmlinkage int sys_od_open(int fd, int type, int obj_id, void* __user config)
+{
+ int ret = 0;
+ struct file* file;
+
+ /*
+ 1) get file from fd, get inode from file
+ 2) lock inode
+ 3) try to lookup object
+ 4) if not present create and enqueue object, inc inode refcnt
+ 5) increment refcnt of object
+ 6) alloc od_table_entry, setup ptrs
+ 7) unlock inode
+ 8) return offset in od_table as OD
+ */
+
+ if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ file = fget(fd);
+ if (!file) {
+ ret = -EBADF;
+ goto out;
+ }
+
+ ret = do_sys_od_open(file, type, obj_id, config);
+
+ fput(file);
+
+out:
+ return ret;
+}
+
+
+asmlinkage int sys_od_close(int od)
+{
+ int ret = -EINVAL;
+ struct task_struct *t = current;
+
+ if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS)
+ return ret;
+
+ if (!t->od_table || !t->od_table[od].used)
+ return ret;
+
+
+ /* give the class a chance to reject the close
+ */
+ ret = fdso_close(t->od_table + od);
+ if (ret == 0)
+ ret = put_od_entry(t->od_table + od);
+
+ return ret;
+}
diff --git a/litmus/ft_event.c b/litmus/ft_event.c
new file mode 100644
index 0000000..db9f4ea
--- /dev/null
+++ b/litmus/ft_event.c
@@ -0,0 +1,104 @@
+#include <linux/types.h>
+
+#include <litmus/feather_trace.h>
+
+/* the feather trace management functions assume
+ * exclusive access to the event table
+ */
+
+
+#define BYTE_JUMP 0xeb
+#define BYTE_JUMP_LEN 0x02
+
+/* for each event, there is an entry in the event table */
+struct trace_event {
+ long id;
+ long count;
+ long start_addr;
+ long end_addr;
+};
+
+extern struct trace_event __start___event_table[];
+extern struct trace_event __stop___event_table[];
+
+int ft_enable_event(unsigned long id)
+{
+ struct trace_event* te = __start___event_table;
+ int count = 0;
+ char* delta;
+ unsigned char* instr;
+
+ while (te < __stop___event_table) {
+ if (te->id == id && ++te->count == 1) {
+ instr = (unsigned char*) te->start_addr;
+ /* make sure we don't clobber something wrong */
+ if (*instr == BYTE_JUMP) {
+ delta = (((unsigned char*) te->start_addr) + 1);
+ *delta = 0;
+ }
+ }
+ if (te->id == id)
+ count++;
+ te++;
+ }
+ return count;
+}
+
+int ft_disable_event(unsigned long id)
+{
+ struct trace_event* te = __start___event_table;
+ int count = 0;
+ char* delta;
+ unsigned char* instr;
+
+ while (te < __stop___event_table) {
+ if (te->id == id && --te->count == 0) {
+ instr = (unsigned char*) te->start_addr;
+ if (*instr == BYTE_JUMP) {
+ delta = (((unsigned char*) te->start_addr) + 1);
+ *delta = te->end_addr - te->start_addr -
+ BYTE_JUMP_LEN;
+ }
+ }
+ if (te->id == id)
+ count++;
+ te++;
+ }
+ return count;
+}
+
+int ft_disable_all_events(void)
+{
+ struct trace_event* te = __start___event_table;
+ int count = 0;
+ char* delta;
+ unsigned char* instr;
+
+ while (te < __stop___event_table) {
+ if (te->count) {
+ instr = (unsigned char*) te->start_addr;
+ if (*instr == BYTE_JUMP) {
+ delta = (((unsigned char*) te->start_addr)
+ + 1);
+ *delta = te->end_addr - te->start_addr -
+ BYTE_JUMP_LEN;
+ te->count = 0;
+ count++;
+ }
+ }
+ te++;
+ }
+ return count;
+}
+
+int ft_is_event_enabled(unsigned long id)
+{
+ struct trace_event* te = __start___event_table;
+
+ while (te < __stop___event_table) {
+ if (te->id == id)
+ return te->count;
+ te++;
+ }
+ return 0;
+}
diff --git a/litmus/jobs.c b/litmus/jobs.c
new file mode 100644
index 0000000..e294bc5
--- /dev/null
+++ b/litmus/jobs.c
@@ -0,0 +1,43 @@
+/* litmus/jobs.c - common job control code
+ */
+
+#include <linux/sched.h>
+
+#include <litmus/litmus.h>
+#include <litmus/jobs.h>
+
+void prepare_for_next_period(struct task_struct *t)
+{
+ BUG_ON(!t);
+ /* prepare next release */
+ t->rt_param.job_params.release = t->rt_param.job_params.deadline;
+ t->rt_param.job_params.deadline += get_rt_period(t);
+ t->rt_param.job_params.exec_time = 0;
+ /* update job sequence number */
+ t->rt_param.job_params.job_no++;
+
+ /* don't confuse Linux */
+ t->time_slice = 1;
+}
+
+void release_at(struct task_struct *t, lt_t start)
+{
+ t->rt_param.job_params.deadline = start;
+ prepare_for_next_period(t);
+ set_rt_flags(t, RT_F_RUNNING);
+}
+
+
+/*
+ * Deactivate current task until the beginning of the next period.
+ */
+long complete_job(void)
+{
+ /* Mark that we do not excute anymore */
+ set_rt_flags(current, RT_F_SLEEP);
+ /* call schedule, this will return when a new job arrives
+ * it also takes care of preparing for the next release
+ */
+ schedule();
+ return 0;
+}
diff --git a/litmus/litmus.c b/litmus/litmus.c
new file mode 100644
index 0000000..77aad7d
--- /dev/null
+++ b/litmus/litmus.c
@@ -0,0 +1,830 @@
+/* litmus.c -- Implementation of the LITMUS syscalls, the LITMUS intialization code,
+ * and the procfs interface..
+ */
+#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <linux/sysrq.h>
+
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+
+
+#include <litmus/litmus.h>
+#include <linux/sched.h>
+#include <litmus/sched_plugin.h>
+
+#include <litmus/trace.h>
+
+/* Number of RT tasks that exist in the system */
+atomic_t rt_task_count = ATOMIC_INIT(0);
+static DEFINE_SPINLOCK(task_transition_lock);
+
+/* To send signals from the scheduler
+ * Must drop locks first.
+ */
+static LIST_HEAD(sched_sig_list);
+static DEFINE_SPINLOCK(sched_sig_list_lock);
+
+/*
+ * sys_set_task_rt_param
+ * @pid: Pid of the task which scheduling parameters must be changed
+ * @param: New real-time extension parameters such as the execution cost and
+ * period
+ * Syscall for manipulating with task rt extension params
+ * Returns EFAULT if param is NULL.
+ * ESRCH if pid is not corrsponding
+ * to a valid task.
+ * EINVAL if either period or execution cost is <=0
+ * EPERM if pid is a real-time task
+ * 0 if success
+ *
+ * Only non-real-time tasks may be configured with this system call
+ * to avoid races with the scheduler. In practice, this means that a
+ * task's parameters must be set _before_ calling sys_prepare_rt_task()
+ */
+asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
+{
+ struct rt_task tp;
+ struct task_struct *target;
+ int retval = -EINVAL;
+
+ printk("Setting up rt task parameters for process %d.\n", pid);
+
+ if (pid < 0 || param == 0) {
+ goto out;
+ }
+ if (copy_from_user(&tp, param, sizeof(tp))) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ /* Task search and manipulation must be protected */
+ read_lock_irq(&tasklist_lock);
+ if (!(target = find_task_by_pid(pid))) {
+ retval = -ESRCH;
+ goto out_unlock;
+ }
+
+ if (is_realtime(target)) {
+ /* The task is already a real-time task.
+ * We cannot not allow parameter changes at this point.
+ */
+ retval = -EBUSY;
+ goto out_unlock;
+ }
+
+ if (tp.exec_cost <= 0)
+ goto out_unlock;
+ if (tp.period <= 0)
+ goto out_unlock;
+ if (!cpu_online(tp.cpu))
+ goto out_unlock;
+ if (tp.period < tp.exec_cost)
+ {
+ printk(KERN_INFO "litmus: real-time task %d rejected "
+ "because wcet > period\n", pid);
+ goto out_unlock;
+ }
+
+ target->rt_param.task_params = tp;
+
+ retval = 0;
+ out_unlock:
+ read_unlock_irq(&tasklist_lock);
+ out:
+ return retval;
+}
+
+/* Getter of task's RT params
+ * returns EINVAL if param or pid is NULL
+ * returns ESRCH if pid does not correspond to a valid task
+ * returns EFAULT if copying of parameters has failed.
+ */
+asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param)
+{
+ int retval = -EINVAL;
+ struct task_struct *source;
+ struct rt_task lp;
+ if (param == 0 || pid < 0)
+ goto out;
+ read_lock(&tasklist_lock);
+ if (!(source = find_task_by_pid(pid))) {
+ retval = -ESRCH;
+ goto out_unlock;
+ }
+ lp = source->rt_param.task_params;
+ read_unlock(&tasklist_lock);
+ /* Do copying outside the lock */
+ retval =
+ copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
+ return retval;
+ out_unlock:
+ read_unlock(&tasklist_lock);
+ out:
+ return retval;
+
+}
+
+/* sys_task_mode_transition
+ * @target_mode: The desired execution mode after the system call completes.
+ * Either BACKGROUND_TASK or LITMUS_RT_TASK.
+ * Allow a normal task to become a real-time task, vice versa.
+ * Returns EINVAL if illegal transition requested.
+ * 0 if task mode was changed succesfully
+ * other if plugin failed.
+ */
+asmlinkage long sys_task_mode_transition(int target_mode)
+{
+ int retval = -EINVAL;
+ struct task_struct *t = current;
+
+ if (( is_realtime(t) && target_mode == BACKGROUND_TASK) ||
+ (!is_realtime(t) && target_mode == LITMUS_RT_TASK)) {
+ TRACE_TASK(t, "attempts mode transition to %s\n",
+ is_realtime(t) ? "best-effort" : "real-time");
+ preempt_disable();
+ t->rt_param.transition_pending = 1;
+ t->state = TASK_STOPPED;
+ preempt_enable_no_resched();
+
+ schedule();
+
+ retval = t->rt_param.transition_error;
+ }
+ return retval;
+}
+
+/* implemented in kernel/litmus_sem.c */
+void srp_ceiling_block(void);
+
+/*
+ * This is the crucial function for periodic task implementation,
+ * It checks if a task is periodic, checks if such kind of sleep
+ * is permitted and calls plugin-specific sleep, which puts the
+ * task into a wait array.
+ * returns 0 on successful wakeup
+ * returns EPERM if current conditions do not permit such sleep
+ * returns EINVAL if current task is not able to go to sleep
+ */
+asmlinkage long sys_sleep_next_period(void)
+{
+ int retval = -EPERM;
+ if (!is_realtime(current)) {
+ retval = -EINVAL;
+ goto out;
+ }
+ /* Task with negative or zero period cannot sleep */
+ if (get_rt_period(current) <= 0) {
+ retval = -EINVAL;
+ goto out;
+ }
+ /* The plugin has to put the task into an
+ * appropriate queue and call schedule
+ */
+ retval = curr_sched_plugin->sleep_next_period();
+ out:
+ return retval;
+}
+
+/* This is an "improved" version of sys_sleep_next_period() that
+ * addresses the problem of unintentionally missing a job after
+ * an overrun.
+ *
+ * returns 0 on successful wakeup
+ * returns EPERM if current conditions do not permit such sleep
+ * returns EINVAL if current task is not able to go to sleep
+ */
+asmlinkage long sys_wait_for_job_release(unsigned int job)
+{
+ int retval = -EPERM;
+ if (!is_realtime(current)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ /* Task with negative or zero period cannot sleep */
+ if (get_rt_period(current) <= 0) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ retval = 0;
+
+ /* first wait until we have "reached" the desired job
+ *
+ * This implementation has at least two problems:
+ *
+ * 1) It doesn't gracefully handle the wrap around of
+ * job_no. Since LITMUS is a prototype, this is not much
+ * of a problem right now.
+ *
+ * 2) It is theoretically racy if a job release occurs
+ * between checking job_no and calling sleep_next_period().
+ * A proper solution would requiring adding another callback
+ * in the plugin structure and testing the condition with
+ * interrupts disabled.
+ *
+ * FIXME: At least problem 2 should be taken care of eventually.
+ */
+ while (!retval && job > current->rt_param.job_params.job_no)
+ /* If the last job overran then job <= job_no and we
+ * don't send the task to sleep.
+ */
+ retval = curr_sched_plugin->sleep_next_period();
+ out:
+ return retval;
+}
+
+/* This is a helper syscall to query the current job sequence number.
+ *
+ * returns 0 on successful query
+ * returns EPERM if task is not a real-time task.
+ * returns EFAULT if &job is not a valid pointer.
+ */
+asmlinkage long sys_query_job_no(unsigned int __user *job)
+{
+ int retval = -EPERM;
+ if (is_realtime(current))
+ retval = put_user(current->rt_param.job_params.job_no, job);
+
+ return retval;
+}
+
+struct sched_sig {
+ struct list_head list;
+ struct task_struct* task;
+ unsigned int signal:31;
+ int force:1;
+};
+
+static void __scheduler_signal(struct task_struct *t, unsigned int signo,
+ int force)
+{
+ struct sched_sig* sig;
+
+ sig = kmalloc(GFP_ATOMIC, sizeof(struct sched_sig));
+ if (!sig) {
+ TRACE_TASK(t, "dropping signal: %u\n", t);
+ return;
+ }
+
+ spin_lock(&sched_sig_list_lock);
+
+ sig->signal = signo;
+ sig->force = force;
+ sig->task = t;
+ get_task_struct(t);
+ list_add(&sig->list, &sched_sig_list);
+
+ spin_unlock(&sched_sig_list_lock);
+}
+
+void scheduler_signal(struct task_struct *t, unsigned int signo)
+{
+ __scheduler_signal(t, signo, 0);
+}
+
+void force_scheduler_signal(struct task_struct *t, unsigned int signo)
+{
+ __scheduler_signal(t, signo, 1);
+}
+
+/* FIXME: get rid of the locking and do this on a per-processor basis */
+void send_scheduler_signals(void)
+{
+ unsigned long flags;
+ struct list_head *p, *extra;
+ struct siginfo info;
+ struct sched_sig* sig;
+ struct task_struct* t;
+ struct list_head claimed;
+
+ if (spin_trylock_irqsave(&sched_sig_list_lock, flags)) {
+ if (list_empty(&sched_sig_list))
+ p = NULL;
+ else {
+ p = sched_sig_list.next;
+ list_del(&sched_sig_list);
+ INIT_LIST_HEAD(&sched_sig_list);
+ }
+ spin_unlock_irqrestore(&sched_sig_list_lock, flags);
+
+ /* abort if there are no signals */
+ if (!p)
+ return;
+
+ /* take signal list we just obtained */
+ list_add(&claimed, p);
+
+ list_for_each_safe(p, extra, &claimed) {
+ list_del(p);
+ sig = list_entry(p, struct sched_sig, list);
+ t = sig->task;
+ info.si_signo = sig->signal;
+ info.si_errno = 0;
+ info.si_code = SI_KERNEL;
+ info.si_pid = 1;
+ info.si_uid = 0;
+ TRACE("sending signal %d to %d\n", info.si_signo,
+ t->pid);
+ if (sig->force)
+ force_sig_info(sig->signal, &info, t);
+ else
+ send_sig_info(sig->signal, &info, t);
+ put_task_struct(t);
+ kfree(sig);
+ }
+ }
+
+}
+
+static inline void np_mem_error(struct task_struct* t, const char* reason)
+{
+ if (t->state != TASK_DEAD && !(t->flags & PF_EXITING)) {
+ TRACE("np section: %s => %s/%d killed\n",
+ reason, t->comm, t->pid);
+ force_scheduler_signal(t, SIGKILL);
+ }
+}
+
+/* sys_register_np_flag() allows real-time tasks to register an
+ * np section indicator.
+ * returns 0 if the flag was successfully registered
+ * returns EINVAL if current task is not a real-time task
+ * returns EFAULT if *flag couldn't be written
+ */
+asmlinkage long sys_register_np_flag(short __user *flag)
+{
+ int retval = -EINVAL;
+ short test_val = RT_PREEMPTIVE;
+
+ /* avoid races with the scheduler */
+ preempt_disable();
+ TRACE("reg_np_flag(%p) for %s/%d\n", flag,
+ current->comm, current->pid);
+
+ /* Let's first try to write to the address.
+ * That way it is initialized and any bugs
+ * involving dangling pointers will caught
+ * early.
+ * NULL indicates disabling np section support
+ * and should not be tested.
+ */
+ if (flag)
+ retval = poke_kernel_address(test_val, flag);
+ else
+ retval = 0;
+ TRACE("reg_np_flag: retval=%d\n", retval);
+ if (unlikely(0 != retval))
+ np_mem_error(current, "np flag: not writable");
+ else
+ /* the pointer is ok */
+ current->rt_param.np_flag = flag;
+
+ preempt_enable();
+ return retval;
+}
+
+
+void request_exit_np(struct task_struct *t)
+{
+ int ret;
+ short flag;
+
+ /* We can only do this if t is actually currently scheduled on this CPU
+ * because otherwise we are in the wrong address space. Thus make sure
+ * to check.
+ */
+ BUG_ON(t != current);
+
+ if (unlikely(!is_realtime(t) || !t->rt_param.np_flag)) {
+ TRACE_TASK(t, "request_exit_np(): BAD TASK!\n");
+ return;
+ }
+
+ flag = RT_EXIT_NP_REQUESTED;
+ ret = poke_kernel_address(flag, t->rt_param.np_flag + 1);
+ TRACE("request_exit_np(%s/%d)\n", t->comm, t->pid);
+ if (unlikely(0 != ret))
+ np_mem_error(current, "request_exit_np(): flag not writable");
+
+}
+
+
+int is_np(struct task_struct* t)
+{
+ int ret;
+ unsigned short flag = 0x5858; /* = XX, looks nicer in debug*/
+
+ BUG_ON(t != current);
+
+ if (unlikely(t->rt_param.kernel_np))
+ return 1;
+ else if (unlikely(t->rt_param.np_flag == NULL) ||
+ t->flags & PF_EXITING ||
+ t->state == TASK_DEAD)
+ return 0;
+ else {
+ /* This is the tricky part. The process has registered a
+ * non-preemptive section marker. We now need to check whether
+ * it is set to to NON_PREEMPTIVE. Along the way we could
+ * discover that the pointer points to an unmapped region (=>
+ * kill the task) or that the location contains some garbage
+ * value (=> also kill the task). Killing the task in any case
+ * forces userspace to play nicely. Any bugs will be discovered
+ * immediately.
+ */
+ ret = probe_kernel_address(t->rt_param.np_flag, flag);
+ if (0 == ret && (flag == RT_NON_PREEMPTIVE ||
+ flag == RT_PREEMPTIVE))
+ return flag != RT_PREEMPTIVE;
+ else {
+ /* either we could not read from the address or
+ * it contained garbage => kill the process
+ * FIXME: Should we cause a SEGFAULT instead?
+ */
+ TRACE("is_np: ret=%d flag=%c%c (%x)\n", ret,
+ flag & 0xff, (flag >> 8) & 0xff, flag);
+ np_mem_error(t, "is_np() could not read");
+ return 0;
+ }
+ }
+}
+
+/*
+ * sys_exit_np() allows real-time tasks to signal that it left a
+ * non-preemptable section. It will be called after the kernel requested a
+ * callback in the preemption indicator flag.
+ * returns 0 if the signal was valid and processed.
+ * returns EINVAL if current task is not a real-time task
+ */
+asmlinkage long sys_exit_np(void)
+{
+ int retval = -EINVAL;
+
+ TS_EXIT_NP_START;
+
+ if (!is_realtime(current))
+ goto out;
+
+ TRACE("sys_exit_np(%s/%d)\n", current->comm, current->pid);
+ /* force rescheduling so that we can be preempted */
+ set_tsk_need_resched(current);
+ retval = 0;
+ out:
+
+ TS_EXIT_NP_END;
+ return retval;
+}
+
+void __setscheduler(struct task_struct *, int, int);
+
+/* p is a real-time task. Re-init its state as a best-effort task. */
+static void reinit_litmus_state(struct task_struct* p, int restore)
+{
+ struct rt_task user_config = {};
+ __user short *np_flag = NULL;
+
+ if (restore) {
+ /* Safe user-space provided configuration data.
+ * FIXME: This is missing service levels for adaptive tasks.
+ */
+ user_config = p->rt_param.task_params;
+ np_flag = p->rt_param.np_flag;
+ }
+
+ /* We probably should not be inheriting any task's priority
+ * at this point in time.
+ */
+ WARN_ON(p->rt_param.inh_task);
+
+ /* We need to restore the priority of the task. */
+ __setscheduler(p, p->rt_param.old_policy, p->rt_param.old_prio);
+
+ /* Cleanup everything else. */
+ memset(&p->rt_param, 0, sizeof(struct rt_task));
+
+ /* Restore preserved fields. */
+ if (restore) {
+ p->rt_param.task_params = user_config;
+ p->rt_param.np_flag = np_flag;
+ }
+}
+
+long transition_to_rt(struct task_struct* tsk)
+{
+ long retval;
+ long flags;
+
+ BUG_ON(is_realtime(tsk));
+
+ if (get_rt_period(tsk) == 0 ||
+ get_exec_cost(tsk) > get_rt_period(tsk)) {
+ TRACE_TASK(tsk, "litmus prepare: invalid task parameters "
+ "(%lu, %lu)\n",
+ get_exec_cost(tsk), get_rt_period(tsk));
+ return -EINVAL;
+ }
+
+ if (!cpu_online(get_partition(tsk)))
+ {
+ TRACE_TASK(tsk, "litmus prepare: cpu %d is not online\n",
+ get_partition(tsk));
+ return -EINVAL;
+ }
+
+ tsk->rt_param.old_prio = tsk->rt_priority;
+ tsk->rt_param.old_policy = tsk->policy;
+ INIT_LIST_HEAD(&tsk->rt_list);
+
+ /* avoid scheduler plugin changing underneath us */
+ spin_lock_irqsave(&task_transition_lock, flags);
+ retval = curr_sched_plugin->prepare_task(tsk);
+
+ if (!retval) {
+ atomic_inc(&rt_task_count);
+ __setscheduler(tsk, SCHED_FIFO, MAX_RT_PRIO - 1);
+ tsk->rt_param.is_realtime = 1;
+ tsk->rt_param.litmus_controlled = 1;
+ }
+ spin_unlock_irqrestore(&task_transition_lock, flags);
+
+ return retval;
+}
+
+long transition_to_be(struct task_struct* tsk)
+{
+ BUG_ON(!is_realtime(tsk));
+
+ curr_sched_plugin->tear_down(tsk);
+ atomic_dec(&rt_task_count);
+ reinit_litmus_state(tsk, 1);
+ return 0;
+}
+
+
+/* Switching a plugin in use is tricky.
+ * We must watch out that no real-time tasks exists
+ * (and that none is created in parallel) and that the plugin is not
+ * currently in use on any processor (in theory).
+ *
+ * For now, we don't enforce the second part since it is unlikely to cause
+ * any trouble by itself as long as we don't unload modules.
+ */
+int switch_sched_plugin(struct sched_plugin* plugin)
+{
+ long flags;
+ int ret = 0;
+
+ BUG_ON(!plugin);
+
+ /* stop task transitions */
+ spin_lock_irqsave(&task_transition_lock, flags);
+
+ /* don't switch if there are active real-time tasks */
+ if (atomic_read(&rt_task_count) == 0) {
+ printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
+ curr_sched_plugin = plugin;
+ } else
+ ret = -EBUSY;
+
+ spin_unlock_irqrestore(&task_transition_lock, flags);
+ return ret;
+}
+
+/* Called upon fork.
+ * p is the newly forked task.
+ */
+void litmus_fork(struct task_struct* p)
+{
+ if (is_realtime(p))
+ /* clean out any litmus related state, don't preserve anything*/
+ reinit_litmus_state(p, 0);
+}
+
+/* Called upon execve().
+ * current is doing the exec.
+ * Don't let address space specific stuff leak.
+ */
+void litmus_exec(void)
+{
+ struct task_struct* p = current;
+
+ if (is_realtime(p)) {
+ WARN_ON(p->rt_param.inh_task);
+ p->rt_param.np_flag = NULL;
+ }
+}
+
+void exit_litmus(struct task_struct *dead_tsk)
+{
+ if (is_realtime(dead_tsk))
+ transition_to_be(dead_tsk);
+}
+
+
+void list_qsort(struct list_head* list, list_cmp_t less_than)
+{
+ struct list_head lt;
+ struct list_head geq;
+ struct list_head *pos, *extra, *pivot;
+ int n_lt = 0, n_geq = 0;
+ BUG_ON(!list);
+
+ if (list->next == list)
+ return;
+
+ INIT_LIST_HEAD(<);
+ INIT_LIST_HEAD(&geq);
+
+ pivot = list->next;
+ list_del(pivot);
+ list_for_each_safe(pos, extra, list) {
+ list_del(pos);
+ if (less_than(pos, pivot)) {
+ list_add(pos, <);
+ n_lt++;
+ } else {
+ list_add(pos, &geq);
+ n_geq++;
+ }
+ }
+ if (n_lt < n_geq) {
+ list_qsort(<, less_than);
+ list_qsort(&geq, less_than);
+ } else {
+ list_qsort(&geq, less_than);
+ list_qsort(<, less_than);
+ }
+ list_splice(&geq, list);
+ list_add(pivot, list);
+ list_splice(<, list);
+}
+
+#ifdef CONFIG_MAGIC_SYSRQ
+int sys_kill(int pid, int sig);
+
+static void sysrq_handle_kill_rt_tasks(int key, struct tty_struct *tty)
+{
+ struct task_struct *t;
+ read_lock(&tasklist_lock);
+ for_each_process(t) {
+ if (is_realtime(t)) {
+ sys_kill(t->pid, SIGKILL);
+ }
+ }
+ read_unlock(&tasklist_lock);
+}
+
+static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
+ .handler = sysrq_handle_kill_rt_tasks,
+ .help_msg = "Quit-rt-tasks",
+ .action_msg = "sent SIGKILL to all real-time tasks",
+};
+#endif
+
+static int proc_read_stats(char *page, char **start,
+ off_t off, int count,
+ int *eof, void *data)
+{
+ int len;
+
+ len = snprintf(page, PAGE_SIZE,
+ "real-time task count = %d\n",
+ atomic_read(&rt_task_count));
+ return len;
+}
+
+static int proc_read_plugins(char *page, char **start,
+ off_t off, int count,
+ int *eof, void *data)
+{
+ int len;
+
+ len = print_sched_plugins(page, PAGE_SIZE);
+ return len;
+}
+
+static int proc_read_curr(char *page, char **start,
+ off_t off, int count,
+ int *eof, void *data)
+{
+ int len;
+
+ len = snprintf(page, PAGE_SIZE, "%s\n", curr_sched_plugin->plugin_name);
+ return len;
+}
+
+static int proc_write_curr(struct file *file,
+ const char *buffer,
+ unsigned long count,
+ void *data)
+{
+ int len, ret;
+ char name[65];
+ struct sched_plugin* found;
+
+ if(count > 64)
+ len = 64;
+ else
+ len = count;
+
+ if(copy_from_user(name, buffer, len))
+ return -EFAULT;
+
+ name[len] = '\0';
+ /* chomp name */
+ if (len > 1 && name[len - 1] == '\n')
+ name[len - 1] = '\0';
+
+ found = find_sched_plugin(name);
+
+ if (found) {
+ ret = switch_sched_plugin(found);
+ if (ret != 0)
+ printk(KERN_INFO "Could not switch plugin: %d\n", ret);
+ } else
+ printk(KERN_INFO "Plugin '%s' is unknown.\n", name);
+
+ return len;
+}
+
+
+static struct proc_dir_entry *litmus_dir = NULL,
+ *curr_file = NULL,
+ *stat_file = NULL,
+ *plugs_file = NULL;
+
+static int __init init_litmus_proc(void)
+{
+ litmus_dir = proc_mkdir("litmus", NULL);
+ if (!litmus_dir) {
+ printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n");
+ return -ENOMEM;
+ }
+ litmus_dir->owner = THIS_MODULE;
+
+ curr_file = create_proc_entry("active_plugin",
+ 0644, litmus_dir);
+ if (!curr_file) {
+ printk(KERN_ERR "Could not allocate active_plugin "
+ "procfs entry.\n");
+ return -ENOMEM;
+ }
+ curr_file->owner = THIS_MODULE;
+ curr_file->read_proc = proc_read_curr;
+ curr_file->write_proc = proc_write_curr;
+
+ stat_file = create_proc_read_entry("stats", 0444, litmus_dir,
+ proc_read_stats, NULL);
+
+ plugs_file = create_proc_read_entry("plugins", 0444, litmus_dir,
+ proc_read_plugins, NULL);
+
+ return 0;
+}
+
+static void exit_litmus_proc(void)
+{
+ if (plugs_file)
+ remove_proc_entry("plugins", litmus_dir);
+ if (stat_file)
+ remove_proc_entry("stats", litmus_dir);
+ if (curr_file)
+ remove_proc_entry("active_plugin", litmus_dir);
+ if (litmus_dir)
+ remove_proc_entry("litmus", NULL);
+}
+
+extern struct sched_plugin linux_sched_plugin;
+
+static int __init _init_litmus(void)
+{
+ /* Common initializers,
+ * mode change lock is used to enforce single mode change
+ * operation.
+ */
+ printk("Starting LITMUS^RT kernel\n");
+
+ register_sched_plugin(&linux_sched_plugin);
+
+#ifdef CONFIG_MAGIC_SYSRQ
+ /* offer some debugging help */
+ if (!register_sysrq_key('q', &sysrq_kill_rt_tasks_op))
+ printk("Registered kill rt tasks magic sysrq.\n");
+ else
+ printk("Could not register kill rt tasks magic sysrq.\n");
+#endif
+
+ init_litmus_proc();
+
+ return 0;
+}
+
+static void _exit_litmus(void)
+{
+ exit_litmus_proc();
+}
+
+module_init(_init_litmus);
+module_exit(_exit_litmus);
diff --git a/litmus/litmus_sem.c b/litmus/litmus_sem.c
new file mode 100644
index 0000000..7179b43
--- /dev/null
+++ b/litmus/litmus_sem.c
@@ -0,0 +1,551 @@
+/*
+ * PI semaphores and SRP implementations.
+ * Much of the code here is borrowed from include/asm-i386/semaphore.h.
+ *
+ * NOTE: This implementation is very much a prototype and horribly insecure. It
+ * is intended to be a proof of concept, not a feature-complete solution.
+ */
+
+#include <asm/atomic.h>
+#include <asm/semaphore.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/spinlock.h>
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/edf_common.h>
+
+#include <litmus/fdso.h>
+
+#include <litmus/trace.h>
+
+/* ************************************************************************** */
+/* PRIORITY INHERITANCE */
+/* ************************************************************************** */
+
+static void* create_pi_semaphore(void)
+{
+ struct pi_semaphore* sem;
+ int i;
+
+ sem = kmalloc(sizeof(struct pi_semaphore), GFP_KERNEL);
+ if (!sem)
+ return NULL;
+ atomic_set(&sem->count, 1);
+ sem->sleepers = 0;
+ init_waitqueue_head(&sem->wait);
+ sem->hp.task = NULL;
+ sem->holder = NULL;
+ for (i = 0; i < NR_CPUS; i++)
+ sem->hp.cpu_task[i] = NULL;
+ return sem;
+}
+
+static void destroy_pi_semaphore(void* sem)
+{
+ /* XXX assert invariants */
+ kfree(sem);
+}
+
+struct fdso_ops pi_sem_ops = {
+ .create = create_pi_semaphore,
+ .destroy = destroy_pi_semaphore
+};
+
+struct wq_pair {
+ struct task_struct* tsk;
+ struct pi_semaphore* sem;
+};
+
+static int rt_pi_wake_up(wait_queue_t *wait, unsigned mode, int sync,
+ void *key)
+{
+ struct wq_pair* wqp = (struct wq_pair*) wait->private;
+ set_rt_flags(wqp->tsk, RT_F_EXIT_SEM);
+ curr_sched_plugin->inherit_priority(wqp->sem, wqp->tsk);
+ TRACE_TASK(wqp->tsk,
+ "woken up by rt_pi_wake_up() (RT_F_SEM_EXIT, PI)\n");
+ /* point to task for default_wake_function() */
+ wait->private = wqp->tsk;
+ default_wake_function(wait, mode, sync, key);
+
+ /* Always return true since we know that if we encountered a task
+ * that was already running the wake_up raced with the schedule in
+ * rt_pi_down(). In that case the task in rt_pi_down() will be scheduled
+ * immediately and own the lock. We must not wake up another task in
+ * any case.
+ */
+ return 1;
+}
+
+/* caller is responsible for locking */
+int set_hp_task(struct pi_semaphore *sem, prio_cmp_t higher_prio)
+{
+ struct list_head *tmp, *next;
+ struct task_struct *queued;
+ int ret = 0;
+
+ sem->hp.task = NULL;
+ list_for_each_safe(tmp, next, &sem->wait.task_list) {
+ queued = ((struct wq_pair*)
+ list_entry(tmp, wait_queue_t,
+ task_list)->private)->tsk;
+
+ /* Compare task prios, find high prio task. */
+ if (higher_prio(queued, sem->hp.task)) {
+ sem->hp.task = queued;
+ ret = 1;
+ }
+ }
+ return ret;
+}
+
+/* caller is responsible for locking */
+int set_hp_cpu_task(struct pi_semaphore *sem, int cpu, prio_cmp_t higher_prio)
+{
+ struct list_head *tmp, *next;
+ struct task_struct *queued;
+ int ret = 0;
+
+ sem->hp.cpu_task[cpu] = NULL;
+ list_for_each_safe(tmp, next, &sem->wait.task_list) {
+ queued = ((struct wq_pair*)
+ list_entry(tmp, wait_queue_t,
+ task_list)->private)->tsk;
+
+ /* Compare task prios, find high prio task. */
+ if (get_partition(queued) == cpu &&
+ higher_prio(queued, sem->hp.cpu_task[cpu])) {
+ sem->hp.cpu_task[cpu] = queued;
+ ret = 1;
+ }
+ }
+ return ret;
+}
+
+int do_pi_down(struct pi_semaphore* sem)
+{
+ unsigned long flags;
+ struct task_struct *tsk = current;
+ struct wq_pair pair;
+ int suspended = 1;
+ wait_queue_t wait = {
+ .private = &pair,
+ .func = rt_pi_wake_up,
+ .task_list = {NULL, NULL}
+ };
+
+ pair.tsk = tsk;
+ pair.sem = sem;
+ spin_lock_irqsave(&sem->wait.lock, flags);
+
+ if (atomic_dec_return(&sem->count) < 0 ||
+ waitqueue_active(&sem->wait)) {
+ /* we need to suspend */
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+ TRACE_CUR("suspends on PI lock %p\n", sem);
+ curr_sched_plugin->pi_block(sem, tsk);
+
+ /* release lock before sleeping */
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+ TS_PI_DOWN_END;
+ preempt_enable_no_resched();
+
+
+ /* we depend on the FIFO order
+ * Thus, we don't need to recheck when we wake up, we
+ * are guaranteed to have the lock since there is only one
+ * wake up per release
+ */
+ schedule();
+
+ TRACE_CUR("woke up, now owns PI lock %p\n", sem);
+
+ /* try_to_wake_up() set our state to TASK_RUNNING,
+ * all we need to do is to remove our wait queue entry
+ */
+ remove_wait_queue(&sem->wait, &wait);
+ } else {
+ /* no priority inheritance necessary, since there are no queued
+ * tasks.
+ */
+ suspended = 0;
+ TRACE_CUR("acquired PI lock %p, no contention\n", sem);
+ sem->holder = tsk;
+ sem->hp.task = tsk;
+ curr_sched_plugin->inherit_priority(sem, tsk);
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+ }
+ return suspended;
+}
+
+void do_pi_up(struct pi_semaphore* sem)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&sem->wait.lock, flags);
+
+ TRACE_CUR("releases PI lock %p\n", sem);
+ curr_sched_plugin->return_priority(sem);
+ sem->holder = NULL;
+ if (atomic_inc_return(&sem->count) < 1)
+ /* there is a task queued */
+ wake_up_locked(&sem->wait);
+
+ spin_unlock_irqrestore(&sem->wait.lock, flags);
+}
+
+asmlinkage long sys_pi_down(int sem_od)
+{
+ long ret = 0;
+ struct pi_semaphore * sem;
+ int suspended = 0;
+
+ preempt_disable();
+ TS_PI_DOWN_START;
+
+ sem = lookup_pi_sem(sem_od);
+ if (sem)
+ suspended = do_pi_down(sem);
+ else
+ ret = -EINVAL;
+
+ if (!suspended) {
+ TS_PI_DOWN_END;
+ preempt_enable();
+ }
+
+ return ret;
+}
+
+asmlinkage long sys_pi_up(int sem_od)
+{
+ long ret = 0;
+ struct pi_semaphore * sem;
+
+ preempt_disable();
+ TS_PI_UP_START;
+
+ sem = lookup_pi_sem(sem_od);
+ if (sem)
+ do_pi_up(sem);
+ else
+ ret = -EINVAL;
+
+
+ TS_PI_UP_END;
+ preempt_enable();
+
+ return ret;
+}
+
+
+/* ************************************************************************** */
+/* STACK RESOURCE POLICY */
+/* ************************************************************************** */
+
+
+struct srp_priority {
+ struct list_head list;
+ unsigned int period;
+ pid_t pid;
+};
+
+#define list2prio(l) list_entry(l, struct srp_priority, list)
+
+/* SRP task priority comparison function. Smaller periods have highest
+ * priority, tie-break is PID. Special case: period == 0 <=> no priority
+ */
+static int srp_higher_prio(struct srp_priority* first,
+ struct srp_priority* second)
+{
+ if (!first->period)
+ return 0;
+ else
+ return !second->period ||
+ first->period < second->period || (
+ first->period == second->period &&
+ first->pid < second->pid);
+}
+
+struct srp {
+ struct list_head ceiling;
+ wait_queue_head_t ceiling_blocked;
+};
+
+
+atomic_t srp_objects_in_use = ATOMIC_INIT(0);
+
+DEFINE_PER_CPU(struct srp, srp);
+
+
+/* Initialize SRP semaphores at boot time. */
+static int __init srp_init(void)
+{
+ int i;
+
+ printk("Initializing SRP per-CPU ceilings...");
+ for (i = 0; i < NR_CPUS; i++) {
+ init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked);
+ INIT_LIST_HEAD(&per_cpu(srp, i).ceiling);
+ }
+ printk(" done!\n");
+
+ return 0;
+}
+module_init(srp_init);
+
+
+#define system_ceiling(srp) list2prio(srp->ceiling.next)
+
+
+#define UNDEF_SEM -2
+
+
+/* struct for uniprocessor SRP "semaphore" */
+struct srp_semaphore {
+ struct srp_priority ceiling;
+ struct task_struct* owner;
+ int cpu; /* cpu associated with this "semaphore" and resource */
+};
+
+#define ceiling2sem(c) container_of(c, struct srp_semaphore, ceiling)
+
+static int srp_exceeds_ceiling(struct task_struct* first,
+ struct srp* srp)
+{
+ return list_empty(&srp->ceiling) ||
+ get_rt_period(first) < system_ceiling(srp)->period ||
+ (get_rt_period(first) == system_ceiling(srp)->period &&
+ first->pid < system_ceiling(srp)->pid) ||
+ ceiling2sem(system_ceiling(srp))->owner == first;
+}
+
+static void srp_add_prio(struct srp* srp, struct srp_priority* prio)
+{
+ struct list_head *pos;
+ if (in_list(&prio->list)) {
+ printk(KERN_CRIT "WARNING: SRP violation detected, prio is already in "
+ "ceiling list! cpu=%d, srp=%p\n", smp_processor_id(), ceiling2sem(prio));
+ return;
+ }
+ list_for_each(pos, &srp->ceiling)
+ if (unlikely(srp_higher_prio(prio, list2prio(pos)))) {
+ __list_add(&prio->list, pos->prev, pos);
+ return;
+ }
+
+ list_add_tail(&prio->list, &srp->ceiling);
+}
+
+
+static void* create_srp_semaphore(void)
+{
+ struct srp_semaphore* sem;
+
+ sem = kmalloc(sizeof(*sem), GFP_KERNEL);
+ if (!sem)
+ return NULL;
+
+ INIT_LIST_HEAD(&sem->ceiling.list);
+ sem->ceiling.period = 0;
+ sem->cpu = UNDEF_SEM;
+ sem->owner = NULL;
+ atomic_inc(&srp_objects_in_use);
+ return sem;
+}
+
+static noinline int open_srp_semaphore(struct od_table_entry* entry, void* __user arg)
+{
+ struct srp_semaphore* sem = (struct srp_semaphore*) entry->obj->obj;
+ int ret = 0;
+ struct task_struct* t = current;
+ struct srp_priority t_prio;
+
+ TRACE("opening SRP semaphore %p, cpu=%d\n", sem, sem->cpu);
+ if (!srp_active())
+ return -EBUSY;
+
+ if (sem->cpu == UNDEF_SEM)
+ sem->cpu = get_partition(t);
+ else if (sem->cpu != get_partition(t))
+ ret = -EPERM;
+
+ if (ret == 0) {
+ t_prio.period = get_rt_period(t);
+ t_prio.pid = t->pid;
+ if (srp_higher_prio(&t_prio, &sem->ceiling)) {
+ sem->ceiling.period = t_prio.period;
+ sem->ceiling.pid = t_prio.pid;
+ }
+ }
+
+ return ret;
+}
+
+static void destroy_srp_semaphore(void* sem)
+{
+ /* XXX invariants */
+ atomic_dec(&srp_objects_in_use);
+ kfree(sem);
+}
+
+struct fdso_ops srp_sem_ops = {
+ .create = create_srp_semaphore,
+ .open = open_srp_semaphore,
+ .destroy = destroy_srp_semaphore
+};
+
+
+void do_srp_down(struct srp_semaphore* sem)
+{
+ /* Update ceiling. */
+ srp_add_prio(&__get_cpu_var(srp), &sem->ceiling);
+ WARN_ON(sem->owner != NULL);
+ sem->owner = current;
+ TRACE_CUR("acquired srp 0x%p\n", sem);
+}
+
+void do_srp_up(struct srp_semaphore* sem)
+{
+ /* Determine new system priority ceiling for this CPU. */
+ WARN_ON(!in_list(&sem->ceiling.list));
+ if (in_list(&sem->ceiling.list))
+ list_del(&sem->ceiling.list);
+
+ sem->owner = NULL;
+
+ /* Wake tasks on this CPU, if they exceed current ceiling. */
+ TRACE_CUR("released srp 0x%p\n", sem);
+ wake_up_all(&__get_cpu_var(srp).ceiling_blocked);
+}
+
+/* Adjust the system-wide priority ceiling if resource is claimed. */
+asmlinkage long sys_srp_down(int sem_od)
+{
+ int cpu;
+ int ret = -EINVAL;
+ struct srp_semaphore* sem;
+
+ /* disabling preemptions is sufficient protection since
+ * SRP is strictly per CPU and we don't interfere with any
+ * interrupt handlers
+ */
+ preempt_disable();
+ TS_SRP_DOWN_START;
+
+ cpu = smp_processor_id();
+ sem = lookup_srp_sem(sem_od);
+ if (sem && sem->cpu == cpu) {
+ do_srp_down(sem);
+ ret = 0;
+ }
+
+ TS_SRP_DOWN_END;
+ preempt_enable();
+ return ret;
+}
+
+/* Adjust the system-wide priority ceiling if resource is freed. */
+asmlinkage long sys_srp_up(int sem_od)
+{
+ int cpu;
+ int ret = -EINVAL;
+ struct srp_semaphore* sem;
+
+ preempt_disable();
+ TS_SRP_UP_START;
+
+ cpu = smp_processor_id();
+ sem = lookup_srp_sem(sem_od);
+
+ if (sem && sem->cpu == cpu) {
+ do_srp_up(sem);
+ ret = 0;
+ }
+
+ TS_SRP_UP_END;
+ preempt_enable();
+ return ret;
+}
+
+asmlinkage long sys_reg_task_srp_sem(int sem_od)
+{
+ /* unused */
+ return 0;
+}
+
+static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync,
+ void *key)
+{
+ int cpu = smp_processor_id();
+ struct task_struct *tsk = wait->private;
+ if (cpu != get_partition(tsk))
+ TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b",
+ get_partition(tsk));
+ else if (srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
+ return default_wake_function(wait, mode, sync, key);
+ return 0;
+}
+
+
+
+static void do_ceiling_block(struct task_struct *tsk)
+{
+ wait_queue_t wait = {
+ .private = tsk,
+ .func = srp_wake_up,
+ .task_list = {NULL, NULL}
+ };
+
+ tsk->state = TASK_UNINTERRUPTIBLE;
+ add_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
+ tsk->rt_param.srp_non_recurse = 1;
+ preempt_enable_no_resched();
+ schedule();
+ preempt_disable();
+ tsk->rt_param.srp_non_recurse = 0;
+ remove_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
+}
+
+/* Wait for current task priority to exceed system-wide priority ceiling.
+ */
+void srp_ceiling_block(void)
+{
+ struct task_struct *tsk = current;
+
+ TS_SRPT_START;
+
+ /* Only applies to real-time tasks, but optimize for RT tasks. */
+ if (unlikely(!is_realtime(tsk)))
+ return;
+
+ /* Avoid recursive ceiling blocking. */
+ if (unlikely(tsk->rt_param.srp_non_recurse))
+ return;
+
+ /* Bail out early if there aren't any SRP resources around. */
+ if (likely(!atomic_read(&srp_objects_in_use)))
+ return;
+
+ preempt_disable();
+ if (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) {
+ TRACE_CUR("is priority ceiling blocked.\n");
+ TS_SRPT_END;
+ while (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
+ do_ceiling_block(tsk);
+ TRACE_CUR("finally exceeds system ceiling.\n");
+ } else {
+ TS_SRPT_END;
+ TRACE_CUR("is not priority ceiling blocked\n");
+ }
+ preempt_enable();
+}
+
+/* ************************************************************************** */
+
+
+
diff --git a/litmus/pcp.c b/litmus/pcp.c
new file mode 100644
index 0000000..06030d4
--- /dev/null
+++ b/litmus/pcp.c
@@ -0,0 +1,764 @@
+/* pcp.c -- Implementations of the PCP, D-PCP, and M-PCP.
+ *
+ */
+#include <asm/uaccess.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+
+#include <litmus/sched_plugin.h>
+#include <litmus/litmus.h>
+#include <litmus/rm_common.h>
+#include <litmus/fdso.h>
+#include <litmus/trace.h>
+
+/* from sched_rm.c */
+void rm_set_prio(struct task_struct *t, struct pcp_priority* new_prio);
+
+#define GLOBAL_SEM -1
+#define UNDEF_SEM -2
+
+#define get_prio(t) ((t)->rt_param.cur_prio)
+#define get_base_prio(t) (&((t)->rt_param.pcp_prio))
+
+
+struct dpcp_request {
+ struct list_head list;
+ struct completion done;
+ long arg;
+ lt_t prio;
+ int pid;
+};
+
+struct pcp_semaphore {
+ int cpu;
+
+ /* waiting tasks */
+ wait_queue_head_t blocked;
+ struct pcp_priority* blocked_prio;
+
+ /* system ceiling support */
+ struct list_head list;
+ struct pcp_priority ceiling;
+
+ /* task_struct owned_semaphore list */
+ struct list_head owned_list;
+
+ /* Current lock holder.
+ * NULL implies unlocked.
+ */
+ struct task_struct* holder;
+
+ /* D-PCP support */
+ spinlock_t dpcp_lock;
+ struct list_head dpcp_requests;
+ int dpcp_count;
+ struct dpcp_request* dpcp_current;
+ struct completion dpcp_job;
+ struct task_struct* dpcp_agent;
+};
+
+static DEFINE_PER_CPU(spinlock_t, pcp_lock);
+static DEFINE_PER_CPU(struct list_head, sys_ceiling);
+
+static noinline void init_pcp_sem(struct pcp_semaphore *sem, int cpu)
+{
+ sem->cpu = cpu;
+ init_waitqueue_head(&sem->blocked);
+ INIT_LIST_HEAD(&sem->list);
+ INIT_LIST_HEAD(&sem->owned_list);
+ INIT_LIST_HEAD(&sem->dpcp_requests);
+ sem->holder = NULL;
+ sem->dpcp_current = NULL;
+ sem->blocked_prio = NULL;
+ sem->ceiling = (struct pcp_priority) {ULLONG_MAX, 0, INT_MAX};
+ init_completion(&sem->dpcp_job);
+ spin_lock_init(&sem->dpcp_lock);
+ sem->dpcp_count = 0;
+ sem->dpcp_agent = NULL;
+}
+
+static noinline int tsk_pcp_higher_prio(struct task_struct* t,
+ struct pcp_priority* p2)
+{
+ return _rm_higher_prio(t->rt_param.cur_prio, p2);
+}
+
+static noinline struct pcp_semaphore* get_ceiling(int cpu)
+{
+ struct list_head *ceil_list = &per_cpu(sys_ceiling, cpu);
+ if (list_empty(ceil_list))
+ return NULL;
+ return list_entry(ceil_list->next, struct pcp_semaphore, list);
+}
+
+static noinline void raise_ceiling(struct pcp_semaphore* sem, int cpu)
+{
+ struct list_head *ceil_list = &per_cpu(sys_ceiling, cpu);
+ list_add(&sem->list, ceil_list);
+}
+
+static noinline int exceeds_ceiling(struct task_struct* t,
+ struct pcp_semaphore* ceil)
+{
+ return !ceil || ceil->holder == t ||
+ tsk_pcp_higher_prio(t, &ceil->ceiling);
+}
+
+static noinline void give_priority(struct task_struct* t, struct pcp_semaphore* sem)
+{
+ struct pcp_semaphore* next;
+ /* sem->blocked_prio can be NULL, but _rm_higher_prio() handles that */
+
+ /* only update if we actually exceed existing priorities */
+ if (_rm_higher_prio(get_prio(t), sem->blocked_prio) &&
+ _rm_higher_prio(get_prio(t), get_base_prio(sem->holder))) {
+ /* we need to register our priority */
+ sem->blocked_prio = get_prio(t);
+
+ /* only update task if it results in a priority increase */
+ if (_rm_higher_prio(get_prio(t), get_prio(sem->holder))) {
+ /* update prio */
+ TRACE("PCP: %s/%d inherits from %s/%d\n",
+ sem->holder->comm, sem->holder->pid,
+ t->comm, t->pid);
+ rm_set_prio(sem->holder, get_prio(t));
+ /* check if recipient is blocked, too */
+ next = sem->holder->rt_param.blocked_on;
+ if (next)
+ /* Transitive priority inheritance.
+ * Recurse.
+ */
+ give_priority(sem->holder, next);
+ }
+ }
+}
+
+static noinline long local_pcp_down(struct pcp_semaphore *sem)
+{
+ long ret = 0;
+ struct task_struct* t = current;
+ struct pcp_semaphore* ceiling;
+ int cpu;
+ int ceiling_passed = 0;
+
+ /* don't allow recursive locking */
+ if (sem->holder == t)
+ return -EINVAL;
+
+ cpu = smp_processor_id();
+ if (cpu != sem->cpu) {
+ preempt_enable();
+ return -EPERM;
+ }
+
+
+ /* first we need to pass the local system ceiling */
+ while (!ceiling_passed) {
+ ceiling = get_ceiling(cpu);
+ TRACE_TASK(t, "PCP: I want %p, ceiling is %p\n", sem, ceiling);
+ ceiling_passed = exceeds_ceiling(t, ceiling);
+ if (!ceiling_passed) {
+ /* block on sys_ceiling */
+ DECLARE_WAITQUEUE(waitq, t);
+ TRACE_TASK(t, "blocks on PCP system ceiling\n");
+ add_wait_queue(&ceiling->blocked, &waitq);
+ /* initiate priority inheritance */
+ give_priority(t, ceiling);
+ t->rt_param.blocked_on = ceiling;
+ t->state = TASK_UNINTERRUPTIBLE;
+ preempt_enable_no_resched();
+ TS_PCP1_DOWN_END;
+ schedule();
+ preempt_disable();
+ t->rt_param.blocked_on = NULL;
+ remove_wait_queue(&ceiling->blocked, &waitq);
+ } else {
+ if (ceiling)
+ TRACE_TASK(t,
+ "system ceiling passed: {%llu, %d, %d} < "
+ "{%llu, %d, %d}\n",
+ ceiling->ceiling.prio,
+ ceiling->ceiling.in_global_cs,
+ ceiling->ceiling.pid,
+ t->rt_param.cur_prio->prio,
+ t->rt_param.cur_prio->in_global_cs,
+ t->rt_param.cur_prio->pid
+ );
+ else
+ TRACE_TASK(t,
+ "system ceiling passed: NULL < "
+ "{%llu, %d, %d}\n",
+ t->rt_param.cur_prio->prio,
+ t->rt_param.cur_prio->in_global_cs,
+ t->rt_param.cur_prio->pid
+ );
+ TS_PCP1_DOWN_END;
+ }
+ }
+
+ TS_PCP2_DOWN_START;
+ /* Since we have passed the priority ceiling the semaphore cannot be
+ * in use. If it were in use then the ceiling would be at least as high
+ * as our priority.
+ */
+ WARN_ON(sem->holder);
+
+ TRACE_TASK(t, "taking PCP semaphore 0x%p, owner:%p\n", sem, sem->holder);
+
+ /* We can become the owner. */
+ sem->holder = t;
+ list_add(&sem->owned_list, &t->rt_param.owned_semaphores);
+
+ /* We need to update the system ceiling, but only
+ * if the new ceiling is higher than the old.
+ */
+ ceiling = get_ceiling(cpu);
+ /* if the priorities are equal then t already owns ceiling,
+ * otherwise it would not have gotten past the system ceiling
+ */
+ if (!ceiling || _rm_higher_prio(&sem->ceiling, &ceiling->ceiling)) {
+ raise_ceiling(sem, cpu);
+ TRACE_TASK(t, "raised ceiling on %d\n", cpu);
+ }
+
+ TS_PCP2_DOWN_END;
+ return ret;
+}
+
+static noinline struct pcp_priority* fetch_highest_prio(struct task_struct *t)
+{
+ struct pcp_priority *prio;
+ struct list_head* pos;
+ struct pcp_semaphore* sem;
+
+ /* base case is that the task uses its normal priority */
+ prio = get_base_prio(t);
+
+ /* now search the list of semaphores that we own for a higher priority
+ * to inherit
+ */
+ list_for_each(pos, &t->rt_param.owned_semaphores) {
+ sem = list_entry(pos, struct pcp_semaphore, owned_list);
+ /* sem->blocked_prio could be NULL */
+ if (!_rm_higher_prio(prio, sem->blocked_prio))
+ prio = sem->blocked_prio;
+ }
+ return prio;
+}
+
+static noinline long local_pcp_up(struct pcp_semaphore *sem)
+{
+ long ret = 0;
+ struct task_struct* t = current;
+ int cpu;
+
+ cpu = smp_processor_id();
+
+ if (cpu != sem->cpu)
+ return -EPERM;
+
+ if (sem->holder == t) {
+ TRACE_TASK(t, "giving up PCP semaphore 0x%p.\n", sem);
+
+ /* we need to unblock all tasks in the wait_queue */
+ wake_up_all(&sem->blocked);
+
+ /* unlock semaphore */
+ sem->holder = NULL;
+ list_del(&sem->owned_list);
+
+ /* remove from system ceiling list */
+ if (in_list(&sem->list))
+ list_del(&sem->list);
+
+ if (sem->blocked_prio == get_prio(t)) {
+ /* We are currently inheriting from this
+ * semaphore. We need to figure out which priority
+ * we should fall back to.
+ */
+ TRACE_TASK(t, "giving up inherited prio.\n");
+ rm_set_prio(t, fetch_highest_prio(t));
+ }
+ /* reset semaphore priority inheritance */
+ sem->blocked_prio = NULL;
+ } else {
+ TRACE_TASK(t, "local_pcp_up EINVAL 0x%p.\n", sem);
+ ret = -EINVAL;
+ }
+
+ TS_PCP_UP_END;
+ return ret;
+}
+
+static noinline struct task_struct* wqlist2task(struct list_head* l)
+{
+ return (struct task_struct*)
+ list_entry(l, wait_queue_t, task_list)->private;
+}
+
+static noinline int wait_order(struct list_head* la, struct list_head* lb)
+{
+ return rm_higher_prio(wqlist2task(la), wqlist2task(lb));
+}
+
+/* The default function is too picky.
+ * We really only want to wake up one task.
+ */
+int single_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+ int ret = default_wake_function(wait, mode, sync, key);
+ if (!ret)
+ TRACE("Overriding default_wake_function() return code.\n");
+ return 1;
+}
+
+static noinline long global_pcp_down(struct pcp_semaphore* sem)
+{
+ unsigned long flags;
+ long ret = 0;
+ struct task_struct* t = current;
+
+ /* don't allow recursive locking */
+ if (sem->holder == t)
+ return -EINVAL;
+
+ spin_lock_irqsave(&sem->blocked.lock, flags);
+
+ /* Get the global priority. Do this before
+ * we block, so that we wake up as a high-priority task.
+ */
+ t->rt_param.pcp_prio.in_global_cs = 1;
+ rm_set_prio(t, &t->rt_param.pcp_prio);
+
+ if (sem->holder) {
+ /* semaphore is not free. We need to block. */
+ DECLARE_WAITQUEUE(waitq, t);
+ TRACE_TASK(t, "blocks on MPCP semaphore %p.\n", sem);
+ waitq.flags = WQ_FLAG_EXCLUSIVE;
+ waitq.func = single_wake_function;
+ /* insert ordered by priority */
+ list_insert(&waitq.task_list, &sem->blocked.task_list,
+ wait_order);
+ t->state = TASK_UNINTERRUPTIBLE;
+ spin_unlock_irqrestore(&sem->blocked.lock, flags);
+ preempt_enable_no_resched();
+ TS_MPCP_DOWN_END;
+
+ schedule();
+
+ preempt_disable();
+ /* once we wake up we are the owner of the lock */
+ spin_lock_irqsave(&sem->blocked.lock, flags);
+ remove_wait_queue_locked(&sem->blocked, &waitq);
+ } else {
+ /* semaphore is free. We can proceed. */
+ TS_MPCP_DOWN_END;
+ sem->holder = t;
+ }
+ if (sem->holder != t) {
+ if (sem->holder)
+ TRACE("expected %s/%d, but I am %s/%d\n",
+ sem->holder->comm, sem->holder->pid, t->comm, t->pid);
+ else
+ TRACE("expected NULL, but I am %s/%d\n",
+ t->comm, t->pid);
+ }
+ TRACE_TASK(t, "acquired MPCP semaphore %p.\n", sem);
+
+
+ spin_unlock_irqrestore(&sem->blocked.lock, flags);
+ return ret;
+}
+
+static noinline long global_pcp_up(struct pcp_semaphore* sem)
+{
+ unsigned long flags;
+ long ret = 0;
+ struct task_struct* t = current;
+
+ if (sem->holder != t)
+ return -EINVAL;
+
+ TRACE_TASK(t, "releasing MPCP semaphore %p.\n", sem);
+
+ spin_lock_irqsave(&sem->blocked.lock, flags);
+ if (waitqueue_active(&sem->blocked)) {
+ /* pass ownership on */
+ sem->holder = wqlist2task(sem->blocked.task_list.next);
+ TRACE_TASK(t, "waking up next (=%s/%d) on MPCP semaphore %p.\n",
+ sem->holder->comm, sem->holder->pid, sem);
+ /* wake up first */
+ wake_up_locked(&sem->blocked);
+ } else
+ sem->holder = NULL;
+
+ /* restore our own priority */
+ t->rt_param.pcp_prio.in_global_cs = 0;
+ rm_set_prio(t, &t->rt_param.pcp_prio);
+
+ TS_MPCP_UP_END;
+ spin_unlock_irqrestore(&sem->blocked.lock, flags);
+ return ret;
+}
+
+static noinline int request_order(struct list_head* la, struct list_head* lb)
+{
+ struct dpcp_request *a, *b;
+ a = list_entry(la, struct dpcp_request, list);
+ b = list_entry(lb, struct dpcp_request, list);
+ return a->prio < b->prio;
+}
+
+static noinline long dpcp_invoke(struct pcp_semaphore* sem, long arg)
+{
+ unsigned long flags;
+ long ret = 0;
+ struct task_struct* t = current, *a;
+ struct dpcp_request req;
+
+ spin_lock_irqsave(&sem->dpcp_lock, flags);
+
+ init_completion(&req.done);
+ req.arg = arg;
+ req.prio = t->rt_param.pcp_prio.prio;
+ req.pid = t->rt_param.pcp_prio.pid;
+
+ list_insert(&req.list, &sem->dpcp_requests,
+ request_order);
+
+ if (!(sem->dpcp_count++)) {
+ /* agent needs to be awakened */
+ TRACE_TASK(t, "waking DPCP agent for %p.\n", sem);
+ if (sem->dpcp_agent) {
+ a = sem->dpcp_agent;
+ /* set agent priority */
+ a->rt_param.pcp_prio.in_global_cs = 1;
+ a->rt_param.pcp_prio.prio = req.prio;
+ rm_set_prio(a, &a->rt_param.pcp_prio);
+ }
+ complete(&sem->dpcp_job);
+ }
+
+ spin_unlock_irqrestore(&sem->dpcp_lock, flags);
+ TRACE_TASK(t, "blocking on DPCP sem %p.\n", sem);
+ preempt_enable_no_resched();
+ TS_DPCP_INVOKE_END;
+
+ wait_for_completion(&req.done);
+
+ preempt_disable();
+ /* we don't need to clean up, the remote agent did that for us */
+ return ret;
+}
+
+static noinline long dpcp_agent(struct pcp_semaphore* sem, long flags, long *arg)
+{
+ unsigned long spinflags;
+ long ret = 0;
+ struct task_struct* t = current;
+
+ spin_lock_irqsave(&sem->dpcp_lock, spinflags);
+
+ /* defend against multiple concurrent agents */
+ if (sem->dpcp_agent && sem->dpcp_agent != t) {
+ spin_unlock_irqrestore(&sem->dpcp_lock, spinflags);
+ return -EBUSY;
+ } else
+ sem->dpcp_agent = t;
+
+ if (sem->cpu != get_partition(t)) {
+ int cpu = smp_processor_id();
+ spin_unlock_irqrestore(&sem->dpcp_lock, spinflags);
+ printk(KERN_CRIT
+ "dpcp_agent: sem->cpu: %d, but agent "
+ "is on %d, and part=%d\n",
+ sem->cpu, cpu, get_partition(t));
+ return -EINVAL;
+ }
+
+ if ((flags & DPCP_COMPLETE) && sem->dpcp_current) {
+ TRACE_TASK(t, "completing DPCP sem %p.\n", sem);
+ /* we need to release the holder */
+ complete(&sem->dpcp_current->done);
+ sem->dpcp_count--;
+ sem->dpcp_current = NULL;
+ }
+
+ if (flags & DPCP_WAIT) {
+ do {
+ if (sem->dpcp_count) {
+ /* pass ownership on */
+ sem->dpcp_current = list_entry(
+ sem->dpcp_requests.next,
+ struct dpcp_request, list);
+ list_del(sem->dpcp_requests.next);
+ t->rt_param.pcp_prio.in_global_cs = 1;
+ t->rt_param.pcp_prio.prio =
+ sem->dpcp_current->prio;
+ t->rt_param.pcp_prio.pid = sem->dpcp_current->pid;
+ rm_set_prio(t, &t->rt_param.pcp_prio);
+ TS_DPCP_AGENT2_END;
+ } else {
+ /* need to wait */
+ spin_unlock_irqrestore(&sem->dpcp_lock,
+ spinflags);
+ TRACE_TASK(t, "agent waiting for "
+ "DPCP sem %p.\n", sem);
+
+ preempt_enable_no_resched();
+ TS_DPCP_AGENT2_END;
+ ret = wait_for_completion_interruptible(&sem->dpcp_job);
+ preempt_disable();
+ TRACE_TASK(t, "got DPCP job on sem %p, "
+ "ret=%d.\n", sem, ret);
+ spin_lock_irqsave(&sem->dpcp_lock, spinflags);
+ if (ret != 0) {
+ /* FIXME: set priority */
+ break;
+ }
+ }
+ } while (!sem->dpcp_current);
+ if (ret == 0)
+ *arg = sem->dpcp_current->arg;
+ } else {
+ /* restore our own priority */
+ t->rt_param.pcp_prio.in_global_cs = 0;
+ t->rt_param.pcp_prio.prio = ULLONG_MAX;
+ rm_set_prio(t, &t->rt_param.pcp_prio);
+ sem->dpcp_agent = NULL;
+ }
+
+ spin_unlock_irqrestore(&sem->dpcp_lock, spinflags);
+ return ret;
+}
+
+
+/* system calls */
+
+asmlinkage long sys_pcp_down(int sem_od)
+{
+ long ret = 0;
+ struct pcp_semaphore * sem;
+
+ preempt_disable();
+ TS_MPCP_DOWN_START;
+ TS_PCP1_DOWN_START;
+
+ if (!is_realtime(current)) {
+ ret = -EPERM;
+ goto out;
+ }
+
+ sem = lookup_pcp_sem(sem_od);
+ if (sem) {
+ if (sem->cpu != GLOBAL_SEM)
+ ret = local_pcp_down(sem);
+ else
+ ret = global_pcp_down(sem);
+ } else
+ ret = -EINVAL;
+
+out:
+ preempt_enable();
+ return ret;
+}
+
+asmlinkage long sys_pcp_up(int sem_od)
+{
+ long ret = 0;
+ struct pcp_semaphore * sem;
+
+ preempt_disable();
+ TS_PCP_UP_START;
+ TS_MPCP_UP_START;
+
+ if (!is_realtime(current)) {
+ ret = -EPERM;
+ goto out;
+ }
+
+ sem = lookup_pcp_sem(sem_od);
+ if (sem) {
+ if (sem->cpu != GLOBAL_SEM)
+ ret = local_pcp_up(sem);
+ else
+ ret = global_pcp_up(sem);
+ } else
+ ret = -EINVAL;
+
+out:
+ preempt_enable();
+ return ret;
+}
+
+
+asmlinkage long sys_dpcp_invoke(int sem_od, long arg)
+{
+ long ret = 0;
+ struct pcp_semaphore * sem;
+
+ preempt_disable();
+ TS_DPCP_INVOKE_START;
+
+ if (!is_realtime(current)) {
+ ret = -EPERM;
+ goto out;
+ }
+
+ sem = lookup_pcp_sem(sem_od);
+ if (sem) {
+ ret = dpcp_invoke(sem, arg);
+ } else
+ ret = -EINVAL;
+
+out:
+ preempt_enable();
+ return ret;
+}
+
+asmlinkage long sys_dpcp_agent(int sem_od, long flags, long __user *__arg)
+{
+ long ret = 0;
+ long arg;
+ struct pcp_semaphore * sem;
+
+ preempt_disable();
+ TS_DPCP_AGENT1_START;
+
+ if (!is_realtime(current)) {
+ ret = -EPERM;
+ goto out;
+ }
+
+ sem = lookup_pcp_sem(sem_od);
+ if (sem) {
+ TS_DPCP_AGENT1_END;
+ if (flags & DPCP_COMPLETE) {
+ TS_PCP_UP_START;
+ local_pcp_up(sem);
+ }
+ TS_DPCP_AGENT2_START;
+ ret = dpcp_agent(sem, flags, &arg);
+ if (ret == 0 && (flags & DPCP_WAIT)) {
+ ret = put_user(arg, __arg);
+ if (ret == 0) {
+ TS_PCP1_DOWN_START;
+ local_pcp_down(sem);
+ }
+ }
+ } else
+ ret = -EINVAL;
+
+out:
+ preempt_enable();
+ return ret;
+}
+
+
+/* FDSO callbacks */
+
+static noinline void* create_pcp_semaphore(void)
+{
+ struct pcp_semaphore* sem;
+
+ sem = kmalloc(sizeof(struct pcp_semaphore), GFP_KERNEL);
+ if (!sem)
+ return NULL;
+ init_pcp_sem(sem, UNDEF_SEM);
+ TRACE("allocated PCP semaphore %p\n", sem);
+ return sem;
+}
+
+static noinline void destroy_pcp_semaphore(void* obj)
+{
+ struct pcp_semaphore* sem = (struct pcp_semaphore*) obj;
+ WARN_ON(sem->holder);
+ WARN_ON(in_list(&sem->list));
+ kfree(sem);
+}
+
+static noinline void update_pcp_ceiling(struct pcp_semaphore* sem, struct task_struct* t, int global)
+{
+ struct pcp_priority prio = {get_rt_period(t), 1, t->pid};
+ if (global && !sem->ceiling.in_global_cs)
+ sem->ceiling.in_global_cs = 1;
+ if (_rm_higher_prio(&prio, &sem->ceiling))
+ sem->ceiling = prio;
+}
+
+static noinline int open_pcp_semaphore(struct od_table_entry* entry, void __user *__arg)
+{
+ struct pcp_semaphore* sem = (struct pcp_semaphore*) entry->obj->obj;
+ int *arg = (int*) __arg;
+ struct task_struct* t = current;
+ int cpu= get_partition(t);
+
+ TRACE("opening PCP semaphore %p, cpu=%d\n", sem, sem->cpu);
+ if (!pcp_active())
+ return -EBUSY;
+
+ if (arg && get_user(cpu, arg) != 0)
+ return -EFAULT;
+
+ if (sem->cpu == UNDEF_SEM)
+ sem->cpu = cpu;
+
+ update_pcp_ceiling(sem, t, sem->cpu != get_partition(t));
+
+ return 0;
+}
+
+static noinline void update_mpcp_ceiling(struct pcp_semaphore* sem, struct task_struct* t)
+{
+ struct pcp_priority prio = {get_rt_period(t), 1, t->pid};
+ if (_rm_higher_prio(&prio, &sem->ceiling))
+ sem->ceiling = prio;
+}
+
+static noinline int open_mpcp_semaphore(struct od_table_entry* entry, void* __user arg)
+{
+ struct pcp_semaphore* sem = (struct pcp_semaphore*) entry->obj->obj;
+ int ret = 0;
+ struct task_struct* t = current;
+
+ if (!pcp_active())
+ return -EBUSY;
+
+ if (sem->cpu == UNDEF_SEM)
+ sem->cpu = GLOBAL_SEM;
+
+ update_mpcp_ceiling(sem, t);
+
+ return ret;
+}
+
+struct fdso_ops pcp_sem_ops = {
+ .create = create_pcp_semaphore,
+ .destroy = destroy_pcp_semaphore,
+ .open = open_pcp_semaphore
+};
+
+struct fdso_ops mpcp_sem_ops = {
+ .create = create_pcp_semaphore,
+ .destroy = destroy_pcp_semaphore,
+ .open = open_mpcp_semaphore
+};
+
+static noinline int __init pcp_boot_init(void)
+{
+ int i;
+
+ printk("Initializing PCP per-CPU ceilings...");
+ for (i = 0; i < NR_CPUS; i++) {
+ INIT_LIST_HEAD(&per_cpu(sys_ceiling, i));
+ per_cpu(pcp_lock, i) = __SPIN_LOCK_UNLOCKED(pcp_lock);
+ }
+ printk(" done!\n");
+
+ return 0;
+}
+
+module_init(pcp_boot_init);
diff --git a/litmus/rm_common.c b/litmus/rm_common.c
new file mode 100644
index 0000000..9bf21fd
--- /dev/null
+++ b/litmus/rm_common.c
@@ -0,0 +1,76 @@
+/*
+ * litmus/rm_common.c
+ *
+ * Common functions for RM based schedulers.
+ *
+ * FIXME: Too much code duplication with edf_common.c
+ */
+
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/sched_trace.h>
+
+
+#include <litmus/rm_common.h>
+
+/* rm_higher_prio - returns true if first has a higher RM priority
+ * than second. Period ties are broken by PID.
+ *
+ * first first must not be NULL and a real-time task.
+ * second may be NULL or a non-rt task.
+ */
+int rm_higher_prio(struct task_struct* first,
+ struct task_struct* second)
+{
+ struct pcp_priority *p1, *p2;
+
+ /* verify assumptions in DEBUG build */
+ BUG_ON(!first);
+ BUG_ON(!is_realtime(first));
+ BUG_ON(second && !is_realtime(second) && second->rt_param.cur_prio);
+
+ p1 = first->rt_param.cur_prio;
+
+ /* if second is not a real-time task, then cur_prio is NULL */
+ p2 = second ? second->rt_param.cur_prio : NULL;
+ return _rm_higher_prio(p1, p2);
+}
+
+int rm_ready_order(struct list_head* a, struct list_head* b)
+{
+ return rm_higher_prio(
+ list_entry(a, struct task_struct, rt_list),
+ list_entry(b, struct task_struct, rt_list));
+}
+
+
+void rm_domain_init(rt_domain_t* rt, check_resched_needed_t resched)
+{
+ rt_domain_init(rt, resched, rm_ready_order);
+}
+
+/* need_to_preempt - check whether the task t needs to be preempted
+ * call only with irqs disabled and with ready_lock acquired
+ * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
+ */
+int rm_preemption_needed(rt_domain_t* rt, struct task_struct *t)
+{
+ /* we need the read lock for edf_ready_queue */
+ /* no need to preempt if there is nothing pending */
+ if (!ready_jobs_pending(rt))
+ return 0;
+ /* we need to reschedule if t doesn't exist */
+ if (!t)
+ return 1;
+
+ /* NOTE: We cannot check for non-preemptibility since we
+ * don't know what address space we're currently in.
+ */
+
+ /* make sure to get non-rt stuff out of the way */
+ return !is_realtime(t) || rm_higher_prio(next_ready(rt), t);
+}
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
new file mode 100644
index 0000000..fe7bd29
--- /dev/null
+++ b/litmus/rt_domain.c
@@ -0,0 +1,130 @@
+/*
+ * kernel/rt_domain.c
+ *
+ * LITMUS real-time infrastructure. This file contains the
+ * functions that manipulate RT domains. RT domains are an abstraction
+ * of a ready queue and a release queue.
+ */
+
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/sched_trace.h>
+
+#include <litmus/rt_domain.h>
+
+
+static int dummy_resched(rt_domain_t *rt)
+{
+ return 0;
+}
+
+static int dummy_order(struct list_head* a, struct list_head* b)
+{
+ return 0;
+}
+
+int release_order(struct list_head* a, struct list_head* b)
+{
+ return earlier_release(
+ list_entry(a, struct task_struct, rt_list),
+ list_entry(b, struct task_struct, rt_list));
+}
+
+
+void rt_domain_init(rt_domain_t *rt,
+ check_resched_needed_t f,
+ list_cmp_t order)
+{
+ BUG_ON(!rt);
+ if (!f)
+ f = dummy_resched;
+ if (!order)
+ order = dummy_order;
+ INIT_LIST_HEAD(&rt->ready_queue);
+ INIT_LIST_HEAD(&rt->release_queue);
+ rt->ready_lock = RW_LOCK_UNLOCKED;
+ rt->release_lock = SPIN_LOCK_UNLOCKED;
+ rt->check_resched = f;
+ rt->order = order;
+}
+
+/* add_ready - add a real-time task to the rt ready queue. It must be runnable.
+ * @new: the newly released task
+ */
+void __add_ready(rt_domain_t* rt, struct task_struct *new)
+{
+ TRACE("rt: adding %s/%d (%llu, %llu) to ready queue at %llu\n",
+ new->comm, new->pid, get_exec_cost(new), get_rt_period(new),
+ sched_clock());
+
+ if (!list_insert(&new->rt_list, &rt->ready_queue, rt->order))
+ rt->check_resched(rt);
+}
+
+struct task_struct* __take_ready(rt_domain_t* rt)
+{
+ struct task_struct *t = __peek_ready(rt);
+
+ /* kick it out of the ready list */
+ if (t)
+ list_del(&t->rt_list);
+ return t;
+}
+
+struct task_struct* __peek_ready(rt_domain_t* rt)
+{
+ if (!list_empty(&rt->ready_queue))
+ return next_ready(rt);
+ else
+ return NULL;
+}
+
+/* add_release - add a real-time task to the rt release queue.
+ * @task: the sleeping task
+ */
+void __add_release(rt_domain_t* rt, struct task_struct *task)
+{
+ TRACE("rt: adding %s/%d (%llu, %llu) rel=%llu to release queue\n",
+ task->comm, task->pid, get_exec_cost(task), get_rt_period(task),
+ get_release(task));
+
+ list_insert(&task->rt_list, &rt->release_queue, release_order);
+}
+
+void __release_pending(rt_domain_t* rt)
+{
+ struct list_head *pos, *save;
+ struct task_struct *queued;
+ lt_t now = sched_clock();
+ list_for_each_safe(pos, save, &rt->release_queue) {
+ queued = list_entry(pos, struct task_struct, rt_list);
+ if (likely(is_released(queued, now))) {
+ /* this one is ready to go*/
+ list_del(pos);
+ set_rt_flags(queued, RT_F_RUNNING);
+
+ sched_trace_job_release(queued);
+
+ /* now it can be picked up */
+ barrier();
+ add_ready(rt, queued);
+ }
+ else
+ /* the release queue is ordered */
+ break;
+ }
+}
+
+void try_release_pending(rt_domain_t* rt)
+{
+ unsigned long flags;
+
+ if (spin_trylock_irqsave(&rt->release_lock, flags)) {
+ __release_pending(rt);
+ spin_unlock_irqrestore(&rt->release_lock, flags);
+ }
+}
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
new file mode 100644
index 0000000..314f8a1
--- /dev/null
+++ b/litmus/sched_gsn_edf.c
@@ -0,0 +1,733 @@
+/*
+ * kernel/sched_gsn_edf.c
+ *
+ * Implementation of the GSN-EDF scheduling algorithm.
+ *
+ * This version uses the simple approach and serializes all scheduling
+ * decisions by the use of a queue lock. This is probably not the
+ * best way to do it, but it should suffice for now.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+
+#include <litmus/litmus.h>
+#include <litmus/jobs.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/edf_common.h>
+#include <litmus/sched_trace.h>
+
+#include <linux/module.h>
+
+/* Overview of GSN-EDF operations.
+ *
+ * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This
+ * description only covers how the individual operations are implemented in
+ * LITMUS.
+ *
+ * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage
+ * structure (NOT the actually scheduled
+ * task). If there is another linked task To
+ * already it will set To->linked_on = NO_CPU
+ * (thereby removing its association with this
+ * CPU). However, it will not requeue the
+ * previously linked task (if any). It will set
+ * T's state to RT_F_RUNNING and check whether
+ * it is already running somewhere else. If T
+ * is scheduled somewhere else it will link
+ * it to that CPU instead (and pull the linked
+ * task to cpu). T may be NULL.
+ *
+ * unlink(T) - Unlink removes T from all scheduler data
+ * structures. If it is linked to some CPU it
+ * will link NULL to that CPU. If it is
+ * currently queued in the gsnedf queue it will
+ * be removed from the T->rt_list. It is safe to
+ * call unlink(T) if T is not linked. T may not
+ * be NULL.
+ *
+ * requeue(T) - Requeue will insert T into the appropriate
+ * queue. If the system is in real-time mode and
+ * the T is released already, it will go into the
+ * ready queue. If the system is not in
+ * real-time mode is T, then T will go into the
+ * release queue. If T's release time is in the
+ * future, it will go into the release
+ * queue. That means that T's release time/job
+ * no/etc. has to be updated before requeu(T) is
+ * called. It is not safe to call requeue(T)
+ * when T is already queued. T may not be NULL.
+ *
+ * gsnedf_job_arrival(T) - This is the catch all function when T enters
+ * the system after either a suspension or at a
+ * job release. It will queue T (which means it
+ * is not safe to call gsnedf_job_arrival(T) if
+ * T is already queued) and then check whether a
+ * preemption is necessary. If a preemption is
+ * necessary it will update the linkage
+ * accordingly and cause scheduled to be called
+ * (either with an IPI or need_resched). It is
+ * safe to call gsnedf_job_arrival(T) if T's
+ * next job has not been actually released yet
+ * (releast time in the future). T will be put
+ * on the release queue in that case.
+ *
+ * job_completion(T) - Take care of everything that needs to be done
+ * to prepare T for its next release and place
+ * it in the right queue with
+ * gsnedf_job_arrival().
+ *
+ *
+ * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is
+ * equivalent to unlink(T). Note that if you unlink a task from a CPU none of
+ * the functions will automatically propagate pending task from the ready queue
+ * to a linked task. This is the job of the calling function ( by means of
+ * __take_ready).
+ */
+
+
+/* cpu_entry_t - maintain the linked and scheduled state
+ */
+typedef struct {
+ int cpu;
+ struct task_struct* linked; /* only RT tasks */
+ struct task_struct* scheduled; /* only RT tasks */
+ struct list_head list;
+ atomic_t will_schedule; /* prevent unneeded IPIs */
+} cpu_entry_t;
+DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries);
+
+#define set_will_schedule() \
+ (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 1))
+#define clear_will_schedule() \
+ (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 0))
+#define test_will_schedule(cpu) \
+ (atomic_read(&per_cpu(gsnedf_cpu_entries, cpu).will_schedule))
+
+
+#define NO_CPU 0xffffffff
+
+/* The gsnedf_lock is used to serialize all scheduling events.
+ * It protects
+ */
+static DEFINE_SPINLOCK(gsnedf_lock);
+/* the cpus queue themselves according to priority in here */
+static LIST_HEAD(gsnedf_cpu_queue);
+
+static rt_domain_t gsnedf;
+
+
+/* update_cpu_position - Move the cpu entry to the correct place to maintain
+ * order in the cpu queue. Caller must hold gsnedf lock.
+ *
+ * This really should be a heap.
+ */
+static void update_cpu_position(cpu_entry_t *entry)
+{
+ cpu_entry_t *other;
+ struct list_head *pos;
+
+ if (likely(in_list(&entry->list)))
+ list_del(&entry->list);
+ /* if we do not execute real-time jobs we just move
+ * to the end of the queue
+ */
+ if (entry->linked) {
+ list_for_each(pos, &gsnedf_cpu_queue) {
+ other = list_entry(pos, cpu_entry_t, list);
+ if (edf_higher_prio(entry->linked, other->linked)) {
+ __list_add(&entry->list, pos->prev, pos);
+ return;
+ }
+ }
+ }
+ /* if we get this far we have the lowest priority job */
+ list_add_tail(&entry->list, &gsnedf_cpu_queue);
+}
+
+/* link_task_to_cpu - Update the link of a CPU.
+ * Handles the case where the to-be-linked task is already
+ * scheduled on a different CPU.
+ */
+static noinline void link_task_to_cpu(struct task_struct* linked,
+ cpu_entry_t *entry)
+{
+ cpu_entry_t *sched;
+ struct task_struct* tmp;
+ int on_cpu;
+
+ BUG_ON(linked && !is_realtime(linked));
+
+ /* Currently linked task is set to be unlinked. */
+ if (entry->linked) {
+ entry->linked->rt_param.linked_on = NO_CPU;
+ }
+
+ /* Link new task to CPU. */
+ if (linked) {
+ set_rt_flags(linked, RT_F_RUNNING);
+ /* handle task is already scheduled somewhere! */
+ on_cpu = linked->rt_param.scheduled_on;
+ if (on_cpu != NO_CPU) {
+ sched = &per_cpu(gsnedf_cpu_entries, on_cpu);
+ /* this should only happen if not linked already */
+ BUG_ON(sched->linked == linked);
+
+ /* If we are already scheduled on the CPU to which we
+ * wanted to link, we don't need to do the swap --
+ * we just link ourselves to the CPU and depend on
+ * the caller to get things right.
+ */
+ if (entry != sched) {
+ tmp = sched->linked;
+ linked->rt_param.linked_on = sched->cpu;
+ sched->linked = linked;
+ update_cpu_position(sched);
+ linked = tmp;
+ }
+ }
+ if (linked) /* might be NULL due to swap */
+ linked->rt_param.linked_on = entry->cpu;
+ }
+ entry->linked = linked;
+ update_cpu_position(entry);
+}
+
+/* unlink - Make sure a task is not linked any longer to an entry
+ * where it was linked before. Must hold gsnedf_lock.
+ */
+static noinline void unlink(struct task_struct* t)
+{
+ cpu_entry_t *entry;
+
+ if (unlikely(!t)) {
+ TRACE_BUG_ON(!t);
+ return;
+ }
+
+ if (t->rt_param.linked_on != NO_CPU) {
+ /* unlink */
+ entry = &per_cpu(gsnedf_cpu_entries, t->rt_param.linked_on);
+ t->rt_param.linked_on = NO_CPU;
+ link_task_to_cpu(NULL, entry);
+ } else if (in_list(&t->rt_list)) {
+ /* This is an interesting situation: t is scheduled,
+ * but was just recently unlinked. It cannot be
+ * linked anywhere else (because then it would have
+ * been relinked to this CPU), thus it must be in some
+ * queue. We must remove it from the list in this
+ * case.
+ */
+ list_del(&t->rt_list);
+ }
+}
+
+
+/* preempt - force a CPU to reschedule
+ */
+static noinline void preempt(cpu_entry_t *entry)
+{
+ /* We cannot make the is_np() decision here if it is a remote CPU
+ * because requesting exit_np() requires that we currently use the
+ * address space of the task. Thus, in the remote case we just send
+ * the IPI and let schedule() handle the problem.
+ */
+
+ if (smp_processor_id() == entry->cpu) {
+ if (entry->scheduled && is_np(entry->scheduled))
+ request_exit_np(entry->scheduled);
+ else
+ set_tsk_need_resched(current);
+ } else
+ /* in case that it is a remote CPU we have to defer the
+ * the decision to the remote CPU
+ * FIXME: We could save a few IPI's here if we leave the flag
+ * set when we are waiting for a np_exit().
+ */
+ if (!test_will_schedule(entry->cpu))
+ smp_send_reschedule(entry->cpu);
+}
+
+/* requeue - Put an unlinked task into gsn-edf domain.
+ * Caller must hold gsnedf_lock.
+ */
+static noinline void requeue(struct task_struct* task)
+{
+ BUG_ON(!task);
+ /* sanity check rt_list before insertion */
+ BUG_ON(in_list(&task->rt_list));
+
+ if (get_rt_flags(task) == RT_F_SLEEP) {
+ /* this task has expired
+ * _schedule has already taken care of updating
+ * the release and
+ * deadline. We just must check if it has been released.
+ */
+ if (is_released(task, sched_clock()))
+ __add_ready(&gsnedf, task);
+ else {
+ /* it has got to wait */
+ __add_release(&gsnedf, task);
+ }
+
+ } else
+ /* this is a forced preemption
+ * thus the task stays in the ready_queue
+ * we only must make it available to others
+ */
+ __add_ready(&gsnedf, task);
+}
+
+/* gsnedf_job_arrival: task is either resumed or released */
+static noinline void gsnedf_job_arrival(struct task_struct* task)
+{
+ cpu_entry_t* last;
+
+ BUG_ON(list_empty(&gsnedf_cpu_queue));
+ BUG_ON(!task);
+
+ /* first queue arriving job */
+ requeue(task);
+
+ /* then check for any necessary preemptions */
+ last = list_entry(gsnedf_cpu_queue.prev, cpu_entry_t, list);
+ if (edf_preemption_needed(&gsnedf, last->linked)) {
+ /* preemption necessary */
+ task = __take_ready(&gsnedf);
+ TRACE("job_arrival: task %d linked to %d\n",
+ task->pid, last->cpu);
+ if (last->linked)
+ requeue(last->linked);
+
+ link_task_to_cpu(task, last);
+ preempt(last);
+ }
+}
+
+/* check for current job releases */
+static noinline void gsnedf_release_jobs(void)
+{
+ struct list_head *pos, *save;
+ struct task_struct *queued;
+ lt_t now = sched_clock();
+
+
+ list_for_each_safe(pos, save, &gsnedf.release_queue) {
+ queued = list_entry(pos, struct task_struct, rt_list);
+ if (likely(is_released(queued, now))) {
+ /* this one is ready to go*/
+ list_del(pos);
+ set_rt_flags(queued, RT_F_RUNNING);
+
+ sched_trace_job_release(queued);
+ gsnedf_job_arrival(queued);
+ }
+ else
+ /* the release queue is ordered */
+ break;
+ }
+}
+
+/* gsnedf_scheduler_tick - this function is called for every local timer
+ * interrupt.
+ *
+ * checks whether the current task has expired and checks
+ * whether we need to preempt it if it has not expired
+ */
+static void gsnedf_scheduler_tick(void)
+{
+ unsigned long flags;
+ struct task_struct* t = current;
+
+ if (is_realtime(t) && budget_exhausted(t)) {
+ if (!is_np(t)) {
+ /* np tasks will be preempted when they become
+ * preemptable again
+ */
+ set_tsk_need_resched(t);
+ set_will_schedule();
+ TRACE("gsnedf_scheduler_tick: "
+ "%d is preemptable "
+ " => FORCE_RESCHED\n", t->pid);
+ } else {
+ TRACE("gsnedf_scheduler_tick: "
+ "%d is non-preemptable, "
+ "preemption delayed.\n", t->pid);
+ request_exit_np(t);
+ }
+ }
+
+ /* only the first CPU needs to release jobs */
+ if (smp_processor_id() == 0) {
+ spin_lock_irqsave(&gsnedf_lock, flags);
+
+ /* Try to release pending jobs */
+ gsnedf_release_jobs();
+
+ /* We don't need to check linked != scheduled since
+ * set_tsk_need_resched has been set by preempt() if necessary.
+ */
+
+ spin_unlock_irqrestore(&gsnedf_lock, flags);
+ }
+}
+
+/* caller holds gsnedf_lock */
+static noinline void job_completion(struct task_struct *t)
+{
+ BUG_ON(!t);
+
+ sched_trace_job_completion(t);
+
+ TRACE_TASK(t, "job_completion().\n");
+
+ /* set flags */
+ set_rt_flags(t, RT_F_SLEEP);
+ /* prepare for next period */
+ prepare_for_next_period(t);
+ /* unlink */
+ unlink(t);
+ /* requeue
+ * But don't requeue a blocking task. */
+ if (is_running(t))
+ gsnedf_job_arrival(t);
+}
+
+
+/* Getting schedule() right is a bit tricky. schedule() may not make any
+ * assumptions on the state of the current task since it may be called for a
+ * number of reasons. The reasons include a scheduler_tick() determined that it
+ * was necessary, because sys_exit_np() was called, because some Linux
+ * subsystem determined so, or even (in the worst case) because there is a bug
+ * hidden somewhere. Thus, we must take extreme care to determine what the
+ * current state is.
+ *
+ * The CPU could currently be scheduling a task (or not), be linked (or not).
+ *
+ * The following assertions for the scheduled task could hold:
+ *
+ * - !is_running(scheduled) // the job blocks
+ * - scheduled->timeslice == 0 // the job completed (forcefully)
+ * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
+ * - linked != scheduled // we need to reschedule (for any reason)
+ * - is_np(scheduled) // rescheduling must be delayed,
+ * sys_exit_np must be requested
+ *
+ * Any of these can occur together.
+ */
+static int gsnedf_schedule(struct task_struct * prev,
+ struct task_struct ** next)
+{
+ cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
+ int out_of_time, sleep, preempt, np, exists, blocks;
+
+ /* Will be released in finish_switch. */
+ spin_lock(&gsnedf_lock);
+ clear_will_schedule();
+
+ /* sanity checking */
+ BUG_ON(entry->scheduled && entry->scheduled != prev);
+ BUG_ON(entry->scheduled && !is_realtime(prev));
+ BUG_ON(is_realtime(prev) && !entry->scheduled);
+
+ /* (0) Determine state */
+ exists = entry->scheduled != NULL;
+ blocks = exists && !is_running(entry->scheduled);
+ out_of_time = exists && budget_exhausted(entry->scheduled);
+ np = exists && is_np(entry->scheduled);
+ sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
+ preempt = entry->scheduled != entry->linked;
+
+ /* If a task blocks we have no choice but to reschedule.
+ */
+ if (blocks)
+ unlink(entry->scheduled);
+
+ /* Request a sys_exit_np() call if we would like to preempt but cannot.
+ * We need to make sure to update the link structure anyway in case
+ * that we are still linked. Multiple calls to request_exit_np() don't
+ * hurt.
+ */
+ if (np && (out_of_time || preempt || sleep)) {
+ unlink(entry->scheduled);
+ request_exit_np(entry->scheduled);
+ }
+
+ /* Any task that is preemptable and either exhausts its execution
+ * budget or wants to sleep completes. We may have to reschedule after
+ * this.
+ */
+ if (!np && (out_of_time || sleep))
+ job_completion(entry->scheduled);
+
+ /* Link pending task if we became unlinked.
+ */
+ if (!entry->linked)
+ link_task_to_cpu(__take_ready(&gsnedf), entry);
+
+ /* The final scheduling decision. Do we need to switch for some reason?
+ * If linked different from scheduled select linked as next.
+ */
+ if ((!np || blocks) &&
+ entry->linked != entry->scheduled) {
+ /* Schedule a linked job? */
+ if (entry->linked)
+ *next = entry->linked;
+ } else
+ /* Only override Linux scheduler if we have real-time task
+ * scheduled that needs to continue.
+ */
+ if (exists)
+ *next = prev;
+
+ spin_unlock(&gsnedf_lock);
+
+ /* don't race with a concurrent switch */
+ if (*next && prev != *next)
+ while ((*next)->rt_param.scheduled_on != NO_CPU)
+ cpu_relax();
+ return 0;
+}
+
+
+/* _finish_switch - we just finished the switch away from prev
+ */
+static void gsnedf_finish_switch(struct task_struct *prev)
+{
+ cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
+
+ entry->scheduled = is_realtime(current) ? current : NULL;
+
+ prev->rt_param.scheduled_on = NO_CPU;
+ current->rt_param.scheduled_on = smp_processor_id();
+}
+
+
+/* Prepare a task for running in RT mode
+ * Enqueues the task into master queue data structure
+ * returns
+ * -EPERM if task is not TASK_STOPPED
+ */
+static long gsnedf_prepare_task(struct task_struct * t)
+{
+ unsigned long flags;
+ TRACE("gsn edf: prepare task %d\n", t->pid);
+
+ if (t->state == TASK_STOPPED) {
+ t->rt_param.scheduled_on = NO_CPU;
+ t->rt_param.linked_on = NO_CPU;
+
+ /* delay by 1ms */
+ release_at(t, sched_clock() + 1000000);
+
+ /* The task should be running in the queue, otherwise signal
+ * code will try to wake it up with fatal consequences.
+ */
+ t->state = TASK_RUNNING;
+ spin_lock_irqsave(&gsnedf_lock, flags);
+ t->rt_param.litmus_controlled = 1;
+ requeue(t);
+ spin_unlock_irqrestore(&gsnedf_lock, flags);
+ return 0;
+ }
+ else
+ return -EPERM;
+}
+
+static void gsnedf_wake_up_task(struct task_struct *task)
+{
+ unsigned long flags;
+ lt_t now;
+ /* We must determine whether task should go into the release
+ * queue or into the ready queue. It may enter the ready queue
+ * if it has credit left in its time slice and has not yet reached
+ * its deadline. If it is now passed its deadline we assume this the
+ * arrival of a new sporadic job and thus put it in the ready queue
+ * anyway.If it has zero budget and the next release is in the future
+ * it has to go to the release queue.
+ */
+ TRACE("gsnedf: %d unsuspends with budget=%d\n",
+ task->pid, task->time_slice);
+
+ spin_lock_irqsave(&gsnedf_lock, flags);
+ if (!task->rt_param.litmus_controlled) {
+ task->rt_param.litmus_controlled = 1;
+ /* We need to take suspensions because of semaphores into
+ * account! If a job resumes after being suspended due to acquiring
+ * a semaphore, it should never be treated as a new job release.
+ */
+ if (get_rt_flags(task) == RT_F_EXIT_SEM) {
+ set_rt_flags(task, RT_F_RUNNING);
+ } else {
+ now = sched_clock();
+ if (is_tardy(task, now)) {
+ /* new sporadic release */
+ release_at(task, now);
+ sched_trace_job_release(task);
+ }
+ else if (task->time_slice)
+ /* came back in time before deadline
+ */
+ set_rt_flags(task, RT_F_RUNNING);
+ }
+ task->state = TASK_RUNNING;
+ gsnedf_job_arrival(task);
+ }
+ spin_unlock_irqrestore(&gsnedf_lock, flags);
+}
+
+static void gsnedf_task_blocks(struct task_struct *t)
+{
+ unsigned long flags;
+
+ /* unlink if necessary */
+ spin_lock_irqsave(&gsnedf_lock, flags);
+ unlink(t);
+ t->rt_param.litmus_controlled = 0;
+ spin_unlock_irqrestore(&gsnedf_lock, flags);
+
+ BUG_ON(!is_realtime(t));
+ TRACE("task %d suspends with budget=%d\n", t->pid, t->time_slice);
+ BUG_ON(t->rt_list.next != LIST_POISON1);
+ BUG_ON(t->rt_list.prev != LIST_POISON2);
+}
+
+
+/* When _tear_down is called, the task should not be in any queue any more
+ * as it must have blocked first. We don't have any internal state for the task,
+ * it is all in the task_struct.
+ */
+static long gsnedf_tear_down(struct task_struct * t)
+{
+ BUG_ON(!is_realtime(t));
+ TRACE_TASK(t, "RIP\n");
+ BUG_ON(t->array);
+ BUG_ON(t->rt_list.next != LIST_POISON1);
+ BUG_ON(t->rt_list.prev != LIST_POISON2);
+ return 0;
+}
+
+static long gsnedf_pi_block(struct pi_semaphore *sem,
+ struct task_struct *new_waiter)
+{
+ /* This callback has to handle the situation where a new waiter is
+ * added to the wait queue of the semaphore.
+ *
+ * We must check if has a higher priority than the currently
+ * highest-priority task, and then potentially reschedule.
+ */
+
+ BUG_ON(!new_waiter);
+
+ if (edf_higher_prio(new_waiter, sem->hp.task)) {
+ TRACE_TASK(new_waiter, " boosts priority\n");
+ /* called with IRQs disabled */
+ spin_lock(&gsnedf_lock);
+ /* store new highest-priority task */
+ sem->hp.task = new_waiter;
+ if (sem->holder) {
+ /* let holder inherit */
+ sem->holder->rt_param.inh_task = new_waiter;
+ unlink(sem->holder);
+ gsnedf_job_arrival(sem->holder);
+ }
+ spin_unlock(&gsnedf_lock);
+ }
+
+ return 0;
+}
+
+static long gsnedf_inherit_priority(struct pi_semaphore *sem,
+ struct task_struct *new_owner)
+{
+ /* We don't need to acquire the gsnedf_lock since at the time of this
+ * call new_owner isn't actually scheduled yet (it's still sleeping)
+ * and since the calling function already holds sem->wait.lock, which
+ * prevents concurrent sem->hp.task changes.
+ */
+
+ if (sem->hp.task && sem->hp.task != new_owner) {
+ new_owner->rt_param.inh_task = sem->hp.task;
+ TRACE_TASK(new_owner, "inherited priority from %s/%d\n",
+ sem->hp.task->comm, sem->hp.task->pid);
+ } else
+ TRACE_TASK(new_owner,
+ "cannot inherit priority, "
+ "no higher priority job waits.\n");
+ return 0;
+}
+
+/* This function is called on a semaphore release, and assumes that
+ * the current task is also the semaphore holder.
+ */
+static long gsnedf_return_priority(struct pi_semaphore *sem)
+{
+ struct task_struct* t = current;
+ int ret = 0;
+
+ /* Find new highest-priority semaphore task
+ * if holder task is the current hp.task.
+ *
+ * Calling function holds sem->wait.lock.
+ */
+ if (t == sem->hp.task)
+ set_hp_task(sem, edf_higher_prio);
+
+ TRACE_CUR("gsnedf_return_priority for lock %p\n", sem);
+
+ if (t->rt_param.inh_task) {
+ /* interrupts already disabled by PI code */
+ spin_lock(&gsnedf_lock);
+
+ /* Reset inh_task to NULL. */
+ t->rt_param.inh_task = NULL;
+
+ /* Check if rescheduling is necessary */
+ unlink(t);
+ gsnedf_job_arrival(t);
+ spin_unlock(&gsnedf_lock);
+ }
+
+ return ret;
+}
+
+/* Plugin object */
+static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
+ .plugin_name = "GSN-EDF",
+ .scheduler_tick = gsnedf_scheduler_tick,
+ .prepare_task = gsnedf_prepare_task,
+ .sleep_next_period = complete_job,
+ .tear_down = gsnedf_tear_down,
+ .schedule = gsnedf_schedule,
+ .finish_switch = gsnedf_finish_switch,
+ .wake_up_task = gsnedf_wake_up_task,
+ .task_blocks = gsnedf_task_blocks,
+ .inherit_priority = gsnedf_inherit_priority,
+ .return_priority = gsnedf_return_priority,
+ .pi_block = gsnedf_pi_block
+};
+
+
+static int __init init_gsn_edf(void)
+{
+ int cpu;
+ cpu_entry_t *entry;
+
+ /* initialize CPU state */
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ entry = &per_cpu(gsnedf_cpu_entries, cpu);
+ atomic_set(&entry->will_schedule, 0);
+ entry->linked = NULL;
+ entry->scheduled = NULL;
+ entry->cpu = cpu;
+ INIT_LIST_HEAD(&entry->list);
+ }
+
+ edf_domain_init(&gsnedf, NULL);
+ return register_sched_plugin(&gsn_edf_plugin);
+}
+
+
+module_init(init_gsn_edf);
diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c
new file mode 100644
index 0000000..f05fc56
--- /dev/null
+++ b/litmus/sched_plugin.c
@@ -0,0 +1,169 @@
+/* sched_plugin.c -- core infrastructure for the scheduler plugin system
+ *
+ * This file includes the initialization of the plugin system, the no-op Linux
+ * scheduler plugin and some dummy functions.
+ */
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+#include <litmus/litmus.h>
+#include <litmus/sched_plugin.h>
+
+
+/*************************************************************
+ * Dummy plugin functions *
+ *************************************************************/
+
+static void litmus_dummy_finish_switch(struct task_struct * prev)
+{
+}
+
+static int litmus_dummy_schedule(struct task_struct * prev,
+ struct task_struct** next)
+{
+ return 0;
+}
+
+static void litmus_dummy_scheduler_tick(void)
+{
+}
+
+static long litmus_dummy_prepare_task(struct task_struct *t)
+{
+ return -ENOSYS;
+}
+
+static void litmus_dummy_wake_up_task(struct task_struct *task)
+{
+ printk(KERN_WARNING "task %d: unhandled real-time wake up!\n",
+ task->pid);
+}
+
+static void litmus_dummy_task_blocks(struct task_struct *task)
+{
+}
+
+static long litmus_dummy_tear_down(struct task_struct *task)
+{
+ return 0;
+}
+
+static long litmus_dummy_sleep_next_period(void)
+{
+ return -ENOSYS;
+}
+
+static long litmus_dummy_inherit_priority(struct pi_semaphore *sem,
+ struct task_struct *new_owner)
+{
+ return -ENOSYS;
+}
+
+static long litmus_dummy_return_priority(struct pi_semaphore *sem)
+{
+ return -ENOSYS;
+}
+
+static long litmus_dummy_pi_block(struct pi_semaphore *sem,
+ struct task_struct *new_waiter)
+{
+ return -ENOSYS;
+}
+
+
+/* The default scheduler plugin. It doesn't do anything and lets Linux do its
+ * job.
+ */
+struct sched_plugin linux_sched_plugin = {
+ .plugin_name = "Linux",
+ .scheduler_tick = litmus_dummy_scheduler_tick,
+ .prepare_task = litmus_dummy_prepare_task,
+ .tear_down = litmus_dummy_tear_down,
+ .wake_up_task = litmus_dummy_wake_up_task,
+ .task_blocks = litmus_dummy_task_blocks,
+ .sleep_next_period = litmus_dummy_sleep_next_period,
+ .schedule = litmus_dummy_schedule,
+ .finish_switch = litmus_dummy_finish_switch,
+ .inherit_priority = litmus_dummy_inherit_priority,
+ .return_priority = litmus_dummy_return_priority,
+ .pi_block = litmus_dummy_pi_block
+};
+
+/*
+ * The reference to current plugin that is used to schedule tasks within
+ * the system. It stores references to actual function implementations
+ * Should be initialized by calling "init_***_plugin()"
+ */
+struct sched_plugin *curr_sched_plugin = &linux_sched_plugin;
+
+/* the list of registered scheduling plugins */
+static LIST_HEAD(sched_plugins);
+static DEFINE_SPINLOCK(sched_plugins_lock);
+
+#define CHECK(func) {\
+ if (!plugin->func) \
+ plugin->func = litmus_dummy_ ## func;}
+
+/* FIXME: get reference to module */
+int register_sched_plugin(struct sched_plugin* plugin)
+{
+ printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n",
+ plugin->plugin_name);
+
+ /* make sure we don't trip over null pointers later */
+ CHECK(finish_switch);
+ CHECK(schedule);
+ CHECK(scheduler_tick);
+ CHECK(wake_up_task);
+ CHECK(tear_down);
+ CHECK(task_blocks);
+ CHECK(prepare_task);
+ CHECK(sleep_next_period);
+ CHECK(inherit_priority);
+ CHECK(return_priority);
+ CHECK(pi_block);
+
+ spin_lock(&sched_plugins_lock);
+ list_add(&plugin->list, &sched_plugins);
+ spin_unlock(&sched_plugins_lock);
+
+ return 0;
+}
+
+
+/* FIXME: reference counting, etc. */
+struct sched_plugin* find_sched_plugin(const char* name)
+{
+ struct list_head *pos;
+ struct sched_plugin *plugin;
+
+ spin_lock(&sched_plugins_lock);
+ list_for_each(pos, &sched_plugins) {
+ plugin = list_entry(pos, struct sched_plugin, list);
+ if (!strcmp(plugin->plugin_name, name))
+ goto out_unlock;
+ }
+ plugin = NULL;
+
+out_unlock:
+ spin_unlock(&sched_plugins_lock);
+ return plugin;
+}
+
+int print_sched_plugins(char* buf, int max)
+{
+ int count = 0;
+ struct list_head *pos;
+ struct sched_plugin *plugin;
+
+ spin_lock(&sched_plugins_lock);
+ list_for_each(pos, &sched_plugins) {
+ plugin = list_entry(pos, struct sched_plugin, list);
+ count += snprintf(buf + count, max - count, "%s\n", plugin->plugin_name);
+ if (max - count <= 0)
+ break;
+ }
+ spin_unlock(&sched_plugins_lock);
+ return count;
+}
diff --git a/litmus/sched_psn_edf.c b/litmus/sched_psn_edf.c
new file mode 100644
index 0000000..27f4b5c
--- /dev/null
+++ b/litmus/sched_psn_edf.c
@@ -0,0 +1,458 @@
+
+/*
+ * kernel/sched_psn_edf.c
+ *
+ * Implementation of the PSN-EDF scheduler plugin.
+ * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
+ *
+ * Suspensions and non-preemptable sections are supported.
+ * Priority inheritance is not supported.
+ */
+
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+#include <linux/module.h>
+
+#include <litmus/litmus.h>
+#include <litmus/jobs.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/edf_common.h>
+
+
+typedef struct {
+ rt_domain_t domain;
+ int cpu;
+ struct task_struct* scheduled; /* only RT tasks */
+ spinlock_t lock; /* protects the domain and
+ * serializes scheduling decisions
+ */
+} psnedf_domain_t;
+
+DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
+
+#define local_edf (&__get_cpu_var(psnedf_domains).domain)
+#define local_pedf (&__get_cpu_var(psnedf_domains))
+#define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain)
+#define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu))
+#define task_edf(task) remote_edf(get_partition(task))
+#define task_pedf(task) remote_pedf(get_partition(task))
+
+
+static void psnedf_domain_init(psnedf_domain_t* pedf,
+ check_resched_needed_t check,
+ int cpu)
+{
+ edf_domain_init(&pedf->domain, check);
+ pedf->cpu = cpu;
+ pedf->lock = SPIN_LOCK_UNLOCKED;
+ pedf->scheduled = NULL;
+}
+
+static void requeue(struct task_struct* t, rt_domain_t *edf)
+{
+ /* only requeue if t is actually running */
+ BUG_ON(!is_running(t));
+
+ if (t->state != TASK_RUNNING)
+ TRACE_TASK(t, "requeue: !TASK_RUNNING");
+
+ set_rt_flags(t, RT_F_RUNNING);
+ if (is_released(t, sched_clock()))
+ __add_ready(edf, t);
+ else
+ __add_release(edf, t); /* it has got to wait */
+}
+
+/* we assume the lock is being held */
+static void preempt(psnedf_domain_t *pedf)
+{
+ if (smp_processor_id() == pedf->cpu) {
+ if (pedf->scheduled && is_np(pedf->scheduled))
+ request_exit_np(pedf->scheduled);
+ else
+ set_tsk_need_resched(current);
+ } else
+ /* in case that it is a remote CPU we have to defer the
+ * the decision to the remote CPU
+ */
+ smp_send_reschedule(pedf->cpu);
+}
+
+/* This check is trivial in partioned systems as we only have to consider
+ * the CPU of the partition.
+ */
+static int psnedf_check_resched(rt_domain_t *edf)
+{
+ psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
+ int ret = 0;
+
+ /* because this is a callback from rt_domain_t we already hold
+ * the necessary lock for the ready queue
+ */
+ if (edf_preemption_needed(edf, pedf->scheduled)) {
+ preempt(pedf);
+ ret = 1;
+ }
+ return ret;
+}
+
+
+static void psnedf_scheduler_tick(void)
+{
+ unsigned long flags;
+ struct task_struct *t = current;
+ rt_domain_t *edf = local_edf;
+ psnedf_domain_t *pedf = local_pedf;
+
+ /* Check for inconsistency. We don't need the lock for this since
+ * ->scheduled is only changed in schedule, which obviously is not
+ * executing in parallel on this CPU
+ */
+ BUG_ON(is_realtime(t) && t != pedf->scheduled);
+
+ if (is_realtime(t) && budget_exhausted(t)) {
+ if (!is_np(t))
+ set_tsk_need_resched(t);
+ else {
+ TRACE("psnedf_scheduler_tick: "
+ "%d is non-preemptable, "
+ "preemption delayed.\n", t->pid);
+ request_exit_np(t);
+ }
+ }
+
+ spin_lock_irqsave(&pedf->lock, flags);
+ __release_pending(edf);
+ if (edf_preemption_needed(edf, t))
+ set_tsk_need_resched(t);
+ spin_unlock_irqrestore(&pedf->lock, flags);
+}
+
+static void job_completion(struct task_struct* t)
+{
+ TRACE_TASK(t, "job_completion().\n");
+ set_rt_flags(t, RT_F_SLEEP);
+ prepare_for_next_period(t);
+}
+
+static int psnedf_schedule(struct task_struct * prev,
+ struct task_struct ** next)
+{
+ psnedf_domain_t* pedf = local_pedf;
+ rt_domain_t* edf = &pedf->domain;
+
+ int out_of_time, sleep, preempt,
+ np, exists, blocks, resched;
+
+ spin_lock(&pedf->lock);
+
+ /* sanity checking */
+ BUG_ON(pedf->scheduled && pedf->scheduled != prev);
+ BUG_ON(pedf->scheduled && !is_realtime(prev));
+
+ /* (0) Determine state */
+ exists = pedf->scheduled != NULL;
+ blocks = exists && !is_running(pedf->scheduled);
+ out_of_time = exists && budget_exhausted(pedf->scheduled);
+ np = exists && is_np(pedf->scheduled);
+ sleep = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP;
+ preempt = edf_preemption_needed(edf, prev);
+
+ /* If we need to preempt do so.
+ * The following checks set resched to 1 in case of special
+ * circumstances.
+ */
+ resched = preempt;
+
+ /* If a task blocks we have no choice but to reschedule.
+ */
+ if (blocks)
+ resched = 1;
+
+ /* Request a sys_exit_np() call if we would like to preempt but cannot.
+ * Multiple calls to request_exit_np() don't hurt.
+ */
+ if (np && (out_of_time || preempt || sleep))
+ request_exit_np(pedf->scheduled);
+
+ /* Any task that is preemptable and either exhausts its execution
+ * budget or wants to sleep completes. We may have to reschedule after
+ * this.
+ */
+ if (!np && (out_of_time || sleep)) {
+ job_completion(pedf->scheduled);
+ resched = 1;
+ }
+
+ /* The final scheduling decision. Do we need to switch for some reason?
+ * Switch if we are in RT mode and have no task or if we need to
+ * resched.
+ */
+ *next = NULL;
+ if ((!np || blocks) && (resched || !exists)) {
+ /* Take care of a previously scheduled
+ * job by taking it out of the Linux runqueue.
+ */
+ if (pedf->scheduled) {
+ /* as opposed to global schedulers that switch without
+ * a lock being held we can requeue already here since
+ * no other CPU will schedule from this domain.
+ */
+ if (!blocks)
+ requeue(pedf->scheduled, edf);
+ }
+ *next = __take_ready(edf);
+ } else
+ /* Only override Linux scheduler if we have a real-time task
+ * scheduled that needs to continue.
+ */
+ if (exists)
+ *next = prev;
+
+ if (*next)
+ set_rt_flags(*next, RT_F_RUNNING);
+
+ pedf->scheduled = *next;
+ spin_unlock(&pedf->lock);
+ return 0;
+}
+
+
+/* Prepare a task for running in RT mode
+ * Enqueues the task into master queue data structure
+ * returns
+ * -EPERM if task is not TASK_STOPPED
+ */
+static long psnedf_prepare_task(struct task_struct * t)
+{
+ rt_domain_t* edf = task_edf(t);
+ psnedf_domain_t* pedf = task_pedf(t);
+ unsigned long flags;
+
+ TRACE("[%d] psn edf: prepare task %d on CPU %d\n",
+ smp_processor_id(), t->pid, get_partition(t));
+ if (t->state == TASK_STOPPED) {
+
+ /* 1ms delay */
+ release_at(t, sched_clock() + 1000000);
+
+ /* The task should be running in the queue, otherwise signal
+ * code will try to wake it up with fatal consequences.
+ */
+ t->state = TASK_RUNNING;
+ spin_lock_irqsave(&pedf->lock, flags);
+ t->rt_param.litmus_controlled = 1;
+ __add_release(edf, t);
+ spin_unlock_irqrestore(&pedf->lock, flags);
+ return 0;
+ } else
+ return -EPERM;
+}
+
+static void psnedf_wake_up_task(struct task_struct *task)
+{
+ unsigned long flags;
+ psnedf_domain_t* pedf = task_pedf(task);
+ rt_domain_t* edf = task_edf(task);
+ lt_t now;
+
+ TRACE("psnedf: %d unsuspends with budget=%d\n",
+ task->pid, task->time_slice);
+
+ spin_lock_irqsave(&pedf->lock, flags);
+ if (!task->rt_param.litmus_controlled) {
+ BUG_ON(in_list(&task->rt_list));
+ task->rt_param.litmus_controlled = 1;
+ /* We need to take suspensions because of semaphores into
+ * account! If a job resumes after being suspended due to acquiring
+ * a semaphore, it should never be treated as a new job release.
+ */
+ now = sched_clock();
+ if (is_tardy(task, now) &&
+ get_rt_flags(task) != RT_F_EXIT_SEM) {
+ /* new sporadic release */
+ release_at(task, now);
+ sched_trace_job_release(task);
+ }
+ task->state = TASK_RUNNING;
+ requeue(task, edf);
+ }
+ spin_unlock_irqrestore(&pedf->lock, flags);
+}
+
+static void psnedf_task_blocks(struct task_struct *t)
+{
+ BUG_ON(!is_realtime(t));
+ /* not really anything to do since it can only block if
+ * it is running, and when it is not running it is not in any
+ * queue anyway.
+ */
+ TRACE("task %d blocks with budget=%d\n", t->pid, t->time_slice);
+ BUG_ON(in_list(&t->rt_list));
+ t->rt_param.litmus_controlled = 0;
+}
+
+
+/* When _tear_down is called, the task should not be in any queue any more
+ * as it must have blocked first. We don't have any internal state for the task,
+ * it is all in the task_struct.
+ */
+static long psnedf_tear_down(struct task_struct * t)
+{
+ BUG_ON(!is_realtime(t));
+ TRACE_TASK(t, "tear down called");
+ BUG_ON(t->array);
+ BUG_ON(in_list(&t->rt_list));
+ return 0;
+}
+
+static long psnedf_pi_block(struct pi_semaphore *sem,
+ struct task_struct *new_waiter)
+{
+ psnedf_domain_t* pedf;
+ rt_domain_t* edf;
+ struct task_struct* t;
+ int cpu = get_partition(new_waiter);
+
+ BUG_ON(!new_waiter);
+
+ if (edf_higher_prio(new_waiter, sem->hp.cpu_task[cpu])) {
+ TRACE_TASK(new_waiter, " boosts priority\n");
+ pedf = task_pedf(new_waiter);
+ edf = task_edf(new_waiter);
+
+ /* interrupts already disabled */
+ spin_lock(&pedf->lock);
+
+ /* store new highest-priority task */
+ sem->hp.cpu_task[cpu] = new_waiter;
+ if (sem->holder &&
+ get_partition(sem->holder) == get_partition(new_waiter)) {
+ /* let holder inherit */
+ sem->holder->rt_param.inh_task = new_waiter;
+ t = sem->holder;
+ if (in_list(&t->rt_list)) {
+ /* queued in domain*/
+ list_del(&t->rt_list);
+ /* readd to make priority change take place */
+ if (is_released(t, sched_clock()))
+ __add_ready(edf, t);
+ else
+ __add_release(edf, t);
+ }
+ }
+
+ /* check if we need to reschedule */
+ if (edf_preemption_needed(edf, current))
+ preempt(pedf);
+
+ spin_unlock(&pedf->lock);
+ }
+
+ return 0;
+}
+
+static long psnedf_inherit_priority(struct pi_semaphore *sem,
+ struct task_struct *new_owner)
+{
+ int cpu = get_partition(new_owner);
+
+ /* FIXME: This doesn't look correct at all!
+ * Why do we inherit in any case???
+ */
+ new_owner->rt_param.inh_task = sem->hp.cpu_task[cpu];
+ if (sem->hp.cpu_task[cpu] && new_owner != sem->hp.cpu_task[cpu]) {
+ TRACE_TASK(new_owner,
+ "inherited priority from %s/%d\n",
+ sem->hp.cpu_task[cpu]->comm,
+ sem->hp.cpu_task[cpu]->pid);
+ } else
+ TRACE_TASK(new_owner,
+ "cannot inherit priority: "
+ "no higher priority job waits on this CPU!\n");
+ /* make new owner non-preemptable as required by FMLP under
+ * PSN-EDF.
+ */
+ make_np(new_owner);
+ return 0;
+}
+
+
+/* This function is called on a semaphore release, and assumes that
+ * the current task is also the semaphore holder.
+ */
+static long psnedf_return_priority(struct pi_semaphore *sem)
+{
+ struct task_struct* t = current;
+ psnedf_domain_t* pedf = task_pedf(t);
+ rt_domain_t* edf = task_edf(t);
+ int ret = 0;
+ int cpu = get_partition(current);
+
+
+ /* Find new highest-priority semaphore task
+ * if holder task is the current hp.cpu_task[cpu].
+ *
+ * Calling function holds sem->wait.lock.
+ */
+ if (t == sem->hp.cpu_task[cpu])
+ set_hp_cpu_task(sem, cpu, edf_higher_prio);
+
+ take_np(t);
+ if (current->rt_param.inh_task) {
+ TRACE_CUR("return priority of %s/%d\n",
+ current->rt_param.inh_task->comm,
+ current->rt_param.inh_task->pid);
+ spin_lock(&pedf->lock);
+
+ /* Reset inh_task to NULL. */
+ current->rt_param.inh_task = NULL;
+
+ /* check if we need to reschedule */
+ if (edf_preemption_needed(edf, current))
+ preempt(pedf);
+
+ spin_unlock(&pedf->lock);
+ } else
+ TRACE_CUR(" no priority to return %p\n", sem);
+
+ return ret;
+}
+
+
+/* Plugin object */
+static struct sched_plugin psn_edf_plugin __cacheline_aligned_in_smp = {
+ .plugin_name = "PSN-EDF",
+ .srp_active = 1,
+ .scheduler_tick = psnedf_scheduler_tick,
+ .prepare_task = psnedf_prepare_task,
+ .sleep_next_period = complete_job,
+ .tear_down = psnedf_tear_down,
+ .schedule = psnedf_schedule,
+ .wake_up_task = psnedf_wake_up_task,
+ .task_blocks = psnedf_task_blocks,
+ .pi_block = psnedf_pi_block,
+ .inherit_priority = psnedf_inherit_priority,
+ .return_priority = psnedf_return_priority
+};
+
+
+static int __init init_psn_edf(void)
+{
+ int i;
+
+ for (i = 0; i < NR_CPUS; i++)
+ {
+ psnedf_domain_init(remote_pedf(i),
+ psnedf_check_resched, i);
+ printk("PSN-EDF: CPU partition %d initialized.\n", i);
+ }
+ return register_sched_plugin(&psn_edf_plugin);
+}
+
+
+
+module_init(init_psn_edf);
diff --git a/litmus/sched_rm.c b/litmus/sched_rm.c
new file mode 100644
index 0000000..57acde4
--- /dev/null
+++ b/litmus/sched_rm.c
@@ -0,0 +1,397 @@
+
+/* RM implementation.
+ * Will support the M-PCP eventually.
+ */
+
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+#include <linux/module.h>
+
+#include <litmus/litmus.h>
+#include <litmus/jobs.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/rm_common.h>
+
+
+typedef struct {
+ rt_domain_t domain;
+ int cpu;
+ struct task_struct* scheduled; /* only RT tasks */
+ spinlock_t lock; /* protects the domain and
+ * serializes scheduling decisions
+ */
+} rm_domain_t;
+
+DEFINE_PER_CPU(rm_domain_t, rm_domains);
+
+#define local_dom (&__get_cpu_var(rm_domains).domain)
+#define local_part (&__get_cpu_var(rm_domains))
+#define remote_dom(cpu) (&per_cpu(rm_domains, cpu).domain)
+#define remote_part(cpu) (&per_cpu(rm_domains, cpu))
+#define task_dom(task) remote_dom(get_partition(task))
+#define task_part(task) remote_part(get_partition(task))
+
+
+static void prm_domain_init(rm_domain_t* part,
+ check_resched_needed_t check,
+ int cpu)
+{
+ rm_domain_init(&part->domain, check);
+ part->cpu = cpu;
+ part->lock = SPIN_LOCK_UNLOCKED;
+ part->scheduled = NULL;
+}
+
+static void requeue(struct task_struct* t, rt_domain_t *dom)
+{
+ /* only requeue if t is actually running */
+ BUG_ON(!is_running(t));
+
+ if (t->state != TASK_RUNNING)
+ TRACE_TASK(t, "requeue: !TASK_RUNNING");
+
+ set_rt_flags(t, RT_F_RUNNING);
+ if (is_released(t, sched_clock()))
+ __add_ready(dom, t);
+ else
+ __add_release(dom, t); /* it has got to wait */
+}
+
+/* we assume the lock is being held */
+static void preempt(rm_domain_t *part)
+{
+ if (smp_processor_id() == part->cpu) {
+ if (part->scheduled && is_np(part->scheduled))
+ request_exit_np(part->scheduled);
+ else
+ set_tsk_need_resched(current);
+ } else
+ /* in case that it is a remote CPU we have to defer the
+ * the decision to the remote CPU
+ */
+ smp_send_reschedule(part->cpu);
+}
+
+/* This check is trivial in partioned systems as we only have to consider
+ * the CPU of the partition.
+ */
+static int rm_check_resched(rt_domain_t *dom)
+{
+ rm_domain_t *part = container_of(dom, rm_domain_t, domain);
+ int ret = 0;
+
+ /* because this is a callback from rt_domain_t we already hold
+ * the necessary lock for the ready queue
+ */
+ if (rm_preemption_needed(dom, part->scheduled)) {
+ preempt(part);
+ ret = 1;
+ }
+ return ret;
+}
+
+static void __rm_set_prio(struct task_struct *t, struct pcp_priority* new_prio,
+ rm_domain_t* part)
+{
+ t->rt_param.cur_prio = new_prio;
+ if (in_list(&t->rt_list)) {
+ list_del(&t->rt_list);
+ requeue(t, &part->domain);
+ } else
+ rm_check_resched(&part->domain);
+}
+
+/* call only with IRQs disabled */
+void rm_set_prio(struct task_struct *t, struct pcp_priority* new_prio)
+{
+ unsigned long flags;
+ rm_domain_t *part = task_part(t);
+
+ BUG_ON(!is_realtime(t));
+ spin_lock_irqsave(&part->lock, flags);
+ __rm_set_prio(t, new_prio, part);
+ spin_unlock_irqrestore(&part->lock, flags);
+}
+
+static void rm_scheduler_tick(void)
+{
+ unsigned long flags;
+ struct task_struct *t = current;
+ rt_domain_t *dom = local_dom;
+ rm_domain_t *part = local_part;
+
+ /* Check for inconsistency. We don't need the lock for this since
+ * ->scheduled is only changed in schedule, which obviously is not
+ * executing in parallel on this CPU
+ */
+ BUG_ON(is_realtime(t) && t != part->scheduled);
+
+/* if (is_realtime(t) && budget_exhausted(t)) {
+ if (!is_np(t))
+ set_tsk_need_resched(t);
+ else {
+ TRACE("rm_scheduler_tick: "
+ "%d is non-preemptable, "
+ "preemption delayed.\n", t->pid);
+ request_exit_np(t);
+ }
+ }
+*/
+ spin_lock_irqsave(&part->lock, flags);
+ __release_pending(dom);
+ if (rm_preemption_needed(dom, t))
+ set_tsk_need_resched(t);
+ spin_unlock_irqrestore(&part->lock, flags);
+}
+
+static void job_completion(struct task_struct* t)
+{
+ TRACE_TASK(t, "job_completion().\n");
+ set_rt_flags(t, RT_F_SLEEP);
+ prepare_for_next_period(t);
+}
+
+static int rm_schedule(struct task_struct * prev,
+ struct task_struct ** next)
+{
+ rm_domain_t* part = local_part;
+ rt_domain_t* dom = &part->domain;
+
+ int sleep, preempt,
+ np, exists, blocks, resched;
+// int out_of_time;
+
+ spin_lock(&part->lock);
+
+ /* sanity checking */
+ BUG_ON(part->scheduled && part->scheduled != prev);
+ BUG_ON(part->scheduled && !is_realtime(prev));
+
+ /* (0) Determine state */
+ exists = part->scheduled != NULL;
+ blocks = exists && !is_running(part->scheduled);
+// out_of_time = exists && budget_exhausted(part->scheduled);
+#define out_of_time 0
+ np = exists && is_np(part->scheduled);
+ sleep = exists && get_rt_flags(part->scheduled) == RT_F_SLEEP;
+ preempt = rm_preemption_needed(dom, prev);
+
+ /* If we need to preempt do so.
+ * The following checks set resched to 1 in case of special
+ * circumstances.
+ */
+ resched = preempt;
+
+ /* If a task blocks we have no choice but to reschedule.
+ */
+ if (blocks)
+ resched = 1;
+
+ /* Request a sys_exit_np() call if we would like to preempt but cannot.
+ * Multiple calls to request_exit_np() don't hurt.
+ */
+ if (np && (out_of_time || preempt || sleep))
+ request_exit_np(part->scheduled);
+
+ /* Any task that is preemptable and either exhausts its execution
+ * budget or wants to sleep completes. We may have to reschedule after
+ * this.
+ */
+ if (!np && (out_of_time || sleep)) {
+ job_completion(part->scheduled);
+ resched = 1;
+ }
+
+ /* The final scheduling decision. Do we need to switch for some reason?
+ * Switch if we are in RT mode and have no task or if we need to
+ * resched.
+ */
+ *next = NULL;
+ if ((!np || blocks) && (resched || !exists)) {
+ /* Take care of a previously scheduled
+ * job by taking it out of the Linux runqueue.
+ */
+ if (part->scheduled) {
+ /* as opposed to global schedulers that switch without
+ * a lock being held we can requeue already here since
+ * no other CPU will schedule from this domain.
+ */
+ if (!blocks)
+ requeue(part->scheduled, dom);
+ }
+ *next = __take_ready(dom);
+ } else
+ /* Only override Linux scheduler if we have a real-time task
+ * scheduled that needs to continue.
+ */
+ if (exists)
+ *next = prev;
+
+ if (*next)
+ set_rt_flags(*next, RT_F_RUNNING);
+
+ part->scheduled = *next;
+ spin_unlock(&part->lock);
+ return 0;
+}
+
+
+/* Prepare a task for running in RT mode
+ * Enqueues the task into master queue data structure
+ * returns
+ * -EPERM if task is not TASK_STOPPED
+ */
+static long rm_prepare_task(struct task_struct * t)
+{
+ rt_domain_t* dom = task_dom(t);
+ rm_domain_t* part = task_part(t);
+ unsigned long flags;
+
+ TRACE("[%d] P-RM: prepare task %d on CPU %d\n",
+ smp_processor_id(), t->pid, get_partition(t));
+ if (t->state == TASK_STOPPED) {
+//FIXME if (!t->rt_param.task_params.prio) {
+ TRACE_TASK(t, "using rate-monotonic prio assignment\n");
+ t->rt_param.pcp_prio.prio = get_rt_period(t);
+// } else {
+// TRACE_TASK(t, "using user-defined static prio assignment\n");
+// t->rt_param.pcp_prio.prio = t->rt_param.task_params.prio;
+// }
+ t->rt_param.pcp_prio.in_global_cs = 0;
+ t->rt_param.pcp_prio.pid = t->pid;
+ t->rt_param.cur_prio = &t->rt_param.pcp_prio;
+ INIT_LIST_HEAD(&t->rt_param.owned_semaphores);
+ /* 1ms delay */
+ release_at(t, sched_clock() + 1000000);
+
+ /* The task should be running in the queue, otherwise signal
+ * code will try to wake it up with fatal consequences.
+ */
+ t->state = TASK_RUNNING;
+
+ spin_lock_irqsave(&part->lock, flags);
+ t->rt_param.litmus_controlled = 1;
+ __add_release(dom, t);
+ spin_unlock_irqrestore(&part->lock, flags);
+ return 0;
+ } else
+ return -EPERM;
+}
+
+static void rm_wake_up_task(struct task_struct *task)
+{
+ unsigned long flags;
+ rm_domain_t* part = task_part(task);
+ rt_domain_t* dom = task_dom(task);
+
+ TRACE_TASK(task, "P-RM: %d unsuspends.\n");
+
+ spin_lock_irqsave(&part->lock, flags);
+ if (!task->rt_param.litmus_controlled) {
+ BUG_ON(in_list(&task->rt_list));
+ task->rt_param.litmus_controlled = 1;
+ task->state = TASK_RUNNING;
+ requeue(task, dom);
+ }
+ spin_unlock_irqrestore(&part->lock, flags);
+}
+
+static void rm_task_blocks(struct task_struct *t)
+{
+ BUG_ON(!is_realtime(t));
+ /* not really anything to do since it can only block if
+ * it is running, and when it is not running it is not in any
+ * queue anyway.
+ */
+ TRACE("task %d blocks with budget=%d\n", t->pid, t->time_slice);
+ BUG_ON(in_list(&t->rt_list));
+ t->rt_param.litmus_controlled = 0;
+}
+
+
+/* When _tear_down is called, the task should not be in any queue any more
+ * as it must have blocked first. We don't have any internal state for the task,
+ * it is all in the task_struct.
+ */
+static long rm_tear_down(struct task_struct * t)
+{
+ BUG_ON(!is_realtime(t));
+ TRACE_TASK(t, "tear down called");
+ BUG_ON(t->array);
+ BUG_ON(in_list(&t->rt_list));
+ return 0;
+}
+
+static struct pcp_priority boosted = {0, 1, INT_MAX};
+
+static long rm_pi_block(struct pi_semaphore *sem,
+ struct task_struct *new_waiter)
+{
+ return 0;
+}
+
+static long rm_inherit_priority(struct pi_semaphore *sem,
+ struct task_struct *new_owner)
+{
+ rm_set_prio(new_owner, &boosted);
+ TRACE_TASK(new_owner, "priority boosted");
+ make_np(new_owner);
+ return 0;
+}
+
+
+/* This function is called on a semaphore release, and assumes that
+ * the current task is also the semaphore holder.
+ */
+static long rm_return_priority(struct pi_semaphore *sem)
+{
+ struct task_struct* t = current;
+
+ take_np(t);
+ /* reset prio to trigger resched if required */
+ rm_set_prio(t, &t->rt_param.pcp_prio);
+ TRACE_TASK(t, "prio boost ended");
+ return 0;
+}
+
+/* Plugin object */
+static struct sched_plugin p_rm_plugin __cacheline_aligned_in_smp = {
+ .plugin_name = "P-RM",
+ /* PCP and SRP don't really work together, but this is something the
+ * user has to get right for the moment.
+ * System will not crash and burn, but timing correctness is not ensured.
+ * Just don't use both APIs at the same time for now.
+ */
+ .pcp_active = 1,
+ .srp_active = 1,
+ .scheduler_tick = rm_scheduler_tick,
+ .prepare_task = rm_prepare_task,
+ .sleep_next_period = complete_job,
+ .tear_down = rm_tear_down,
+ .schedule = rm_schedule,
+ .wake_up_task = rm_wake_up_task,
+ .task_blocks = rm_task_blocks,
+ .pi_block = rm_pi_block,
+ .inherit_priority = rm_inherit_priority,
+ .return_priority = rm_return_priority
+};
+
+static int __init init_rm(void)
+{
+ int i;
+
+ for (i = 0; i < NR_CPUS; i++)
+ {
+ prm_domain_init(remote_part(i),
+ rm_check_resched, i);
+ printk("P-RM: CPU partition %d initialized.\n", i);
+ }
+ return register_sched_plugin(&p_rm_plugin);
+}
+
+
+
+module_init(init_rm);
diff --git a/litmus/sched_trace.c b/litmus/sched_trace.c
new file mode 100644
index 0000000..0976e83
--- /dev/null
+++ b/litmus/sched_trace.c
@@ -0,0 +1,541 @@
+/* sched_trace.c -- record scheduling events to a byte stream.
+ *
+ * TODO: Move ring buffer to a lockfree implementation.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <asm/semaphore.h>
+#include <asm/uaccess.h>
+#include <linux/module.h>
+
+#include <litmus/sched_trace.h>
+#include <litmus/litmus.h>
+
+
+typedef struct {
+ /* guard read and write pointers */
+ spinlock_t lock;
+ /* guard against concurrent freeing of buffer */
+ rwlock_t del_lock;
+
+ /* memory allocated for ring buffer */
+ unsigned long order;
+ char* buf;
+ char* end;
+
+ /* Read/write pointer. May not cross.
+ * They point to the position of next write and
+ * last read.
+ */
+ char* writep;
+ char* readp;
+
+} ring_buffer_t;
+
+#define EMPTY_RING_BUFFER { \
+ .lock = SPIN_LOCK_UNLOCKED, \
+ .del_lock = RW_LOCK_UNLOCKED, \
+ .buf = NULL, \
+ .end = NULL, \
+ .writep = NULL, \
+ .readp = NULL \
+}
+
+void rb_init(ring_buffer_t* buf)
+{
+ *buf = (ring_buffer_t) EMPTY_RING_BUFFER;
+}
+
+int rb_alloc_buf(ring_buffer_t* buf, unsigned long order)
+{
+ unsigned long flags;
+ int error = 0;
+ char *mem;
+
+ /* do memory allocation while not atomic */
+ mem = (char *) __get_free_pages(GFP_KERNEL, order);
+ if (!mem)
+ return -ENOMEM;
+ write_lock_irqsave(&buf->del_lock, flags);
+ BUG_ON(buf->buf);
+ buf->buf = mem;
+ buf->end = buf->buf + PAGE_SIZE * (1 << order) - 1;
+ memset(buf->buf, 0xff, buf->end - buf->buf);
+ buf->order = order;
+ buf->writep = buf->buf + 1;
+ buf->readp = buf->buf;
+ write_unlock_irqrestore(&buf->del_lock, flags);
+ return error;
+}
+
+int rb_free_buf(ring_buffer_t* buf)
+{
+ unsigned long flags;
+ int error = 0;
+ write_lock_irqsave(&buf->del_lock, flags);
+ BUG_ON(!buf->buf);
+ free_pages((unsigned long) buf->buf, buf->order);
+ buf->buf = NULL;
+ buf->end = NULL;
+ buf->writep = NULL;
+ buf->readp = NULL;
+ write_unlock_irqrestore(&buf->del_lock, flags);
+ return error;
+}
+
+/* Assumption: concurrent writes are serialized externally
+ *
+ * Will only succeed if there is enough space for all len bytes.
+ */
+int rb_put(ring_buffer_t* buf, char* mem, size_t len)
+{
+ unsigned long flags;
+ char* r , *w;
+ int error = 0;
+ read_lock_irqsave(&buf->del_lock, flags);
+ if (!buf->buf) {
+ error = -ENODEV;
+ goto out;
+ }
+ spin_lock(&buf->lock);
+ r = buf->readp;
+ w = buf->writep;
+ spin_unlock(&buf->lock);
+ if (r < w && buf->end - w >= len - 1) {
+ /* easy case: there is enough space in the buffer
+ * to write it in one continous chunk*/
+ memcpy(w, mem, len);
+ w += len;
+ if (w > buf->end)
+ /* special case: fit exactly into buffer
+ * w is now buf->end + 1
+ */
+ w = buf->buf;
+ } else if (w < r && r - w >= len) { /* >= len because may not cross */
+ /* we are constrained by the read pointer but we there
+ * is enough space
+ */
+ memcpy(w, mem, len);
+ w += len;
+ } else if (r <= w && buf->end - w < len - 1) {
+ /* the wrap around case: there may or may not be space */
+ if ((buf->end - w) + (r - buf->buf) >= len - 1) {
+ /* copy chunk that fits at the end */
+ memcpy(w, mem, buf->end - w + 1);
+ mem += buf->end - w + 1;
+ len -= (buf->end - w + 1);
+ w = buf->buf;
+ /* copy the rest */
+ memcpy(w, mem, len);
+ w += len;
+ }
+ else
+ error = -ENOMEM;
+ } else {
+ error = -ENOMEM;
+ }
+ if (!error) {
+ spin_lock(&buf->lock);
+ buf->writep = w;
+ spin_unlock(&buf->lock);
+ }
+ out:
+ read_unlock_irqrestore(&buf->del_lock, flags);
+ return error;
+}
+
+/* Assumption: concurrent reads are serialized externally */
+int rb_get(ring_buffer_t* buf, char* mem, size_t len)
+{
+ unsigned long flags;
+ char* r , *w;
+ int error = 0;
+ read_lock_irqsave(&buf->del_lock, flags);
+ if (!buf->buf) {
+ error = -ENODEV;
+ goto out;
+ }
+ spin_lock(&buf->lock);
+ r = buf->readp;
+ w = buf->writep;
+ spin_unlock(&buf->lock);
+
+ if (w <= r && buf->end - r >= len) {
+ /* easy case: there is enough data in the buffer
+ * to get it in one chunk*/
+ memcpy(mem, r + 1, len);
+ r += len;
+ error = len;
+
+ } else if (r + 1 < w && w - r - 1 >= len) {
+ /* we are constrained by the write pointer but
+ * there is enough data
+ */
+ memcpy(mem, r + 1, len);
+ r += len;
+ error = len;
+
+ } else if (r + 1 < w && w - r - 1 < len) {
+ /* we are constrained by the write pointer and there
+ * there is not enough data
+ */
+ memcpy(mem, r + 1, w - r - 1);
+ error = w - r - 1;
+ r += w - r - 1;
+
+ } else if (w <= r && buf->end - r < len) {
+ /* the wrap around case: there may or may not be enough data
+ * first let's get what is available
+ */
+ memcpy(mem, r + 1, buf->end - r);
+ error += (buf->end - r);
+ mem += (buf->end - r);
+ len -= (buf->end - r);
+ r += (buf->end - r);
+
+ if (w > buf->buf) {
+ /* there is more to get */
+ r = buf->buf - 1;
+ if (w - r >= len) {
+ /* plenty */
+ memcpy(mem, r + 1, len);
+ error += len;
+ r += len;
+ } else {
+ memcpy(mem, r + 1, w - r - 1);
+ error += w - r - 1;
+ r += w - r - 1;
+ }
+ }
+ } /* nothing available */
+
+ if (error > 0) {
+ spin_lock(&buf->lock);
+ buf->readp = r;
+ spin_unlock(&buf->lock);
+ }
+ out:
+ read_unlock_irqrestore(&buf->del_lock, flags);
+ return error;
+}
+
+
+
+/******************************************************************************/
+/* DEVICE FILE DRIVER */
+/******************************************************************************/
+
+
+
+/* Allocate a buffer of about 1 MB per CPU.
+ *
+ */
+#define BUFFER_ORDER 8
+
+typedef struct {
+ ring_buffer_t buf;
+ atomic_t reader_cnt;
+ struct semaphore reader_mutex;
+} trace_buffer_t;
+
+
+/* This does not initialize the semaphore!! */
+
+#define EMPTY_TRACE_BUFFER \
+ { .buf = EMPTY_RING_BUFFER, .reader_cnt = ATOMIC_INIT(0)}
+
+static DEFINE_PER_CPU(trace_buffer_t, trace_buffer);
+
+#ifdef CONFIG_SCHED_DEBUG_TRACE
+static spinlock_t log_buffer_lock = SPIN_LOCK_UNLOCKED;
+#endif
+static trace_buffer_t log_buffer = EMPTY_TRACE_BUFFER;
+
+static void init_buffers(void)
+{
+ int i;
+
+ for (i = 0; i < NR_CPUS; i++) {
+ rb_init(&per_cpu(trace_buffer, i).buf);
+ init_MUTEX(&per_cpu(trace_buffer, i).reader_mutex);
+ atomic_set(&per_cpu(trace_buffer, i).reader_cnt, 0);
+ }
+ /* only initialize the mutex, the rest was initialized as part
+ * of the static initialization macro
+ */
+ init_MUTEX(&log_buffer.reader_mutex);
+}
+
+static int trace_release(struct inode *in, struct file *filp)
+{
+ int error = -EINVAL;
+ trace_buffer_t* buf = filp->private_data;
+
+ BUG_ON(!filp->private_data);
+
+ if (down_interruptible(&buf->reader_mutex)) {
+ error = -ERESTARTSYS;
+ goto out;
+ }
+
+ /* last release must deallocate buffers */
+ if (atomic_dec_return(&buf->reader_cnt) == 0) {
+ error = rb_free_buf(&buf->buf);
+ }
+
+ up(&buf->reader_mutex);
+ out:
+ return error;
+}
+
+static ssize_t trace_read(struct file *filp, char __user *to, size_t len,
+ loff_t *f_pos)
+{
+ /* we ignore f_pos, this is strictly sequential */
+
+ ssize_t error = -EINVAL;
+ char* mem;
+ trace_buffer_t *buf = filp->private_data;
+
+ if (down_interruptible(&buf->reader_mutex)) {
+ error = -ERESTARTSYS;
+ goto out;
+ }
+
+ if (len > 64 * 1024)
+ len = 64 * 1024;
+ mem = kmalloc(len, GFP_KERNEL);
+ if (!mem) {
+ error = -ENOMEM;
+ goto out_unlock;
+ }
+
+ error = rb_get(&buf->buf, mem, len);
+ while (!error) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(110);
+ if (signal_pending(current))
+ error = -ERESTARTSYS;
+ else
+ error = rb_get(&buf->buf, mem, len);
+ }
+
+ if (error > 0 && copy_to_user(to, mem, error))
+ error = -EFAULT;
+
+ kfree(mem);
+ out_unlock:
+ up(&buf->reader_mutex);
+ out:
+ return error;
+}
+
+
+/* trace_open - Open one of the per-CPU sched_trace buffers.
+ */
+static int trace_open(struct inode *in, struct file *filp)
+{
+ int error = -EINVAL;
+ int cpu = MINOR(in->i_rdev);
+ trace_buffer_t* buf;
+
+ if (!cpu_online(cpu)) {
+ printk(KERN_WARNING "sched trace: "
+ "CPU #%d is not online. (open failed)\n", cpu);
+ error = -ENODEV;
+ goto out;
+ }
+
+ buf = &per_cpu(trace_buffer, cpu);
+
+ if (down_interruptible(&buf->reader_mutex)) {
+ error = -ERESTARTSYS;
+ goto out;
+ }
+
+ /* first open must allocate buffers */
+ if (atomic_inc_return(&buf->reader_cnt) == 1) {
+ if ((error = rb_alloc_buf(&buf->buf, BUFFER_ORDER)))
+ {
+ atomic_dec(&buf->reader_cnt);
+ goto out_unlock;
+ }
+ }
+
+ error = 0;
+ filp->private_data = buf;
+
+ out_unlock:
+ up(&buf->reader_mutex);
+ out:
+ return error;
+}
+
+/* log_open - open the global log message ring buffer.
+ */
+static int log_open(struct inode *in, struct file *filp)
+{
+ int error = -EINVAL;
+ trace_buffer_t* buf;
+
+ buf = &log_buffer;
+
+ if (down_interruptible(&buf->reader_mutex)) {
+ error = -ERESTARTSYS;
+ goto out;
+ }
+
+ /* first open must allocate buffers */
+ if (atomic_inc_return(&buf->reader_cnt) == 1) {
+ if ((error = rb_alloc_buf(&buf->buf, BUFFER_ORDER)))
+ {
+ atomic_dec(&buf->reader_cnt);
+ goto out_unlock;
+ }
+ }
+
+ error = 0;
+ filp->private_data = buf;
+
+ out_unlock:
+ up(&buf->reader_mutex);
+ out:
+ return error;
+}
+
+/******************************************************************************/
+/* Device Registration */
+/******************************************************************************/
+
+/* the major numbes are from the unassigned/local use block
+ *
+ * This should be converted to dynamic allocation at some point...
+ */
+#define TRACE_MAJOR 250
+#define LOG_MAJOR 251
+
+/* trace_fops - The file operations for accessing the per-CPU scheduling event
+ * trace buffers.
+ */
+struct file_operations trace_fops = {
+ .owner = THIS_MODULE,
+ .open = trace_open,
+ .release = trace_release,
+ .read = trace_read,
+};
+
+/* log_fops - The file operations for accessing the global LITMUS log message
+ * buffer.
+ *
+ * Except for opening the device file it uses the same operations as trace_fops.
+ */
+struct file_operations log_fops = {
+ .owner = THIS_MODULE,
+ .open = log_open,
+ .release = trace_release,
+ .read = trace_read,
+};
+
+static int __init register_buffer_dev(const char* name,
+ struct file_operations* fops,
+ int major, int count)
+{
+ dev_t trace_dev;
+ struct cdev *cdev;
+ int error = 0;
+
+ trace_dev = MKDEV(major, 0);
+ error = register_chrdev_region(trace_dev, count, name);
+ if (error)
+ {
+ printk(KERN_WARNING "sched trace: "
+ "Could not register major/minor number %d\n", major);
+ return error;
+ }
+ cdev = cdev_alloc();
+ if (!cdev) {
+ printk(KERN_WARNING "sched trace: "
+ "Could not get a cdev for %s.\n", name);
+ return -ENOMEM;
+ }
+ cdev->owner = THIS_MODULE;
+ cdev->ops = fops;
+ error = cdev_add(cdev, trace_dev, count);
+ if (error) {
+ printk(KERN_WARNING "sched trace: "
+ "add_cdev failed for %s.\n", name);
+ return -ENOMEM;
+ }
+ return error;
+
+}
+
+static int __init init_sched_trace(void)
+{
+ int error1 = 0, error2 = 0;
+
+ printk("Initializing scheduler trace device\n");
+ init_buffers();
+
+ error1 = register_buffer_dev("schedtrace", &trace_fops,
+ TRACE_MAJOR, NR_CPUS);
+
+ error2 = register_buffer_dev("litmus_log", &log_fops,
+ LOG_MAJOR, 1);
+ if (error1 || error2)
+ return min(error1, error2);
+ else
+ return 0;
+}
+
+module_init(init_sched_trace);
+
+/******************************************************************************/
+/* KERNEL API */
+/******************************************************************************/
+
+/* The per-CPU LITMUS log buffer. Don't put it on the stack, it is too big for
+ * that and the kernel gets very picky with nested interrupts and small stacks.
+ */
+
+#ifdef CONFIG_SCHED_DEBUG_TRACE
+
+#define MSG_SIZE 255
+static DEFINE_PER_CPU(char[MSG_SIZE], fmt_buffer);
+
+/* sched_trace_log_message - This is the only function that accesses the the
+ * log buffer inside the kernel for writing.
+ * Concurrent access to it is serialized via the
+ * log_buffer_lock.
+ *
+ * The maximum length of a formatted message is 255.
+ */
+void sched_trace_log_message(const char* fmt, ...)
+{
+ unsigned long flags;
+ va_list args;
+ size_t len;
+ char* buf;
+
+ va_start(args, fmt);
+ local_irq_save(flags);
+
+ /* format message */
+ buf = __get_cpu_var(fmt_buffer);
+ len = vscnprintf(buf, MSG_SIZE, fmt, args);
+
+ spin_lock(&log_buffer_lock);
+ /* Don't copy the trailing null byte, we don't want null bytes
+ * in a text file.
+ */
+ rb_put(&log_buffer.buf, buf, len);
+ spin_unlock(&log_buffer_lock);
+
+ local_irq_restore(flags);
+ va_end(args);
+}
+
+#endif
+
diff --git a/litmus/sync.c b/litmus/sync.c
new file mode 100644
index 0000000..4405228
--- /dev/null
+++ b/litmus/sync.c
@@ -0,0 +1,84 @@
+/* litmus/sync.c - Support for synchronous and asynchronous task system releases.
+ *
+ *
+ */
+
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+
+#include <litmus/litmus.h>
+#include <litmus/jobs.h>
+
+static DECLARE_COMPLETION(ts_release);
+
+static long do_wait_for_ts_release(void)
+{
+ long ret = 0;
+
+ /* If the interruption races with a release, the completion object
+ * may have a non-zero counter. To avoid this problem, this should
+ * be replaced by wait_for_completion().
+ *
+ * For debugging purposes, this is interruptible for now.
+ */
+ ret = wait_for_completion_interruptible(&ts_release);
+
+ return ret;
+}
+
+
+static long do_release_ts(lt_t start)
+{
+ int task_count = 0;
+ long flags;
+ struct list_head *pos;
+ struct task_struct *t;
+
+
+ spin_lock_irqsave(&ts_release.wait.lock, flags);
+
+ list_for_each(pos, &ts_release.wait.task_list) {
+ t = (struct task_struct*) list_entry(pos,
+ struct __wait_queue,
+ task_list)->private;
+ task_count++;
+ release_at(t, start + t->rt_param.task_params.phase);
+ }
+
+ spin_unlock_irqrestore(&ts_release.wait.lock, flags);
+
+ complete_n(&ts_release, task_count);
+
+ return task_count;
+}
+
+
+asmlinkage long sys_wait_for_ts_release(void)
+{
+ long ret = -EPERM;
+ struct task_struct *t = current;
+
+ if (is_realtime(t))
+ ret = do_wait_for_ts_release();
+
+ return ret;
+}
+
+
+asmlinkage long sys_release_ts(lt_t __user *__delay)
+{
+ long ret;
+ lt_t delay;
+
+ /* FIXME: check capabilities... */
+
+ ret = copy_from_user(&delay, __delay, sizeof(lt_t));
+ if (ret == 0)
+ ret = do_release_ts(sched_clock() + delay);
+
+ return ret;
+}
diff --git a/litmus/trace.c b/litmus/trace.c
new file mode 100644
index 0000000..bcdf103
--- /dev/null
+++ b/litmus/trace.c
@@ -0,0 +1,302 @@
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <asm/semaphore.h>
+#include <asm/uaccess.h>
+#include <linux/module.h>
+
+#include <litmus/trace.h>
+
+/******************************************************************************/
+/* Allocation */
+/******************************************************************************/
+
+struct ft_buffer* trace_ts_buf = NULL;
+
+static unsigned int ts_seq_no = 0;
+
+feather_callback void save_timestamp(unsigned long event)
+{
+ unsigned int seq_no = fetch_and_inc((int *) &ts_seq_no);
+ struct timestamp *ts;
+ if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
+ ts->event = event;
+ ts->timestamp = ft_read_tsc();
+ ts->seq_no = seq_no;
+ ts->cpu = raw_smp_processor_id();
+ ft_buffer_finish_write(trace_ts_buf, ts);
+ }
+}
+
+static struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size)
+{
+ struct ft_buffer* buf;
+ size_t total = (size + 1) * count;
+ char* mem;
+ int order = 0, pages = 1;
+
+ buf = kmalloc(sizeof(struct ft_buffer), GFP_KERNEL);
+ if (!buf)
+ return NULL;
+
+ total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0);
+ while (pages < total) {
+ order++;
+ pages *= 2;
+ }
+
+ mem = (char*) __get_free_pages(GFP_KERNEL, order);
+ if (!mem) {
+ kfree(buf);
+ return NULL;
+ }
+
+ if (!init_ft_buffer(buf, count, size,
+ mem + (count * size), /* markers at the end */
+ mem)) { /* buffer objects */
+ free_pages((unsigned long) mem, order);
+ kfree(buf);
+ return NULL;
+ }
+ return buf;
+}
+
+static void free_ft_buffer(struct ft_buffer* buf)
+{
+ int order = 0, pages = 1;
+ size_t total;
+
+ if (buf) {
+ total = (buf->slot_size + 1) * buf->slot_count;
+ total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0);
+ while (pages < total) {
+ order++;
+ pages *= 2;
+ }
+ free_pages((unsigned long) buf->buffer_mem, order);
+ kfree(buf);
+ }
+}
+
+
+/******************************************************************************/
+/* DEVICE FILE DRIVER */
+/******************************************************************************/
+
+#define NO_TIMESTAMPS 262144
+
+static DECLARE_MUTEX(feather_lock);
+static int use_count = 0;
+
+static int trace_release(struct inode *in, struct file *filp)
+{
+ int err = -EINVAL;
+
+ if (down_interruptible(&feather_lock)) {
+ err = -ERESTARTSYS;
+ goto out;
+ }
+
+ printk(KERN_ALERT "%s/%d disconnects from feather trace device. "
+ "use_count=%d\n",
+ current->comm, current->pid, use_count);
+
+ if (use_count == 1) {
+ /* disable events */
+ ft_disable_all_events();
+
+ /* wait for any pending events to complete */
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(HZ);
+
+ printk(KERN_ALERT "Failed trace writes: %u\n",
+ trace_ts_buf->failed_writes);
+
+ free_ft_buffer(trace_ts_buf);
+ trace_ts_buf = NULL;
+ }
+
+ use_count--;
+ up(&feather_lock);
+out:
+ return err;
+}
+
+
+static ssize_t trace_read(struct file *filp, char __user *to, size_t len,
+ loff_t *f_pos)
+{
+ /* we ignore f_pos, this is strictly sequential */
+ ssize_t error = 0;
+ struct timestamp ts;
+
+ if (down_interruptible(&feather_lock)) {
+ error = -ERESTARTSYS;
+ goto out;
+ }
+
+
+ while (len >= sizeof(struct timestamp)) {
+ if (ft_buffer_read(trace_ts_buf, &ts)) {
+ if (copy_to_user(to, &ts, sizeof(struct timestamp))) {
+ error = -EFAULT;
+ break;
+ } else {
+ len -= sizeof(struct timestamp);
+ to += sizeof(struct timestamp);
+ error += sizeof(struct timestamp);
+ }
+ } else {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(50);
+ if (signal_pending(current)) {
+ error = -ERESTARTSYS;
+ break;
+ }
+ }
+ }
+ up(&feather_lock);
+out:
+ return error;
+}
+
+#define ENABLE_CMD 0
+#define DISABLE_CMD 1
+
+static ssize_t trace_write(struct file *filp, const char __user *from,
+ size_t len, loff_t *f_pos)
+{
+ ssize_t error = -EINVAL;
+ unsigned long cmd;
+ unsigned long id;
+
+ if (len % sizeof(long) || len < 2 * sizeof(long))
+ goto out;
+
+ if (copy_from_user(&cmd, from, sizeof(long))) {
+ error = -EFAULT;
+ goto out;
+ }
+ len -= sizeof(long);
+ from += sizeof(long);
+
+ if (cmd != ENABLE_CMD && cmd != DISABLE_CMD)
+ goto out;
+
+ if (down_interruptible(&feather_lock)) {
+ error = -ERESTARTSYS;
+ goto out;
+ }
+
+ error = sizeof(long);
+ while (len) {
+ if (copy_from_user(&id, from, sizeof(long))) {
+ error = -EFAULT;
+ goto out;
+ }
+ len -= sizeof(long);
+ from += sizeof(long);
+ if (cmd) {
+ printk(KERN_INFO
+ "Disabling feather-trace event %lu.\n", id);
+ ft_disable_event(id);
+ } else {
+ printk(KERN_INFO
+ "Enabling feather-trace event %lu.\n", id);
+ ft_enable_event(id);
+ }
+ error += sizeof(long);
+ }
+
+ up(&feather_lock);
+ out:
+ return error;
+}
+
+static int trace_open(struct inode *in, struct file *filp)
+{
+ int err = 0;
+ unsigned int count = NO_TIMESTAMPS;
+
+ if (down_interruptible(&feather_lock)) {
+ err = -ERESTARTSYS;
+ goto out;
+ }
+
+ while (count && !trace_ts_buf) {
+ printk("trace: trying to allocate %u time stamps.\n", count);
+ trace_ts_buf = alloc_ft_buffer(count, sizeof(struct timestamp));
+ count /= 2;
+ }
+ if (!trace_ts_buf)
+ err = -ENOMEM;
+ else
+ use_count++;
+
+ up(&feather_lock);
+out:
+ return err;
+}
+
+/******************************************************************************/
+/* Device Registration */
+/******************************************************************************/
+
+#define FT_TRACE_MAJOR 252
+
+struct file_operations ft_trace_fops = {
+ .owner = THIS_MODULE,
+ .open = trace_open,
+ .release = trace_release,
+ .write = trace_write,
+ .read = trace_read,
+};
+
+
+static int __init register_buffer_dev(const char* name,
+ struct file_operations* fops,
+ int major, int count)
+{
+ dev_t trace_dev;
+ struct cdev *cdev;
+ int error = 0;
+
+ trace_dev = MKDEV(major, 0);
+ error = register_chrdev_region(trace_dev, count, name);
+ if (error)
+ {
+ printk(KERN_WARNING "trace: "
+ "Could not register major/minor number %d\n", major);
+ return error;
+ }
+ cdev = cdev_alloc();
+ if (!cdev) {
+ printk(KERN_WARNING "trace: "
+ "Could not get a cdev for %s.\n", name);
+ return -ENOMEM;
+ }
+ cdev->owner = THIS_MODULE;
+ cdev->ops = fops;
+ error = cdev_add(cdev, trace_dev, count);
+ if (error) {
+ printk(KERN_WARNING "trace: "
+ "add_cdev failed for %s.\n", name);
+ return -ENOMEM;
+ }
+ return error;
+
+}
+
+static int __init init_sched_trace(void)
+{
+ int error = 0;
+
+ printk("Initializing Feather-Trace device\n");
+ /* dummy entry to make linker happy */
+ ft_event0(666, save_timestamp);
+
+ error = register_buffer_dev("ft_trace", &ft_trace_fops,
+ FT_TRACE_MAJOR, 1);
+ return error;
+}
+
+module_init(init_sched_trace);