From 9aaa23e28a41fb579ef33ecb845e22cd717195c9 Mon Sep 17 00:00:00 2001
From: "Bjoern B. Brandenburg"
Date: Mon, 29 Oct 2007 04:30:48 -0400
Subject: Added LITMUS release 2007.2.
Also some text changes.
---
download/MD5SUM | 3 +
download/liblitmus-2007.2.tgz | Bin 0 -> 10825 bytes
download/libso-2007.2.tgz | Bin 0 -> 15836 bytes
download/litmus-rt-2007.2.patch | 12100 ++++++++++++++++++++++++++++++++++++++
index.html | 83 +-
5 files changed, 12166 insertions(+), 20 deletions(-)
create mode 100644 download/liblitmus-2007.2.tgz
create mode 100644 download/libso-2007.2.tgz
create mode 100644 download/litmus-rt-2007.2.patch
diff --git a/download/MD5SUM b/download/MD5SUM
index 4d34aa9..4876c6d 100644
--- a/download/MD5SUM
+++ b/download/MD5SUM
@@ -1,3 +1,6 @@
991469b3a8c9b6a0caa4cedfb663e9be liblitmus-2007.1.tgz
+eddf0c80b0942f792ad8323cb62c9234 liblitmus-2007.2.tgz
6a80c8bb52af8f38dc1bbd874fa2e44f libso-2007.1.tgz
+f3cb1e78f38dd22c4cca84a03fab3bbd libso-2007.2.tgz
c6ef29d2e198c2fbc08e47d6f2f404bb litmus-rt-2007.1.patch
+f4a1888b942a82ccce9daa55fce98202 litmus-rt-2007.2.patch
diff --git a/download/liblitmus-2007.2.tgz b/download/liblitmus-2007.2.tgz
new file mode 100644
index 0000000..616f345
Binary files /dev/null and b/download/liblitmus-2007.2.tgz differ
diff --git a/download/libso-2007.2.tgz b/download/libso-2007.2.tgz
new file mode 100644
index 0000000..394665f
Binary files /dev/null and b/download/libso-2007.2.tgz differ
diff --git a/download/litmus-rt-2007.2.patch b/download/litmus-rt-2007.2.patch
new file mode 100644
index 0000000..deea27d
--- /dev/null
+++ b/download/litmus-rt-2007.2.patch
@@ -0,0 +1,12100 @@
+diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
+index 0dfee81..da6f1e9 100644
+--- a/arch/i386/Kconfig
++++ b/arch/i386/Kconfig
+@@ -1210,6 +1210,7 @@ config KPROBES
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
++
+ endmenu
+
+ source "arch/i386/Kconfig.debug"
+@@ -1259,3 +1260,30 @@ config X86_TRAMPOLINE
+ config KTIME_SCALAR
+ bool
+ default y
++
++
++menu "LITMUS^RT"
++
++
++config SCHED_TASK_TRACE
++ bool "Trace real-time tasks"
++ default y
++ help
++ Include support for the sched_trace_XXX() tracing functions. This
++ allows the collection of real-time task events such as job
++ completions, job releases, early completions, etc. This results in a
++ small overhead in the scheduling code. Disable if the overhead is not
++ acceptable (e.g., benchmarking).
++
++config SCHED_DEBUG_TRACE
++ bool "TRACE() debugging"
++ default y
++ help
++ Include support for sched_trace_log_messageg(), which is used to
++ implement TRACE(). If disabled, no TRACE() messages will be included
++ in the kernel, and no overheads due to debugging statements will be
++ incurred by the scheduler. Disable if the overhead is not acceptable
++ (e.g. benchmarking).
++
++
++endmenu
+diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
+index 776d9be..2e8909f 100644
+--- a/arch/i386/kernel/apic.c
++++ b/arch/i386/kernel/apic.c
+@@ -26,6 +26,7 @@
+ #include
+ #include
+ #include
++#include
+
+ #include
+ #include
+@@ -43,6 +44,8 @@
+
+ #include "io_ports.h"
+
++#include
++
+ /*
+ * cpu_mask that denotes the CPUs that needs timer interrupt coming in as
+ * IPIs in place of local APIC timers
+@@ -54,6 +57,15 @@ static cpumask_t timer_bcast_ipi;
+ */
+ static int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
+
++/*
++ * Definitions and variables related to quantum synchronization.
++ */
++#define WAIT_TO_SYNC 30000 /* time after boot until sync */
++static int stagger = 0; /* are we using staggered quanta? */
++static atomic_t qsync_time = ATOMIC_INIT(INITIAL_JIFFIES);
++static atomic_t quantum_sync_barrier = ATOMIC_INIT(0);
++static atomic_t sync_done = ATOMIC_INIT(0);
++
+ static inline void lapic_disable(void)
+ {
+ enable_local_apic = -1;
+@@ -786,6 +798,23 @@ static int __init apic_set_verbosity(char *str)
+
+ __setup("apic=", apic_set_verbosity);
+
++/*
++ * Determine whether to use aligned or staggerd quanta.
++ */
++
++static int __init apic_synch_type(char *str)
++{
++ if (strcmp("aligned", str) == 0)
++ stagger = 0;
++ else if (strcmp("staggered", str) == 0)
++ stagger = 1;
++ else
++ stagger = 0; /* aligned quanta by default */
++ return 1;
++}
++
++__setup("quanta=", apic_synch_type);
++
+ static int __init detect_init_APIC (void)
+ {
+ u32 h, l, features;
+@@ -1198,6 +1227,47 @@ EXPORT_SYMBOL(switch_ipi_to_APIC_timer);
+ #undef APIC_DIVISOR
+
+ /*
++ * This function is called to align all quanta, and to stagger quanta if
++ * necessary. It relies on a barrier to synchronize all processors, so
++ * that they all reset their APIC timers at the same time. If quanta
++ * should be staggered, the appropriate stagger delay is then added at
++ * each processor.
++ */
++
++void synchronize_quanta(void)
++{
++ int cpu = smp_processor_id();
++ int total_cpus = num_online_cpus();
++ int stagger_interval = jiffies_to_usecs(1) / total_cpus;
++
++ /*
++ * Disable APIC timer, wait for all other processors to reach barrier,
++ * and re-enable all timers concurrently.
++ */
++ disable_APIC_timer();
++ atomic_inc(&quantum_sync_barrier);
++ while (atomic_read(&quantum_sync_barrier) < total_cpus) {
++ /* Delay, otherwise atomic_inc's cannot occur. */
++ udelay(1);
++ }
++
++ /* Add necessary stagger for this CPU, if required. */
++ if (stagger) {
++ int stagger_us = cpu * stagger_interval;
++ udelay(stagger_us);
++ }
++
++ /* Re-enable all timers. */
++ __setup_APIC_LVTT(calibration_result);
++ enable_APIC_timer();
++
++ /* The first CPU signals that quantum sync is complete. */
++ if (cpu == 0)
++ atomic_inc(&sync_done);
++}
++
++
++/*
+ * Local timer interrupt handler. It does both profiling and
+ * process statistics/rescheduling.
+ *
+@@ -1209,11 +1279,32 @@ EXPORT_SYMBOL(switch_ipi_to_APIC_timer);
+
+ inline void smp_local_timer_interrupt(void)
+ {
++/* s64 offset; */
++
++ TS_TICK_START;
++
+ profile_tick(CPU_PROFILING);
+ #ifdef CONFIG_SMP
+ update_process_times(user_mode_vm(get_irq_regs()));
+ #endif
+
++ /* Print out timing data - can be commented out if necessary. */
++/* offset = get_nsec_offset(); */
++/* TRACE("%d\n", offset); */
++
++ /*
++ * Synchronize quanta if we have reached qsync_time plus wait
++ * interval. The synchronization code itself is placed in its own
++ * (non-inline) function, to avoid issues with creating an inline
++ * function that is too large.
++ */
++ if (unlikely(!atomic_read(&sync_done) &&
++ time_after(jiffies,
++ (unsigned long)(atomic_read(&qsync_time) +
++ msecs_to_jiffies(WAIT_TO_SYNC))))) {
++ synchronize_quanta();
++ }
++
+ /*
+ * We take the 'long' return path, and there every subsystem
+ * grabs the apropriate locks (kernel lock/ irq lock).
+@@ -1224,6 +1315,7 @@ inline void smp_local_timer_interrupt(void)
+ * Currently this isn't too much of an issue (performance wise),
+ * we can take more than 100K local irqs per second on a 100 MHz P5.
+ */
++ TS_TICK_END;
+ }
+
+ /*
+diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
+index e3d4b73..9670f77 100644
+--- a/arch/i386/kernel/i386_ksyms.c
++++ b/arch/i386/kernel/i386_ksyms.c
+@@ -6,6 +6,7 @@ EXPORT_SYMBOL(__down_failed);
+ EXPORT_SYMBOL(__down_failed_interruptible);
+ EXPORT_SYMBOL(__down_failed_trylock);
+ EXPORT_SYMBOL(__up_wakeup);
++
+ /* Networking helper routines. */
+ EXPORT_SYMBOL(csum_partial_copy_generic);
+
+diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
+index 2697e92..9a633ea 100644
+--- a/arch/i386/kernel/syscall_table.S
++++ b/arch/i386/kernel/syscall_table.S
+@@ -319,3 +319,32 @@ ENTRY(sys_call_table)
+ .long sys_move_pages
+ .long sys_getcpu
+ .long sys_epoll_pwait
++ /* LITMUS syscalls */
++ .long sys_sched_setpolicy /* 320 */
++ .long sys_sched_getpolicy
++ .long sys_set_rt_mode
++ .long sys_set_rt_task_param
++ .long sys_get_rt_task_param
++ .long sys_prepare_rt_task /* 325 */
++ .long sys_ni_syscall /* CLEANUP: sys_reset_stat */
++ .long sys_sleep_next_period
++ .long sys_scheduler_setup
++ .long sys_register_np_flag
++ .long sys_exit_np /* 330 */
++ .long sys_pi_sema_init
++ .long sys_pi_down
++ .long sys_pi_up
++ .long sys_pi_sema_free
++ .long sys_sema_init /* 335 */
++ .long sys_down
++ .long sys_up
++ .long sys_sema_free
++ .long sys_srp_sema_init
++ .long sys_srp_down /* 340 */
++ .long sys_srp_up
++ .long sys_reg_task_srp_sem
++ .long sys_srp_sema_free
++ .long sys_query_job_no
++ .long sys_wait_for_job_release /* 345 */
++ .long sys_set_service_levels
++ .long sys_get_cur_service_level
+\ No newline at end of file
+diff --git a/include/asm-i386/semaphore.h b/include/asm-i386/semaphore.h
+index 4e34a46..7212f4b 100644
+--- a/include/asm-i386/semaphore.h
++++ b/include/asm-i386/semaphore.h
+@@ -45,6 +45,7 @@ struct semaphore {
+ atomic_t count;
+ int sleepers;
+ wait_queue_head_t wait;
++ int used; /* allows semaphores to allocated to user space processes */
+ };
+
+
+diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
+index 833fa17..8a5d47c 100644
+--- a/include/asm-i386/unistd.h
++++ b/include/asm-i386/unistd.h
+@@ -325,10 +325,40 @@
+ #define __NR_move_pages 317
+ #define __NR_getcpu 318
+ #define __NR_epoll_pwait 319
++/* LITMUS */
++#define __NR_sched_setpolicy 320
++#define __NR_sched_getpolicy 321
++/* Syscall definitions for mode change and task creation-manipulation */
++#define __NR_set_rt_mode 322
++#define __NR_set_rt_task_param 323
++#define __NR_get_rt_task_param 324
++#define __NR_prepare_rt_task 325
++#define __NR_reset_stat 326
++#define __NR_sleep_next_period 327
++#define __NR_scheduler_setup 328
++#define __NR_enter_np 329
++#define __NR_exit_np 330
++#define __NR_pi_sema_init 331
++#define __NR_pi_down 332
++#define __NR_pi_up 333
++#define __NR_pi_sema_free 334
++#define __NR_sema_init 335
++#define __NR_down 336
++#define __NR_up 337
++#define __NR_sema_free 338
++#define __NR_srp_sema_init 339
++#define __NR_srp_down 340
++#define __NR_srp_up 341
++#define __NR_reg_task_srp_sem 342
++#define __NR_srp_sema_free 343
++#define __NR_query_job_no 344
++#define __NR_wait_for_job_release 345
++#define __NR_set_service_levels 346
++#define __NR_get_cur_service_level 347
+
+ #ifdef __KERNEL__
+
+-#define NR_syscalls 320
++#define NR_syscalls 343
+
+ #define __ARCH_WANT_IPC_PARSE_VERSION
+ #define __ARCH_WANT_OLD_READDIR
+diff --git a/include/linux/edf_common.h b/include/linux/edf_common.h
+new file mode 100644
+index 0000000..f940308
+--- /dev/null
++++ b/include/linux/edf_common.h
+@@ -0,0 +1,36 @@
++/* EDF common data structures and utility functions shared by all EDF
++ * based scheduler plugins
++ */
++
++/* CLEANUP: Add comments and make it less messy.
++ *
++ */
++
++#ifndef __UNC_EDF_COMMON_H__
++#define __UNC_EDF_COMMON_H__
++
++#include
++
++
++void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched);
++
++int edf_higher_prio(struct task_struct* first,
++ struct task_struct* second);
++
++int edf_ready_order(struct list_head* a, struct list_head* b);
++
++void edf_release_at(struct task_struct *t, jiffie_t start);
++#define edf_release_now(t) edf_release_at(t, jiffies)
++
++int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t);
++long edf_sleep_next_period(void);
++
++void edf_prepare_for_next_period(struct task_struct *t);
++
++#define job_completed(t) (!is_be(t) && \
++ (t)->rt_param.times.exec_time == (t)->rt_param.basic_params.exec_cost)
++
++int edf_set_hp_task(struct pi_semaphore *sem);
++int edf_set_hp_cpu_task(struct pi_semaphore *sem, int cpu);
++
++#endif
+diff --git a/include/linux/feather_buffer.h b/include/linux/feather_buffer.h
+new file mode 100644
+index 0000000..c477772
+--- /dev/null
++++ b/include/linux/feather_buffer.h
+@@ -0,0 +1,108 @@
++#ifndef _FEATHER_BUFFER_H_
++#define _FEATHER_BUFFER_H_
++
++/* requires UINT_MAX and memcpy */
++
++static inline int fetch_and_inc(int *val)
++{
++ int ret = 1;
++ __asm__ __volatile__("lock; xaddl %0, %1" : "+r" (ret), "+m" (*val) : : "memory" );
++ return ret;
++}
++
++static inline int fetch_and_dec(int *val)
++{
++ int ret = -1;
++ __asm__ __volatile__("lock; xaddl %0, %1" : "+r" (ret), "+m" (*val) : : "memory" );
++ return ret;
++}
++
++#define SLOT_FREE 0
++#define SLOT_BUSY 1
++#define SLOT_READY 2
++
++struct ft_buffer {
++ unsigned int slot_count;
++ unsigned int slot_size;
++
++ int free_count;
++ unsigned int write_idx;
++ unsigned int read_idx;
++
++ char* slots;
++ void* buffer_mem;
++ unsigned int failed_writes;
++};
++
++static inline int init_ft_buffer(struct ft_buffer* buf,
++ unsigned int slot_count,
++ unsigned int slot_size,
++ char* slots,
++ void* buffer_mem)
++{
++ int i = 0;
++ if (!slot_count || UINT_MAX % slot_count != slot_count - 1) {
++ /* The slot count must divide UNIT_MAX + 1 so that when it
++ * wraps around the index correctly points to 0.
++ */
++ return 0;
++ } else {
++ buf->slot_count = slot_count;
++ buf->slot_size = slot_size;
++ buf->slots = slots;
++ buf->buffer_mem = buffer_mem;
++ buf->free_count = slot_count;
++ buf->write_idx = 0;
++ buf->read_idx = 0;
++ buf->failed_writes = 0;
++ for (i = 0; i < slot_count; i++)
++ buf->slots[i] = SLOT_FREE;
++ return 1;
++ }
++}
++
++static inline int ft_buffer_start_write(struct ft_buffer* buf, void **ptr)
++{
++ int free = fetch_and_dec(&buf->free_count);
++ unsigned int idx;
++ if (free <= 0) {
++ fetch_and_inc(&buf->free_count);
++ *ptr = 0;
++ fetch_and_inc(&buf->failed_writes);
++ return 0;
++ } else {
++ idx = fetch_and_inc((int*) &buf->write_idx) % buf->slot_count;
++ buf->slots[idx] = SLOT_BUSY;
++ *ptr = ((char*) buf->buffer_mem) + idx * buf->slot_size;
++ return 1;
++ }
++}
++
++static inline void ft_buffer_finish_write(struct ft_buffer* buf, void *ptr)
++{
++ unsigned int idx = ((char*) ptr - (char*) buf->buffer_mem) / buf->slot_size;
++ buf->slots[idx] = SLOT_READY;
++}
++
++
++/* exclusive reader access is assumed */
++static inline int ft_buffer_read(struct ft_buffer* buf, void* dest)
++{
++ unsigned int idx;
++ if (buf->free_count == buf->slot_count)
++ /* nothing available */
++ return 0;
++ idx = buf->read_idx % buf->slot_count;
++ if (buf->slots[idx] == SLOT_READY) {
++ memcpy(dest, ((char*) buf->buffer_mem) + idx * buf->slot_size,
++ buf->slot_size);
++ buf->slots[idx] = SLOT_FREE;
++ buf->read_idx++;
++ fetch_and_inc(&buf->free_count);
++ return 1;
++ } else
++ return 0;
++}
++
++
++#endif
+diff --git a/include/linux/feather_trace.h b/include/linux/feather_trace.h
+new file mode 100644
+index 0000000..57a21a5
+--- /dev/null
++++ b/include/linux/feather_trace.h
+@@ -0,0 +1,93 @@
++#ifndef _FEATHER_TRACE_H_
++#define _FEATHER_TRACE_H_
++
++#define feather_callback __attribute__((regparm(0)))
++
++/* make the compiler reload any register that is not saved in
++ * a cdecl function call
++ */
++#define CLOBBER_LIST "memory", "cc", "eax", "ecx", "edx"
++
++#define ft_event(id, callback) \
++ __asm__ __volatile__( \
++ "1: jmp 2f \n\t" \
++ " call " #callback " \n\t" \
++ ".section __event_table, \"aw\" \n\t" \
++ ".long " #id ", 0, 1b, 2f \n\t" \
++ ".previous \n\t" \
++ "2: \n\t" \
++ : : : CLOBBER_LIST)
++
++#define ft_event0(id, callback) \
++ __asm__ __volatile__( \
++ "1: jmp 2f \n\t" \
++ " subl $4, %%esp \n\t" \
++ " movl $" #id ", (%%esp) \n\t" \
++ " call " #callback " \n\t" \
++ " addl $4, %%esp \n\t" \
++ ".section __event_table, \"aw\" \n\t" \
++ ".long " #id ", 0, 1b, 2f \n\t" \
++ ".previous \n\t" \
++ "2: \n\t" \
++ : : : CLOBBER_LIST)
++
++#define ft_event1(id, callback, param) \
++ __asm__ __volatile__( \
++ "1: jmp 2f \n\t" \
++ " subl $8, %%esp \n\t" \
++ " movl %0, 4(%%esp) \n\t" \
++ " movl $" #id ", (%%esp) \n\t" \
++ " call " #callback " \n\t" \
++ " addl $8, %%esp \n\t" \
++ ".section __event_table, \"aw\" \n\t" \
++ ".long " #id ", 0, 1b, 2f \n\t" \
++ ".previous \n\t" \
++ "2: \n\t" \
++ : : "r" (param) : CLOBBER_LIST)
++
++#define ft_event2(id, callback, param, param2) \
++ __asm__ __volatile__( \
++ "1: jmp 2f \n\t" \
++ " subl $12, %%esp \n\t" \
++ " movl %1, 8(%%esp) \n\t" \
++ " movl %0, 4(%%esp) \n\t" \
++ " movl $" #id ", (%%esp) \n\t" \
++ " call " #callback " \n\t" \
++ " addl $12, %%esp \n\t" \
++ ".section __event_table, \"aw\" \n\t" \
++ ".long " #id ", 0, 1b, 2f \n\t" \
++ ".previous \n\t" \
++ "2: \n\t" \
++ : : "r" (param), "r" (param2) : CLOBBER_LIST)
++
++
++#define ft_event3(id, callback, p, p2, p3) \
++ __asm__ __volatile__( \
++ "1: jmp 2f \n\t" \
++ " subl $16, %%esp \n\t" \
++ " movl %1, 12(%%esp) \n\t" \
++ " movl %1, 8(%%esp) \n\t" \
++ " movl %0, 4(%%esp) \n\t" \
++ " movl $" #id ", (%%esp) \n\t" \
++ " call " #callback " \n\t" \
++ " addl $16, %%esp \n\t" \
++ ".section __event_table, \"aw\" \n\t" \
++ ".long " #id ", 0, 1b, 2f \n\t" \
++ ".previous \n\t" \
++ "2: \n\t" \
++ : : "r" (p), "r" (p2), "r" (p3) : CLOBBER_LIST)
++
++
++static inline unsigned long long ft_read_tsc(void)
++{
++ unsigned long long ret;
++ __asm__ __volatile__("rdtsc" : "=A" (ret));
++ return ret;
++}
++
++int ft_enable_event(unsigned long id);
++int ft_disable_event(unsigned long id);
++int ft_is_event_enabled(unsigned long id);
++int ft_disable_all_events(void);
++
++#endif
+diff --git a/include/linux/fifo_common.h b/include/linux/fifo_common.h
+new file mode 100644
+index 0000000..0883226
+--- /dev/null
++++ b/include/linux/fifo_common.h
+@@ -0,0 +1,18 @@
++/* FIFO common definitions and utility functions.
++ */
++#ifndef __UNC_SCHED_FIFO_H__
++#define __UNC_SCHED_FIFO_H__
++
++#include
++
++
++int fifo_higher_prio(struct task_struct* first,
++ struct task_struct* second);
++
++int fifo_ready_order(struct list_head* a, struct list_head* b);
++
++
++void fifo_domain_init(rt_domain_t* fifo, check_resched_needed_t resched);
++
++
++#endif
+diff --git a/include/linux/fpmath.h b/include/linux/fpmath.h
+new file mode 100644
+index 0000000..a15c239
+--- /dev/null
++++ b/include/linux/fpmath.h
+@@ -0,0 +1,111 @@
++#ifndef __FP_MATH_H__
++#define __FP_MATH_H__
++
++#define FP_SHIFT 10
++#define ROUND_BIT (FP_SHIFT - 1)
++#define ONE FP(1)
++
++#define _fp(x) ((fp_t) {x})
++
++static inline long _point(fp_t x)
++{
++ return (x.val % (1 << FP_SHIFT));
++
++}
++
++#define fp2str(x) x.val
++/*(x.val >> FP_SHIFT), (x.val % (1 << FP_SHIFT)) */
++#define _FP_ "%ld/1024"
++
++
++static inline fp_t FP(long x)
++{
++ return _fp(((long) x) << FP_SHIFT);
++}
++
++static inline long _floor(fp_t x)
++{
++ return x.val >> FP_SHIFT;
++}
++
++/* FIXME: negative rounding */
++static inline long _round(fp_t x)
++{
++ return _floor(x) + ((x.val >> ROUND_BIT) & 1);
++}
++
++/* divide two integers to obtain a fixed point value */
++static inline fp_t _frac(long a, long b)
++{
++ return _fp(FP(a).val / (b));
++}
++
++/* multiply two fixed point values */
++static inline fp_t _mul(fp_t a, fp_t b)
++{
++ return _fp((a.val * b.val) >> FP_SHIFT);
++}
++
++static inline fp_t _div(fp_t a, fp_t b)
++{
++ /* try not to overflow */
++ if (unlikely(a.val > 2 << (BITS_PER_LONG - FP_SHIFT)))
++ return _fp((a.val / b.val) << FP_SHIFT);
++ else
++ return _fp((a.val << FP_SHIFT) / b.val);
++}
++
++static inline fp_t _add(fp_t a, fp_t b)
++{
++ return _fp(a.val + b.val);
++}
++
++static inline fp_t _sub(fp_t a, fp_t b)
++{
++ return _fp(a.val - b.val);
++}
++
++static inline fp_t _neg(fp_t x)
++{
++ return _fp(-x.val);
++}
++
++static inline fp_t _abs(fp_t x)
++{
++ return _fp(abs(x.val));
++}
++
++static inline int _leq(fp_t a, fp_t b)
++{
++ return a.val <= b.val;
++}
++
++static inline int _geq(fp_t a, fp_t b)
++{
++ return a.val >= b.val;
++}
++
++static inline int _lt(fp_t a, fp_t b)
++{
++ return a.val < b.val;
++}
++
++static inline int _gt(fp_t a, fp_t b)
++{
++ return a.val > b.val;
++}
++
++static inline int _eq(fp_t a, fp_t b)
++{
++ return a.val == b.val;
++}
++
++static inline fp_t _max(fp_t a, fp_t b)
++{
++ if (a.val < b.val)
++ return b;
++ else
++ return a;
++}
++
++#endif
+diff --git a/include/linux/list.h b/include/linux/list.h
+index 611059d..319c5ed 100644
+--- a/include/linux/list.h
++++ b/include/linux/list.h
+@@ -898,6 +898,36 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
+ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+ pos = pos->next)
+
++
++typedef int (*list_cmp_t)(struct list_head*, struct list_head*);
++
++static inline unsigned int list_insert(struct list_head* new,
++ struct list_head* head,
++ list_cmp_t order_before)
++{
++ struct list_head *pos;
++ unsigned int passed = 0;
++
++ BUG_ON(!new);
++
++ /* find a spot where the new entry is less than the next */
++ list_for_each(pos, head) {
++ if (unlikely(order_before(new, pos))) {
++ /* pos is not less than new, thus insert here */
++ __list_add(new, pos->prev, pos);
++ goto out;
++ }
++ passed++;
++ }
++ /* if we get to this point either the list is empty or every entry
++ * queued element is less than new.
++ * Let's add new to the end. */
++ list_add_tail(new, head);
++ out:
++ return passed;
++}
++
++
+ #else
+ #warning "don't include kernel headers in userspace"
+ #endif /* __KERNEL__ */
+diff --git a/include/linux/litmus.h b/include/linux/litmus.h
+new file mode 100644
+index 0000000..259594e
+--- /dev/null
++++ b/include/linux/litmus.h
+@@ -0,0 +1,128 @@
++/*
++ * Constant definitions related to
++ * scheduling policy.
++ */
++
++#ifndef _LINUX_LITMUS_H_
++#define _LINUX_LITMUS_H_
++
++#include
++#include
++
++typedef enum {
++ SCHED_BEG = 0,
++ SCHED_LINUX = 0,
++ SCHED_PFAIR = 1,
++ SCHED_PFAIR_STAGGER = 2,
++ SCHED_PART_EDF = 3,
++ SCHED_PART_EEVDF = 4,
++ SCHED_GLOBAL_EDF = 5,
++ SCHED_PFAIR_DESYNC = 6,
++ SCHED_GLOBAL_EDF_NP = 7,
++ SCHED_CUSTOM = 8,
++ SCHED_EDF_HSB = 9,
++ SCHED_GSN_EDF = 10,
++ SCHED_PSN_EDF = 11,
++ SCHED_ADAPTIVE = 12,
++ /* Add your scheduling policy here */
++
++ SCHED_END = 12,
++ SCHED_DEFAULT = 0,
++ SCHED_INVALID = -1,
++} spolicy;
++
++
++typedef enum {
++ LITMUS_RESERVED_RANGE = 1024,
++
++} sched_setup_cmd_t;
++
++/* Runtime modes */
++enum rt_mode_t {
++ MODE_NON_RT = 0,
++ MODE_RT_RUN = 1
++};
++
++/* Plugin boot options, for convenience */
++#define PLUGIN_LINUX "linux"
++#define PLUGIN_PFAIR "pfair"
++#define PLUGIN_PART_EDF "part_edf"
++#define PLUGIN_GLOBAL_EDF "global_edf"
++#define PLUGIN_GLOBAL_EDF_NP "global_edf_np"
++#define PLUGIN_EDF_HSB "edf_hsb"
++#define PLUGIN_GSN_EDF "gsn_edf"
++#define PLUGIN_PSN_EDF "psn_edf"
++#define PLUGIN_ADAPTIVE "adaptive"
++
++extern spolicy sched_policy;
++
++/* RT mode start time */
++extern volatile unsigned long rt_start_time;
++
++/* Here we store the current mode of the system */
++extern atomic_t rt_mode;
++
++#define get_rt_mode() (atomic_read(&rt_mode))
++#define set_rt_mode(a) atomic_set(&rt_mode,(a))
++
++#define TRACE(fmt, args...) \
++ sched_trace_log_message("%d: " fmt, raw_smp_processor_id(), ## args)
++
++#define TRACE_TASK(t, fmt, args...) \
++ TRACE("(%s/%d) " fmt, (t)->comm, (t)->pid, ##args)
++
++#define TRACE_CUR(fmt, args...) \
++ TRACE_TASK(current, fmt, ## args)
++
++#define TRACE_BUG_ON(cond) \
++ do { if (cond) TRACE("BUG_ON(%s) at %s:%d " \
++ "called from %p current=%s/%d state=%d " \
++ "flags=%x mode=%d partition=%d cpu=%d rtflags=%d"\
++ " job=%u knp=%d timeslice=%u\n", \
++ #cond, __FILE__, __LINE__, __builtin_return_address(0), current->comm, \
++ current->pid, current->state, current->flags, get_rt_mode(), \
++ get_partition(current), smp_processor_id(), get_rt_flags(current), \
++ current->rt_param.times.job_no, current->rt_param.kernel_np, \
++ current->time_slice\
++ ); } while(0);
++
++
++/* in_list - is a given list_head queued on some list?
++ */
++static inline int in_list(struct list_head* list)
++{
++ return !( /* case 1: deleted */
++ (list->next == LIST_POISON1 &&
++ list->prev == LIST_POISON2)
++ ||
++ /* case 2: initialized */
++ (list->next == list &&
++ list->prev == list)
++ );
++}
++
++void list_qsort(struct list_head* list, list_cmp_t less_than);
++
++
++#define RT_PREEMPTIVE 0x2050 /* = NP */
++#define RT_NON_PREEMPTIVE 0x4e50 /* = P */
++#define RT_EXIT_NP_REQUESTED 0x5251 /* = RQ */
++
++/* returns 1 if task t has registered np flag and set it to RT_NON_PREEMPTIVE
++ */
++int is_np(struct task_struct *t);
++
++/* request that the task should call sys_exit_np()
++ */
++void request_exit_np(struct task_struct *t);
++
++/* kill naughty tasks
++ */
++void scheduler_signal(struct task_struct *t, unsigned int signal);
++void send_scheduler_signals(void);
++void np_mem_kill(struct task_struct *t);
++
++/* clean up real-time state of a task */
++void exit_litmus(struct task_struct *dead_tsk);
++
++#endif
+diff --git a/include/linux/pfair_common.h b/include/linux/pfair_common.h
+new file mode 100644
+index 0000000..67e18c6
+--- /dev/null
++++ b/include/linux/pfair_common.h
+@@ -0,0 +1,40 @@
++/* PFAIR common data structures and utility functions shared by all PFAIR
++ * based scheduler plugins
++ */
++
++#ifndef __UNC_PFAIR_COMMON_H__
++#define __UNC_PFAIR_COMMON_H__
++
++#include
++#include
++
++typedef struct _pfair_domain {
++ /* Global lock to protect the data structures */
++ queuelock_t pfair_lock;
++ /* runnable rt tasks are in here */
++ struct list_head ready_queue;
++
++ /* real-time tasks waiting for release are in here */
++ struct list_head release_queue;
++
++ /* CPU's in the domain */
++ cpumask_t domain_cpus;
++
++} pfair_domain_t;
++
++#define next_ready(pfair) \
++ (list_entry((pfair)->ready_queue.next, struct task_struct, rt_list))
++void pfair_domain_init(pfair_domain_t *pfair);
++void pfair_add_ready(pfair_domain_t* pfair, struct task_struct *new);
++struct task_struct* __pfair_take_ready(pfair_domain_t* pfair);
++void pfair_add_release(pfair_domain_t* pfair, struct task_struct *task);
++void pfair_try_release_pending(pfair_domain_t* pfair);
++void __pfair_prepare_new_release(struct task_struct *t, jiffie_t start);
++
++void pfair_prepare_next_job(struct task_struct *t);
++void pfair_prepare_next_subtask(struct task_struct *t);
++
++void pfair_prepare_new_releases(pfair_domain_t *pfair, jiffie_t start);
++
++#endif
++
+diff --git a/include/linux/pfair_math.h b/include/linux/pfair_math.h
+new file mode 100644
+index 0000000..b2a14e4
+--- /dev/null
++++ b/include/linux/pfair_math.h
+@@ -0,0 +1,80 @@
++/* PFAIR Mathematical functions */
++#ifndef __UNC_PFAIR_MATH_H__
++#define __UNC_PFAIR_MATH_H__
++
++#include
++#include
++#include
++#include
++
++/* Type definition for our quantums */
++typedef unsigned long long quantum_t;
++
++/*
++* This file defines mathematical functions "ceiling", "floor",
++* and PFAIR specific functions for computing the release and
++* the deadline of a subtask, as well as tie breakers:
++* b-bit and group deadline.
++*/
++static inline quantum_t FLOOR(quantum_t a, unsigned long b)
++{
++ BUG_ON( b == 0);
++ do_div(a, b);
++ return a;
++}
++static inline quantum_t CEIL(quantum_t a, unsigned long b)
++{
++ quantum_t t = FLOOR(a, b);
++ return (quantum_t)((t * b == a) ? t : (t + 1));
++}
++
++
++/*
++* invariant - i-1=get_passed_quanta(t)
++*
++* release time of i-th subtask of j-th job is
++* r_{ij}+\lfloor i-1/wt(T) \rfloor
++* This operation should be robust to wrap-around
++* so we can compare the result with jiffies safely
++*/
++static inline quantum_t release_time(struct task_struct * t)
++{
++ quantum_t e = get_exec_cost(t);
++ quantum_t p = get_rt_period(t);
++ return FLOOR((get_passed_quanta(t)) * p, e);
++}
++/*
++* deadline time of i-th subtask of j-th job is
++* r_{ij}+\lceil i/wt(T) \rceil
++* This operation should be robust to wrap-around
++* so we can compare the result with jiffies safely
++*/
++static inline quantum_t pfair_deadline(struct task_struct * t)
++{
++ quantum_t e = get_exec_cost(t);
++ quantum_t p = get_rt_period(t);
++ return CEIL((get_passed_quanta(t) + 1) * p, e);
++}
++/* In PFAIR b-bit is defined as
++* \lceil i/wt(T) \rceil-\lfloor i/wt(T) \rfloor
++*/
++static inline int b_bit(struct task_struct *t)
++{
++ quantum_t e = get_exec_cost(t);
++ quantum_t p = get_rt_period(t);
++ return CEIL((get_passed_quanta(t) + 1) * p, e)-
++ FLOOR((get_passed_quanta(t) + 1) * p, e);
++}
++/*
++* Group deadline
++*/
++static inline quantum_t group_deadline(struct task_struct * t)
++{
++ quantum_t p = get_rt_period(t);
++ quantum_t e = get_exec_cost(t);
++ quantum_t stage1 = CEIL((get_passed_quanta(t) + 1) * p, e);
++ quantum_t stage2 = CEIL(stage1 * (p - e), p);
++ return CEIL(stage2 * p, p - e);
++}
++
++#endif /* __UNC_PFAIR_MATH_H__ */
+diff --git a/include/linux/queuelock.h b/include/linux/queuelock.h
+new file mode 100644
+index 0000000..454ff81
+--- /dev/null
++++ b/include/linux/queuelock.h
+@@ -0,0 +1,98 @@
++#ifndef _UNC_QUEUELOCK_H_
++#define _UNC_QUEUELOCK_H_
++/**
++* Queue lock
++*
++* This is an implementation of T. Anderson's queue lock.
++* It strives to follow the normal Linux locking conventions
++* as much as possible. The rules for acquiring a lock are:
++*
++* 1) The caller must ensure interrupts and preemptions are disabled.
++*
++* 2) The caller _cannot_ recursively acquire the lock.
++*
++* 3) The caller may not sleep while holding the lock. This is currently
++* not enforced, but it will not work.
++*/
++
++#include
++#include
++#include
++
++typedef struct {
++ /* pad the values being spun on to make sure
++ that they are cache local
++ */
++ union {
++ volatile enum {
++ MUST_WAIT,
++ HAS_LOCK
++ } val;
++ char padding[SMP_CACHE_BYTES];
++ } slots[NR_CPUS];
++
++ /* since spin_slot is not being spun on it can be
++ * in a shared cache line. next_slot will be evicted
++ * anyway on every attempt to acquire the lock.
++ */
++ int spin_slot[NR_CPUS];
++
++ /* The next slot that will be available.
++ */
++ atomic_t next_slot;
++} queuelock_t;
++
++
++static inline void queue_lock_init(queuelock_t *lock)
++{
++ int i;
++ for (i = 0; i < NR_CPUS; i++) {
++ lock->slots[i].val = MUST_WAIT;
++ lock->spin_slot[i] = i;
++ }
++ lock->slots[0].val = HAS_LOCK;
++ atomic_set(&lock->next_slot, 0);
++}
++
++
++static inline void queue_lock(queuelock_t *lock)
++{
++ int me = smp_processor_id();
++ volatile int* spin_var;
++ /* Get slot to spin on. atomic_inc_return() returns the incremented
++ * value, so take one of again
++ */
++ lock->spin_slot[me] = atomic_inc_return(&lock->next_slot) - 1;
++ /* check for wrap-around
++ * This could probably optimized away if we ensure that NR_CPUS divides
++ * INT_MAX...
++ */
++ if (unlikely(lock->spin_slot[me] == NR_CPUS - 1))
++ atomic_add(-NR_CPUS, &lock->next_slot);
++ /* range limit*/
++ lock->spin_slot[me] %= NR_CPUS;
++ /* spin until you acquire the lock */
++ spin_var = (int*) &lock->slots[lock->spin_slot[me]].val;
++ while (*spin_var == MUST_WAIT)
++ cpu_relax();
++
++ /* reset the lock */
++ lock->slots[lock->spin_slot[me]].val = MUST_WAIT;
++ barrier();
++}
++
++
++static inline void queue_unlock(queuelock_t *lock)
++{
++ int me = smp_processor_id();
++ barrier();
++ lock->slots[(lock->spin_slot[me] + 1) % NR_CPUS].val = HAS_LOCK;
++}
++
++#define queue_lock_irqsave(lock, flags) \
++ do { local_irq_save(flags); queue_lock(lock); } while (0);
++
++#define queue_unlock_irqrestore(lock, flags) \
++ do { queue_unlock(lock); local_irq_restore(flags); } while (0);
++
++#endif /* _UNC_QUEUELOCK_H_ */
+diff --git a/include/linux/rt_domain.h b/include/linux/rt_domain.h
+new file mode 100644
+index 0000000..237eac7
+--- /dev/null
++++ b/include/linux/rt_domain.h
+@@ -0,0 +1,98 @@
++/* CLEANUP: Add comments and make it less messy.
++ *
++ */
++
++#ifndef __UNC_RT_DOMAIN_H__
++#define __UNC_RT_DOMAIN_H__
++
++struct _rt_domain;
++
++typedef int (*check_resched_needed_t)(struct _rt_domain *rt);
++typedef void (*release_at_t)(struct task_struct *t, jiffie_t start);
++
++typedef struct _rt_domain {
++ /* runnable rt tasks are in here */
++ rwlock_t ready_lock;
++ struct list_head ready_queue;
++
++ /* real-time tasks waiting for release are in here */
++ spinlock_t release_lock;
++ struct list_head release_queue;
++
++ /* how do we check if we need to kick another CPU? */
++ check_resched_needed_t check_resched;
++
++ /* how are tasks ordered in the ready queue? */
++ list_cmp_t order;
++} rt_domain_t;
++
++#define next_ready(rt) \
++ (list_entry((rt)->ready_queue.next, struct task_struct, rt_list))
++
++#define ready_jobs_pending(rt) \
++ (!list_empty(&(rt)->ready_queue))
++
++void rt_domain_init(rt_domain_t *rt, check_resched_needed_t f,
++ list_cmp_t order);
++
++void __add_ready(rt_domain_t* rt, struct task_struct *new);
++void __add_release(rt_domain_t* rt, struct task_struct *task);
++
++struct task_struct* __take_ready_rq(rt_domain_t* rt, runqueue_t* rq, int cpu);
++struct task_struct* __take_ready(rt_domain_t* rt);
++struct task_struct* __peek_ready(rt_domain_t* rt);
++
++void try_release_pending(rt_domain_t* rt);
++void __release_pending(rt_domain_t* rt);
++
++void rerelease_all(rt_domain_t *rt, release_at_t release);
++void __rerelease_all(rt_domain_t *rt, release_at_t release);
++
++static inline void add_ready(rt_domain_t* rt, struct task_struct *new)
++{
++ unsigned long flags;
++ /* first we need the write lock for rt_ready_queue */
++ write_lock_irqsave(&rt->ready_lock, flags);
++ __add_ready(rt, new);
++ write_unlock_irqrestore(&rt->ready_lock, flags);
++}
++
++static inline struct task_struct* take_ready(rt_domain_t* rt)
++{
++ unsigned long flags;
++ struct task_struct* ret;
++ /* first we need the write lock for rt_ready_queue */
++ write_lock_irqsave(&rt->ready_lock, flags);
++ ret = __take_ready(rt);
++ write_unlock_irqrestore(&rt->ready_lock, flags);
++ return ret;
++}
++
++
++static inline void add_release(rt_domain_t* rt, struct task_struct *task)
++{
++ unsigned long flags;
++ /* first we need the write lock for rt_ready_queue */
++ spin_lock_irqsave(&rt->release_lock, flags);
++ __add_release(rt, task);
++ spin_unlock_irqrestore(&rt->release_lock, flags);
++}
++
++static inline int __jobs_pending(rt_domain_t* rt)
++{
++ return !list_empty(&rt->ready_queue);
++}
++
++static inline int jobs_pending(rt_domain_t* rt)
++{
++ unsigned long flags;
++ int ret;
++ /* first we need the write lock for rt_ready_queue */
++ read_lock_irqsave(&rt->ready_lock, flags);
++ ret = __jobs_pending(rt);
++ read_unlock_irqrestore(&rt->ready_lock, flags);
++ return ret;
++}
++
++
++#endif
+diff --git a/include/linux/rt_param.h b/include/linux/rt_param.h
+new file mode 100644
+index 0000000..426a929
+--- /dev/null
++++ b/include/linux/rt_param.h
+@@ -0,0 +1,264 @@
++/*
++ * Definition of the scheduler plugin interface.
++ *
++ */
++#ifndef _LINUX_RT_PARAM_H_
++#define _LINUX_RT_PARAM_H_
++
++#include
++
++typedef unsigned long jiffie_t;
++
++/* different types of clients */
++typedef enum {
++ RT_CLASS_HARD,
++ RT_CLASS_SOFT,
++ RT_CLASS_BEST_EFFORT
++} task_class_t;
++
++typedef struct rt_param {
++ unsigned long exec_cost;
++ unsigned long period;
++ unsigned int cpu;
++ task_class_t class;
++} rt_param_t;
++
++/* fixed point wrapper to force compiler
++ * errors in case of misuse of a fixed point value
++ */
++typedef struct
++{
++ long val;
++} fp_t;
++
++typedef struct {
++ fp_t weight;
++ unsigned long period;
++ fp_t value;
++} service_level_t;
++
++typedef struct {
++ fp_t estimate;
++ fp_t accumulated;
++} predictor_state_t;
++
++typedef struct {
++ /* when will this task be release the next time? */
++ jiffie_t release;
++ /* time instant the last job was released */
++ jiffie_t last_release;
++ /* what is the current deadline? */
++ jiffie_t deadline;
++ /* b-bit tie breaker for PFAIR, it is ignored in EDF */
++ int b_bit;
++ /* group deadline tie breaker, it is ignored in EDF */
++ jiffie_t group_deadline;
++ /* how long has this task executed so far?
++ * In case of capacity sharing a job completion cannot be
++ * detected by checking time_slice == 0 as the job may have
++ * executed while using another capacity. Use this counter
++ * to keep track of the time spent on a CPU by a job.
++ *
++ * In other words: The number of consumed quanta since the
++ * last job release.
++ */
++ unsigned int exec_time;
++
++ /* Which job is this. This is used to let user space
++ * specify which job to wait for, which is important if jobs
++ * overrun. If we just call sys_sleep_next_period() then we
++ * will unintentionally miss jobs after an overrun.
++ *
++ * Increase this sequence number when a job is released.
++ */
++ unsigned int job_no;
++} rt_times_t;
++
++
++/* RT task parameters for scheduling extensions
++ * These parameters are inherited during clone and therefore must
++ * be explicitly set up before the task set is launched.
++ */
++typedef struct task_rt_param {
++ /* is the task sleeping? */
++ unsigned int flags:8;
++
++ /* Real-time marker: 1 iff it is a LITMUS real-time task.
++ */
++ unsigned int is_realtime:1;
++
++ /* is this task under control of litmus?
++` *
++ * this is necessary because otherwise signal delivery code
++ * may try to wake up a task that is already queued in plugin
++ * data structures.
++ */
++ unsigned int litmus_controlled:1;
++
++ /* Did this task register any SRP controlled resource accesses?
++ * This, of course, should only ever be true under partitioning.
++ * However, this limitation is not currently enforced.
++ */
++ unsigned int subject_to_srp:1;
++
++ /* user controlled parameters */
++ rt_param_t basic_params;
++
++ /* task representing the current "inherited" task
++ * priority, assigned by inherit_priority and
++ * return priority in the scheduler plugins.
++ * could point to self if PI does not result in
++ * an increased task priority.
++ */
++ struct task_struct* inh_task;
++
++ /* Don't just dereference this pointer in kernel space!
++ * It might very well point to junk or nothing at all.
++ * NULL indicates that the task has not requested any non-preemptable
++ * section support.
++ * TODO: What happens on fork?
++ */
++ __user short* np_flag;
++
++ /* For the FMLP under PSN-EDF, it is required to make the task
++ * non-preemptive from kernel space. In order not to interfere with
++ * user space, this counter indicates the kernel space np setting.
++ * kernel_np > 0 => task is non-preemptive
++ */
++ unsigned int kernel_np;
++
++ /* timing parameters */
++ rt_times_t times;
++
++ /* This is currently only used by the PFAIR code
++ * and a prime candidate for cleanup.
++ */
++ rt_times_t backup;
++
++ /* This field can be used by plugins to store where the task
++ * is currently scheduled. It is the responsibility of the
++ * plugin to avoid race conditions.
++ *
++ * Used by GSN-EDF.
++ */
++ int scheduled_on;
++
++ /* This field can be used by plugins to store where the task
++ * is currently linked. It is the responsibility of the plugin
++ * to avoid race conditions.
++ *
++ * Used by GSN-EDF.
++ */
++ int linked_on;
++
++ /* Adaptive support. Adaptive tasks will store service levels
++ * in this (dynamically allocated) structure.
++ */
++ service_level_t* service_level;
++ unsigned int no_service_levels;
++ unsigned int cur_service_level;
++
++ /* Adaptive support. Store state for weight estimation.
++ */
++ predictor_state_t predictor_state;
++
++ /* Adaptive support. Optimizer fields.
++ */
++ struct list_head opt_list;
++ fp_t opt_order;
++ fp_t opt_dw;
++ fp_t opt_nw;
++ unsigned int opt_level;
++ jiffie_t opt_change;
++} task_rt_param_t;
++
++/* Possible RT flags */
++#define RT_F_RUNNING 0x00000000
++#define RT_F_SLEEP 0x00000001
++#define RT_F_EXP_QUANTA 0x00000002
++#define RT_F_NON_PREEMTABLE 0x00000004
++#define RT_F_EXIT_SEM 0x00000008
++
++#define is_realtime(t) ((t)->rt_param.is_realtime)
++
++/* Realtime utility macros */
++#define get_passed_quanta(t) ((t)->rt_param.times.exec_time)
++#define inc_passed_quanta(t) ((t)->rt_param.times.exec_time += 1)
++#define get_rt_flags(t) ((t)->rt_param.flags)
++#define set_rt_flags(t,f) (t)->rt_param.flags=(f)
++#define get_exec_cost(t) ((t)->rt_param.basic_params.exec_cost)
++#define get_rt_period(t) ((t)->rt_param.basic_params.period)
++#define set_rt_period(t,p) (t)->rt_param.basic_params.period=(p)
++#define set_exec_cost(t,e) (t)->rt_param.basic_params.exec_cost=(e)
++#define get_partition(t) (t)->rt_param.basic_params.cpu
++#define get_deadline(t) ((t)->rt_param.times.deadline)
++#define get_last_release(t) ((t)->rt_param.times.last_release)
++#define get_class(t) ((t)->rt_param.basic_params.class)
++
++#define has_active_job(t) \
++ (time_before(get_last_release(t), jiffies) \
++ && time_before_eq(jiffies, get_deadline(t)))
++
++#define get_est_weight(t) ((t)->rt_param.predictor_state.estimate)
++#define get_sl(t, l) \
++ ((t)->rt_param.service_level[l])
++#define get_cur_sl(t) ((t)->rt_param.cur_service_level)
++#define get_max_sl(t) ((t)->rt_param.no_service_levels - 1)
++#define get_opt_sl(t) ((t)->rt_param.opt_level)
++
++
++#define is_subject_to_srp(t) ((t)->rt_param.subject_to_srp)
++#define is_hrt(t) \
++ ((t)->rt_param.basic_params.class == RT_CLASS_HARD)
++#define is_srt(t) \
++ ((t)->rt_param.basic_params.class == RT_CLASS_SOFT)
++#define is_be(t) \
++ ((t)->rt_param.basic_params.class == RT_CLASS_BEST_EFFORT)
++
++#define clear_rt_params(t) \
++memset(&(t)->rt_param,0, sizeof(struct task_rt_param))
++
++#define get_release(t) ((t)->rt_param.times.release)
++#define set_release(t,r) ((t)->rt_param.times.release=(r))
++
++/* honor the flag that is set when scheduling is in progress
++ * This is some dirty hack in Linux that creates race conditions in our code
++ * if we don't pay attention to it.
++ */
++#define is_running(t) \
++ ((t)->state == TASK_RUNNING || \
++ (t)->thread_info->preempt_count & PREEMPT_ACTIVE)
++
++#define is_blocked(t) (!is_running(t))
++#define is_released(t) (time_before_eq((t)->rt_param.times.release, jiffies))
++#define is_tardy(t) (time_before_eq((t)->rt_param.times.deadline, jiffies))
++#define task_slack(t) ( (int) (t)->rt_param.times.deadline - (int) jiffies - \
++ (int) ((t)->rt_param.basic_params.exec_cost - \
++ (t)->rt_param.times.exec_time))
++
++
++/* real-time comparison macros */
++#define earlier_deadline(a, b) (time_before(\
++ (a)->rt_param.times.deadline,\
++ (b)->rt_param.times.deadline))
++#define earlier_release(a, b) (time_before(\
++ (a)->rt_param.times.release,\
++ (b)->rt_param.times.release))
++
++#define earlier_last_release(a, b) (time_before(\
++ (a)->rt_param.times.last_release,\
++ (b)->rt_param.times.last_release))
++
++
++#define make_np(t) do {t->rt_param.kernel_np++;} while(0);
++#define take_np(t) do {t->rt_param.kernel_np--;} while(0);
++
++#define backup_times(t) do { (t)->rt_param.backup=(t)->rt_param.times; \
++ } while(0);
++#define restore_times(t) do { (t)->rt_param.times=(t)->rt_param.backup; \
++ } while(0);
++
++
++#define rt_list2task(p) list_entry(p, struct task_struct, rt_list)
++
++#endif
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 4463735..f533ae3 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -3,6 +3,8 @@
+
+ #include /* For AT_VECTOR_SIZE */
+
++#include
++
+ /*
+ * cloning flags:
+ */
+@@ -26,6 +28,8 @@
+ #define CLONE_STOPPED 0x02000000 /* Start in stopped state */
+ #define CLONE_NEWUTS 0x04000000 /* New utsname group? */
+ #define CLONE_NEWIPC 0x08000000 /* New ipcs */
++#define CLONE_REALTIME 0x10000000 /* LITMUS real-time task creation */
++
+
+ /*
+ * Scheduling policies
+@@ -1051,6 +1055,12 @@ struct task_struct {
+ #ifdef CONFIG_FAULT_INJECTION
+ int make_it_fail;
+ #endif
++ /* litmus parameters and state */
++ task_rt_param_t rt_param;
++
++ /* allow scheduler plugins to queue in release lists, etc. */
++ struct list_head rt_list;
++
+ };
+
+ static inline pid_t process_group(struct task_struct *tsk)
+diff --git a/include/linux/sched_plugin.h b/include/linux/sched_plugin.h
+new file mode 100644
+index 0000000..1ea8178
+--- /dev/null
++++ b/include/linux/sched_plugin.h
+@@ -0,0 +1,149 @@
++/*
++ * Definition of the scheduler plugin interface.
++ *
++ */
++#ifndef _LINUX_SCHED_PLUGIN_H_
++#define _LINUX_SCHED_PLUGIN_H_
++
++#include
++
++/* struct for semaphore with priority inheritance */
++struct pi_semaphore {
++ atomic_t count;
++ int sleepers;
++ wait_queue_head_t wait;
++ union {
++ /* highest-prio holder/waiter */
++ struct task_struct *task;
++ struct task_struct* cpu_task[NR_CPUS];
++ } hp;
++ /* current lock holder */
++ struct task_struct *holder;
++ /* is the semaphore being used? */
++ int used;
++};
++
++
++/* Enforce runqueues to be opaque objects.
++ *
++ * This allows us to pass around pointers to runqueues,
++ * without actually having to rip it out of sched.c. It
++ * also discourages plugins from trying to be
++ * overly clever.
++ */
++typedef void runqueue_t;
++
++
++/********************* scheduler invocation ******************/
++
++typedef enum {
++ NO_RESCHED = 0,
++ FORCE_RESCHED = 1
++} reschedule_check_t;
++
++
++/* Plugin-specific realtime tick handler */
++typedef reschedule_check_t (*scheduler_tick_t) (void);
++/* Novell make sched decision function */
++typedef int (*schedule_t) (struct task_struct * prev,
++ struct task_struct ** next,
++ runqueue_t * rq);
++/* Clean up after the task switch has occured.
++ * This function is called after every (even non-rt) task switch.
++ */
++typedef void (*finish_switch_t)(struct task_struct *prev);
++
++
++/********************* task state changes ********************/
++
++/* called to setup a new real-time task */
++typedef long (*prepare_task_t) (struct task_struct *task);
++/* called to re-introduce a task after blocking */
++typedef void (*wake_up_task_t) (struct task_struct *task);
++/* called to notify the plugin of a blocking real-time task
++ * it will only be called for real-time tasks and before schedule is called */
++typedef void (*task_blocks_t) (struct task_struct *task);
++/* called when a real-time task exits. Free any allocated resources */
++typedef long (*tear_down_t) (struct task_struct *);
++
++/* Called when the new_owner is released from the wait queue
++ * it should now inherit the priority from sem, _before_ it gets readded
++ * to any queue
++ */
++typedef long (*inherit_priority_t) (struct pi_semaphore *sem,
++ struct task_struct *new_owner);
++
++/* Called when the current task releases a semahpore where it might have
++ * inherited a piority from
++ */
++typedef long (*return_priority_t) (struct pi_semaphore *sem);
++
++/* Called when a task tries to acquire a semaphore and fails. Check if its
++ * priority is higher than that of the current holder.
++ */
++typedef long (*pi_block_t) (struct pi_semaphore *sem, struct task_struct *t);
++
++
++/********************* sys call backends ********************/
++/* This function causes the caller to sleep until the next release */
++typedef long (*sleep_next_period_t) (void);
++
++typedef int (*scheduler_setup_t) (int cmd, void __user *parameter);
++
++typedef int (*mode_change_t) (int);
++
++struct sched_plugin {
++ /* basic info */
++ char *plugin_name;
++ int ready_to_use;
++
++ /* management interface */
++ mode_change_t mode_change;
++
++ /* scheduler invocation */
++ scheduler_tick_t scheduler_tick;
++ schedule_t schedule;
++ finish_switch_t finish_switch;
++
++ /* syscall backend */
++ sleep_next_period_t sleep_next_period;
++ scheduler_setup_t scheduler_setup;
++
++ /* task state changes */
++ prepare_task_t prepare_task;
++ wake_up_task_t wake_up_task;
++ task_blocks_t task_blocks;
++ tear_down_t tear_down;
++
++ /* priority inheritance */
++ inherit_priority_t inherit_priority;
++ return_priority_t return_priority;
++ pi_block_t pi_block;
++} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
++
++typedef struct sched_plugin sched_plugin_t;
++
++extern sched_plugin_t *curr_sched_plugin;
++
++
++/* common scheduler tick */
++reschedule_check_t rt_scheduler_tick(void);
++
++
++/* Don't pull in our definitions on top of the real ones
++ * in sched.c!
++ */
++#ifndef __SCHED_C__
++
++/* External linux scheduler facilities */
++void deactivate_task(struct task_struct *, runqueue_t *);
++/* This function is defined in sched.c. We need acces to it for
++ * indirect switching.
++ */
++void __activate_task(struct task_struct *, runqueue_t *);
++void __setscheduler(struct task_struct *, int, int);
++
++#endif
++
++extern int get_sched_options(void);
++#endif
+diff --git a/include/linux/sched_trace.h b/include/linux/sched_trace.h
+new file mode 100644
+index 0000000..308cc7d
+--- /dev/null
++++ b/include/linux/sched_trace.h
+@@ -0,0 +1,182 @@
++/* sched_trace.h -- record scheduler events to a byte stream for offline analysis.
++ */
++#ifndef _LINUX_SCHED_TRACE_H_
++#define _LINUX_SCHED_TRACE_H_
++
++#include
++
++typedef enum {
++ ST_INVOCATION = 0,
++ ST_ARRIVAL = 1,
++ ST_DEPARTURE = 2,
++ ST_PREEMPTION = 3,
++ ST_SCHEDULED = 4,
++ ST_JOB_RELEASE = 5,
++ ST_JOB_COMPLETION = 6,
++ ST_CAPACITY_RELEASE = 7,
++ ST_CAPACITY_ALLOCATION = 8,
++ ST_SERVICE_LEVEL_CHANGE = 9,
++ ST_WEIGHT_ERROR = 10,
++} trace_type_t;
++
++typedef struct {
++ trace_type_t trace:8;
++ unsigned int size:24;
++ unsigned long long timestamp;
++} trace_header_t;
++
++
++typedef struct {
++ unsigned int is_rt:1;
++ unsigned int is_server:1;
++ task_class_t class:4;
++ unsigned int budget:24;
++ u32 deadline;
++
++ pid_t pid;
++} task_info_t;
++
++typedef struct {
++ trace_header_t header;
++ unsigned long flags;
++} invocation_record_t;
++
++typedef struct {
++ trace_header_t header;
++ task_info_t task;
++} arrival_record_t;
++
++typedef struct {
++ trace_header_t header;
++ task_info_t task;
++} departure_record_t;
++
++typedef struct {
++ trace_header_t header;
++ task_info_t task;
++ task_info_t by;
++} preemption_record_t;
++
++typedef struct {
++ trace_header_t header;
++ task_info_t task;
++} scheduled_record_t;
++
++typedef struct {
++ trace_header_t header;
++ task_info_t task;
++ u16 period;
++ u16 wcet;
++} release_record_t;
++
++typedef struct {
++ trace_header_t header;
++ task_info_t task;
++ u16 period;
++ u16 wcet;
++ int tardiness;
++ unsigned int job_no;
++} completion_record_t;
++
++typedef struct {
++ trace_header_t header;
++ task_info_t task;
++} cap_release_record_t;
++
++typedef struct {
++ trace_header_t header;
++ task_info_t task;
++ u16 budget;
++ u32 deadline;
++ pid_t donor;
++} cap_allocation_record_t;
++
++typedef struct {
++ trace_header_t header;
++ task_info_t task;
++ unsigned int from:16;
++ unsigned int to:16;
++ service_level_t new_level;
++ service_level_t old_level;
++} service_level_change_record_t;
++
++typedef struct {
++ trace_header_t header;
++ pid_t task;
++ fp_t estimate;
++ fp_t actual;
++} weight_error_record_t;
++
++#ifdef CONFIG_SCHED_TASK_TRACE
++void sched_trace_scheduler_invocation(void);
++
++void sched_trace_task_arrival(struct task_struct *t);
++void sched_trace_task_departure(struct task_struct *t);
++void sched_trace_task_preemption(struct task_struct *t,
++ struct task_struct* by);
++void sched_trace_task_scheduled(struct task_struct *);
++
++void sched_trace_job_release(struct task_struct *t);
++void sched_trace_job_completion(struct task_struct *t);
++
++void sched_trace_capacity_release(struct task_struct *t);
++void sched_trace_capacity_allocation(struct task_struct *t,
++ u16 budget, u32 deadline, pid_t donor);
++
++void sched_trace_capacity_alloc_srv(pid_t srv, u32 srv_dl, task_class_t cls,
++ u16 srv_budget,
++ u16 budget, u32 deadline, pid_t donor);
++
++void sched_trace_server_release(int id, unsigned int wcet,
++ unsigned int period,
++ task_class_t class);
++
++void sched_trace_server_completion(int id, unsigned int budget,
++ jiffie_t deadline,
++ task_class_t class);
++
++void sched_trace_server_scheduled(int id, task_class_t class,
++ unsigned int budget, jiffie_t deadline);
++
++void sched_trace_service_level_change(struct task_struct* t,
++ unsigned int from,
++ unsigned int to);
++
++void sched_trace_weight_error(struct task_struct* t, fp_t actual);
++
++#else
++#define sched_trace_scheduler_invocation(x)
++
++#define sched_trace_task_arrival(t)
++#define sched_trace_task_departure(t)
++#define sched_trace_task_preemption(t, by)
++#define sched_trace_task_scheduled(t)
++#define sched_trace_job_release(t)
++#define sched_trace_job_completion(t)
++#define sched_trace_capacity_release(t)
++#define sched_trace_capacity_allocation(t, budget, deadline, donor)
++#define sched_trace_capacity_alloc_srv(srv, srv_dl, cls, srv_budget,\
++ budget, deadline, donor)
++#define sched_trace_server_release(id, wcet, period, class)
++#define sched_trace_server_completion(id, budget, deadline, class)
++#define sched_trace_server_scheduled(id, class, budget, deadline)
++
++#define sched_trace_service_level_change(t, a, b)
++
++#define sched_trace_weight_error(x, y)
++
++
++#endif
++
++
++#ifdef CONFIG_SCHED_DEBUG_TRACE
++void sched_trace_log_message(const char* fmt, ...);
++
++#else
++
++#define sched_trace_log_message(fmt, ...)
++
++#endif
++
++
++#endif
+diff --git a/include/linux/trace.h b/include/linux/trace.h
+new file mode 100644
+index 0000000..9e457aa
+--- /dev/null
++++ b/include/linux/trace.h
+@@ -0,0 +1,74 @@
++
++#ifndef _SYS_TRACE_H_
++#define _SYS_TRACE_H_
++
++#include
++#include
++
++
++/*********************** TIMESTAMPS ************************/
++
++struct timestamp {
++ unsigned long event;
++ unsigned long long timestamp;
++ unsigned int seq_no;
++ int cpu;
++};
++
++
++/* buffer holding time stamps - will be provided by driver */
++extern struct ft_buffer* trace_ts_buf;
++
++/* save_timestamp: stores current time as struct timestamp
++ * in trace_ts_buf
++ */
++asmlinkage void save_timestamp(unsigned long event);
++
++#define TIMESTAMP(id) ft_event0(id, save_timestamp)
++
++/* Convention for timestamps
++ * =========================
++ *
++ * In order to process the trace files with a common tool, we use the following
++ * convention to measure execution times: The end time id of a code segment is
++ * always the next number after the start time event id.
++ */
++
++#define TS_SCHED_START TIMESTAMP(100)
++#define TS_SCHED_END TIMESTAMP(101)
++#define TS_CXS_START TIMESTAMP(102)
++#define TS_CXS_END TIMESTAMP(103)
++
++#define TS_TICK_START TIMESTAMP(110)
++#define TS_TICK_END TIMESTAMP(111)
++
++#define TS_PLUGIN_SCHED_START TIMESTAMP(120)
++#define TS_PLUGIN_SCHED_END TIMESTAMP(121)
++
++#define TS_PLUGIN_TICK_START TIMESTAMP(130)
++#define TS_PLUGIN_TICK_END TIMESTAMP(131)
++
++#define TS_ENTER_NP_START TIMESTAMP(140)
++#define TS_ENTER_NP_END TIMESTAMP(141)
++
++#define TS_EXIT_NP_START TIMESTAMP(150)
++#define TS_EXIT_NP_END TIMESTAMP(151)
++
++#define TS_SRP_UP_START TIMESTAMP(160)
++#define TS_SRP_UP_END TIMESTAMP(161)
++#define TS_SRP_DOWN_START TIMESTAMP(162)
++#define TS_SRP_DOWN_END TIMESTAMP(163)
++
++#define TS_PI_UP_START TIMESTAMP(170)
++#define TS_PI_UP_END TIMESTAMP(171)
++#define TS_PI_DOWN_START TIMESTAMP(172)
++#define TS_PI_DOWN_END TIMESTAMP(173)
++
++#define TS_FIFO_UP_START TIMESTAMP(180)
++#define TS_FIFO_UP_END TIMESTAMP(181)
++#define TS_FIFO_DOWN_START TIMESTAMP(182)
++#define TS_FIFO_DOWN_END TIMESTAMP(183)
++
++
++
++#endif /* !_SYS_TRACE_H_ */
+diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
+index 975c963..6ae0ff9 100644
+--- a/include/linux/uaccess.h
++++ b/include/linux/uaccess.h
+@@ -84,4 +84,20 @@ static inline unsigned long __copy_from_user_nocache(void *to,
+ ret; \
+ })
+
++/* This is a naive attempt at a write version of the above native Linux macro.
++ */
++#define poke_kernel_address(val, addr) \
++ ({ \
++ long ret; \
++ mm_segment_t old_fs = get_fs(); \
++ \
++ set_fs(KERNEL_DS); \
++ pagefault_disable(); \
++ ret = __put_user(val, (__force typeof(val) __user *)(addr)); \
++ pagefault_enable(); \
++ set_fs(old_fs); \
++ ret; \
++ })
++
++
+ #endif /* __LINUX_UACCESS_H__ */
+diff --git a/include/linux/wait.h b/include/linux/wait.h
+index e820d00..c7e96b6 100644
+--- a/include/linux/wait.h
++++ b/include/linux/wait.h
+@@ -161,6 +161,8 @@ wait_queue_head_t *FASTCALL(bit_waitqueue(void *, int));
+ #define wake_up_locked(x) __wake_up_locked((x), TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE)
+ #define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
+
++#define pi_wake_up(x) __pi_wake_up(x, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, NULL)
++
+ #define __wait_event(wq, condition) \
+ do { \
+ DEFINE_WAIT(__wait); \
+diff --git a/kernel/Makefile b/kernel/Makefile
+index 14f4d45..55acc93 100644
+--- a/kernel/Makefile
++++ b/kernel/Makefile
+@@ -8,7 +8,12 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
+ signal.o sys.o kmod.o workqueue.o pid.o \
+ rcupdate.o extable.o params.o posix-timers.o \
+ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
+- hrtimer.o rwsem.o latency.o nsproxy.o srcu.o
++ hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \
++ sched_plugin.o litmus.o sched_trace.o \
++ edf_common.o fifo_common.o pfair_common.o\
++ sched_global_edf.o sched_part_edf.o sched_edf_hsb.o sched_pfair.o \
++ sched_gsn_edf.o sched_psn_edf.o litmus_sem.o \
++ trace.o ft_event.o rt_domain.o sched_adaptive.o
+
+ obj-$(CONFIG_STACKTRACE) += stacktrace.o
+ obj-y += time/
+diff --git a/kernel/edf_common.c b/kernel/edf_common.c
+new file mode 100644
+index 0000000..4746c66
+--- /dev/null
++++ b/kernel/edf_common.c
+@@ -0,0 +1,135 @@
++/*
++ * kernel/edf_common.c
++ *
++ * Common functions for EDF based scheduler.
++ */
++
++#include
++#include
++#include
++
++#include
++#include
++#include
++
++
++#include
++
++/* edf_higher_prio - returns true if first has a higher EDF priority
++ * than second. Deadline ties are broken by PID.
++ *
++ * first first must not be NULL and a real-time task.
++ * second may be NULL or a non-rt task.
++ */
++int edf_higher_prio(struct task_struct* first,
++ struct task_struct* second)
++{
++ struct task_struct *first_task = first;
++ struct task_struct *second_task = second;
++
++ /* Check for inherited priorities. Change task
++ * used for comparison in such a case.
++ */
++ if (first && first->rt_param.inh_task)
++ first_task = first->rt_param.inh_task;
++ if (second && second->rt_param.inh_task)
++ second_task = second->rt_param.inh_task;
++
++ return
++ /* does the second task exist and is it a real-time task? If
++ * not, the first task (which is a RT task) has higher
++ * priority.
++ */
++ !second_task || !is_realtime(second_task) ||
++
++ /* is the deadline of the first task earlier?
++ * Then it has higher priority.
++ */
++ earlier_deadline(first_task, second_task) ||
++
++ /* Do we have a deadline tie?
++ * Then break by PID.
++ */
++ (get_deadline(first_task) == get_deadline(second_task) &&
++ (first_task->pid < second_task->pid ||
++
++ /* If the PIDs are the same then the task with the inherited
++ * priority wins.
++ */
++ (first_task->pid == second_task->pid &&
++ !second->rt_param.inh_task)));
++}
++
++int edf_ready_order(struct list_head* a, struct list_head* b)
++{
++ return edf_higher_prio(
++ list_entry(a, struct task_struct, rt_list),
++ list_entry(b, struct task_struct, rt_list));
++}
++
++void edf_release_at(struct task_struct *t, jiffie_t start)
++{
++ t->rt_param.times.deadline = start;
++ edf_prepare_for_next_period(t);
++ t->rt_param.times.last_release = start;
++ set_rt_flags(t, RT_F_RUNNING);
++}
++
++void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched)
++{
++ rt_domain_init(rt, resched, edf_ready_order);
++}
++
++void edf_prepare_for_next_period(struct task_struct *t)
++{
++ BUG_ON(!t);
++ /* prepare next release */
++ t->rt_param.times.release = t->rt_param.times.deadline;
++ t->rt_param.times.deadline += get_rt_period(t);
++ t->rt_param.times.exec_time = 0;
++ /* update job sequence number */
++ t->rt_param.times.job_no++;
++
++ t->time_slice = get_exec_cost(t);
++
++ /* who uses this? statistics? */
++ t->first_time_slice = 0;
++}
++
++/* need_to_preempt - check whether the task t needs to be preempted
++ * call only with irqs disabled and with ready_lock acquired
++ * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT!
++ */
++int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t)
++{
++ /* we need the read lock for edf_ready_queue */
++ /* no need to preempt if there is nothing pending */
++ if (!ready_jobs_pending(rt))
++ return 0;
++ /* we need to reschedule if t doesn't exist */
++ if (!t)
++ return 1;
++
++ /* NOTE: We cannot check for non-preemptibility since we
++ * don't know what address space we're currently in.
++ */
++
++ /* make sure to get non-rt stuff out of the way */
++ return !is_realtime(t) || edf_higher_prio(next_ready(rt), t);
++}
++
++
++/*
++ * Deactivate current task until the beginning of the next period.
++ */
++long edf_sleep_next_period(void)
++{
++ /* Mark that we do not excute anymore */
++ set_rt_flags(current, RT_F_SLEEP);
++ /* call schedule, this will return when a new job arrives
++ * it also takes care of preparing for the next release
++ */
++ schedule();
++ return 0;
++}
++
+diff --git a/kernel/fifo_common.c b/kernel/fifo_common.c
+new file mode 100644
+index 0000000..c1641a1
+--- /dev/null
++++ b/kernel/fifo_common.c
+@@ -0,0 +1,86 @@
++/*
++ * kernel/fifo_common.c
++ *
++ * Fifo helper functions. Could one day be a FIFO plugin if someone
++ * is interested.
++ *
++ * The current FIFO implementaion automatically chops Linux tasks into
++ * smaller jobs by assigning a fixed time slice. Once that time slice expires,
++ * it is treated as a new job release (that is queued in the back).
++ *
++ * The result is that it provides FIFO properties on a job level and round-robin
++ * on a task level if the tasks execute continuously.
++ */
++
++#include
++#include
++#include
++#include
++
++#include
++#include
++#include
++#include
++
++/* This function is defined in sched.c. We need access it for
++ * indirect switching.
++ */
++void __activate_task(struct task_struct *p, runqueue_t *rq);
++
++/* fifo_higher_prio - returns true if first has a higher FIFO priority
++ * than second. Release time ties are broken by PID.
++ *
++ * first first must not be NULL and a real-time task.
++ * second may be NULL or a non-rt task.
++ */
++int fifo_higher_prio(struct task_struct* first,
++ struct task_struct* second)
++{
++ struct task_struct *first_task = first;
++ struct task_struct *second_task = second;
++
++ /* Check for inherited priorities. Change task
++ * used for comparison in such a case.
++ */
++ if (first && first->rt_param.inh_task)
++ first_task = first->rt_param.inh_task;
++ if (second && second->rt_param.inh_task)
++ second_task = second->rt_param.inh_task;
++
++ return
++ /* does the second task exist and is it a real-time task? If
++ * not, the first task (which is a RT task) has higher
++ * priority.
++ */
++ !second_task || !is_realtime(second_task) ||
++
++ /* is the release of the first task earlier?
++ * Then it has higher priority.
++ */
++ earlier_last_release(first_task, second_task) ||
++
++ /* Do we have a release time tie?
++ * Then break by PID.
++ */
++ (get_last_release(first_task) ==
++ get_last_release(second_task) &&
++ (first_task->pid < second_task->pid ||
++
++ /* If the PIDs are the same then the task with the inherited
++ * priority wins.
++ */
++ (first_task->pid == second_task->pid &&
++ !second->rt_param.inh_task)));
++}
++
++int fifo_ready_order(struct list_head* a, struct list_head* b)
++{
++ return fifo_higher_prio(
++ list_entry(a, struct task_struct, rt_list),
++ list_entry(b, struct task_struct, rt_list));
++}
++
++void fifo_domain_init(rt_domain_t* rt, check_resched_needed_t resched)
++{
++ rt_domain_init(rt, resched, fifo_ready_order);
++}
+diff --git a/kernel/fork.c b/kernel/fork.c
+index d57118d..d786dcf 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -57,6 +57,9 @@
+ #include
+ #include
+
++#include
++#include
++
+ /*
+ * Protected counters by write_lock_irq(&tasklist_lock)
+ */
+@@ -118,6 +121,9 @@ void __put_task_struct(struct task_struct *tsk)
+ WARN_ON(atomic_read(&tsk->usage));
+ WARN_ON(tsk == current);
+
++ if (is_realtime(tsk))
++ exit_litmus(tsk);
++
+ security_task_free(tsk);
+ free_uid(tsk->user);
+ put_group_info(tsk->group_info);
+diff --git a/kernel/ft_event.c b/kernel/ft_event.c
+new file mode 100644
+index 0000000..10318ee
+--- /dev/null
++++ b/kernel/ft_event.c
+@@ -0,0 +1,104 @@
++#include
++
++#include
++
++/* the feather trace management functions assume
++ * exclusive access to the event table
++ */
++
++
++#define BYTE_JUMP 0xeb
++#define BYTE_JUMP_LEN 0x02
++
++/* for each event, there is an entry in the event table */
++struct trace_event {
++ long id;
++ long count;
++ long start_addr;
++ long end_addr;
++};
++
++extern struct trace_event __start___event_table[];
++extern struct trace_event __stop___event_table[];
++
++int ft_enable_event(unsigned long id)
++{
++ struct trace_event* te = __start___event_table;
++ int count = 0;
++ char* delta;
++ unsigned char* instr;
++
++ while (te < __stop___event_table) {
++ if (te->id == id && ++te->count == 1) {
++ instr = (unsigned char*) te->start_addr;
++ /* make sure we don't clobber something wrong */
++ if (*instr == BYTE_JUMP) {
++ delta = (((unsigned char*) te->start_addr) + 1);
++ *delta = 0;
++ }
++ }
++ if (te->id == id)
++ count++;
++ te++;
++ }
++ return count;
++}
++
++int ft_disable_event(unsigned long id)
++{
++ struct trace_event* te = __start___event_table;
++ int count = 0;
++ char* delta;
++ unsigned char* instr;
++
++ while (te < __stop___event_table) {
++ if (te->id == id && --te->count == 0) {
++ instr = (unsigned char*) te->start_addr;
++ if (*instr == BYTE_JUMP) {
++ delta = (((unsigned char*) te->start_addr) + 1);
++ *delta = te->end_addr - te->start_addr -
++ BYTE_JUMP_LEN;
++ }
++ }
++ if (te->id == id)
++ count++;
++ te++;
++ }
++ return count;
++}
++
++int ft_disable_all_events(void)
++{
++ struct trace_event* te = __start___event_table;
++ int count = 0;
++ char* delta;
++ unsigned char* instr;
++
++ while (te < __stop___event_table) {
++ if (te->count) {
++ instr = (unsigned char*) te->start_addr;
++ if (*instr == BYTE_JUMP) {
++ delta = (((unsigned char*) te->start_addr)
++ + 1);
++ *delta = te->end_addr - te->start_addr -
++ BYTE_JUMP_LEN;
++ te->count = 0;
++ count++;
++ }
++ }
++ te++;
++ }
++ return count;
++}
++
++int ft_is_event_enabled(unsigned long id)
++{
++ struct trace_event* te = __start___event_table;
++
++ while (te < __stop___event_table) {
++ if (te->id == id)
++ return te->count;
++ te++;
++ }
++ return 0;
++}
+diff --git a/kernel/litmus.c b/kernel/litmus.c
+new file mode 100644
+index 0000000..8f238ba
+--- /dev/null
++++ b/kernel/litmus.c
+@@ -0,0 +1,953 @@
++/* litmus.c -- Implementation of the LITMUS syscalls, the LITMUS intialization,
++ * and the common tick function.
++ */
++#include
++#include
++#include
++
++#include
++#include
++#include
++#include
++#include
++
++#include
++
++#define MAX_SERVICE_LEVELS 10
++
++/* Variables that govern the scheduling process */
++spolicy sched_policy = SCHED_DEFAULT;
++int sched_options = 0;
++
++
++/* This is a flag for switching the system into RT mode when it is booted up
++ * In RT-mode non-realtime tasks are scheduled as background tasks.
++ */
++
++/* The system is booting in non-realtime mode */
++atomic_t rt_mode = ATOMIC_INIT(MODE_NON_RT);
++/* Here we specify a mode change to be made */
++atomic_t new_mode = ATOMIC_INIT(MODE_NON_RT);
++/* Number of RT tasks that exist in the system */
++atomic_t n_rt_tasks = ATOMIC_INIT(0);
++
++/* Only one CPU may perform a mode change. */
++static queuelock_t mode_change_lock;
++
++/* The time instant when we switched to RT mode */
++volatile jiffie_t rt_start_time = 0;
++
++/* To send signals from the scheduler
++ * Must drop locks first.
++ */
++static LIST_HEAD(sched_sig_list);
++static DEFINE_SPINLOCK(sched_sig_list_lock);
++
++/**
++ * sys_set_rt_mode
++ * @newmode: new mode the scheduler must be switched to
++ * External syscall for setting the RT mode flag
++ * Returns EINVAL if mode is not recognized or mode transition is
++ * not permitted
++ * On success 0 is returned
++ *
++ * FIXME: In a "real" OS we cannot just let any user switch the mode...
++ */
++asmlinkage long sys_set_rt_mode(int newmode)
++{
++ if ((newmode == MODE_NON_RT) || (newmode == MODE_RT_RUN)) {
++ printk(KERN_INFO "real-time mode switch to %s\n",
++ (newmode == MODE_RT_RUN ? "rt" : "non-rt"));
++ atomic_set(&new_mode, newmode);
++ return 0;
++ }
++ return -EINVAL;
++}
++
++/*
++ * sys_set_task_rt_param
++ * @pid: Pid of the task which scheduling parameters must be changed
++ * @param: New real-time extension parameters such as the execution cost and
++ * period
++ * Syscall for manipulating with task rt extension params
++ * Returns EFAULT if param is NULL.
++ * ESRCH if pid is not corrsponding
++ * to a valid task.
++ * EINVAL if either period or execution cost is <=0
++ * EPERM if pid is a real-time task
++ * 0 if success
++ *
++ * Only non-real-time tasks may be configured with this system call
++ * to avoid races with the scheduler. In practice, this means that a
++ * task's parameters must be set _before_ calling sys_prepare_rt_task()
++ */
++asmlinkage long sys_set_rt_task_param(pid_t pid, rt_param_t __user * param)
++{
++ rt_param_t tp;
++ struct task_struct *target;
++ int retval = -EINVAL;
++
++ printk("Setting up rt task parameters for process %d.\n", pid);
++
++ if (pid < 0 || param == 0) {
++ goto out;
++ }
++ if (copy_from_user(&tp, param, sizeof(tp))) {
++ retval = -EFAULT;
++ goto out;
++ }
++
++ /* Task search and manipulation must be protected */
++ read_lock_irq(&tasklist_lock);
++ if (!(target = find_task_by_pid(pid))) {
++ retval = -ESRCH;
++ goto out_unlock;
++ }
++
++ if (is_realtime(target)) {
++ /* The task is already a real-time task.
++ * We cannot not allow parameter changes at this point.
++ */
++ retval = -EPERM;
++ goto out_unlock;
++ }
++
++ if (tp.exec_cost <= 0)
++ goto out_unlock;
++ if (tp.period <= 0)
++ goto out_unlock;
++ if (!cpu_online(tp.cpu))
++ goto out_unlock;
++ if (tp.period < tp.exec_cost)
++ {
++ printk(KERN_INFO "litmus: real-time task %d rejected "
++ "because wcet > period\n", pid);
++ goto out_unlock;
++ }
++
++ /* Assign params */
++ target->rt_param.basic_params = tp;
++
++ retval = 0;
++ out_unlock:
++ read_unlock_irq(&tasklist_lock);
++ out:
++ return retval;
++}
++
++/* Getter of task's RT params
++ * returns EINVAL if param or pid is NULL
++ * returns ESRCH if pid does not correspond to a valid task
++ * returns EFAULT if copying of parameters has failed.
++ */
++asmlinkage long sys_get_rt_task_param(pid_t pid, rt_param_t __user * param)
++{
++ int retval = -EINVAL;
++ struct task_struct *source;
++ rt_param_t lp;
++ if (param == 0 || pid < 0)
++ goto out;
++ read_lock(&tasklist_lock);
++ if (!(source = find_task_by_pid(pid))) {
++ retval = -ESRCH;
++ goto out_unlock;
++ }
++ lp = source->rt_param.basic_params;
++ read_unlock(&tasklist_lock);
++ /* Do copying outside the lock */
++ retval =
++ copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
++ return retval;
++ out_unlock:
++ read_unlock(&tasklist_lock);
++ out:
++ return retval;
++
++}
++
++/*
++ * sys_set_service_levels
++ * @pid: Pid of the task that is to be configured
++ * @count: The number of service levels
++ * @levels: The new service levels.
++ *
++ * Returns EFAULT if levels is not a valid address.
++ * ESRCH if pid is not corrsponding
++ * to a valid task.
++ * EINVAL if either period or execution cost is <=0 for any level,
++ * of if utility is not incresing.
++ * EPERM if pid is a real-time task
++ * ENOMEM if there is insufficient memory available
++ * 0 if success
++ *
++ * May not be used on RT tasks to avoid races.
++ */
++asmlinkage long sys_set_service_levels(pid_t pid,
++ unsigned int count,
++ service_level_t __user *levels)
++{
++ struct task_struct *target;
++ service_level_t level, *klevels;
++ int retval = -EINVAL, i;
++ fp_t last_value = FP(0);
++ fp_t last_weight = FP(0);
++
++ TRACE("Setting up service levels for process %d.\n", pid);
++
++ if (pid < 0 || count > MAX_SERVICE_LEVELS) {
++ goto out;
++ }
++
++ /* Task search and manipulation must be protected */
++ read_lock_irq(&tasklist_lock);
++ if (!(target = find_task_by_pid(pid))) {
++ retval = -ESRCH;
++ read_unlock_irq(&tasklist_lock);
++ goto out;
++ }
++ read_unlock_irq(&tasklist_lock);
++
++ if (is_realtime(target)) {
++ /* The task is already a real-time task.
++ * We cannot not allow parameter changes at this point.
++ */
++ retval = -EPERM;
++ goto out;
++ }
++
++ /* get rid of old service levels, if any */
++ kfree(target->rt_param.service_level);
++ target->rt_param.service_level = NULL;
++ target->rt_param.no_service_levels = 0;
++
++ /* count == 0 means tear down service levels*/
++ if (count == 0) {
++ retval = 0;
++ goto out;
++ }
++
++ klevels = kmalloc(sizeof(service_level_t) * count, GFP_KERNEL);
++ if (!klevels) {
++ retval = -ENOMEM;
++ goto out;
++ }
++
++ for (i = 0; i < count; i++) {
++ if (copy_from_user(&level, levels + i, sizeof(level))) {
++ retval = -EFAULT;
++ kfree(klevels);
++ goto out;
++ }
++ if (level.period <= 0) {
++ TRACE("service level %d period <= 0\n", i);
++ goto out;
++ }
++ if (_leq(level.weight, last_weight)) {
++ TRACE("service level %d weight non-increase\n", i);
++ goto out;
++ }
++ if (_leq(level.value, last_value)) {
++ TRACE("service level %d value non-increase\n", i);
++ goto out;
++ }
++ last_value = level.value;
++ last_weight = level.weight;
++ klevels[i] = level;
++ }
++ target->rt_param.basic_params.exec_cost = _round(_mul(klevels[0].weight,
++ FP(klevels[0].period)));
++ target->rt_param.basic_params.period = klevels[0].period;
++ target->rt_param.service_level = klevels;
++ target->rt_param.no_service_levels = count;
++ retval = 0;
++
++ out:
++ return retval;
++}
++
++asmlinkage long sys_get_cur_service_level(void)
++{
++ long level;
++
++ if (!is_realtime(current))
++ return -EINVAL;
++
++ /* block scheduler that might cause reweighting to happen */
++ local_irq_disable();
++ level = current->rt_param.cur_service_level;
++ local_irq_enable();
++ return level;
++}
++
++
++/*
++ * sys_prepare_rt_task
++ * @pid: Pid of the task we want to prepare for RT mode
++ * Syscall for adding a task to RT queue, plugin dependent.
++ * Must be called before RT tasks are going to start up.
++ * Returns EPERM if current plugin does not define prepare operation
++ * or scheduling policy does not allow the operation.
++ * ESRCH if pid does not correspond to a valid task.
++ * EINVAL if a task is non-realtime or in invalid state
++ * from underlying plugin function
++ * EAGAIN if a task is not in the right state
++ * ENOMEM if there is no memory space to handle this task
++ * 0 if success
++ */
++asmlinkage long sys_prepare_rt_task(pid_t pid)
++{
++ int retval = -EINVAL;
++ struct task_struct *target = 0;
++ /* If a plugin does not define preparation mode then nothing to do */
++ if (curr_sched_plugin->prepare_task == 0
++ || sched_policy == SCHED_DEFAULT) {
++ retval = -EPERM;
++ goto out_prepare;
++ }
++ read_lock_irq(&tasklist_lock);
++ if (!(target = find_task_by_pid(pid))) {
++ retval = -ESRCH;
++ goto out_prepare_unlock;
++ }
++ if (!cpu_online(get_partition(target)))
++ {
++ printk(KERN_WARNING "litmus prepare: cpu %d is not online\n",
++ get_partition(target));
++ goto out_prepare_unlock;
++ }
++ retval = curr_sched_plugin->prepare_task(target);
++ if (!retval) {
++ atomic_inc(&n_rt_tasks);
++ target->rt_param.is_realtime = 1;
++ target->rt_param.litmus_controlled = 1;
++ }
++ out_prepare_unlock:
++ read_unlock_irq(&tasklist_lock);
++ out_prepare:
++ return retval;
++}
++
++
++/* implemented in kernel/litmus_sem.c */
++void srp_ceiling_block(void);
++
++/*
++ * This is the crucial function for periodic task implementation,
++ * It checks if a task is periodic, checks if such kind of sleep
++ * is permitted and calls plugin-specific sleep, which puts the
++ * task into a wait array.
++ * returns 0 on successful wakeup
++ * returns EPERM if current conditions do not permit such sleep
++ * returns EINVAL if current task is not able to go to sleep
++ */
++asmlinkage long sys_sleep_next_period(void)
++{
++ int retval = -EPERM;
++ if (!is_realtime(current)) {
++ retval = -EINVAL;
++ goto out;
++ }
++ /* Task with negative or zero period cannot sleep */
++ if (get_rt_period(current) <= 0) {
++ retval = -EINVAL;
++ goto out;
++ }
++ /* The plugin has to put the task into an
++ * appropriate queue and call schedule
++ */
++ retval = curr_sched_plugin->sleep_next_period();
++ if (!retval && is_subject_to_srp(current))
++ srp_ceiling_block();
++ out:
++ return retval;
++}
++
++/* This is an "improved" version of sys_sleep_next_period() that
++ * addresses the problem of unintentionally missing a job after
++ * an overrun.
++ *
++ * returns 0 on successful wakeup
++ * returns EPERM if current conditions do not permit such sleep
++ * returns EINVAL if current task is not able to go to sleep
++ */
++asmlinkage long sys_wait_for_job_release(unsigned int job)
++{
++ int retval = -EPERM;
++ if (!is_realtime(current)) {
++ retval = -EINVAL;
++ goto out;
++ }
++
++ /* Task with negative or zero period cannot sleep */
++ if (get_rt_period(current) <= 0) {
++ retval = -EINVAL;
++ goto out;
++ }
++
++ retval = 0;
++
++ /* first wait until we have "reached" the desired job
++ *
++ * This implementation has at least two problems:
++ *
++ * 1) It doesn't gracefully handle the wrap around of
++ * job_no. Since LITMUS is a prototype, this is not much
++ * of a problem right now.
++ *
++ * 2) It is theoretically racy if a job release occurs
++ * between checking job_no and calling sleep_next_period().
++ * A proper solution would requiring adding another callback
++ * in the plugin structure and testing the condition with
++ * interrupts disabled.
++ *
++ * FIXME: At least problem 2 should be taken care of eventually.
++ */
++ while (!retval && job > current->rt_param.times.job_no)
++ /* If the last job overran then job <= job_no and we
++ * don't send the task to sleep.
++ */
++ retval = curr_sched_plugin->sleep_next_period();
++
++ /* We still have to honor the SRP after the actual release.
++ */
++ if (!retval && is_subject_to_srp(current))
++ srp_ceiling_block();
++ out:
++ return retval;
++}
++
++/* This is a helper syscall to query the current job sequence number.
++ *
++ * returns 0 on successful query
++ * returns EPERM if task is not a real-time task.
++ * returns EFAULT if &job is not a valid pointer.
++ */
++asmlinkage long sys_query_job_no(unsigned int __user *job)
++{
++ int retval = -EPERM;
++ if (is_realtime(current))
++ retval = put_user(current->rt_param.times.job_no, job);
++
++ return retval;
++}
++
++
++/* The LITMUS tick function. It manages the change to and from real-time mode
++ * and then calls the plugin's tick function.
++ */
++reschedule_check_t __sched rt_scheduler_tick(void)
++{
++ /* Check for mode change */
++ if ((get_rt_mode() != atomic_read(&new_mode))) {
++ queue_lock(&mode_change_lock);
++ // If the mode is already changed, proceed
++ if (get_rt_mode() == atomic_read(&new_mode)) {
++ queue_unlock(&mode_change_lock);
++ goto proceed;
++ }
++ // change the mode
++ if ((atomic_read(&new_mode) == MODE_RT_RUN)) {
++ /* The deferral of entering real-time mode should be
++ * handled by deferring task releases in the plugin.
++ * The plugin interface does not really need to know
++ * about quanta, that is the plugin's job.
++ */
++
++ /* update rt start time */
++ rt_start_time = jiffies;
++ printk(KERN_INFO "Real-Time mode enabled at %ld "
++ "on %d\n",
++ jiffies, smp_processor_id());
++ } else
++ printk(KERN_INFO "Real-Time mode disabled at %ld "
++ "on %d\n",
++ jiffies, smp_processor_id());
++ if (curr_sched_plugin->mode_change)
++ curr_sched_plugin->
++ mode_change(atomic_read(&new_mode));
++ printk(KERN_INFO "Plugin mode change done at %ld\n",
++ jiffies);
++ set_rt_mode(atomic_read(&new_mode));
++ queue_unlock(&mode_change_lock);
++ }
++
++ proceed:
++ /* Call plugin-defined tick handler
++ *
++ * It is the plugin's tick handler' job to detect quantum
++ * boundaries in pfair.
++ */
++ return curr_sched_plugin->scheduler_tick();
++}
++
++asmlinkage spolicy sys_sched_setpolicy(spolicy newpolicy)
++{
++ /* Dynamic policy change is disabled at the moment */
++ return SCHED_INVALID;
++}
++
++asmlinkage spolicy sys_sched_getpolicy(void)
++{
++ return sched_policy;
++}
++
++
++asmlinkage int sys_scheduler_setup(int cmd, void __user *parameter)
++{
++ int ret = -EINVAL;
++
++ ret = curr_sched_plugin->scheduler_setup(cmd, parameter);
++
++ return ret;
++}
++
++struct sched_sig {
++ struct list_head list;
++ struct task_struct* task;
++ unsigned int signal:31;
++ int force:1;
++};
++
++static void __scheduler_signal(struct task_struct *t, unsigned int signo,
++ int force)
++{
++ struct sched_sig* sig;
++
++ sig = kmalloc(GFP_ATOMIC, sizeof(struct sched_sig));
++ if (!sig) {
++ TRACE_TASK(t, "dropping signal: %u\n", t);
++ return;
++ }
++
++ spin_lock(&sched_sig_list_lock);
++
++ sig->signal = signo;
++ sig->force = force;
++ sig->task = t;
++ get_task_struct(t);
++ list_add(&sig->list, &sched_sig_list);
++
++ spin_unlock(&sched_sig_list_lock);
++}
++
++void scheduler_signal(struct task_struct *t, unsigned int signo)
++{
++ __scheduler_signal(t, signo, 0);
++}
++
++void force_scheduler_signal(struct task_struct *t, unsigned int signo)
++{
++ __scheduler_signal(t, signo, 1);
++}
++
++void send_scheduler_signals(void)
++{
++ unsigned long flags;
++ struct list_head *p, *extra;
++ struct siginfo info;
++ struct sched_sig* sig;
++ struct task_struct* t;
++ struct list_head claimed;
++
++ if (spin_trylock_irqsave(&sched_sig_list_lock, flags)) {
++ if (list_empty(&sched_sig_list))
++ p = NULL;
++ else {
++ p = sched_sig_list.next;
++ list_del(&sched_sig_list);
++ INIT_LIST_HEAD(&sched_sig_list);
++ }
++ spin_unlock_irqrestore(&sched_sig_list_lock, flags);
++
++ /* abort if there are no signals */
++ if (!p)
++ return;
++
++ /* take signal list we just obtained */
++ list_add(&claimed, p);
++
++ list_for_each_safe(p, extra, &claimed) {
++ list_del(p);
++ sig = list_entry(p, struct sched_sig, list);
++ t = sig->task;
++ info.si_signo = sig->signal;
++ info.si_errno = 0;
++ info.si_code = SI_KERNEL;
++ info.si_pid = 1;
++ info.si_uid = 0;
++ TRACE("sending signal %d to %d\n", info.si_signo,
++ t->pid);
++ if (sig->force)
++ force_sig_info(sig->signal, &info, t);
++ else
++ send_sig_info(sig->signal, &info, t);
++ put_task_struct(t);
++ kfree(sig);
++ }
++ }
++
++}
++
++static inline void np_mem_error(struct task_struct* t, const char* reason)
++{
++ if (t->state != TASK_DEAD && !(t->flags & PF_EXITING)) {
++ TRACE("np section: %s => %s/%d killed\n",
++ reason, t->comm, t->pid);
++ force_scheduler_signal(t, SIGKILL);
++ }
++}
++
++/* sys_register_np_flag() allows real-time tasks to register an
++ * np section indicator.
++ * returns 0 if the flag was successfully registered
++ * returns EINVAL if current task is not a real-time task
++ * returns EFAULT if *flag couldn't be written
++ */
++asmlinkage long sys_register_np_flag(short __user *flag)
++{
++ int retval = -EINVAL;
++ short test_val = RT_PREEMPTIVE;
++
++ /* avoid races with the scheduler */
++ preempt_disable();
++ TRACE("reg_np_flag(%p) for %s/%d\n", flag,
++ current->comm, current->pid);
++ if (!is_realtime(current))
++ goto out;
++
++ /* Let's first try to write to the address.
++ * That way it is initialized and any bugs
++ * involving dangling pointers will caught
++ * early.
++ * NULL indicates disabling np section support
++ * and should not be tested.
++ */
++ if (flag)
++ retval = poke_kernel_address(test_val, flag);
++ else
++ retval = 0;
++ TRACE("reg_np_flag: retval=%d\n", retval);
++ if (unlikely(0 != retval))
++ np_mem_error(current, "np flag: not writable");
++ else
++ /* the pointer is ok */
++ current->rt_param.np_flag = flag;
++
++ out:
++ preempt_enable();
++ /* force rescheduling so that we can be preempted */
++ return retval;
++}
++
++
++void request_exit_np(struct task_struct *t)
++{
++ int ret;
++ short flag;
++
++ /* We can only do this if t is actually currently scheduled on this CPU
++ * because otherwise we are in the wrong address space. Thus make sure
++ * to check.
++ */
++ BUG_ON(t != current);
++
++ if (unlikely(!is_realtime(t) || !t->rt_param.np_flag)) {
++ TRACE_TASK(t, "request_exit_np(): BAD TASK!\n");
++ return;
++ }
++
++ flag = RT_EXIT_NP_REQUESTED;
++ ret = poke_kernel_address(flag, t->rt_param.np_flag + 1);
++ TRACE("request_exit_np(%s/%d)\n", t->comm, t->pid);
++ if (unlikely(0 != ret))
++ np_mem_error(current, "request_exit_np(): flag not writable");
++
++}
++
++
++int is_np(struct task_struct* t)
++{
++ int ret;
++ unsigned short flag = 0x5858; /* = XX, looks nicer in debug*/
++
++ BUG_ON(t != current);
++
++ if (unlikely(t->rt_param.kernel_np))
++ return 1;
++ else if (unlikely(t->rt_param.np_flag == NULL) ||
++ t->flags & PF_EXITING ||
++ t->state == TASK_DEAD)
++ return 0;
++ else {
++ /* This is the tricky part. The process has registered a
++ * non-preemptive section marker. We now need to check whether
++ * it is set to to NON_PREEMPTIVE. Along the way we could
++ * discover that the pointer points to an unmapped region (=>
++ * kill the task) or that the location contains some garbage
++ * value (=> also kill the task). Killing the task in any case
++ * forces userspace to play nicely. Any bugs will be discovered
++ * immediately.
++ */
++ ret = probe_kernel_address(t->rt_param.np_flag, flag);
++ if (0 == ret && (flag == RT_NON_PREEMPTIVE ||
++ flag == RT_PREEMPTIVE))
++ return flag != RT_PREEMPTIVE;
++ else {
++ /* either we could not read from the address or
++ * it contained garbage => kill the process
++ * FIXME: Should we cause a SEGFAULT instead?
++ */
++ TRACE("is_np: ret=%d flag=%c%c (%x)\n", ret,
++ flag & 0xff, (flag >> 8) & 0xff, flag);
++ np_mem_error(t, "is_np() could not read");
++ return 0;
++ }
++ }
++}
++
++/*
++ * sys_exit_np() allows real-time tasks to signal that it left a
++ * non-preemptable section. It will be called after the kernel requested a
++ * callback in the preemption indicator flag.
++ * returns 0 if the signal was valid and processed.
++ * returns EINVAL if current task is not a real-time task
++ */
++asmlinkage long sys_exit_np(void)
++{
++ int retval = -EINVAL;
++
++ TS_EXIT_NP_START;
++
++ if (!is_realtime(current))
++ goto out;
++
++ TRACE("sys_exit_np(%s/%d)\n", current->comm, current->pid);
++ /* force rescheduling so that we can be preempted */
++ set_tsk_need_resched(current);
++ retval = 0;
++ out:
++
++ TS_EXIT_NP_END;
++ return retval;
++}
++
++void exit_litmus(struct task_struct *dead_tsk)
++{
++ kfree(dead_tsk->rt_param.service_level);
++ curr_sched_plugin->tear_down(dead_tsk);
++}
++
++
++void list_qsort(struct list_head* list, list_cmp_t less_than)
++{
++ struct list_head lt;
++ struct list_head geq;
++ struct list_head *pos, *extra, *pivot;
++ int n_lt = 0, n_geq = 0;
++ BUG_ON(!list);
++
++ if (list->next == list)
++ return;
++
++ INIT_LIST_HEAD(<);
++ INIT_LIST_HEAD(&geq);
++
++ pivot = list->next;
++ list_del(pivot);
++ list_for_each_safe(pos, extra, list) {
++ list_del(pos);
++ if (less_than(pos, pivot)) {
++ list_add(pos, <);
++ n_lt++;
++ } else {
++ list_add(pos, &geq);
++ n_geq++;
++ }
++ }
++ if (n_lt < n_geq) {
++ list_qsort(<, less_than);
++ list_qsort(&geq, less_than);
++ } else {
++ list_qsort(&geq, less_than);
++ list_qsort(<, less_than);
++ }
++ list_splice(&geq, list);
++ list_add(pivot, list);
++ list_splice(<, list);
++}
++
++#ifdef CONFIG_MAGIC_SYSRQ
++/* We offer the possibility to change the real-time mode of the system
++ * with a magic sys request. This helps in debugging in case the system fails
++ * to perform its planned switch back to normal mode. This may happen if we have
++ * total system utilization and the task that is supposed to do the switch is
++ * always preempted (if it is not a real-time task).
++ */
++int sys_kill(int pid, int sig);
++
++
++static void sysrq_handle_toGgle_rt_mode(int key, struct tty_struct *tty)
++{
++ sys_set_rt_mode(get_rt_mode() == MODE_NON_RT);
++}
++
++static struct sysrq_key_op sysrq_toGgle_rt_mode_op = {
++ .handler = sysrq_handle_toGgle_rt_mode,
++ .help_msg = "toGgle-rt-mode",
++ .action_msg = "real-time mode changed",
++};
++
++static void sysrq_handle_kill_rt_tasks(int key, struct tty_struct *tty)
++{
++ struct task_struct *t;
++ read_lock(&tasklist_lock);
++ for_each_process(t) {
++ if (is_realtime(t)) {
++ sys_kill(t->pid, SIGKILL);
++ }
++ }
++ read_unlock(&tasklist_lock);
++}
++
++static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
++ .handler = sysrq_handle_kill_rt_tasks,
++ .help_msg = "Quit-rt-tasks",
++ .action_msg = "sent SIGKILL to all real-time tasks",
++};
++#endif
++
++/*
++ * Scheduler initialization so that customized scheduler is
++ * enabled at boot time
++ * by setting boot option "rtsched=plugin_name", e.g. "rtsched=pfair"
++ */
++
++/* All we need to know about other plugins is their initialization
++ * functions. These functions initialize internal data structures of a
++ * scheduler and return a pointer to initialized sched_plugin data
++ * structure with pointers to scheduling function implementations.
++ * If called repeatedly these init functions just return an existing
++ * plugin pointer.
++ */
++sched_plugin_t *init_global_edf_plugin(void);
++sched_plugin_t *init_global_edf_np_plugin(void);
++sched_plugin_t *init_part_edf_plugin(void);
++sched_plugin_t *init_edf_hsb_plugin(void);
++sched_plugin_t *init_pfair_plugin(void);
++sched_plugin_t *init_gsn_edf_plugin(void);
++sched_plugin_t *init_psn_edf_plugin(void);
++sched_plugin_t *init_adaptive_plugin(void);
++
++/* keep everything needed to setup plugins in one place */
++
++/* we are lazy, so we use a convention for function naming to fill
++ * a table
++ */
++#define PLUGIN(caps, small) \
++ {PLUGIN_ ## caps, SCHED_ ## caps, init_ ## small ## _plugin}
++
++#define init_nosetup_plugin 0
++
++static struct {
++ const char *name;
++ const spolicy policy_id;
++ sched_plugin_t *(*init) (void);
++} available_plugins[] = {
++ PLUGIN(LINUX, nosetup),
++ PLUGIN(GLOBAL_EDF_NP, global_edf_np),
++ PLUGIN(GLOBAL_EDF, global_edf),
++ PLUGIN(PART_EDF, part_edf),
++ PLUGIN(EDF_HSB, edf_hsb),
++ PLUGIN(PFAIR, pfair),
++ PLUGIN(GSN_EDF, gsn_edf),
++ PLUGIN(PSN_EDF, psn_edf),
++ PLUGIN(ADAPTIVE, adaptive),
++ /*********************************************
++ * Add your custom plugin here
++ **********************************************/
++};
++
++/* Some plugins may leave important functions unused. We define dummies
++ * so that we don't have to check for null pointers all over the place.
++ */
++void litmus_dummy_finish_switch(struct task_struct * prev);
++int litmus_dummy_schedule(struct task_struct * prev, struct task_struct** next,
++ runqueue_t* q);
++reschedule_check_t litmus_dummy_scheduler_tick(void);
++long litmus_dummy_prepare_task(struct task_struct *t);
++void litmus_dummy_wake_up_task(struct task_struct *task);
++void litmus_dummy_task_blocks(struct task_struct *task);
++long litmus_dummy_tear_down(struct task_struct *task);
++int litmus_dummy_scheduler_setup(int cmd, void __user *parameter);
++long litmus_dummy_sleep_next_period(void);
++long litmus_dummy_inherit_priority(struct pi_semaphore *sem,
++ struct task_struct *new_owner);
++long litmus_dummy_return_priority(struct pi_semaphore *sem);
++long litmus_dummy_pi_block(struct pi_semaphore *sem,
++ struct task_struct *t);
++
++#define CHECK(func) {\
++ if (!curr_sched_plugin->func) \
++ curr_sched_plugin->func = litmus_dummy_ ## func;}
++
++static int boot_sched_setup(char *plugin_name)
++{
++ int i = 0;
++
++ /* Common initializers,
++ * mode change lock is used to enforce single mode change
++ * operation.
++ */
++ queue_lock_init(&mode_change_lock);
++
++ printk("Starting LITMUS^RT kernel\n");
++
++ /* Look for a matching plugin.
++ */
++ for (i = 0; i < ARRAY_SIZE(available_plugins); i++) {
++ if (!strcmp(plugin_name, available_plugins[i].name)) {
++ printk("Using %s scheduler plugin\n", plugin_name);
++ sched_policy = available_plugins[i].policy_id;
++ if (available_plugins[i].init)
++ curr_sched_plugin = available_plugins[i].init();
++ goto out;
++ }
++ }
++
++
++ /* Otherwise we have default linux scheduler */
++ printk("Plugin name %s is unknown, using default %s\n", plugin_name,
++ curr_sched_plugin->plugin_name);
++
++out:
++ /* make sure we don't trip over null pointers later */
++ CHECK(finish_switch);
++ CHECK(schedule);
++ CHECK(scheduler_tick);
++ CHECK(wake_up_task);
++ CHECK(tear_down);
++ CHECK(task_blocks);
++ CHECK(prepare_task);
++ CHECK(scheduler_setup);
++ CHECK(sleep_next_period);
++ CHECK(inherit_priority);
++ CHECK(return_priority);
++ CHECK(pi_block);
++
++#ifdef CONFIG_MAGIC_SYSRQ
++ /* offer some debugging help */
++ if (!register_sysrq_key('g', &sysrq_toGgle_rt_mode_op))
++ printk("Registered eXit real-time mode magic sysrq.\n");
++ else
++ printk("Could not register eXit real-time mode magic sysrq.\n");
++ if (!register_sysrq_key('q', &sysrq_kill_rt_tasks_op))
++ printk("Registered kill rt tasks magic sysrq.\n");
++ else
++ printk("Could not register kill rt tasks magic sysrq.\n");
++#endif
++ printk("Litmus setup complete.");
++ return 1;
++}
++
++/* Register for boot option */
++__setup("rtsched=", boot_sched_setup);
+diff --git a/kernel/litmus_sem.c b/kernel/litmus_sem.c
+new file mode 100644
+index 0000000..12a6ab1
+--- /dev/null
++++ b/kernel/litmus_sem.c
+@@ -0,0 +1,765 @@
++
++/*
++ * SMP- and interrupt-safe semaphores. Also PI and SRP implementations.
++ * Much of the code here is borrowed from include/asm-i386/semaphore.h.
++ *
++ * NOTE: This implementation is very much a prototype and horribly insecure. It
++ * is intended to be a proof of concept, not a feature-complete solution.
++ */
++
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++#include
++
++
++#include
++/* ************************************************************************** */
++/* STANDARD FIFO SEMAPHORES */
++/* ************************************************************************** */
++
++#define MAX_SEMAPHORES 16000
++#define MAX_PI_SEMAPHORES 16000
++#define MAX_SRP_SEMAPHORES 16000
++
++
++struct semaphore sems[MAX_SEMAPHORES]; /* all sems */
++typedef int sema_id; /* Userspace ID of a semaphore */
++
++static int rt_fifo_wake_up(wait_queue_t *wait, unsigned mode, int sync,
++ void *key)
++{
++ struct task_struct* t = (struct task_struct*) wait->private;
++ set_rt_flags(t, RT_F_EXIT_SEM);
++ TRACE_TASK(t, "woken up by rt_fifo_wake_up(), set RT_F_EXIT_SEM\n");
++ default_wake_function(wait, mode, sync, key);
++ /* for reason why we always return 1 see rt_pi_wake_up() below */
++ return 1;
++}
++
++static fastcall void rt_fifo_up(struct semaphore * sem)
++{
++ TRACE_CUR("releases lock %p\n");
++ preempt_disable();
++ TS_FIFO_UP_START;
++ if (atomic_inc_return(&sem->count) < 1)
++ /* there is a task queued */
++ wake_up(&sem->wait);
++ TS_FIFO_UP_END;
++ preempt_enable();
++}
++
++/* not optimized like the Linux down() implementation, but then
++ * again we incur the cost of a syscall anyway, so this hardly matters
++ */
++static fastcall void rt_fifo_down(struct semaphore * sem)
++{
++ struct task_struct *tsk = current;
++ wait_queue_t wait = {
++ .private = tsk,
++ .func = rt_fifo_wake_up,
++ .task_list = {NULL, NULL}
++ };
++
++ preempt_disable();
++ TS_FIFO_DOWN_START;
++
++ spin_lock(&sem->wait.lock);
++ if (atomic_dec_return(&sem->count) < 0 ||
++ waitqueue_active(&sem->wait)) {
++ /* we need to suspend */
++ tsk->state = TASK_UNINTERRUPTIBLE;
++ add_wait_queue_exclusive_locked(&sem->wait, &wait);
++
++ TRACE_CUR("suspends on lock %p\n", sem);
++
++ /* release lock before sleeping */
++ spin_unlock(&sem->wait.lock);
++
++ TS_FIFO_DOWN_END;
++ preempt_enable_no_resched();
++
++ /* we depend on the FIFO order
++ * Thus, we don't need to recheck when we wake up, we
++ * are guaranteed to have the lock since there is only one
++ * wake up per release
++ */
++ schedule();
++
++ TRACE_CUR("woke up, now owns lock %p\n", sem);
++
++ /* try_to_wake_up() set our state to TASK_RUNNING,
++ * all we need to do is to remove our wait queue entry
++ */
++ spin_lock(&sem->wait.lock);
++ remove_wait_queue_locked(&sem->wait, &wait);
++ spin_unlock(&sem->wait.lock);
++ } else {
++ TRACE_CUR("acquired lock %p, no contention\n", sem);
++ spin_unlock(&sem->wait.lock);
++ TS_FIFO_DOWN_END;
++ preempt_enable();
++ }
++}
++
++
++
++/* Initialize semaphores at boot time. */
++static int __init sema_boot_init(void)
++{
++ sema_id sem_id;
++
++ printk("Initializing semaphores...");
++ for (sem_id = 0; sem_id < MAX_SEMAPHORES; sem_id++)
++ sems[sem_id].used = 0;
++ printk(" done!\n");
++
++ return 0;
++}
++__initcall(sema_boot_init);
++
++/* Find a free semaphore and return. */
++asmlinkage long sys_sema_init (void)
++{
++ sema_id sem_id;
++
++ for (sem_id = 0; sem_id < MAX_SEMAPHORES; sem_id++) {
++ if (!cmpxchg(&sems[sem_id].used, 0, 1)) {
++ sema_init(&sems[sem_id], 1);
++ return sem_id;
++ }
++ }
++ return -ENOMEM;
++}
++
++asmlinkage long sys_down(sema_id sem_id)
++{
++ if (sem_id < 0 || sem_id >= MAX_SEMAPHORES)
++ return -EINVAL;
++
++ if (!sems[sem_id].used)
++ return -EINVAL;
++ /* This allows for FIFO sems and gives others a chance... */
++ rt_fifo_down(sems + sem_id);
++ return 0;
++}
++
++asmlinkage long sys_up(sema_id sem_id)
++{
++ if (sem_id < 0 || sem_id >= MAX_SEMAPHORES)
++ return -EINVAL;
++
++ if (!sems[sem_id].used)
++ return -EINVAL;
++ rt_fifo_up(sems + sem_id);
++ return 0;
++}
++
++asmlinkage long sys_sema_free(sema_id sem_id)
++{
++ struct list_head *tmp, *next;
++ unsigned long flags;
++
++ if (sem_id < 0 || sem_id >= MAX_SEMAPHORES)
++ return -EINVAL;
++
++ if (!sems[sem_id].used)
++ return -EINVAL;
++
++ spin_lock_irqsave(&sems[sem_id].wait.lock, flags);
++ if (waitqueue_active(&sems[sem_id].wait)) {
++ list_for_each_safe(tmp, next, &sems[sem_id].wait.task_list) {
++ wait_queue_t *curr = list_entry(tmp, wait_queue_t,
++ task_list);
++ list_del(tmp);
++ set_rt_flags((struct task_struct*)curr->private,
++ RT_F_EXIT_SEM);
++ curr->func(curr,
++ TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
++ 0, NULL);
++ }
++ }
++
++ spin_unlock_irqrestore(&sems[sem_id].wait.lock, flags);
++ sems[sem_id].used = 0;
++
++ return 0;
++}
++
++
++
++
++/* ************************************************************************** */
++/* PRIORITY INHERITANCE */
++/* ************************************************************************** */
++
++
++
++struct pi_semaphore pi_sems[MAX_PI_SEMAPHORES]; /* all PI sems */
++typedef int pi_sema_id; /* Userspace ID of a pi_semaphore */
++
++struct wq_pair {
++ struct task_struct* tsk;
++ struct pi_semaphore* sem;
++};
++
++static int rt_pi_wake_up(wait_queue_t *wait, unsigned mode, int sync,
++ void *key)
++{
++ struct wq_pair* wqp = (struct wq_pair*) wait->private;
++ set_rt_flags(wqp->tsk, RT_F_EXIT_SEM);
++ curr_sched_plugin->inherit_priority(wqp->sem, wqp->tsk);
++ TRACE_TASK(wqp->tsk,
++ "woken up by rt_pi_wake_up() (RT_F_SEM_EXIT, PI)\n");
++ /* point to task for default_wake_function() */
++ wait->private = wqp->tsk;
++ default_wake_function(wait, mode, sync, key);
++
++ /* Always return true since we know that if we encountered a task
++ * that was already running the wake_up raced with the schedule in
++ * rt_pi_down(). In that case the task in rt_pi_down() will be scheduled
++ * immediately and own the lock. We must not wake up another task in
++ * any case.
++ */
++ return 1;
++}
++
++
++/* caller is responsible for locking */
++int edf_set_hp_task(struct pi_semaphore *sem)
++{
++ struct list_head *tmp, *next;
++ struct task_struct *queued;
++ int ret = 0;
++
++ sem->hp.task = NULL;
++ list_for_each_safe(tmp, next, &sem->wait.task_list) {
++ queued = ((struct wq_pair*)
++ list_entry(tmp, wait_queue_t,
++ task_list)->private)->tsk;
++
++ /* Compare task prios, find high prio task. */
++ if (edf_higher_prio(queued, sem->hp.task)) {
++ sem->hp.task = queued;
++ ret = 1;
++ }
++ }
++ return ret;
++}
++
++
++/* caller is responsible for locking */
++int edf_set_hp_cpu_task(struct pi_semaphore *sem, int cpu)
++{
++ struct list_head *tmp, *next;
++ struct task_struct *queued;
++ int ret = 0;
++
++ sem->hp.cpu_task[cpu] = NULL;
++ list_for_each_safe(tmp, next, &sem->wait.task_list) {
++ queued = ((struct wq_pair*)
++ list_entry(tmp, wait_queue_t,
++ task_list)->private)->tsk;
++
++ /* Compare task prios, find high prio task. */
++ if (get_partition(queued) == cpu &&
++ edf_higher_prio(queued, sem->hp.cpu_task[cpu])) {
++ sem->hp.cpu_task[cpu] = queued;
++ ret = 1;
++ }
++ }
++ return ret;
++}
++
++
++/* Initialize PI semaphores at boot time. */
++static int __init pi_sema_boot_init(void)
++{
++ pi_sema_id sem_id;
++
++ printk("Initializing PI semaphores...");
++ for (sem_id = 0; sem_id < MAX_PI_SEMAPHORES; sem_id++)
++ pi_sems[sem_id].used = 0;
++ printk(" done!\n");
++
++ return 0;
++}
++__initcall(pi_sema_boot_init);
++
++/* Find a free semaphore and return. */
++asmlinkage long sys_pi_sema_init (void)
++{
++ pi_sema_id sem_id;
++ int i = 0;
++
++ for (sem_id = 0; sem_id < MAX_PI_SEMAPHORES; sem_id++) {
++ if (!cmpxchg(&pi_sems[sem_id].used, 0, 1)) {
++ atomic_set(&pi_sems[sem_id].count, 1);
++ pi_sems[sem_id].sleepers = 0;
++ init_waitqueue_head(&pi_sems[sem_id].wait);
++ pi_sems[sem_id].hp.task = NULL;
++ pi_sems[sem_id].holder = NULL;
++ for (i = 0; i < NR_CPUS; i++)
++ pi_sems[sem_id].hp.cpu_task[i] = NULL;
++ return sem_id;
++ }
++ }
++ return -ENOMEM;
++}
++
++asmlinkage long sys_pi_down(pi_sema_id sem_id)
++{
++ struct pi_semaphore * sem;
++ unsigned long flags;
++ struct task_struct *tsk = current;
++ struct wq_pair pair;
++ long ret = -EINVAL;
++ wait_queue_t wait = {
++ .private = &pair,
++ .func = rt_pi_wake_up,
++ .task_list = {NULL, NULL}
++ };
++
++ preempt_disable();
++ TS_PI_DOWN_START;
++
++ if (sem_id < 0 || sem_id >= MAX_PI_SEMAPHORES)
++ goto out;
++
++ if (!pi_sems[sem_id].used)
++ goto out;
++
++ sem = pi_sems + sem_id;
++ pair.tsk = tsk;
++ pair.sem = sem;
++ spin_lock_irqsave(&sem->wait.lock, flags);
++
++ if (atomic_dec_return(&sem->count) < 0 ||
++ waitqueue_active(&sem->wait)) {
++ /* we need to suspend */
++ tsk->state = TASK_UNINTERRUPTIBLE;
++ add_wait_queue_exclusive_locked(&sem->wait, &wait);
++
++ TRACE_CUR("suspends on PI lock %p\n", sem);
++ curr_sched_plugin->pi_block(sem, tsk);
++
++ /* release lock before sleeping */
++ spin_unlock_irqrestore(&sem->wait.lock, flags);
++
++ TS_PI_DOWN_END;
++ preempt_enable_no_resched();
++
++
++ /* we depend on the FIFO order
++ * Thus, we don't need to recheck when we wake up, we
++ * are guaranteed to have the lock since there is only one
++ * wake up per release
++ */
++ schedule();
++
++ TRACE_CUR("woke up, now owns PI lock %p\n", sem);
++
++ /* try_to_wake_up() set our state to TASK_RUNNING,
++ * all we need to do is to remove our wait queue entry
++ */
++ remove_wait_queue(&sem->wait, &wait);
++ } else {
++ /* no priority inheritance necessary, since there are no queued
++ * tasks.
++ */
++ TRACE_CUR("acquired PI lock %p, no contention\n", sem);
++ sem->holder = tsk;
++ sem->hp.task = tsk;
++ curr_sched_plugin->inherit_priority(sem, tsk);
++ spin_unlock_irqrestore(&sem->wait.lock, flags);
++ out:
++ TS_PI_DOWN_END;
++ preempt_enable();
++ }
++ ret = 0;
++ return ret;
++}
++
++asmlinkage long sys_pi_up(pi_sema_id sem_id)
++{
++ unsigned long flags;
++ long ret = -EINVAL;
++ struct pi_semaphore * sem;
++
++ preempt_disable();
++ TS_PI_UP_START;
++
++ if (sem_id < 0 || sem_id >= MAX_PI_SEMAPHORES)
++ goto out;
++
++ if (!pi_sems[sem_id].used)
++ goto out;
++
++ sem = pi_sems + sem_id;
++ spin_lock_irqsave(&sem->wait.lock, flags);
++
++ TRACE_CUR("releases PI lock %p\n", sem);
++ curr_sched_plugin->return_priority(sem);
++ sem->holder = NULL;
++ if (atomic_inc_return(&sem->count) < 1)
++ /* there is a task queued */
++ wake_up_locked(&sem->wait);
++
++ spin_unlock_irqrestore(&sem->wait.lock, flags);
++
++ ret = 0;
++ out:
++ TS_PI_UP_END;
++ preempt_enable();
++ return ret;
++}
++
++/* Clear wait queue and wakeup waiting tasks, and free semaphore. */
++asmlinkage long sys_pi_sema_free(pi_sema_id sem_id)
++{
++ struct list_head *tmp, *next;
++ unsigned long flags;
++
++ if (sem_id < 0 || sem_id >= MAX_PI_SEMAPHORES)
++ return -EINVAL;
++
++ if (!pi_sems[sem_id].used)
++ return -EINVAL;
++
++ spin_lock_irqsave(&pi_sems[sem_id].wait.lock, flags);
++ if (waitqueue_active(&pi_sems[sem_id].wait)) {
++ list_for_each_safe(tmp, next,
++ &pi_sems[sem_id].wait.task_list) {
++ wait_queue_t *curr = list_entry(tmp, wait_queue_t,
++ task_list);
++ list_del(tmp);
++ set_rt_flags((struct task_struct*)curr->private,
++ RT_F_EXIT_SEM);
++ curr->func(curr,
++ TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
++ 0, NULL);
++ }
++ }
++
++ spin_unlock_irqrestore(&pi_sems[sem_id].wait.lock, flags);
++ pi_sems[sem_id].used = 0;
++
++ return 0;
++}
++
++
++
++
++/* ************************************************************************** */
++/* STACK RESOURCE POLICY */
++/* ************************************************************************** */
++
++
++struct srp_priority {
++ struct list_head list;
++ unsigned int period;
++ pid_t pid;
++};
++
++#define list2prio(l) list_entry(l, struct srp_priority, list)
++
++static int srp_higher_prio(struct srp_priority* first,
++ struct srp_priority* second)
++{
++ if (!first->period)
++ return 0;
++ else
++ return !second->period ||
++ first->period < second->period || (
++ first->period == second->period &&
++ first->pid < second->pid);
++}
++
++struct srp {
++ struct list_head ceiling;
++ wait_queue_head_t ceiling_blocked;
++};
++
++#define system_ceiling(srp) list2prio(srp->ceiling.next)
++
++static int srp_exceeds_ceiling(struct task_struct* first,
++ struct srp* srp)
++{
++ return list_empty(&srp->ceiling) ||
++ get_rt_period(first) < system_ceiling(srp)->period ||
++ (get_rt_period(first) == system_ceiling(srp)->period &&
++ first->pid < system_ceiling(srp)->pid);
++}
++
++static void srp_add_prio(struct srp* srp, struct srp_priority* prio)
++{
++ struct list_head *pos;
++ if (in_list(&prio->list)) {
++ TRACE_CUR("WARNING: SRP violation detected, prio is already in "
++ "ceiling list!\n");
++ return;
++ }
++ list_for_each(pos, &srp->ceiling)
++ if (unlikely(srp_higher_prio(prio, list2prio(pos)))) {
++ __list_add(&prio->list, pos->prev, pos);
++ return;
++ }
++
++ list_add_tail(&prio->list, &srp->ceiling);
++}
++
++/* struct for uniprocessor SRP "semaphore" */
++struct srp_semaphore {
++ struct srp_priority ceiling;
++ int cpu; /* cpu associated with this "semaphore" and resource */
++ int claimed; /* is the resource claimed (ceiling should be used)? */
++ int used; /* is the semaphore being used? */
++};
++
++
++struct srp_semaphore srp_sems[MAX_SRP_SEMAPHORES]; /* all SRP sems */
++typedef int srp_sema_id; /* Userspace ID of a srp_semaphore */
++
++DEFINE_PER_CPU(struct srp, srp);
++
++/* Initialize SRP semaphores at boot time. */
++static int __init srp_sema_boot_init(void)
++{
++ srp_sema_id sem_id;
++ int i;
++
++ printk("Initializing SRP semaphores...");
++ for (sem_id = 0; sem_id < MAX_SRP_SEMAPHORES; sem_id++) {
++ srp_sems[sem_id].used = 0;
++ srp_sems[sem_id].claimed = 0;
++ srp_sems[sem_id].cpu = -1;
++ INIT_LIST_HEAD(&srp_sems[sem_id].ceiling.list);
++ }
++ for (i = 0; i < NR_CPUS; i++) {
++ init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked);
++ INIT_LIST_HEAD(&per_cpu(srp, i).ceiling);
++ }
++ printk(" done!\n");
++
++ return 0;
++}
++__initcall(srp_sema_boot_init);
++
++/* Find a free semaphore and return. */
++asmlinkage long sys_srp_sema_init (void)
++{
++ srp_sema_id sem_id;
++
++ if (!is_realtime(current))
++ return -EPERM;
++
++ for (sem_id = 0; sem_id < MAX_SRP_SEMAPHORES; sem_id++) {
++ if (!cmpxchg(&srp_sems[sem_id].used, 0, 1)) {
++ srp_sems[sem_id].ceiling.period = 0;
++ srp_sems[sem_id].cpu = get_partition(current);
++ return sem_id;
++ }
++ }
++ return -ENOMEM;
++}
++
++/* SRP task priority comparison function. Smaller periods have highest
++ * priority, tie-break is PID.
++ */
++
++/* Adjust the system-wide priority ceiling if resource is claimed. */
++asmlinkage long sys_srp_down(srp_sema_id sem_id)
++{
++ int cpu;
++ int ret = -EINVAL;
++
++ /* disabling preemptions is sufficient protection since
++ * SRP is strictly per CPU and we don't interfere with any
++ * interrupt handlers
++ */
++ preempt_disable();
++ TS_SRP_DOWN_START;
++
++
++ cpu = smp_processor_id();
++
++ if (sem_id < 0 || sem_id >= MAX_SRP_SEMAPHORES ||
++ srp_sems[sem_id].cpu != cpu)
++ goto out;
++
++ if (!srp_sems[sem_id].used)
++ goto out;
++
++ /* claim... */
++ srp_sems[sem_id].claimed = 1;
++ /* ...and update ceiling */
++ srp_add_prio(&__get_cpu_var(srp), &srp_sems[sem_id].ceiling);
++
++ ret = 0;
++ out:
++ TS_SRP_DOWN_END;
++ preempt_enable();
++ return ret;
++}
++
++/* Adjust the system-wide priority ceiling if resource is freed. */
++asmlinkage long sys_srp_up(srp_sema_id sem_id)
++{
++ int cpu;
++ int ret = -EINVAL;
++
++ preempt_disable();
++ TS_SRP_UP_START;
++
++ cpu = smp_processor_id();
++
++ if (sem_id < 0 || sem_id >= MAX_SRP_SEMAPHORES ||
++ srp_sems[sem_id].cpu != cpu)
++ goto out;
++
++ if (!srp_sems[sem_id].used)
++ goto out;
++
++ srp_sems[sem_id].claimed = 0;
++ /* Determine new system priority ceiling for this CPU. */
++ if (in_list(&srp_sems[sem_id].ceiling.list))
++ list_del(&srp_sems[sem_id].ceiling.list);
++ else
++ TRACE_CUR("WARNING: SRP violation detected, prio not in ceiling"
++ " list!\n");
++
++ /* Wake tasks on this CPU, if they exceed current ceiling. */
++ wake_up_all(&__get_cpu_var(srp).ceiling_blocked);
++ ret = 0;
++ out:
++ TS_SRP_UP_END;
++ preempt_enable();
++ return ret;
++}
++
++/* Indicate that task will use a resource associated with a given
++ * semaphore. Should be done *a priori* before RT task system is
++ * executed, so this does *not* update the system priority
++ * ceiling! (The ceiling would be meaningless anyway, as the SRP
++ * breaks without this a priori knowledge.)
++ */
++asmlinkage long sys_reg_task_srp_sem(srp_sema_id sem_id, pid_t t_pid)
++{
++ struct pid *task_pid;
++ struct task_struct *t;
++ struct srp_priority t_prio;
++
++ if (sem_id < 0 || sem_id >= MAX_SRP_SEMAPHORES)
++ return -EINVAL;
++
++ task_pid = find_get_pid(t_pid);
++ if (!task_pid)
++ return -EINVAL;
++
++ t = get_pid_task(task_pid, PIDTYPE_PID);
++ if (!t)
++ return -EINVAL;
++
++ if (!is_realtime(t))
++ return -EPERM;
++
++ if (!srp_sems[sem_id].used)
++ return -EINVAL;
++
++ if (srp_sems[sem_id].cpu != get_partition(t))
++ return -EINVAL;
++
++ preempt_disable();
++ t->rt_param.subject_to_srp = 1;
++ t_prio.period = get_rt_period(t);
++ t_prio.pid = t->pid;
++ if (srp_higher_prio(&t_prio, &srp_sems[sem_id].ceiling)) {
++ srp_sems[sem_id].ceiling.period = t_prio.period;
++ srp_sems[sem_id].ceiling.pid = t_prio.pid;
++ }
++
++ preempt_enable();
++
++ return 0;
++}
++
++static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync,
++ void *key)
++{
++ int cpu = smp_processor_id();
++ struct task_struct *tsk = wait->private;
++ if (cpu != get_partition(tsk))
++ TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b",
++ get_partition(tsk));
++ else if (srp_exceeds_ceiling(tsk, &__get_cpu_var(srp)))
++ return default_wake_function(wait, mode, sync, key);
++ return 0;
++}
++
++
++/* Wait for current task priority to exceed system-wide priority ceiling.
++ * Can be used to determine when it is safe to run a job after its release.
++ */
++void srp_ceiling_block(void)
++{
++ struct task_struct *tsk = current;
++ wait_queue_t wait = {
++ .private = tsk,
++ .func = srp_wake_up,
++ .task_list = {NULL, NULL}
++ };
++
++ preempt_disable();
++ if (!srp_exceeds_ceiling(tsk, &__get_cpu_var(srp))) {
++ tsk->state = TASK_UNINTERRUPTIBLE;
++ add_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
++ TRACE_CUR("is priority ceiling blocked.\n");
++ preempt_enable_no_resched();
++ schedule();
++ /* Access to CPU var must occur with preemptions disabled, otherwise
++ * Linux debug code complains loudly, even if it is ok here.
++ */
++ preempt_disable();
++ TRACE_CUR("finally exceeds system ceiling.\n");
++ remove_wait_queue(&__get_cpu_var(srp).ceiling_blocked, &wait);
++ preempt_enable();
++ } else {
++ TRACE_CUR("is not priority ceiling blocked\n");
++ preempt_enable();
++ }
++}
++
++/* Free semaphore, adjusting the system-wide priority ceiling if necessary. */
++asmlinkage long sys_srp_sema_free(srp_sema_id sem_id)
++{
++ int cpu;
++ int ret = 0;
++
++ preempt_disable();
++ cpu = smp_processor_id();
++
++ if (sem_id < 0 || sem_id >= MAX_SRP_SEMAPHORES ||
++ srp_sems[sem_id].cpu != cpu) {
++ ret = -EINVAL;
++ goto out;
++ }
++
++ srp_sems[sem_id].claimed = 0;
++ srp_sems[sem_id].used = 0;
++
++out:
++ preempt_enable();
++ return ret;
++}
++
++
++
++/* ************************************************************************** */
++
++
++
+diff --git a/kernel/pfair_common.c b/kernel/pfair_common.c
+new file mode 100644
+index 0000000..c50fdab
+--- /dev/null
++++ b/kernel/pfair_common.c
+@@ -0,0 +1,237 @@
++/*
++ * Common functions for PFAIR based scheduler.
++ */
++
++#include
++#include
++#include
++
++#include
++#include
++#include
++
++#include
++#include
++/* Comparison of two tasks whether
++ * the lhs has higher priority than the rhs */
++int is_pfair_hp(struct task_struct *lhs, struct task_struct *rhs)
++{
++ /* Favor subtasks with earlier deadlines */
++ if(time_before(get_deadline(lhs), get_deadline(rhs)))
++ return 1;
++ if(get_deadline(lhs) == get_deadline(rhs)) {
++ /* If deadlines are equal,
++ * favor non-zero b-bit (a heavy task) */
++ if(lhs->rt_param.times.b_bit > rhs->rt_param.times.b_bit)
++ return 1;
++
++ if(lhs->rt_param.times.b_bit == rhs->rt_param.times.b_bit &&
++ lhs->rt_param.times.b_bit == 1)
++ /* If b-bit is 1, favor tasks with later
++ * group deadline */
++ return time_after(lhs->rt_param.times.group_deadline,
++ rhs->rt_param.times.group_deadline);
++
++ }
++ return 0;
++}
++
++void pfair_domain_init(pfair_domain_t *pfair)
++{
++ BUG_ON(!pfair);
++ INIT_LIST_HEAD(&pfair->ready_queue);
++ INIT_LIST_HEAD(&pfair->release_queue);
++ queue_lock_init(&pfair->pfair_lock);
++ cpus_setall(pfair->domain_cpus);
++ /* Use cpu 0 to keep the system alive
++ * TODO: Remove later or make it configurable
++ * */
++ cpu_clear(0, pfair->domain_cpus);
++}
++
++
++/* add_ready - add a real-time task to the PFAIR ready queue.
++ * It must be runnable. Global domain lock must be held before
++ * calling this function.
++ *
++ * @new: the newly released task
++ */
++void pfair_add_ready(pfair_domain_t* pfair, struct task_struct *new)
++{
++ struct list_head *pos;
++ struct task_struct *queued;
++
++ BUG_ON(!new);
++ /* find a spot where our deadline is earlier than the next */
++ list_for_each(pos, &pfair->ready_queue) {
++ queued = list_entry(pos, struct task_struct, rt_list);
++ if (unlikely(is_pfair_hp(new, queued))) {
++ /* the task at pos has a later deadline */
++ /* insert the new task in front of it */
++ __list_add(&new->rt_list, pos->prev, pos);
++ return;
++ }
++ }
++ /* if we get to this point either the list is empty or new has the
++ * lowest priority. Let's add it to the end. */
++ list_add_tail(&new->rt_list, &pfair->ready_queue);
++}
++/**
++ * Extraction function.
++ */
++struct task_struct* __pfair_take_ready(pfair_domain_t* pfair)
++{
++ struct task_struct *t = NULL;
++ /* either not yet released, preempted, or non-rt */
++ if (!list_empty(&pfair->ready_queue)) {
++
++ /* take next rt task */
++ t = list_entry(pfair->ready_queue.next, struct task_struct,
++ rt_list);
++
++ /* kick it out of the ready list */
++ list_del(&t->rt_list);
++ }
++ return t;
++}
++
++
++/* add_release - add a real-time task to the PFAIR release queue.
++ * Domain lock must be acquired before the function is called.
++ *
++ * @task: the sleeping task
++ */
++void pfair_add_release(pfair_domain_t* pfair, struct task_struct *task)
++{
++ struct list_head *pos;
++ struct task_struct *queued;
++
++ BUG_ON(!task);
++ /* find a spot where our deadline is earlier than the next */
++ list_for_each_prev(pos, &pfair->release_queue) {
++ queued = list_entry(pos, struct task_struct, rt_list);
++ if ((unlikely(time_before(queued->rt_param.times.release,
++ task->rt_param.times.release)))) {
++ /* the task at pos has an earlier release */
++ /* insert the new task in behind it */
++ __list_add(&task->rt_list, pos, pos->next);
++ return;
++ }
++ }
++ /* if we get to this point either the list is empty or task has the
++ * earliest release. Let's add it to the front. */
++ list_add(&task->rt_list, &pfair->release_queue);
++}
++/**
++ * This function is called from tick handler, it acquires the lock
++ * automatically. Only one processor effectively merges the queues.
++ */
++void pfair_try_release_pending(pfair_domain_t* pfair)
++{
++ unsigned long flags;
++ struct list_head *pos, *save;
++ struct task_struct *queued;
++ queue_lock_irqsave(&pfair->pfair_lock, flags);
++
++ list_for_each_safe(pos, save, &pfair->release_queue) {
++ queued = list_entry(pos, struct task_struct, rt_list);
++ if (likely(time_before_eq(
++ queued->rt_param.times.release, jiffies))) {
++ /* this one is ready to go*/
++ list_del(pos);
++ set_rt_flags(queued, RT_F_RUNNING);
++
++ sched_trace_job_release(queued);
++ /* now it can be picked up */
++ barrier();
++ pfair_add_ready(pfair, queued);
++ }
++ else
++ /* the release queue is ordered */
++ break;
++ }
++ queue_unlock_irqrestore(&pfair->pfair_lock, flags);
++}
++/*
++ * Subtask preparation. Assuming that last_release
++ * denotes the time when the job was released.
++ */
++void pfair_prepare_next_subtask(struct task_struct *t)
++{
++ BUG_ON(!t);
++ /* assign subtask release time, deadline, b-bit,
++ * and group deadline
++ */
++ t->rt_param.times.release = t->rt_param.times.last_release
++ +release_time(t);
++ t->rt_param.times.deadline = t->rt_param.times.last_release
++ +pfair_deadline(t);
++ t->rt_param.times.b_bit = b_bit(t);
++ t->rt_param.times.group_deadline = t->rt_param.times.last_release
++ +group_deadline(t);
++}
++
++void pfair_prepare_next_job(struct task_struct *t)
++{
++ BUG_ON(!t);
++
++ /* prepare next job release */
++ /* make passed quantums zero so that we could compute new release times
++ * and deadlines for subtasks correctly
++ */
++ t->rt_param.times.exec_time = 0;
++ /* assign job-wide release time,
++ * this is the starting point to
++ * compute subtask releases, deadlines and group deadlines
++ */
++ t->rt_param.times.last_release = t->rt_param.times.last_release
++ +get_rt_period(t);
++ /* Release the first subtask. */
++ pfair_prepare_next_subtask(t);
++ t->first_time_slice = 0;
++ /* Increase job sequence number */
++ t->rt_param.times.job_no++;
++}
++
++void __pfair_prepare_new_release(struct task_struct *t, jiffie_t start)
++{
++ t->rt_param.times.release = start;
++ t->rt_param.times.last_release = start;
++ t->rt_param.times.exec_time = 0;
++ t->first_time_slice = 0;
++ pfair_prepare_next_subtask(t);
++ set_rt_flags(t, RT_F_RUNNING);
++}
++
++void pfair_prepare_new_releases(pfair_domain_t *pfair, jiffie_t start)
++{
++ unsigned long flags;
++ struct list_head tmp_list;
++ struct list_head *pos, *n;
++ struct task_struct *t;
++
++ INIT_LIST_HEAD(&tmp_list);
++
++ queue_lock_irqsave(&pfair->pfair_lock, flags);
++
++
++ while (!list_empty(&pfair->release_queue)) {
++ pos = pfair->release_queue.next;
++ list_del(pos);
++ list_add(pos, &tmp_list);
++ }
++ while (!list_empty(&pfair->ready_queue)) {
++ pos = pfair->ready_queue.next;
++ list_del(pos);
++ list_add(pos, &tmp_list);
++ }
++
++ list_for_each_safe(pos, n, &tmp_list) {
++ t = list_entry(pos, struct task_struct, rt_list);
++ list_del(pos);
++ __pfair_prepare_new_release(t, start);
++ pfair_add_release(pfair, t);
++ }
++ queue_unlock_irqrestore(&pfair->pfair_lock, flags);
++}
++
+diff --git a/kernel/rt_domain.c b/kernel/rt_domain.c
+new file mode 100644
+index 0000000..4875c53
+--- /dev/null
++++ b/kernel/rt_domain.c
+@@ -0,0 +1,185 @@
++/*
++ * kernel/rt_domain.c
++ *
++ * LITMUS real-time infrastructure. This file contains the
++ * functions that manipulate RT domains. RT domains are an abstraction
++ * of a ready queue and a release queue.
++ */
++
++#include
++#include
++#include
++
++#include
++#include
++#include
++
++#include
++
++
++static int dummy_resched(rt_domain_t *rt)
++{
++ return 0;
++}
++
++static int dummy_order(struct list_head* a, struct list_head* b)
++{
++ return 0;
++}
++
++int release_order(struct list_head* a, struct list_head* b)
++{
++ return earlier_release(
++ list_entry(a, struct task_struct, rt_list),
++ list_entry(b, struct task_struct, rt_list));
++}
++
++
++void rt_domain_init(rt_domain_t *rt,
++ check_resched_needed_t f,
++ list_cmp_t order)
++{
++ BUG_ON(!rt);
++ if (!f)
++ f = dummy_resched;
++ if (!order)
++ order = dummy_order;
++ INIT_LIST_HEAD(&rt->ready_queue);
++ INIT_LIST_HEAD(&rt->release_queue);
++ rt->ready_lock = RW_LOCK_UNLOCKED;
++ rt->release_lock = SPIN_LOCK_UNLOCKED;
++ rt->check_resched = f;
++ rt->order = order;
++}
++
++/* add_ready - add a real-time task to the rt ready queue. It must be runnable.
++ * @new: the newly released task
++ */
++void __add_ready(rt_domain_t* rt, struct task_struct *new)
++{
++ TRACE("rt: adding %s/%d (%u, %u) to ready queue\n",
++ new->comm, new->pid, get_exec_cost(new), get_rt_period(new));
++
++ if (!list_insert(&new->rt_list, &rt->ready_queue, rt->order))
++ rt->check_resched(rt);
++}
++
++struct task_struct* __take_ready(rt_domain_t* rt)
++{
++ struct task_struct *t = __peek_ready(rt);
++
++ /* kick it out of the ready list */
++ if (t)
++ list_del(&t->rt_list);
++ return t;
++}
++
++struct task_struct* __peek_ready(rt_domain_t* rt)
++{
++ if (!list_empty(&rt->ready_queue))
++ return next_ready(rt);
++ else
++ return NULL;
++}
++
++struct task_struct* __take_ready_rq(rt_domain_t* rt, runqueue_t* rq, int cpu)
++{
++ struct task_struct *task = __take_ready(rt);
++
++ if (task) {
++ set_task_cpu(task, cpu);
++ __activate_task(task, rq);
++ }
++ return task;
++}
++
++/* add_release - add a real-time task to the rt release queue.
++ * @task: the sleeping task
++ */
++void __add_release(rt_domain_t* rt, struct task_struct *task)
++{
++ TRACE("rt: adding %s/%d (%u, %u) rel=%d to release queue\n",
++ task->comm, task->pid, get_exec_cost(task), get_rt_period(task),
++ get_release(task));
++
++ list_insert(&task->rt_list, &rt->release_queue, release_order);
++}
++
++void __release_pending(rt_domain_t* rt)
++{
++ struct list_head *pos, *save;
++ struct task_struct *queued;
++ list_for_each_safe(pos, save, &rt->release_queue) {
++ queued = list_entry(pos, struct task_struct, rt_list);
++ if (likely(is_released(queued))) {
++ /* this one is ready to go*/
++ list_del(pos);
++ set_rt_flags(queued, RT_F_RUNNING);
++
++ sched_trace_job_release(queued);
++
++ /* now it can be picked up */
++ barrier();
++ add_ready(rt, queued);
++ }
++ else
++ /* the release queue is ordered */
++ break;
++ }
++}
++
++void try_release_pending(rt_domain_t* rt)
++{
++ unsigned long flags;
++
++ if (spin_trylock_irqsave(&rt->release_lock, flags)) {
++ __release_pending(rt);
++ spin_unlock_irqrestore(&rt->release_lock, flags);
++ }
++}
++
++void rerelease_all(rt_domain_t *rt,
++ release_at_t release)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&rt->release_lock, flags);
++ write_lock(&rt->ready_lock);
++
++ __rerelease_all(rt, release);
++
++ write_unlock(&rt->ready_lock);
++ spin_unlock_irqrestore(&rt->release_lock, flags);
++}
++
++void __rerelease_all(rt_domain_t *rt,
++ release_at_t release)
++{
++ jiffie_t start = jiffies + 10;
++ struct list_head tmp_list;
++ struct list_head *pos, *n;
++ struct task_struct *t;
++
++ INIT_LIST_HEAD(&tmp_list);
++
++ while (!list_empty(&rt->release_queue)) {
++ pos = rt->release_queue.next;
++ list_del(pos);
++ list_add(pos, &tmp_list);
++ }
++ while (!list_empty(&rt->ready_queue)) {
++ pos = rt->ready_queue.next;
++ list_del(pos);
++ list_add(pos, &tmp_list);
++ }
++
++ list_for_each_safe(pos, n, &tmp_list) {
++ t = list_entry(pos, struct task_struct, rt_list);
++ list_del(pos);
++ release(t, start);
++ __add_release(rt, t);
++ }
++
++}
++
++
+diff --git a/kernel/sched.c b/kernel/sched.c
+index cca93cc..5ad4276 100644
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -56,6 +56,16 @@
+
+ #include
+
++#include
++#define __SCHED_C__
++#include
++#include
++#include
++#include
++
++/* LITMUS: avoid races with multiple task wake-ups */
++DEFINE_SPINLOCK(litmus_task_set_lock);
++
+ /*
+ * Convert user-nice values [ -20 ... 0 ... 19 ]
+ * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
+@@ -836,7 +846,7 @@ static int effective_prio(struct task_struct *p)
+ * keep the priority unchanged. Otherwise, update priority
+ * to the normal priority:
+ */
+- if (!rt_prio(p->prio))
++ if (!rt_prio(p->prio) && !is_realtime(p))
+ return p->normal_prio;
+ return p->prio;
+ }
+@@ -844,7 +854,7 @@ static int effective_prio(struct task_struct *p)
+ /*
+ * __activate_task - move a task to the runqueue.
+ */
+-static void __activate_task(struct task_struct *p, struct rq *rq)
++void __activate_task(struct task_struct *p, struct rq *rq)
+ {
+ struct prio_array *target = rq->active;
+
+@@ -999,7 +1009,7 @@ out:
+ /*
+ * deactivate_task - remove a task from the runqueue.
+ */
+-static void deactivate_task(struct task_struct *p, struct rq *rq)
++void deactivate_task(struct task_struct *p, struct rq *rq)
+ {
+ dec_nr_running(p, rq);
+ dequeue_task(p, p->array);
+@@ -1408,13 +1418,44 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
+ #endif
+
+ rq = task_rq_lock(p, &flags);
++
++ if (is_realtime(p))
++ TRACE("try_to_wake_up(%s/%d)\n", p->comm, p->pid);
++
+ old_state = p->state;
+ if (!(old_state & state))
+- goto out;
++ goto out;
+
+ if (p->array)
+ goto out_running;
+
++
++ spin_lock(&litmus_task_set_lock);
++ if (p->rt_param.litmus_controlled) {
++ /* Already included. This can happen
++ * if the task dropped all locks to call
++ * schedule() but a wake up raced and came in
++ * early.
++ */
++
++ spin_unlock(&litmus_task_set_lock);
++ goto out_running;
++ }
++
++ sched_trace_task_arrival(p);
++ if (is_realtime(p)) {
++ p->rt_param.litmus_controlled = 1;
++ curr_sched_plugin->wake_up_task(p);
++
++ spin_unlock(&litmus_task_set_lock);
++ goto out_running;
++ }
++
++ p->rt_param.litmus_controlled = 0;
++ spin_unlock(&litmus_task_set_lock);
++
++
++
+ cpu = task_cpu(p);
+ this_cpu = smp_processor_id();
+
+@@ -1580,6 +1621,7 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags)
+ cpu = sched_balance_self(cpu, SD_BALANCE_FORK);
+ #endif
+ set_task_cpu(p, cpu);
++ clear_rt_params(p);
+
+ /*
+ * We mark the process as running here, but have not actually
+@@ -1595,6 +1637,10 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags)
+ p->prio = current->normal_prio;
+
+ INIT_LIST_HEAD(&p->run_list);
++ INIT_LIST_HEAD(&p->rt_list);
++ p->rt_param.basic_params.class = RT_CLASS_BEST_EFFORT;
++ p->rt_param.litmus_controlled = 0;
++ p->rt_param.inh_task = NULL;
+ p->array = NULL;
+ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
+ if (unlikely(sched_info_on()))
+@@ -1647,6 +1693,12 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
+ unsigned long flags;
+ int this_cpu, cpu;
+
++ if (clone_flags & CLONE_REALTIME) {
++ /* just mark the task as stopped */
++ p->state = TASK_STOPPED;
++ return;
++ }
++
+ rq = task_rq_lock(p, &flags);
+ BUG_ON(p->state != TASK_RUNNING);
+ this_cpu = smp_processor_id();
+@@ -1730,6 +1782,9 @@ void fastcall sched_exit(struct task_struct *p)
+ unsigned long flags;
+ struct rq *rq;
+
++ if (is_realtime(p))
++ return;
++
+ /*
+ * If the child was a (relative-) CPU hog then decrease
+ * the sleep_avg of the parent as well.
+@@ -1801,6 +1856,13 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
+ */
+ prev_state = prev->state;
+ finish_arch_switch(prev);
++ /* Requeue previous real-time task before we drop the rq lock, cause
++ * that may lead to a preemption.
++ */
++ curr_sched_plugin->finish_switch(prev);
++ sched_trace_task_scheduled(current);
++ /* trace before IRQs are enabled */
++ TS_CXS_END;
+ finish_lock_switch(rq, prev);
+ if (mm)
+ mmdrop(mm);
+@@ -1811,7 +1873,7 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
+ */
+ kprobe_flush_task(prev);
+ put_task_struct(prev);
+- }
++ }
+ }
+
+ /**
+@@ -2990,7 +3052,7 @@ static inline void idle_balance(int cpu, struct rq *rq)
+ static inline void wake_priority_sleeper(struct rq *rq)
+ {
+ #ifdef CONFIG_SCHED_SMT
+- if (!rq->nr_running)
++ if (!rq->nr_running || get_rt_mode() == MODE_RT_RUN)
+ return;
+
+ spin_lock(&rq->lock);
+@@ -3220,14 +3282,30 @@ void scheduler_tick(void)
+
+ update_cpu_clock(p, rq, now);
+
+- if (p == rq->idle)
+- /* Task on the idle queue */
+- wake_priority_sleeper(rq);
+- else
+- task_running_tick(rq, p);
++ /* check whether the RT scheduler plugin requires a call to
++ * schedule
++ */
++ TS_PLUGIN_TICK_START;
++ if (rt_scheduler_tick() == FORCE_RESCHED)
++ set_tsk_need_resched(p);
++ TS_PLUGIN_TICK_END;
++
++ /* real-time accounting is done by the plugin
++ * call linux functions only for background tasks
++ */
++ if (!is_realtime(p)) {
++ if (p == rq->idle)
++ /* Task on the idle queue */
++ wake_priority_sleeper(rq);
++ else
++ task_running_tick(rq, p);
++ }
++ send_scheduler_signals();
++
+ #ifdef CONFIG_SMP
+ update_load(rq);
+- if (time_after_eq(jiffies, rq->next_balance))
++ if (time_after_eq(jiffies, rq->next_balance) &&
++ get_rt_mode() == MODE_NON_RT)
+ raise_softirq(SCHED_SOFTIRQ);
+ #endif
+ }
+@@ -3420,6 +3498,7 @@ asmlinkage void __sched schedule(void)
+ long *switch_count;
+ struct rq *rq;
+
++
+ /*
+ * Test if we are atomic. Since do_exit() needs to call into
+ * schedule() atomically, we ignore that path for now.
+@@ -3427,8 +3506,9 @@ asmlinkage void __sched schedule(void)
+ */
+ if (unlikely(in_atomic() && !current->exit_state)) {
+ printk(KERN_ERR "BUG: scheduling while atomic: "
+- "%s/0x%08x/%d\n",
+- current->comm, preempt_count(), current->pid);
++ "%s/0x%08x/%d %s\n",
++ current->comm, preempt_count(), current->pid,
++ is_realtime(current) ? "rt" : "non-rt");
+ debug_show_held_locks(current);
+ if (irqs_disabled())
+ print_irqtrace_events(current);
+@@ -3438,6 +3518,7 @@ asmlinkage void __sched schedule(void)
+
+ need_resched:
+ preempt_disable();
++ TS_SCHED_START;
+ prev = current;
+ release_kernel_lock(prev);
+ need_resched_nonpreemptible:
+@@ -3470,6 +3551,7 @@ need_resched_nonpreemptible:
+ spin_lock_irq(&rq->lock);
+
+ switch_count = &prev->nivcsw;
++ /* check for blocking tasks */
+ if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
+ switch_count = &prev->nvcsw;
+ if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
+@@ -3478,13 +3560,66 @@ need_resched_nonpreemptible:
+ else {
+ if (prev->state == TASK_UNINTERRUPTIBLE)
+ rq->nr_uninterruptible++;
++ /* we need to remove real-time tasks from the runqueue*/
++
++ /* protect against races with signal delivery and IO
++ * interrupts on other CPUs
++ *
++ * FIXME: This is probably not sufficient,
++ * as (in theory) after
++ * unlocking the task_set_lock this task could
++ * be scheduled elsewere before we switched away
++ * from it. This has not been observed
++ * yet. To get this locking right is tricky.
++ */
++ spin_lock(&litmus_task_set_lock);
++ if (prev->rt_param.litmus_controlled)
++ prev->rt_param.litmus_controlled = 0;
++ spin_unlock(&litmus_task_set_lock);
++
++ if (is_realtime(prev)) {
++ TRACE("schedule: %s/%d blocks. state = %d\n",
++ prev->comm, prev->pid, prev->state);
++ curr_sched_plugin->task_blocks(prev);
++ /* Enable this for all tasks to get _a lot_ of
++ * data. Can be helpful for debugging.
++ */
++ sched_trace_task_departure(prev);
++ }
++
++ /* only indirect switching is supported in the current
++ * version of LITMUS
++ */
+ deactivate_task(prev, rq);
+ }
+ }
+
++ next = NULL;
++
++ /* consult the real-time plugin */
++ TS_PLUGIN_SCHED_START;
++ curr_sched_plugin->schedule(prev, &next, rq);
++ TS_PLUGIN_SCHED_END;
++ /* If the real-time plugin wants to switch to a specific task
++ * it'll be on the rq and have the highest priority. There will
++ * be exaclty one such task, thus the selection of the next task
++ * is unambiguous and the following code can only get
++ * triggered if there are no RT tasks pending (on this CPU). Thus,
++ * we may as well skip it.
++ */
++ if (next)
++ goto switch_tasks;
++
+ cpu = smp_processor_id();
+ if (unlikely(!rq->nr_running)) {
+- idle_balance(cpu, rq);
++ /* only load-balance if we are not in RT mode
++ *
++ * TODO: Maybe this can be relaxed by modifiying the
++ * load-balancing routines in such a way that they never touch
++ * real-time tasks.
++ */
++ if (get_rt_mode() == MODE_NON_RT)
++ idle_balance(cpu, rq);
+ if (!rq->nr_running) {
+ next = rq->idle;
+ rq->expired_timestamp = 0;
+@@ -3528,7 +3663,7 @@ need_resched_nonpreemptible:
+ }
+ }
+ next->sleep_type = SLEEP_NORMAL;
+- if (dependent_sleeper(cpu, rq, next))
++ if (get_rt_mode() == MODE_NON_RT && dependent_sleeper(cpu, rq, next))
+ next = rq->idle;
+ switch_tasks:
+ if (next == rq->idle)
+@@ -3546,7 +3681,11 @@ switch_tasks:
+ prev->timestamp = prev->last_ran = now;
+
+ sched_info_switch(prev, next);
++ TS_SCHED_END;
+ if (likely(prev != next)) {
++ TS_CXS_START;
++ if (is_running(prev))
++ sched_trace_task_preemption(prev, next);
+ next->timestamp = now;
+ rq->nr_switches++;
+ rq->curr = next;
+@@ -3560,9 +3699,12 @@ switch_tasks:
+ * CPUs since it called schedule(), thus the 'rq' on its stack
+ * frame will be invalid.
+ */
+- finish_task_switch(this_rq(), prev);
+- } else
++ finish_task_switch(this_rq(), prev);
++ } else {
+ spin_unlock_irq(&rq->lock);
++ }
++
++ send_scheduler_signals();
+
+ prev = current;
+ if (unlikely(reacquire_kernel_lock(prev) < 0))
+@@ -3691,6 +3833,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
+ }
+ }
+
++
+ /**
+ * __wake_up - wake up threads blocked on a waitqueue.
+ * @q: the waitqueue
+@@ -3709,6 +3852,7 @@ void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode,
+ }
+ EXPORT_SYMBOL(__wake_up);
+
++
+ /*
+ * Same as __wake_up but called with the spinlock in wait_queue_head_t held.
+ */
+@@ -3717,6 +3861,7 @@ void fastcall __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
+ __wake_up_common(q, mode, 1, 0, NULL);
+ }
+
++
+ /**
+ * __wake_up_sync - wake up threads blocked on a waitqueue.
+ * @q: the waitqueue
+@@ -4175,7 +4320,7 @@ static inline struct task_struct *find_process_by_pid(pid_t pid)
+ }
+
+ /* Actually do priority change: must hold rq lock. */
+-static void __setscheduler(struct task_struct *p, int policy, int prio)
++void __setscheduler(struct task_struct *p, int policy, int prio)
+ {
+ BUG_ON(p->array);
+
+@@ -6877,7 +7022,7 @@ void __init sched_init_smp(void)
+ BUG();
+ }
+ #else
+-void __init sched_init_smp(void)
++void __init linux_sched_init_smp(void)
+ {
+ }
+ #endif /* CONFIG_SMP */
+diff --git a/kernel/sched_adaptive.c b/kernel/sched_adaptive.c
+new file mode 100644
+index 0000000..319ebbc
+--- /dev/null
++++ b/kernel/sched_adaptive.c
+@@ -0,0 +1,1454 @@
++
++
++/*
++ * kernel/sched_adaptive.c
++ *
++ * Implementation of Aaron's adaptive global EDF scheduling algorithm. It is
++ * based on the GSN-EDF scheduler. However, it does not support synchronization
++ * primitives.
++ *
++ * It implements a version of FC-GEDF with a bunch of linearity assumptions for
++ * the optimizer and the the weight-transfer function. The code is meant to be
++ * clear, however you really need to read the paper if you want to understand
++ * what is going on here.
++ *
++ * Block et al., "Feedback-Controlled Adaptive Multiprocessor Real-Time
++ * Systems", submitted to RTAS 2008.
++ */
++
++#include
++#include
++#include
++
++#include
++#include
++#include
++#include
++#include
++#include
++
++#include
++
++/* Overview of GSN-EDF operations.
++ *
++ * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This
++ * description only covers how the individual operations are implemented in
++ * LITMUS.
++ *
++ * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage
++ * structure (NOT the actually scheduled
++ * task). If there is another linked task To
++ * already it will set To->linked_on = NO_CPU
++ * (thereby removing its association with this
++ * CPU). However, it will not requeue the
++ * previously linked task (if any). It will set
++ * T's state to RT_F_RUNNING and check whether
++ * it is already running somewhere else. If T
++ * is scheduled somewhere else it will link
++ * it to that CPU instead (and pull the linked
++ * task to cpu). T may be NULL.
++ *
++ * unlink(T) - Unlink removes T from all scheduler data
++ * structures. If it is linked to some CPU it
++ * will link NULL to that CPU. If it is
++ * currently queued in the gsnedf queue it will
++ * be removed from the T->rt_list. It is safe to
++ * call unlink(T) if T is not linked. T may not
++ * be NULL.
++ *
++ * requeue(T) - Requeue will insert T into the appropriate
++ * queue. If the system is in real-time mode and
++ * the T is released already, it will go into the
++ * ready queue. If the system is not in
++ * real-time mode is T, then T will go into the
++ * release queue. If T's release time is in the
++ * future, it will go into the release
++ * queue. That means that T's release time/job
++ * no/etc. has to be updated before requeu(T) is
++ * called. It is not safe to call requeue(T)
++ * when T is already queued. T may not be NULL.
++ *
++ * gsnedf_job_arrival(T) - This is the catch all function when T enters
++ * the system after either a suspension or at a
++ * job release. It will queue T (which means it
++ * is not safe to call gsnedf_job_arrival(T) if
++ * T is already queued) and then check whether a
++ * preemption is necessary. If a preemption is
++ * necessary it will update the linkage
++ * accordingly and cause scheduled to be called
++ * (either with an IPI or need_resched). It is
++ * safe to call gsnedf_job_arrival(T) if T's
++ * next job has not been actually released yet
++ * (releast time in the future). T will be put
++ * on the release queue in that case.
++ *
++ * job_completion(T) - Take care of everything that needs to be done
++ * to prepare T for its next release and place
++ * it in the right queue with
++ * gsnedf_job_arrival().
++ *
++ *
++ * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is
++ * equivalent to unlink(T). Note that if you unlink a task from a CPU none of
++ * the functions will automatically propagate pending task from the ready queue
++ * to a linked task. This is the job of the calling function ( by means of
++ * __take_ready).
++ */
++
++static void unlink(struct task_struct* t);
++static void adaptive_job_arrival(struct task_struct* task);
++
++/* cpu_entry_t - maintain the linked and scheduled state
++ */
++typedef struct {
++ int cpu;
++ struct task_struct* linked; /* only RT tasks */
++ struct task_struct* scheduled; /* only RT tasks */
++ struct list_head list;
++ atomic_t will_schedule; /* prevent unneeded IPIs */
++} cpu_entry_t;
++DEFINE_PER_CPU(cpu_entry_t, adaptive_cpu_entries);
++
++#define set_will_schedule() \
++ (atomic_set(&__get_cpu_var(adaptive_cpu_entries).will_schedule, 1))
++#define clear_will_schedule() \
++ (atomic_set(&__get_cpu_var(adaptive_cpu_entries).will_schedule, 0))
++#define test_will_schedule(cpu) \
++ (atomic_read(&per_cpu(adaptive_cpu_entries, cpu).will_schedule))
++
++
++#define NO_CPU 0xffffffff
++
++/* The gsnedf_lock is used to serialize all scheduling events.
++ * It protects
++ */
++static queuelock_t adaptive_lock;
++/* the cpus queue themselves according to priority in here */
++static LIST_HEAD(adaptive_cpu_queue);
++
++static rt_domain_t adaptive;
++
++/* feedback control parameters */
++static fp_t fc_a, fc_b;
++
++/* optimizer trigger */
++static jiffie_t last_optimizer_run;
++static jiffie_t optimizer_min_invocation_sep;
++static jiffie_t optimizer_period;
++static fp_t task_error_threshold;
++
++static fp_t system_capacity;
++/* total actual weight of the task system */
++static fp_t total_weight;
++
++/* optimizer time snapshot */
++jiffie_t opt_time;
++
++/* Delayed weight increase notification list.
++ * This list gets clobbered on each optimizer run.
++ */
++static LIST_HEAD(adaptive_inc_list);
++
++/* comment out to disable optimizer debugging */
++#define ENABLE_OPTIMIZER_DEBUGGING
++
++#ifdef ENABLE_OPTIMIZER_DEBUGGING
++#define OPT_DBG TRACE
++#define OPT_DBG_T TRACE_TASK
++#else
++#define OPT_DBG
++#define OPT_DBG_T OPT_D
++#endif
++
++/******************************************************************************/
++/* OPTIMIZER MATH */
++/******************************************************************************/
++
++/* All time dependent functions
++ * rely on opt_time.
++ * Update in the optimizer before use!
++ */
++
++static inline fp_t ideal(fp_t weight, jiffie_t delta_t)
++{
++ return _mul(weight, FP(delta_t));
++}
++
++static noinline long ideal_exec_time(struct task_struct* t)
++{
++ jiffie_t delta = opt_time - get_last_release(t);
++ return _round(ideal(get_est_weight(t), delta));
++}
++
++/* this makes a whole bunch of linearity assumptions */
++static noinline fp_t weight_transfer(struct task_struct* t,
++ unsigned int from, unsigned int to,
++ fp_t act_weight)
++{
++ fp_t rel_from, rel_to, ret;
++ rel_from = get_sl(t, from).weight;
++ rel_to = get_sl(t, to).weight;
++ ret.val = (act_weight.val * rel_to.val) / rel_from.val;
++ OPT_DBG("weight_transfer(%ld, %ld, %ld) => %ld to=%u from=%u\n",
++ rel_from.val, rel_to.val, act_weight.val, ret.val, from, to);
++
++ return ret;
++}
++
++static noinline fp_t est_weight_at(struct task_struct* t, unsigned int level)
++{
++ if (t->rt_param.no_service_levels)
++ return weight_transfer(t, get_cur_sl(t), level,
++ get_est_weight(t));
++ else
++ return get_est_weight(t);
++
++}
++
++static noinline void update_estimate(predictor_state_t *state, fp_t actual_weight,
++ fp_t a, fp_t b)
++{
++ fp_t err, new;
++
++ OPT_DBG("OLD ESTIMATE Weight" _FP_ " ActWt " _FP_ " A:" _FP_ ", B:" _FP_
++ "\n", fp2str(state->estimate), fp2str(actual_weight), fp2str(a),
++ fp2str(b));
++ err = _sub(actual_weight, state->estimate);
++ new = _add(_mul(a, err),
++ _mul(b, state->accumulated));
++
++ total_weight = _sub(total_weight, state->estimate);
++ state->estimate = new;
++ total_weight = _add(total_weight, state->estimate);
++
++ state->accumulated = _add(state->accumulated, err);
++ OPT_DBG("ERROR " _FP_ ", NEW " _FP_ ", ACC" _FP_ "\n", fp2str(err),
++ fp2str(new), fp2str(state->accumulated));
++
++}
++
++static noinline fp_t linear_metric(struct task_struct* t)
++{
++ fp_t v1, vmax, g1, gmax;
++ fp_t est_w;
++ unsigned int l = t->rt_param.no_service_levels;
++ unsigned int lcur;
++
++ if (l <= 1)
++ return FP(0);
++
++ lcur = get_cur_sl(t);;
++ est_w = get_est_weight(t);
++
++ OPT_DBG_T(t, " linear_metric: lcur=%u l=%u est_w=" _FP_ "\n",
++ lcur, l, est_w);
++ OPT_DBG_T(t, " linear_metric: est_w.val=%ld\n", est_w.val);
++
++
++ v1 = t->rt_param.service_level[0].value;
++ vmax = t->rt_param.service_level[l - 1].value;
++
++ OPT_DBG_T(t, " linear_metric: v1=" _FP_ " vmax=" _FP_ "\n", v1, vmax);
++ OPT_DBG_T(t, " linear_metric: v1=%ld vmax=%ld\n", v1.val, vmax.val);
++
++
++ g1 = weight_transfer(t, lcur, 0, est_w);
++ gmax = weight_transfer(t, lcur, l - 1, est_w);
++
++ OPT_DBG_T(t, " linear_metric: g1=" _FP_ " gmax=" _FP_ "\n", g1, gmax);
++ OPT_DBG_T(t, " linear_metric: g1=%ld gmax=%ld\n", g1, gmax);
++
++
++ TRACE_BUG_ON(_eq(_sub(gmax, g1), FP(0)));
++ if (_eq(_sub(gmax, g1), FP(0)))
++ return FP(0);
++ return _div(_sub(vmax, v1),
++ _sub(gmax, g1));
++}
++
++static noinline unsigned long reweighted_period(fp_t ow, fp_t nw,
++ unsigned long alloc,
++ jiffie_t deadline,
++ jiffie_t release)
++{
++ fp_t dl;
++ dl = _mul(FP(deadline - release), ow);
++ dl = _sub(dl, FP(alloc));
++ if(_eq(nw, FP(0)))
++ return 0;
++ dl = _div(dl, nw);
++ return _round(dl);
++}
++
++static noinline int is_under_allocated(struct task_struct* t)
++{
++ return ideal_exec_time(t) >= t->rt_param.times.exec_time;
++}
++
++static noinline jiffie_t dec_equal_point_delay(struct task_struct* t)
++{
++ if (_lt(FP(0), get_est_weight(t)))
++ /* when t was released plus time needed to equalize
++ * minus now
++ */
++ return get_last_release(t) +
++ _round(_div( FP(t->rt_param.times.exec_time),
++ get_est_weight(t))) -
++ opt_time;
++ else
++ /* if the weight is zero we just take the
++ * deadline
++ */
++ return t->rt_param.times.deadline;
++}
++
++static noinline jiffie_t inc_equal_point_delay(struct task_struct* t)
++{
++ if (_lt(FP(0), t->rt_param.opt_nw))
++ /* when t was released plus time needed to equalize
++ * minus now
++ */
++ return get_last_release(t) +
++ _round(_div( FP(t->rt_param.times.exec_time),
++ t->rt_param.opt_nw)) -
++ opt_time;
++ else
++ /* if the weight is zero we just take the
++ * deadline
++ */
++ return t->rt_param.times.deadline;
++}
++
++static noinline jiffie_t decrease_delay(struct task_struct* t)
++{
++ if (has_active_job(t) && !is_under_allocated(t))
++ return dec_equal_point_delay(t);
++ return 0;
++}
++
++
++
++/******************************************************************************/
++/* SORT ORDERS */
++/******************************************************************************/
++
++static int by_linear_metric(struct list_head* a, struct list_head* b)
++{
++ struct task_struct *ta, *tb;
++ ta = list_entry(a, struct task_struct, rt_param.opt_list);
++ tb = list_entry(b, struct task_struct, rt_param.opt_list);
++ return _gt(ta->rt_param.opt_order, tb->rt_param.opt_order);
++}
++
++static int by_delta_weight(struct list_head* a, struct list_head* b)
++{
++ struct task_struct *ta, *tb;
++ ta = list_entry(a, struct task_struct, rt_param.opt_list);
++ tb = list_entry(b, struct task_struct, rt_param.opt_list);
++ return _lt(ta->rt_param.opt_dw, tb->rt_param.opt_dw);
++}
++
++static int by_enactment_time(struct list_head* a, struct list_head* b)
++{
++ struct task_struct *ta, *tb;
++ ta = list_entry(a, struct task_struct, rt_param.opt_list);
++ tb = list_entry(b, struct task_struct, rt_param.opt_list);
++ return ta->rt_param.opt_change < tb->rt_param.opt_change;
++}
++
++/******************************************************************************/
++/* WEIGHT CHANGE MECHANICS */
++/******************************************************************************/
++
++static void set_service_level(struct task_struct* t, unsigned int level)
++{
++ service_level_t *new;
++ unsigned int old;
++ BUG_ON(!t);
++ BUG_ON(t->rt_param.no_service_levels <= level);
++
++ old = t->rt_param.cur_service_level;
++ t->rt_param.cur_service_level = level;
++ new = t->rt_param.service_level + level;
++ t->rt_param.basic_params.period = new->period;
++ t->rt_param.basic_params.exec_cost = _round(_mul(new->weight,
++ FP(new->period)));
++
++ scheduler_signal(t, SIGUSR1);
++
++ sched_trace_service_level_change(t, old, level);
++ OPT_DBG_T(t, "service level %u activated\n", level);
++}
++
++/* call this _before_ updating deadline and release of t */
++static void update_weight_estimate(struct task_struct* t)
++{
++ fp_t nw, ow;
++ jiffie_t sl_period, exec_time;
++
++ ow = get_est_weight(t);
++ nw = t->rt_param.opt_nw;
++ exec_time = t->rt_param.times.exec_time;
++ sl_period = get_sl(t, get_opt_sl(t)).period;
++
++ OPT_DBG("ow=" _FP_ " nw=" _FP_ ", r-d " _FP_
++ ", deadline %d, release %d, exec_time=%ld sl_period=%lu\n",
++ fp2str(ow), fp2str(nw),
++ fp2str(FP(get_deadline(t) - get_last_release(t))),
++ get_deadline(t), get_last_release(t), exec_time, sl_period);
++
++ total_weight = _sub(total_weight, get_est_weight(t));
++ t->rt_param.predictor_state.estimate = nw;
++ OPT_DBG_T(t, "update_weight_estimate from " _FP_ " to "_FP_"\n",
++ fp2str(ow), fp2str(nw));
++ total_weight = _add(total_weight, get_est_weight(t));
++
++ OPT_DBG_T(t, " update_weight_estimate: " _FP_ " => " _FP_ "\n",
++ fp2str(ow), fp2str(get_est_weight(t)));
++}
++
++
++static void decrease_weight(struct task_struct* t)
++{
++ fp_t ow, nw;
++ jiffie_t last, period, delay;
++
++ ow = get_sl(t, get_cur_sl(t)).weight;
++ nw = get_sl(t, get_opt_sl(t)).weight;
++ last = t->rt_param.times.last_release;
++ period = reweighted_period(ow, nw, t->rt_param.times.exec_time,
++ t->rt_param.times.deadline, last);
++
++ /* necessary delay has already been computed by optimizer */
++ delay = t->rt_param.opt_change;
++
++ update_weight_estimate(t);
++
++ if (!delay)
++ t->rt_param.times.last_release = opt_time;
++ t->rt_param.times.release = opt_time + delay;
++ t->rt_param.times.deadline = opt_time + delay + period;
++
++ set_service_level(t, get_opt_sl(t));
++
++ /* take out of queue/link structure */
++ unlink(t);
++ /* present as a new job */
++ adaptive_job_arrival(t);
++}
++
++
++static void increase_weight(struct task_struct* t)
++{
++ fp_t ow, nw;
++ jiffie_t last, period, delay;
++
++ ow = get_sl(t, get_cur_sl(t)).weight;
++ nw = get_sl(t, get_opt_sl(t)).weight;
++ last = t->rt_param.times.last_release;
++ period = reweighted_period(ow, nw, t->rt_param.times.exec_time,
++ t->rt_param.times.deadline, last);
++
++ if (t->rt_param.opt_change == 0) {
++ /* can be enacted now */
++ if (is_under_allocated(t) ||
++ time_before(opt_time + period, get_deadline(t)))
++ /* do it now */
++ delay = 0;
++ else {
++ if (is_under_allocated(t)) {
++ t->rt_param.opt_change += opt_time;
++ /* The next job release will notice that opt !=
++ * sl and initiate a weight change.
++ */
++ return;
++ } else
++ /* nope, wait for equal point */
++ delay = inc_equal_point_delay(t);
++ }
++
++ update_weight_estimate(t);
++
++ if (!delay)
++ t->rt_param.times.last_release = opt_time;
++ t->rt_param.times.release = opt_time + delay;
++ t->rt_param.times.deadline = opt_time + delay + period;
++
++ set_service_level(t, get_opt_sl(t));
++
++ /* take out of queue/link structure */
++ unlink(t);
++ /* present as a new job */
++ adaptive_job_arrival(t);
++
++ } else {
++ /* must wait until capacity is released */
++ t->rt_param.opt_change += opt_time;
++ list_insert(&t->rt_param.opt_list, &adaptive_inc_list,
++ by_enactment_time);
++ }
++}
++
++static void delayed_increase_weight(void)
++{
++ struct list_head *p, *extra;
++ struct task_struct* t;
++
++ opt_time = jiffies;
++ list_for_each_safe(p, extra, &adaptive_inc_list) {
++ t = list_entry(p, struct task_struct, rt_param.opt_list);
++ if (time_before_eq(t->rt_param.opt_change, opt_time)) {
++ list_del(p);
++ /* prevent recursion */
++ t->rt_param.opt_change = 0;
++ /* this takes care of everything */
++ increase_weight(t);
++ } else
++ /* list is sorted */
++ break;
++ }
++}
++
++static void change_weight(struct task_struct* t)
++{
++ if (get_cur_sl(t) < get_opt_sl(t))
++ increase_weight(t);
++ else
++ decrease_weight(t);
++ OPT_DBG_T(t, "after change_weight: last_rel:%d rel:%d dl:%d\n",
++ get_last_release(t),
++ get_release(t),
++ get_deadline(t));
++}
++
++/******************************************************************************/
++/* OPTIMIZER */
++/******************************************************************************/
++
++/* only invoke with adaptive_lock behing held */
++void adaptive_optimize(void)
++{
++ struct list_head list;
++ struct list_head inc, dec;
++ struct list_head *p, *extra;
++ cpu_entry_t *cpu;
++ struct task_struct* t;
++ fp_t M = FP(0), w0, wl, tmp, estU = FP(0);
++ unsigned int l;
++ jiffie_t enactment_time;
++
++ if (time_before(jiffies,
++ last_optimizer_run + optimizer_min_invocation_sep))
++ return;
++
++ OPT_DBG(":::::: running adaptive optimizer\n");
++ opt_time = jiffies;
++
++ INIT_LIST_HEAD(&list);
++
++ /* 1) gather all tasks */
++ list_for_each(p, &adaptive.ready_queue)
++ list_add(&(rt_list2task(p)->rt_param.opt_list), &list);
++ list_for_each(p, &adaptive.release_queue)
++ list_add(&(rt_list2task(p)->rt_param.opt_list), &list);
++ list_for_each(p, &adaptive_cpu_queue) {
++ cpu = list_entry(p, cpu_entry_t, list);
++ if (cpu->linked)
++ list_add(&cpu->linked->rt_param.opt_list, &list);
++ }
++
++ /* 2) determine current system capacity */
++ M = system_capacity;
++ OPT_DBG("opt: system capacity: " _FP_ "\n", fp2str(M));
++
++ /* 3) Compute L value for all tasks,
++ * and set tasks to service level 0,
++ * also account for weight.
++ * Also establish current estimated utilization
++ */
++ list_for_each_safe(p, extra, &list) {
++ t = list_entry(p, struct task_struct, rt_param.opt_list);
++ if (time_before(opt_time, get_last_release(t))) {
++ list_del(p);
++ continue;
++ }
++ t->rt_param.opt_order = linear_metric(t);
++ OPT_DBG_T(t, "est_w = " _FP_ " L = " _FP_ "\n",
++ get_est_weight(t),
++ fp2str(t->rt_param.opt_order));
++ t->rt_param.opt_level = 0;
++ M = _sub(M, est_weight_at(t, 0));
++ estU = _add(estU, get_est_weight(t));
++ }
++ OPT_DBG("opt: estimated utilization: " _FP_ "\n", fp2str(estU));
++ OPT_DBG("opt: estimated capacity at all sl=0: " _FP_ "\n", fp2str(M));
++
++
++ /* 4) sort list by decreasing linear metric */
++ list_qsort(&list, by_linear_metric);
++
++ /* 5) assign each task a service level */
++ list_for_each(p, &list) {
++ t = list_entry(p, struct task_struct, rt_param.opt_list);
++ l = t->rt_param.no_service_levels;
++ w0 = est_weight_at(t, 0);
++ while (l > 1) {
++ l--;
++ wl = est_weight_at(t, l);
++ tmp = _sub(M, _sub(wl, w0));
++ if (_leq(FP(0), tmp)) {
++ /* this level fits in */
++ M = tmp;
++ t->rt_param.opt_level = l;
++ t->rt_param.opt_dw = _sub(wl,
++ get_est_weight(t));
++ t->rt_param.opt_nw = wl;
++ break; /* proceed to next task */
++ }
++ }
++ OPT_DBG_T(t, " will run at sl=%u, prior=%u dw=" _FP_ "\n",
++ l, get_cur_sl(t), fp2str(t->rt_param.opt_dw));
++
++ }
++
++ /* 6) filter tasks that reweight */
++ INIT_LIST_HEAD(&inc);
++ INIT_LIST_HEAD(&dec);
++ list_for_each_safe(p, extra, &list) {
++ t = list_entry(p, struct task_struct, rt_param.opt_list);
++ list_del(p);
++ if (t->rt_param.opt_level < get_cur_sl(t)) {
++ list_add(p, &dec);
++ t->rt_param.opt_change = decrease_delay(t);
++ } else if (t->rt_param.opt_level > get_cur_sl(t)) {
++ list_add(p, &inc);
++ t->rt_param.opt_change = 0;
++ }
++ /* if t doesn't change we can ignore it from now on */
++ }
++
++ /* 7) sort dec and inc list */
++ list_qsort(&dec, by_enactment_time);
++ list_qsort(&inc, by_delta_weight);
++
++ /* 8) now figure out when we can enact weight increases
++ * It works like this: We know the current system utilization.
++ * Thus, we know the remaining capacity. We also know when
++ * decreases are going to be enacted (=> capacity increases).
++ * Now we only need to find a spot where the weight increase will
++ * not drive the system into overload.
++ */
++
++ /* Very ugly jump, but we need to force enactment_time = 0
++ * during the first iteration.
++ */
++ M = system_capacity;
++ enactment_time = 0;
++ goto first_iteration;
++
++ while (!list_empty(&inc)) {
++ enactment_time = list_entry(dec.next, struct task_struct,
++ rt_param.opt_list)
++ ->rt_param.opt_change;
++ first_iteration:
++ /* Start by collapsing the next decrease.
++ * Except for in the first iteration, it will always
++ * pick off at least one task.
++ */
++ list_for_each_safe(p, extra, &dec) {
++ t = list_entry(p, struct task_struct,
++ rt_param.opt_list);
++ if (t->rt_param.opt_change == enactment_time) {
++ list_del(p);
++ /* opt_dw is negative */
++ estU = _add(estU, t->rt_param.opt_dw);
++ list_add(p, &list);
++
++ OPT_DBG_T(t, " weight decrease at %ld => estU="
++ _FP_ "\n", enactment_time,
++ fp2str(estU));
++
++ } else
++ /* stop decrease loop */
++ break;
++ }
++
++ /* now start setting enactment times for increases */
++ while (!list_empty(&inc)) {
++ p = inc.next;
++ t = list_entry(p, struct task_struct,
++ rt_param.opt_list);
++ tmp = _add(estU, t->rt_param.opt_dw);
++ if (_leq(tmp, M)) {
++ /* it fits */
++ estU = tmp;
++ t->rt_param.opt_change = enactment_time;
++ list_del(p);
++ list_add(p, &list);
++
++ OPT_DBG_T(t, " weight increase at %ld => estU="
++ _FP_ "\n", enactment_time,
++ fp2str(estU));
++
++ } else
++ /* stop increase loop */
++ break;
++ }
++
++ TRACE_BUG_ON(list_empty(&dec) && !list_empty(&inc));
++ if (list_empty(&dec) && !list_empty(&inc))
++ /* break out in case of bug */
++ break;
++ }
++
++ /* 9) Wow. We made it. Every task has a now a new service level
++ * assigned, together with a correct (earliest) enactment time.
++ * all we have left to do now is to enact changes that did not get
++ * delayed. Also convert change fields to actual timestamp for to be
++ * nice to the scheduler_tick().
++ */
++ INIT_LIST_HEAD(&adaptive_inc_list);
++ list_for_each_safe(p, extra, &list) {
++ t = list_entry(p, struct task_struct, rt_param.opt_list);
++ list_del(p);
++ change_weight(t);
++ }
++
++ last_optimizer_run = jiffies;
++ OPT_DBG(":::::: optimizer run complete\n");
++}
++
++/* update_cpu_position - Move the cpu entry to the correct place to maintain
++ * order in the cpu queue. Caller must hold adaptive lock.
++ */
++static void update_cpu_position(cpu_entry_t *entry)
++{
++ cpu_entry_t *other;
++ struct list_head *pos;
++ list_del(&entry->list);
++ /* if we do not execute real-time jobs we just move
++ * to the end of the queue
++ */
++ if (entry->linked) {
++ list_for_each(pos, &adaptive_cpu_queue) {
++ other = list_entry(pos, cpu_entry_t, list);
++ if (edf_higher_prio(entry->linked, other->linked)) {
++ __list_add(&entry->list, pos->prev, pos);
++ return;
++ }
++ }
++ }
++ /* if we get this far we have the lowest priority job */
++ list_add_tail(&entry->list, &adaptive_cpu_queue);
++}
++
++/* link_task_to_cpu - Update the link of a CPU.
++ * Handles the case where the to-be-linked task is already
++ * scheduled on a different CPU.
++ */
++static noinline void link_task_to_cpu(struct task_struct* linked,
++ cpu_entry_t *entry)
++
++{
++ cpu_entry_t *sched;
++ struct task_struct* tmp;
++ int on_cpu;
++
++ BUG_ON(linked && !is_realtime(linked));
++
++ /* Currently linked task is set to be unlinked. */
++ if (entry->linked)
++ entry->linked->rt_param.linked_on = NO_CPU;
++
++ /* Link new task to CPU. */
++ if (linked) {
++ set_rt_flags(linked, RT_F_RUNNING);
++ /* handle task is already scheduled somewhere! */
++ on_cpu = linked->rt_param.scheduled_on;
++ if (on_cpu != NO_CPU) {
++ sched = &per_cpu(adaptive_cpu_entries, on_cpu);
++ /* this should only happen if not linked already */
++ BUG_ON(sched->linked == linked);
++
++ /* If we are already scheduled on the CPU to which we
++ * wanted to link, we don't need to do the swap --
++ * we just link ourselves to the CPU and depend on
++ * the caller to get things right.
++ */
++ if (entry != sched) {
++ tmp = sched->linked;
++ linked->rt_param.linked_on = sched->cpu;
++ sched->linked = linked;
++ update_cpu_position(sched);
++ linked = tmp;
++ }
++ }
++ if (linked) /* might be NULL due to swap */
++ linked->rt_param.linked_on = entry->cpu;
++ }
++ entry->linked = linked;
++ update_cpu_position(entry);
++}
++
++/* unlink - Make sure a task is not linked any longer to an entry
++ * where it was linked before. Must hold adaptive_lock.
++ */
++static void unlink(struct task_struct* t)
++{
++ cpu_entry_t *entry;
++
++ if (unlikely(!t)) {
++ TRACE_BUG_ON(!t);
++ return;
++ }
++
++ if (t->rt_param.linked_on != NO_CPU) {
++ /* unlink */
++ entry = &per_cpu(adaptive_cpu_entries, t->rt_param.linked_on);
++ t->rt_param.linked_on = NO_CPU;
++ link_task_to_cpu(NULL, entry);
++ } else if (in_list(&t->rt_list)) {
++ /* This is an interesting situation: t is scheduled,
++ * but was just recently unlinked. It cannot be
++ * linked anywhere else (because then it would have
++ * been relinked to this CPU), thus it must be in some
++ * queue. We must remove it from the list in this
++ * case.
++ */
++ list_del(&t->rt_list);
++ }
++}
++
++
++/* preempt - force a CPU to reschedule
++ */
++static noinline void preempt(cpu_entry_t *entry)
++{
++ /* We cannot make the is_np() decision here if it is a remote CPU
++ * because requesting exit_np() requires that we currently use the
++ * address space of the task. Thus, in the remote case we just send
++ * the IPI and let schedule() handle the problem.
++ */
++
++ if (smp_processor_id() == entry->cpu) {
++ if (entry->scheduled && is_np(entry->scheduled))
++ request_exit_np(entry->scheduled);
++ else
++ set_tsk_need_resched(current);
++ } else
++ /* in case that it is a remote CPU we have to defer the
++ * the decision to the remote CPU
++ */
++ if (!test_will_schedule(entry->cpu))
++ smp_send_reschedule(entry->cpu);
++}
++
++/* requeue - Put an unlinked task into gsn-edf domain.
++ * Caller must hold adaptive_lock.
++ */
++static noinline void requeue(struct task_struct* task)
++{
++ BUG_ON(!task);
++ /* sanity check rt_list before insertion */
++ BUG_ON(in_list(&task->rt_list));
++
++ if (get_rt_flags(task) == RT_F_SLEEP ||
++ get_rt_mode() != MODE_RT_RUN) {
++ /* this task has expired
++ * _schedule has already taken care of updating
++ * the release and
++ * deadline. We just must check if it has been released.
++ */
++ if (is_released(task) && get_rt_mode() == MODE_RT_RUN)
++ __add_ready(&adaptive, task);
++ else {
++ /* it has got to wait */
++ __add_release(&adaptive, task);
++ }
++
++ } else
++ /* this is a forced preemption
++ * thus the task stays in the ready_queue
++ * we only must make it available to others
++ */
++ __add_ready(&adaptive, task);
++}
++
++/* adaptive_job_arrival: task is either resumed or released */
++static void adaptive_job_arrival(struct task_struct* task)
++{
++ cpu_entry_t* last;
++
++ BUG_ON(list_empty(&adaptive_cpu_queue));
++ BUG_ON(!task);
++
++ TRACE_TASK(task, "job_arrival: last_rel=%d rel=%d dl=%d now=%d\n",
++ get_last_release(task), get_release(task),
++ get_deadline(task),
++ jiffies);
++
++
++ /* first queue arriving job */
++ requeue(task);
++
++ /* then check for any necessary preemptions */
++ last = list_entry(adaptive_cpu_queue.prev, cpu_entry_t, list);
++ if (edf_preemption_needed(&adaptive, last->linked)) {
++ /* preemption necessary */
++ task = __take_ready(&adaptive);
++
++ TRACE("job_arrival: task %d linked to %d\n",
++ task->pid, last->cpu);
++
++ if (last->linked)
++ requeue(last->linked);
++
++ link_task_to_cpu(task, last);
++ preempt(last);
++ }
++}
++
++/* check for current job releases */
++static noinline void adaptive_release_jobs(void)
++{
++ struct list_head *pos, *save;
++ struct task_struct *queued;
++
++ list_for_each_safe(pos, save, &adaptive.release_queue) {
++ queued = list_entry(pos, struct task_struct, rt_list);
++ if (likely(is_released(queued))) {
++ TRACE_TASK(queued, "released rel=%d now=%d\n",
++ get_release(queued), jiffies);
++ /* this one is ready to go*/
++ list_del(pos);
++ set_rt_flags(queued, RT_F_RUNNING);
++ queued->rt_param.times.last_release =
++ queued->rt_param.times.release;
++
++ /* check for delayed weight increase */
++ if (get_opt_sl(queued) != get_cur_sl(queued) &&
++ time_before_eq(queued->rt_param.opt_change, jiffies)) {
++ opt_time = jiffies;
++ set_service_level(queued, get_opt_sl(queued));
++ queued->rt_param.times.deadline =
++ get_last_release(queued) +
++ get_rt_period(queued);
++ total_weight = _sub(total_weight, get_est_weight(queued));
++ queued->rt_param.predictor_state.estimate =
++ queued->rt_param.opt_nw;
++ total_weight = _add(total_weight, get_est_weight(queued));
++ }
++
++ sched_trace_job_release(queued);
++ adaptive_job_arrival(queued);
++ }
++ else
++ /* the release queue is ordered */
++ break;
++ }
++}
++
++/* adaptive_scheduler_tick - this function is called for every local timer
++ * interrupt.
++ *
++ * checks whether the current task has expired and checks
++ * whether we need to preempt it if it has not expired
++ */
++static reschedule_check_t adaptive_scheduler_tick(void)
++{
++ unsigned long flags;
++ struct task_struct* t = current;
++ reschedule_check_t want_resched = NO_RESCHED;
++
++ /* Account for exec time.
++ * Since we don't preempt forcefully, nothing else needs to be done.
++ */
++ if (is_realtime(t))
++ t->rt_param.times.exec_time++;
++
++ /* only the first CPU needs to release jobs */
++ if (get_rt_mode() == MODE_RT_RUN) {
++ queue_lock_irqsave(&adaptive_lock, flags);
++
++ /* (1) run the optimizer if it did not trigger often enough */
++ if (time_before_eq(last_optimizer_run + optimizer_period, jiffies)) {
++
++ OPT_DBG("adaptive: optimizing due to period threshold\n");
++
++ adaptive_optimize();
++ }
++
++ /* (2) enact delayed weight increases */
++ delayed_increase_weight();
++
++ /* (3) try to release pending jobs */
++ adaptive_release_jobs();
++
++ /* we don't need to check linked != scheduled since
++ * set_tsk_need_resched has been set by preempt() if necessary
++ */
++
++ queue_unlock_irqrestore(&adaptive_lock, flags);
++ }
++
++ return want_resched;
++}
++
++/* caller holds adaptive_lock */
++static noinline void job_completion(struct task_struct *t)
++{
++ long delta;
++ fp_t actual_weight, old_estimate;
++ unsigned int lcurr = get_cur_sl(t);
++ fp_t v = t->rt_param.service_level[lcurr].value;
++
++ int non_zero_weight;
++ fp_t error_percentage;
++ int exceeds_threshold;
++
++ BUG_ON(!t);
++
++ TRACE_TASK(t, " completion, last_rel=%d rel=%d dl=%d now=%d "
++ "period=%d\n",
++ get_last_release(t), get_release(t), get_deadline(t),
++ jiffies, get_rt_period(t));
++
++ sched_trace_job_completion(t);
++ delta = t->rt_param.times.exec_time -
++ t->rt_param.basic_params.exec_cost;
++
++ OPT_DBG_T(t, "job %d completes, delta WCET = %d\n",
++ t->rt_param.times.job_no, delta);
++
++ actual_weight = _frac(t->rt_param.times.exec_time,
++ t->rt_param.basic_params.period);
++ sched_trace_weight_error(t, actual_weight);
++ old_estimate = get_est_weight(t);
++ update_estimate(&t->rt_param.predictor_state, actual_weight,
++ fc_a, fc_b);
++
++ OPT_DBG_T(t, "Job %d completes. Current value " _FP_
++ ", Weight estimation: error=" _FP_ " weight="
++ _FP_ " => " _FP_ "\n",t->rt_param.times.job_no, v,
++ _sub(get_est_weight(t), old_estimate),
++ old_estimate, get_est_weight(t));
++
++ /* Now we have determined the task error.
++ * Next we release the next job.
++ * Then we optimize. It's easier for the optimizer to deal
++ * with just-released jobs.
++ */
++
++ /* prepare for next period */
++ edf_prepare_for_next_period(t);
++
++ TRACE_TASK(t, " prepped, last_rel=%d rel=%d dl=%d now=%d\n",
++ get_last_release(t), get_release(t), get_deadline(t),
++ jiffies);
++
++ if (is_released(t)) {
++ /* set flags */
++ /* prevent fake completions */
++ set_rt_flags(t, RT_F_RUNNING);
++ t->rt_param.times.last_release =
++ t->rt_param.times.release;
++ }
++
++
++ non_zero_weight = !_eq(get_est_weight(t),FP(0));
++ if (non_zero_weight)
++ error_percentage = _div(_abs(_sub(get_est_weight(t),
++ old_estimate)),
++ get_est_weight(t));
++ else
++ error_percentage = FP(0);
++ exceeds_threshold = _gt(error_percentage, task_error_threshold);
++
++
++ if (exceeds_threshold) {
++ OPT_DBG("adaptive: optimizing due to task error threshold\n");
++ adaptive_optimize();
++ } else if (_gt(total_weight, system_capacity)) {
++ OPT_DBG("adaptive: optimizing due to system capacity exceeded\n");
++ adaptive_optimize();
++ }
++
++
++ /* unlink */
++ unlink(t);
++ /* requeue
++ * But don't requeue a blocking task. */
++ if (is_running(t))
++ adaptive_job_arrival(t);
++}
++
++
++/* Getting schedule() right is a bit tricky. schedule() may not make any
++ * assumptions on the state of the current task since it may be called for a
++ * number of reasons. The reasons include a scheduler_tick() determined that it
++ * was necessary, because sys_exit_np() was called, because some Linux
++ * subsystem determined so, or even (in the worst case) because there is a bug
++ * hidden somewhere. Thus, we must take extreme care to determine what the
++ * current state is.
++ *
++ * The CPU could currently be scheduling a task (or not), be linked (or not).
++ *
++ * The following assertions for the scheduled task could hold:
++ *
++ * - !is_running(scheduled) // the job blocks
++ * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
++ * - linked != scheduled // we need to reschedule (for any reason)
++ *
++ * Any of these can occur together.
++ */
++static int adaptive_schedule(struct task_struct * prev,
++ struct task_struct ** next,
++ runqueue_t * rq)
++{
++ cpu_entry_t* entry = &__get_cpu_var(adaptive_cpu_entries);
++ int sleep, preempt, exists,
++ rt, blocks;
++ struct task_struct* linked;
++
++ /* Will be released in finish_switch. */
++ queue_lock(&adaptive_lock);
++ clear_will_schedule();
++
++ /* sanity checking */
++ BUG_ON(entry->scheduled && entry->scheduled != prev);
++ BUG_ON(entry->scheduled && !is_realtime(prev));
++
++ /* (0) Determine state */
++ exists = entry->scheduled != NULL;
++ blocks = exists && !is_running(entry->scheduled);
++ sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
++ preempt = entry->scheduled != entry->linked;
++ rt = get_rt_mode() == MODE_RT_RUN;
++
++ /* If a task blocks we have no choice but to reschedule.
++ */
++ if (blocks)
++ unlink(entry->scheduled);
++
++ /* Task wants to sleep -> job is done.
++ */
++ if (sleep)
++ job_completion(entry->scheduled);
++
++ /* Stop real-time tasks when we leave real-time mode
++ */
++ if (!rt && entry->linked) {
++ /* task will be preempted once it is preemptable
++ * (which it may be already)
++ */
++ linked = entry->linked;
++ unlink(linked);
++ requeue(linked);
++ }
++
++ /* Link pending task if we became unlinked.
++ */
++ if (rt && !entry->linked)
++ link_task_to_cpu(__take_ready(&adaptive), entry);
++
++ /* The final scheduling decision. Do we need to switch for some reason?
++ * If linked different from scheduled select linked as next.
++ */
++ if (entry->linked != entry->scheduled) {
++ /* Take care of a previously scheduled
++ * job by taking it out of the Linux runqueue.
++ */
++ if (entry->scheduled)
++ if (prev->array)
++ /* take it out of the run queue */
++ deactivate_task(prev, rq);
++
++ /* Schedule a linked job? */
++ if (entry->linked) {
++ *next = entry->linked;
++ /* mark the task as executing on this cpu */
++ set_task_cpu(*next, smp_processor_id());
++ /* stick the task into the runqueue */
++ __activate_task(*next, rq);
++ }
++ } else
++ /* Only override Linux scheduler if we have real-time task
++ * scheduled that needs to continue.
++ */
++ if (exists)
++ *next = prev;
++
++ /* Unlock in case that we don't affect real-time tasks or
++ * if nothing changed and finish_switch won't be called.
++ */
++ if (prev == *next || (!is_realtime(prev) && !*next))
++ queue_unlock(&adaptive_lock);
++
++ return 0;
++}
++
++
++/* _finish_switch - we just finished the switch away from prev
++ */
++static void adaptive_finish_switch(struct task_struct *prev)
++{
++ cpu_entry_t* entry = &__get_cpu_var(adaptive_cpu_entries);
++
++ if (is_realtime(current))
++ entry->scheduled = current;
++ else
++ entry->scheduled = NULL;
++
++ prev->rt_param.scheduled_on = NO_CPU;
++ current->rt_param.scheduled_on = smp_processor_id();
++
++ /* unlock in case schedule() left it locked */
++ if (is_realtime(current) || is_realtime(prev))
++ queue_unlock(&adaptive_lock);
++}
++
++
++/* Prepare a task for running in RT mode
++ * Enqueues the task into master queue data structure
++ * returns
++ * -EPERM if task is not TASK_STOPPED
++ */
++static long adaptive_prepare_task(struct task_struct * t)
++{
++ unsigned long flags;
++
++ TRACE("adaptive: prepare task %d\n", t->pid);
++
++ if (t->state == TASK_STOPPED) {
++ __setscheduler(t, SCHED_FIFO, MAX_RT_PRIO - 1);
++
++ t->rt_param.scheduled_on = NO_CPU;
++ t->rt_param.linked_on = NO_CPU;
++ if (t->rt_param.no_service_levels) {
++ t->rt_param.predictor_state.estimate =
++ get_sl(t, 0).weight;
++ } else
++ t->rt_param.predictor_state.estimate =
++ _frac(get_exec_cost(t), get_rt_period(t));
++
++ TRACE_TASK(t, "est_weight=" _FP_ "\n", get_est_weight(t));
++
++ if (get_rt_mode() == MODE_RT_RUN)
++ /* The action is already on.
++ * Prepare immediate release
++ */
++ edf_release_now(t);
++ /* The task should be running in the queue, otherwise signal
++ * code will try to wake it up with fatal consequences.
++ */
++ t->state = TASK_RUNNING;
++
++ queue_lock_irqsave(&adaptive_lock, flags);
++ total_weight = _add(total_weight, get_est_weight(t));
++ requeue(t);
++ queue_unlock_irqrestore(&adaptive_lock, flags);
++ return 0;
++ }
++ else
++ return -EPERM;
++}
++
++static void adaptive_wake_up_task(struct task_struct *task)
++{
++ unsigned long flags;
++ /* We must determine whether task should go into the release
++ * queue or into the ready queue. It may enter the ready queue
++ * if it has credit left in its time slice and has not yet reached
++ * its deadline. If it is now passed its deadline we assume this the
++ * arrival of a new sporadic job and thus put it in the ready queue
++ * anyway.If it has zero budget and the next release is in the future
++ * it has to go to the release queue.
++ */
++
++ TRACE("adaptive: %d unsuspends\n", task->pid);
++
++ task->state = TASK_RUNNING;
++
++ if (is_tardy(task)) {
++ /* new sporadic release */
++ edf_release_now(task);
++ sched_trace_job_release(task);
++ }
++ else if (task->time_slice)
++ /* came back in time before deadline */
++ set_rt_flags(task, RT_F_RUNNING);
++
++ queue_lock_irqsave(&adaptive_lock, flags);
++ total_weight = _add(total_weight, get_est_weight(task));
++ adaptive_job_arrival(task);
++ queue_unlock_irqrestore(&adaptive_lock, flags);
++}
++
++static void adaptive_task_blocks(struct task_struct *t)
++{
++ unsigned long flags;
++
++ /* unlink if necessary */
++ queue_lock_irqsave(&adaptive_lock, flags);
++ total_weight = _sub(total_weight, get_est_weight(t));
++ unlink(t);
++ queue_unlock_irqrestore(&adaptive_lock, flags);
++
++ BUG_ON(!is_realtime(t));
++
++ TRACE("task %d suspends\n", t->pid);
++
++ BUG_ON(t->rt_list.next != LIST_POISON1);
++ BUG_ON(t->rt_list.prev != LIST_POISON2);
++}
++
++
++/* When _tear_down is called, the task should not be in any queue any more
++ * as it must have blocked first. We don't have any internal state for the task,
++ * it is all in the task_struct.
++ */
++static long adaptive_tear_down(struct task_struct * t)
++{
++ BUG_ON(!is_realtime(t));
++ TRACE_TASK(t, "RIP\n");
++ BUG_ON(t->array);
++ BUG_ON(t->rt_list.next != LIST_POISON1);
++ BUG_ON(t->rt_list.prev != LIST_POISON2);
++ return 0;
++}
++
++static int adaptive_mode_change(int new_mode)
++{
++ unsigned long flags;
++ int cpu;
++ cpu_entry_t *entry;
++ struct task_struct* t;
++ struct list_head* pos;
++
++ if (new_mode == MODE_RT_RUN) {
++ queue_lock_irqsave(&adaptive_lock, flags);
++
++ system_capacity = FP(0);
++ for_each_online_cpu(cpu)
++ system_capacity = _add(system_capacity, FP(1));
++
++ __rerelease_all(&adaptive, edf_release_at);
++
++ total_weight = FP(0);
++ list_for_each(pos, &adaptive.release_queue) {
++ t = list_entry(pos, struct task_struct, rt_list);
++ total_weight = _add(total_weight, get_est_weight(t));
++ }
++ TRACE("adaptive: total weight: " _FP_
++ " (at mode change)\n", total_weight);
++
++
++ /* get old cruft out of the way in case we reenter real-time
++ * mode for a second time
++ */
++ while (!list_empty(&adaptive_cpu_queue))
++ list_del(adaptive_cpu_queue.next);
++ /* reinitialize */
++ for_each_online_cpu(cpu) {
++ entry = &per_cpu(adaptive_cpu_entries, cpu);
++ atomic_set(&entry->will_schedule, 0);
++ entry->linked = NULL;
++ entry->scheduled = NULL;
++ list_add(&entry->list, &adaptive_cpu_queue);
++ }
++
++ adaptive_optimize();
++
++ queue_unlock_irqrestore(&adaptive_lock, flags);
++
++ }
++ return 0;
++}
++
++
++typedef enum {
++ ADAPTIVE_SET_MIN_OPT_SEP = 1
++} adaptive_cmds_t;
++
++
++static int adaptive_setup(int cmd, void __user *up)
++{
++ unsigned int error = -EINVAL;
++ unsigned int val;
++
++ if (copy_from_user(&val, up, sizeof(unsigned int))) {
++ error = -EFAULT;
++ goto out;
++ }
++
++ switch (cmd) {
++ case ADAPTIVE_SET_MIN_OPT_SEP:
++ optimizer_min_invocation_sep = val;
++ TRACE("adaptive: min opt sep set to %d\n",
++ optimizer_min_invocation_sep);
++ return 0;
++ break;
++ }
++
++out:
++ return error;
++}
++
++
++/* Plugin object */
++static sched_plugin_t s_plugin __cacheline_aligned_in_smp = {
++ .ready_to_use = 0
++};
++
++
++/*
++ * Plugin initialization code.
++ */
++#define INIT_SCHED_PLUGIN (struct sched_plugin){ \
++ .plugin_name = "ADAPTIVE", \
++ .ready_to_use = 1, \
++ .scheduler_tick = adaptive_scheduler_tick, \
++ .prepare_task = adaptive_prepare_task, \
++ .sleep_next_period = edf_sleep_next_period, \
++ .tear_down = adaptive_tear_down, \
++ .schedule = adaptive_schedule, \
++ .finish_switch = adaptive_finish_switch, \
++ .mode_change = adaptive_mode_change, \
++ .wake_up_task = adaptive_wake_up_task, \
++ .task_blocks = adaptive_task_blocks, \
++ .scheduler_setup = adaptive_setup \
++}
++
++
++sched_plugin_t *__init init_adaptive_plugin(void)
++{
++ int cpu;
++ cpu_entry_t *entry;
++
++ /* magic values given in the paper */
++ fc_a = _frac( 102, 1000);
++ fc_b = _frac( 303, 1000);
++
++ optimizer_period = 1000;
++ optimizer_min_invocation_sep = 200;
++ task_error_threshold = _frac(1, 2);
++
++ if (!s_plugin.ready_to_use)
++ {
++ /* initialize CPU state */
++ for (cpu = 0; cpu < NR_CPUS; cpu++) {
++ entry = &per_cpu(adaptive_cpu_entries, cpu);
++ atomic_set(&entry->will_schedule, 0);
++ entry->linked = NULL;
++ entry->scheduled = NULL;
++ entry->cpu = cpu;
++ }
++
++ queue_lock_init(&adaptive_lock);
++ edf_domain_init(&adaptive, NULL);
++ s_plugin = INIT_SCHED_PLUGIN;
++ }
++ return &s_plugin;
++}
++
++
+diff --git a/kernel/sched_edf_hsb.c b/kernel/sched_edf_hsb.c
+new file mode 100644
+index 0000000..a2f670d
+--- /dev/null
++++ b/kernel/sched_edf_hsb.c
+@@ -0,0 +1,1724 @@
++/*
++ * kernel/sched_edf_hsb.c
++ *
++ * Implementation of the EDF-HSB scheduler plugin.
++ *
++ */
++
++#include
++#include
++#include
++#include
++
++#include
++#include
++#include
++#include
++#include
++
++/* undefine to remove capacity sharing */
++#define HSB_CAP_SHARE_ENABLED
++
++/* fake server PIDs */
++#define HRT_BASE_PID 50000
++#define SRT_BASE_PID 60000
++
++
++/******************************************************************************/
++/* Capacity queue */
++/******************************************************************************/
++
++int cap_check_resched(jiffie_t deadline);
++
++typedef struct {
++ int budget;
++ jiffie_t deadline;
++ pid_t donor;
++
++ struct list_head list;
++} capacity_t;
++
++typedef struct {
++ spinlock_t lock;
++ struct list_head queue;
++} capacity_queue_t;
++
++#define next_cap(q) list_entry((q)->queue.next, capacity_t, list)
++
++void capacity_queue_init(capacity_queue_t* queue)
++{
++ queue->lock = SPIN_LOCK_UNLOCKED;
++ INIT_LIST_HEAD(&queue->queue);
++}
++
++void __add_capacity(capacity_queue_t* queue, capacity_t *cap)
++{
++ struct list_head* pos;
++ capacity_t* queued;
++
++ list_for_each_prev(pos, &queue->queue) {
++ queued = list_entry(pos, capacity_t, list);
++ if ( time_before_eq(queued->deadline, cap->deadline)) {
++ __list_add(&cap->list, pos, pos->next);
++ return;
++ }
++ }
++ list_add(&cap->list, &queue->queue);
++}
++
++int __capacity_available(capacity_queue_t* queue)
++{
++ capacity_t *cap;
++
++ while (!list_empty(&queue->queue)) {
++ cap = list_entry(queue->queue.next, capacity_t, list);
++
++
++ if (time_before_eq(cap->deadline, jiffies)) {
++ list_del(queue->queue.next);
++ kfree(cap);
++ cap = NULL;
++ } else
++ break;
++ }
++
++ return !list_empty(&queue->queue);
++}
++
++void __return_capacity(capacity_queue_t* queue, capacity_t *cap)
++{
++ if (!cap->budget || time_before_eq(cap->deadline, jiffies))
++ kfree(cap);
++ else
++ __add_capacity(queue, cap);
++}
++
++
++void return_capacity(capacity_queue_t* queue, capacity_t *cap)
++
++{
++ unsigned long flags;
++
++ if (!cap->budget || time_before_eq(cap->deadline, jiffies))
++ kfree(cap);
++ else {
++ spin_lock_irqsave(&queue->lock, flags);
++ __add_capacity(queue, cap);
++ spin_unlock_irqrestore(&queue->lock, flags);
++ }
++}
++
++
++#define MIN_TIME_DELTA 1
++#define MIN_BUDGET 1
++
++#ifdef HSB_CAP_SHARE_ENABLED
++void release_capacity(capacity_queue_t* queue, unsigned int budget,
++ jiffie_t deadline, struct task_struct* t)
++{
++ capacity_t* cap;
++ unsigned long flags;
++
++ if (deadline >= jiffies + MIN_TIME_DELTA && budget >= MIN_BUDGET) {
++ cap = kmalloc(sizeof(capacity_t), GFP_ATOMIC);
++ if (cap) {
++ cap->budget = budget;
++ cap->deadline = deadline;
++ if (t)
++ cap->donor = t->pid;
++ else
++ cap->donor = 0;
++ spin_lock_irqsave(&queue->lock, flags);
++ __add_capacity(queue, cap);
++ cap_check_resched(next_cap(queue)->deadline);
++ spin_unlock_irqrestore(&queue->lock, flags);
++ if (t)
++ sched_trace_capacity_release(t);
++ }
++ }
++}
++
++void __release_capacity(capacity_queue_t* queue, unsigned int budget,
++ jiffie_t deadline, struct task_struct* t)
++{
++ capacity_t* cap;
++
++ if (deadline >= jiffies + MIN_TIME_DELTA && budget >= MIN_BUDGET) {
++ cap = kmalloc(sizeof(capacity_t), GFP_ATOMIC);
++ if (cap) {
++ cap->budget = budget;
++ cap->deadline = deadline;
++ if (t)
++ cap->donor = t->pid;
++ else
++ cap->donor = 0;
++ /* no locking, no resched check -- called from schedule */
++ __add_capacity(queue, cap);
++ if (t)
++ sched_trace_capacity_release(t);
++ }
++ }
++}
++
++
++capacity_t* __take_capacity(capacity_queue_t* queue, jiffie_t deadline, int deadline_matters)
++{
++ capacity_t* cap = NULL;
++
++ while (!list_empty(&queue->queue)) {
++ cap = list_entry(queue->queue.next, capacity_t, list);
++
++ if (deadline_matters && time_before(deadline, cap->deadline)) {
++ cap = NULL;
++ break;
++ }
++
++ list_del(queue->queue.next);
++ if (cap->deadline > jiffies) {
++ if (cap->deadline - jiffies < cap->budget)
++ cap->budget = cap->deadline - jiffies;
++ break;
++ }
++ kfree(cap);
++ cap = NULL;
++ }
++
++ return cap;
++}
++#else
++
++/* no capacity sharing */
++void release_capacity(capacity_queue_t* queue, unsigned int budget,
++ jiffie_t deadline, struct task_struct* t)
++{
++}
++
++capacity_t* __take_capacity(capacity_queue_t* queue, jiffie_t deadline, int deadline_matters)
++{
++ return NULL;
++}
++#endif
++
++
++/******************************************************************************/
++/* server abstractions */
++/******************************************************************************/
++
++
++/* hrt_server_t - Abstraction of a hard real-time server.
++ *
++ * One HRT server per CPU. If it is unused period and wcet may be zero.
++ * HRT servers are strictly periodic and retain their budget.
++ */
++typedef struct {
++ rt_domain_t domain;
++
++ unsigned int period;
++ unsigned int wcet;
++
++ jiffie_t deadline;
++ int budget;
++} hrt_server_t;
++
++/* be_server_t - Abstraction of best-effort server.
++ *
++ * This is pretty much only an accounting abstraction.
++ */
++typedef struct {
++ unsigned int period;
++ unsigned int wcet;
++
++ jiffie_t deadline;
++ jiffie_t release;
++ int budget;
++
++ struct list_head list;
++ pid_t pid;
++} be_server_t;
++
++/* cast to int to allow for negative slack, i.e. tardiness */
++#define server_slack(srv) \
++ ( ((int) (srv)->deadline - (int) jiffies) - (int) (srv)->budget )
++
++typedef struct {
++ int cpu;
++
++ hrt_server_t hrt;
++ be_server_t* be;
++ capacity_t* cap;
++
++ task_class_t exec_class;
++ jiffie_t cur_deadline;
++ atomic_t will_schedule;
++
++ struct list_head list;
++ spinlock_t lock;
++} cpu_state_t;
++
++
++DEFINE_PER_CPU(cpu_state_t, hsb_cpu_state);
++
++#define hrt_dom(cpu) (&per_cpu(hsb_cpu_state, cpu).hrt.domain)
++
++#define set_will_schedule() \
++ (atomic_set(&__get_cpu_var(hsb_cpu_state).will_schedule, 1))
++#define clear_will_schedule() \
++ (atomic_set(&__get_cpu_var(hsb_cpu_state).will_schedule, 0))
++#define test_will_schedule(cpu) \
++ (atomic_read(&per_cpu(hsb_cpu_state, cpu).will_schedule))
++
++
++static void prepare_hrt_release(hrt_server_t *srv, jiffie_t start)
++{
++ if (srv->period && srv->wcet) {
++ srv->deadline = start;
++ srv->budget = 0;
++ }
++}
++
++static void check_for_hrt_release(hrt_server_t *srv) {
++ if (srv->wcet && srv->period &&
++ time_before_eq(srv->deadline, jiffies)) {
++ srv->deadline += srv->period;
++ srv->budget = srv->wcet;
++ sched_trace_server_release(HRT_BASE_PID + smp_processor_id(),
++ srv->budget, srv->period, RT_CLASS_HARD);
++ }
++}
++
++/* A HRT client is eligible if either its deadline is before the
++ * the server deadline or if the server has zero slack. The server
++ * must have budget left.
++ */
++static inline int hrt_client_eligible(hrt_server_t *srv)
++{
++ if (!list_empty(&srv->domain.ready_queue))
++ return srv->budget && (
++ time_before(get_deadline(next_ready(&srv->domain)),
++ srv->deadline)
++ || server_slack(srv) <= 0);
++ else
++ return 0;
++}
++
++static void hsb_cpu_state_init(cpu_state_t* cpu_state,
++ check_resched_needed_t check,
++ int cpu)
++{
++ edf_domain_init(&cpu_state->hrt.domain, check);
++ cpu_state->hrt.budget = 0;
++ cpu_state->hrt.deadline = 0;
++ cpu_state->hrt.period = 0;
++ cpu_state->hrt.wcet = 0;
++
++ cpu_state->be = NULL;
++ cpu_state->cap = NULL;
++
++ cpu_state->cur_deadline = 0;
++ cpu_state->cpu = cpu;
++ cpu_state->lock = SPIN_LOCK_UNLOCKED;
++ cpu_state->exec_class = RT_CLASS_BEST_EFFORT;
++
++ atomic_set(&cpu_state->will_schedule, 0);
++ INIT_LIST_HEAD(&cpu_state->list);
++}
++
++/******************************************************************************/
++/* BE queue functions - mostly like edf_common.c */
++/******************************************************************************/
++
++#define be_earlier_deadline(a, b) (time_before(\
++ (a)->deadline, (b)->deadline))
++#define be_earlier_release(a, b) (time_before(\
++ (a)->release, (b)->release))
++
++
++static void be_add_ready(rt_domain_t* edf, be_server_t *new)
++{
++ unsigned long flags;
++ struct list_head *pos;
++ be_server_t *queued;
++ unsigned int passed = 0;
++
++ BUG_ON(!new);
++ /* first we need the write lock for rt_ready_queue */
++ write_lock_irqsave(&edf->ready_lock, flags);
++ /* find a spot where our deadline is earlier than the next */
++ list_for_each(pos, &edf->ready_queue) {
++ queued = list_entry(pos, be_server_t, list);
++ if (unlikely(be_earlier_deadline(new, queued))) {
++ __list_add(&new->list, pos->prev, pos);
++ goto out;
++ }
++ passed++;
++ }
++ /* if we get to this point either the list is empty or new has the
++ * lowest priority. Let's add it to the end. */
++ list_add_tail(&new->list, &edf->ready_queue);
++ out:
++ if (!passed)
++ edf->check_resched(edf);
++ write_unlock_irqrestore(&edf->ready_lock, flags);
++}
++
++static be_server_t* be_take_ready(rt_domain_t* edf)
++{
++ be_server_t *t = NULL;
++
++ if (!list_empty(&edf->ready_queue)) {
++ t = list_entry(edf->ready_queue.next, be_server_t, list);
++ /* kick it out of the ready list */
++ list_del(&t->list);
++ }
++ return t;
++}
++
++/*static be_server_t* get_be_server(rt_domain_t* edf)
++{
++ be_server_t *t = NULL;
++
++ spin_lock(&edf->release_lock);
++ write_lock(&edf->ready_lock);
++ t = be_take_ready(edf);
++
++ if (!t && !list_empty(&edf->release_queue)) {
++ t = list_entry(edf->release_queue.next, be_server_t, list);
++
++ list_del(&t->list);
++ }
++
++ write_unlock(&edf->ready_lock);
++ spin_unlock(&edf->release_lock);
++ return t;
++}*/
++
++static void be_add_release(rt_domain_t* edf, be_server_t *srv)
++{
++ unsigned long flags;
++ struct list_head *pos;
++ be_server_t *queued;
++
++ spin_lock_irqsave(&edf->release_lock, flags);
++ list_for_each_prev(pos, &edf->release_queue) {
++ queued = list_entry(pos, be_server_t, list);
++ if ((unlikely(be_earlier_release(queued, srv)))) {
++ /* the task at pos has an earlier release */
++ /* insert the new task in behind it */
++ __list_add(&srv->list, pos, pos->next);
++ goto out;
++ }
++ }
++
++ list_add(&srv->list, &edf->release_queue);
++ out:
++ spin_unlock_irqrestore(&edf->release_lock, flags);
++}
++
++static void be_try_release_pending(rt_domain_t* edf)
++{
++ unsigned long flags;
++ struct list_head *pos, *save;
++ be_server_t *queued;
++
++ if (spin_trylock_irqsave(&edf->release_lock, flags)) {
++ list_for_each_safe(pos, save, &edf->release_queue) {
++ queued = list_entry(pos, be_server_t, list);
++ if (likely(time_before_eq(
++ queued->release,
++ jiffies))) {
++ list_del(pos);
++ be_add_ready(edf, queued);
++ sched_trace_server_release(
++ queued->pid, queued->budget,
++ queued->period, RT_CLASS_BEST_EFFORT);
++ } else
++ /* the release queue is ordered */
++ break;
++ }
++ spin_unlock_irqrestore(&edf->release_lock, flags);
++ }
++}
++
++static void be_prepare_new_release(be_server_t *t, jiffie_t start) {
++ t->release = start;
++ t->deadline = t->release + t->period;
++ t->budget = t->wcet;
++}
++
++static void be_prepare_new_releases(rt_domain_t *edf, jiffie_t start)
++{
++ unsigned long flags;
++ struct list_head tmp_list;
++ struct list_head *pos, *n;
++ be_server_t *t;
++
++ INIT_LIST_HEAD(&tmp_list);
++
++ spin_lock_irqsave(&edf->release_lock, flags);
++ write_lock(&edf->ready_lock);
++
++
++ while (!list_empty(&edf->release_queue)) {
++ pos = edf->release_queue.next;
++ list_del(pos);
++ list_add(pos, &tmp_list);
++ }
++
++ while (!list_empty(&edf->ready_queue)) {
++ pos = edf->ready_queue.next;
++ list_del(pos);
++ list_add(pos, &tmp_list);
++
++ }
++
++ write_unlock(&edf->ready_lock);
++ spin_unlock_irqrestore(&edf->release_lock, flags);
++
++ list_for_each_safe(pos, n, &tmp_list) {
++ t = list_entry(pos, be_server_t, list);
++ list_del(pos);
++ be_prepare_new_release(t, start);
++ be_add_release(edf, t);
++ }
++
++}
++
++static void be_prepare_for_next_period(be_server_t *t)
++{
++ BUG_ON(!t);
++ /* prepare next release */
++ t->release = t->deadline;
++ t->deadline += t->period;
++ t->budget = t->wcet;
++}
++
++#define be_next_ready(edf) \
++ list_entry((edf)->ready_queue.next, be_server_t, list)
++
++
++/* need_to_preempt - check whether the task t needs to be preempted by a
++ * best-effort server.
++ */
++static inline int be_preemption_needed(rt_domain_t* edf, cpu_state_t* state)
++{
++ /* we need the read lock for rt_ready_queue */
++ if (!list_empty(&edf->ready_queue))
++ {
++
++ if (state->exec_class == RT_CLASS_SOFT) {
++ if (state->cap)
++ return time_before(
++ be_next_ready(edf)->deadline,
++ state->cap->deadline);
++ else
++ return time_before(
++ be_next_ready(edf)->deadline,
++ state->cur_deadline);
++ } else
++ return 1;
++ }
++ return 0;
++}
++
++static void be_enqueue(rt_domain_t* edf, be_server_t* srv)
++{
++ int new_release = 0;
++ if (!srv->budget) {
++ be_prepare_for_next_period(srv);
++ new_release = 1;
++ }
++
++ if (time_before_eq(srv->release, jiffies) &&
++ get_rt_mode() == MODE_RT_RUN) {
++ be_add_ready(edf, srv);
++ if (new_release)
++ sched_trace_server_release(
++ srv->pid, srv->budget,
++ srv->period, RT_CLASS_BEST_EFFORT);
++ } else
++ be_add_release(edf, srv);
++}
++
++static void be_preempt(rt_domain_t *be, cpu_state_t *state)
++{
++ be_server_t *srv;
++
++ spin_lock(&state->lock);
++ srv = state->be;
++ state->be = NULL;
++ spin_unlock(&state->lock);
++
++ /* add outside of lock to avoid deadlock */
++ if (srv)
++ be_enqueue(be, srv);
++}
++
++
++/******************************************************************************/
++/* Actual HSB implementation */
++/******************************************************************************/
++
++/* always acquire the cpu lock as the last lock to avoid deadlocks */
++static spinlock_t hsb_cpu_lock = SPIN_LOCK_UNLOCKED;
++/* the cpus queue themselves according to priority in here */
++static LIST_HEAD(hsb_cpu_queue);
++
++
++/* the global soft real-time domain */
++static rt_domain_t srt;
++/* the global best-effort server domain
++ * belongs conceptually to the srt domain, but has
++ * be_server_t* queued instead of tast_t*
++ */
++static rt_domain_t be;
++
++static rt_domain_t hsb_fifo;
++
++static capacity_queue_t cap_queue;
++
++
++
++
++/* adjust_cpu_queue - Move the cpu entry to the correct place to maintain
++ * order in the cpu queue.
++ *
++ */
++static void adjust_cpu_queue(task_class_t class, jiffie_t deadline,
++ be_server_t *be)
++{
++ struct list_head *pos;
++ cpu_state_t *other;
++ cpu_state_t *entry;
++
++ spin_lock(&hsb_cpu_lock);
++
++ entry = &__get_cpu_var(hsb_cpu_state);
++
++ spin_lock(&entry->lock);
++ entry->exec_class = class;
++ entry->cur_deadline = deadline;
++ entry->be = be;
++
++ spin_unlock(&entry->lock);
++
++
++
++ if (be)
++ sched_trace_server_scheduled(
++ be->pid, RT_CLASS_BEST_EFFORT, be->budget,
++ be->deadline);
++ else if (class == RT_CLASS_HARD)
++ sched_trace_server_scheduled(
++ HRT_BASE_PID + smp_processor_id(), RT_CLASS_HARD,
++ entry->hrt.budget, entry->hrt.deadline);
++
++ list_del(&entry->list);
++ /* If we do not execute real-time jobs we just move
++ * to the end of the queue .
++ * If we execute hard real-time jobs we move the start
++ * of the queue.
++ */
++
++ switch (entry->exec_class) {
++ case RT_CLASS_HARD:
++ list_add(&entry->list, &hsb_cpu_queue);
++ break;
++
++ case RT_CLASS_SOFT:
++ list_for_each(pos, &hsb_cpu_queue) {
++ other = list_entry(pos, cpu_state_t, list);
++ if (other->exec_class > RT_CLASS_SOFT ||
++ time_before_eq(entry->cur_deadline,
++ other->cur_deadline))
++ {
++ __list_add(&entry->list, pos->prev, pos);
++ goto out;
++ }
++ }
++ /* possible fall through if lowest SRT priority */
++
++ case RT_CLASS_BEST_EFFORT:
++ list_add_tail(&entry->list, &hsb_cpu_queue);
++ break;
++
++ default:
++ /* something wrong in the variable */
++ BUG();
++ }
++ out:
++ spin_unlock(&hsb_cpu_lock);
++}
++
++
++/* hrt_check_resched - check whether the HRT server on given CPU needs to
++ * preempt the running task.
++ */
++static int hrt_check_resched(rt_domain_t *edf)
++{
++ hrt_server_t *srv = container_of(edf, hrt_server_t, domain);
++ cpu_state_t *state = container_of(srv, cpu_state_t, hrt);
++ int ret = 0;
++
++ spin_lock(&state->lock);
++
++ if (hrt_client_eligible(srv)) {
++ if (state->exec_class > RT_CLASS_HARD ||
++ time_before(
++ get_deadline(next_ready(edf)),
++ state->cur_deadline)
++ ) {
++ if (state->cpu == smp_processor_id())
++ set_tsk_need_resched(current);
++ else
++ smp_send_reschedule(state->cpu);
++ }
++ }
++
++ spin_unlock(&state->lock);
++ return ret;
++}
++
++
++/* srt_check_resched - Check whether another CPU needs to switch to a SRT task.
++ *
++ * The function only checks and kicks the last CPU. It will reschedule and
++ * kick the next if necessary, and so on. The caller is responsible for making
++ * sure that it is not the last entry or that a reschedule is not necessary.
++ *
++ * Caller must hold edf->ready_lock!
++ */
++static int srt_check_resched(rt_domain_t *edf)
++{
++ cpu_state_t *last;
++ int ret = 0;
++
++ spin_lock(&hsb_cpu_lock);
++
++ if (!list_empty(&srt.ready_queue)) {
++ last = list_entry(hsb_cpu_queue.prev, cpu_state_t, list);
++ /* guard against concurrent updates */
++ spin_lock(&last->lock);
++ if (last->exec_class == RT_CLASS_BEST_EFFORT || (
++ last->exec_class == RT_CLASS_SOFT &&
++ time_before(get_deadline(next_ready(&srt)),
++ last->cur_deadline)))
++ {
++ if (smp_processor_id() == last->cpu)
++ set_tsk_need_resched(current);
++ else
++ if (!test_will_schedule(last->cpu))
++ smp_send_reschedule(last->cpu);
++ ret = 1;
++ }
++ spin_unlock(&last->lock);
++ }
++
++ spin_unlock(&hsb_cpu_lock);
++ return ret;
++}
++
++
++/* be_check_resched - Check whether another CPU needs to switch to a BE server..
++ *
++ * Caller must hold edf->ready_lock!
++ */
++static int be_check_resched(rt_domain_t *edf)
++{
++ cpu_state_t *last;
++ int soft, bg;
++ int ret = 0;
++
++ spin_lock(&hsb_cpu_lock);
++
++ if (!list_empty(&be.ready_queue)) {
++ last = list_entry(hsb_cpu_queue.prev, cpu_state_t, list);
++ /* guard against concurrent updates */
++ spin_lock(&last->lock);
++
++ bg = last->exec_class == RT_CLASS_BEST_EFFORT;
++ soft = last->exec_class == RT_CLASS_SOFT;
++
++ if (bg || (soft && time_before(be_next_ready(&be)->deadline,
++ last->cur_deadline)))
++ {
++ if (smp_processor_id() == last->cpu)
++ set_tsk_need_resched(current);
++ else
++ if (!test_will_schedule(last->cpu))
++ smp_send_reschedule(last->cpu);
++ ret = 1;
++ }
++
++ spin_unlock(&last->lock);
++ }
++
++ spin_unlock(&hsb_cpu_lock);
++ return ret;
++}
++
++
++int cap_check_resched(jiffie_t deadline)
++{
++ unsigned long flags;
++ cpu_state_t *last;
++ int soft, bg;
++ int ret = 0;
++
++
++
++ if (get_rt_mode() == MODE_RT_RUN) {
++ spin_lock_irqsave(&hsb_cpu_lock, flags);
++
++ last = list_entry(hsb_cpu_queue.prev, cpu_state_t, list);
++ /* guard against concurrent updates */
++ spin_lock(&last->lock);
++
++ bg = last->exec_class == RT_CLASS_BEST_EFFORT;
++ soft = last->exec_class == RT_CLASS_SOFT;
++
++ if (bg || (soft && time_before(deadline,
++ last->cur_deadline)))
++ {
++ if (smp_processor_id() == last->cpu)
++ set_tsk_need_resched(current);
++ else
++ if (!test_will_schedule(last->cpu))
++ smp_send_reschedule(last->cpu);
++ ret = 1;
++ }
++
++ spin_unlock(&last->lock);
++
++ spin_unlock_irqrestore(&hsb_cpu_lock, flags);
++ }
++ return ret;
++}
++
++int fifo_check_resched(void)
++{
++ unsigned long flags;
++ cpu_state_t *last;
++ int ret = 0;
++
++ if (get_rt_mode() == MODE_RT_RUN) {
++ spin_lock_irqsave(&hsb_cpu_lock, flags);
++
++
++ last = list_entry(hsb_cpu_queue.prev, cpu_state_t, list);
++ /* guard against concurrent updates */
++
++ spin_lock(&last->lock);
++
++ if (last->exec_class == RT_CLASS_BEST_EFFORT)
++ {
++ if (smp_processor_id() == last->cpu)
++ set_tsk_need_resched(current);
++ else
++ if (!test_will_schedule(last->cpu))
++ smp_send_reschedule(last->cpu);
++ ret = 1;
++ }
++
++ spin_unlock(&last->lock);
++
++ spin_unlock_irqrestore(&hsb_cpu_lock, flags);
++ }
++ return ret;
++}
++
++
++
++static inline int hsb_preemption_needed(rt_domain_t* edf, cpu_state_t* state)
++{
++ /* we need the read lock for rt_ready_queue */
++ if (!list_empty(&edf->ready_queue))
++ {
++ if (state->exec_class == RT_CLASS_SOFT) {
++ if (state->cap)
++ return time_before(get_deadline(next_ready(edf))
++ , state->cap->deadline);
++ else
++ return time_before(get_deadline(next_ready(edf))
++ , state->cur_deadline);
++ } else
++ return 1;
++ }
++ return 0;
++}
++
++static inline int cap_preemption_needed(capacity_queue_t* q, cpu_state_t* state)
++{
++ /* we need the read lock for rt_ready_queue */
++ if (!list_empty(&q->queue))
++ {
++ if (state->exec_class == RT_CLASS_SOFT) {
++ if (state->cap)
++ return time_before(next_cap(q)->deadline
++ , state->cap->deadline);
++ else
++ return time_before(next_cap(q)->deadline
++ , state->cur_deadline);
++ } else
++ return 1;
++ }
++ return 0;
++}
++
++/* hsb_scheduler_tick - this function is called for every local timer
++ * interrupt.
++ *
++ * checks whether the current task has expired and checks
++ * whether we need to preempt it if it has not expired
++ */
++static reschedule_check_t hsb_scheduler_tick(void)
++{
++ unsigned long flags;
++ struct task_struct *t = current;
++ int resched = 0;
++
++ cpu_state_t *state = &__get_cpu_var(hsb_cpu_state);
++
++ /* expire tasks even if not in real-time mode
++ * this makes sure that at the end of real-time mode
++ * no tasks "run away forever".
++ */
++
++ /* charge BE server only if we are not running on a spare capacity */
++ if (state->be && !state->cap && --state->be->budget <= 0) {
++ sched_trace_server_completion(state->be->pid, 0,
++ state->be->deadline,
++ RT_CLASS_BEST_EFFORT);
++ be_preempt(&be, state);
++ resched = 1;
++ }
++
++ if (state->cap)
++ if (--state->cap->budget <= 0 ||
++ time_before_eq(state->cap->deadline, jiffies)) {
++ kfree(state->cap);
++ state->cap = NULL;
++ resched = 1;
++ }
++
++ if (is_realtime(t)) {
++ if (is_hrt(t) && (--state->hrt.budget <= 0)) {
++ sched_trace_server_completion(
++ HRT_BASE_PID + smp_processor_id(), 0,
++ state->hrt.deadline, RT_CLASS_HARD);
++ resched = 1;
++ }
++
++ /* account for received service... */
++ t->rt_param.times.exec_time++;
++
++ /* ...and charge current budget */
++ if (!state->cap) {
++ --t->time_slice;
++ /* a task always should be able to finish its job */
++ BUG_ON(!is_be(t) && !t->time_slice && !job_completed(t));
++ }
++
++ if (job_completed(t) || (is_be(t) && !t->time_slice)) {
++ sched_trace_job_completion(t);
++ set_rt_flags(t, RT_F_SLEEP);
++ resched = 1;
++ }
++ }
++
++
++ if (get_rt_mode() == MODE_RT_RUN)
++ {
++ try_release_pending(&state->hrt.domain);
++ check_for_hrt_release(&state->hrt);
++ try_release_pending(&srt);
++ be_try_release_pending(&be);
++
++ if (!resched)
++ switch (state->exec_class) {
++ case RT_CLASS_HARD:
++ read_lock_irqsave(&state->hrt.domain.ready_lock,
++ flags);
++ resched = edf_preemption_needed(
++ &state->hrt.domain,
++ t);
++ read_unlock_irqrestore(
++ &state->hrt.domain.ready_lock, flags);
++ break;
++
++ case RT_CLASS_SOFT:
++ case RT_CLASS_BEST_EFFORT:
++ local_irq_save(flags);
++
++ /* check for HRT jobs */
++ read_lock(&state->hrt.domain.ready_lock);
++ resched = hrt_client_eligible(&state->hrt);
++ read_unlock(&state->hrt.domain.ready_lock);
++
++ /* check for spare capacities */
++ if (!resched) {
++ spin_lock(&cap_queue.lock);
++ resched =
++ cap_preemption_needed(&cap_queue,
++ state);
++ spin_unlock(&cap_queue.lock);
++ }
++
++ /* check for SRT jobs */
++ if (!resched) {
++ read_lock(&srt.ready_lock);
++ resched = hsb_preemption_needed(
++ &srt, state);
++ read_unlock(&srt.ready_lock);
++ }
++
++ /* check for BE jobs */
++ if (!resched) {
++ read_lock(&be.ready_lock);
++ resched = be_preemption_needed(
++ &be, state);
++ read_unlock(&be.ready_lock);
++ }
++
++ /* check for background jobs */
++ if (!resched && !is_realtime(current))
++ resched = jobs_pending(&hsb_fifo);
++ local_irq_restore(flags);
++ break;
++
++ default:
++ /* something wrong in the variable */
++ BUG();
++ }
++ }
++
++ if (resched) {
++ set_will_schedule();
++ return FORCE_RESCHED;
++ } else
++ return NO_RESCHED;
++}
++
++static int schedule_hrt(struct task_struct * prev,
++ struct task_struct ** next, runqueue_t * rq)
++{
++ unsigned long flags;
++ int deactivate = 1;
++ cpu_state_t *state;
++
++
++ state = &__get_cpu_var(hsb_cpu_state);
++
++ write_lock_irqsave(&state->hrt.domain.ready_lock, flags);
++
++
++ if (state->cap) {
++ /* hrt_schedule does not have the cap_queue lock */
++ return_capacity(&cap_queue, state->cap);
++ state->cap = NULL;
++ }
++
++ if (is_hrt(prev) && is_released(prev) && is_running(prev)
++ && !edf_preemption_needed(&state->hrt.domain, prev)) {
++ /* This really should only happen if the task has
++ * 100% utilization or when we got a bogus/delayed
++ * resched IPI.
++ */
++ TRACE("HRT: prev will be next, already released\n");
++ *next = prev;
++ deactivate = 0;
++ } else {
++ /* either not yet released, preempted, or non-rt */
++ *next = __take_ready(&state->hrt.domain);
++ /* the logic in hsb_schedule makes sure *next must exist
++ * if we get here */
++ BUG_ON(!*next);
++ /* stick the task into the runqueue */
++ __activate_task(*next, rq);
++ set_task_cpu(*next, smp_processor_id());
++ }
++
++ set_rt_flags(*next, RT_F_RUNNING);
++ adjust_cpu_queue(RT_CLASS_HARD, get_deadline(*next), NULL);
++ clear_will_schedule();
++
++ write_unlock_irqrestore(&state->hrt.domain.ready_lock, flags);
++ return deactivate;
++}
++
++
++static struct task_struct* find_min_slack_task(struct task_struct *prev,
++ rt_domain_t* edf)
++{
++ struct list_head *pos;
++ struct task_struct* tsk = NULL;
++ struct task_struct* cur;
++
++ if (is_realtime(prev) && is_running(prev) &&
++ get_rt_flags(prev) != RT_F_SLEEP)
++ tsk = prev;
++ list_for_each(pos, &edf->ready_queue) {
++ cur = list_entry(pos, struct task_struct, rt_list);
++ if (!tsk || task_slack(tsk) > task_slack(cur))
++ tsk = cur;
++ }
++ return tsk;
++}
++
++static struct task_struct* null_heuristic(struct task_struct *prev,
++ rt_domain_t* edf,
++ rt_domain_t* fifo)
++{
++ if (jobs_pending(fifo))
++ return NULL;
++ else if (!list_empty(&edf->ready_queue))
++ return list_entry(edf->ready_queue.next,
++ struct task_struct, rt_list);
++ else
++ return NULL;
++}
++
++/* caller holds all locks
++ */
++
++static int schedule_capacity(struct task_struct *prev,
++ struct task_struct **next, runqueue_t *rq)
++{
++ cpu_state_t *state = &__get_cpu_var(hsb_cpu_state);
++ capacity_t* old;
++
++ if (state->cap) {
++ old = state->cap;
++ state->cap = __take_capacity(&cap_queue, old->deadline, 1);
++ if (!state->cap)
++ state->cap = old;
++ else
++ __return_capacity(&cap_queue, old);
++ } else
++ state->cap = __take_capacity(&cap_queue, 0, 0);
++
++
++ /* pick a task likely to be tardy */
++ *next = find_min_slack_task(prev, &srt);
++
++ /* only give away spare capacities if there is no task that
++ * is going to be tardy
++ */
++ if (*next && task_slack(*next) >= 0)
++ *next = null_heuristic(prev, &srt, &hsb_fifo);
++ if (*next && *next != prev)
++ list_del(&(*next)->rt_list);
++
++
++ /* if there is none pick a BE job */
++ if (!*next) {
++ if (is_realtime(prev) && is_be(prev) && is_running(prev) &&
++ get_rt_flags(prev) != RT_F_SLEEP)
++ *next = prev;
++ else
++ *next = take_ready(&hsb_fifo);
++ }
++
++ if (state->be)
++ be_preempt(&be, state);
++ BUG_ON(!state->cap);
++ if (*next && state->cap->donor) {
++ sched_trace_capacity_allocation(
++ *next, state->cap->budget, state->cap->deadline,
++ state->cap->donor);
++ }
++
++ return *next != prev;
++}
++
++
++
++#define BG 0
++#define SRT 1
++#define BE 2
++#define CAP 3
++
++static inline int what_first(rt_domain_t *be, rt_domain_t *srt, capacity_queue_t* q)
++{
++ jiffie_t sdl = 0, bdl= 0, cdl = 0, cur;
++ int _srt = !list_empty(&srt->ready_queue);
++ int _be = !list_empty(&be->ready_queue);
++ int _cap = __capacity_available(q);
++
++
++ int ret = BG; /* nothing ready => background mode*/
++ cur = 0;
++
++ if (_srt)
++ sdl = get_deadline(next_ready(srt));
++ if (_be)
++ bdl = be_next_ready(be)->deadline;
++ if (_cap)
++ cdl = next_cap(q)->deadline;
++
++
++
++ if (_cap) {
++ ret = CAP;
++ cur = cdl;
++ }
++ if (_srt && (time_before(sdl, cur) || !ret)) {
++ ret = SRT;
++ cur = sdl;
++ }
++ if (_be && (time_before(bdl, cur) || !ret)) {
++ ret = BE;
++ cur = bdl;
++ }
++ return ret;
++}
++
++
++
++static int schedule_srt_be_cap(struct task_struct *prev,
++ struct task_struct **next, runqueue_t *rq)
++{
++ task_class_t class = RT_CLASS_BEST_EFFORT;
++ jiffie_t deadline = 0;
++ unsigned long flags;
++ int deactivate = 1;
++ be_server_t* bes;
++ cpu_state_t* state;
++ int type = BG;
++
++reschedule:
++ write_lock_irqsave(&srt.ready_lock, flags);
++ write_lock(&be.ready_lock);
++ spin_lock(&cap_queue.lock);
++
++
++ state = &__get_cpu_var(hsb_cpu_state);
++ bes = NULL;
++
++ clear_will_schedule();
++
++ if (is_realtime(prev) && (is_released(prev) || is_be(prev)) &&
++ is_running(prev) && !hsb_preemption_needed(&srt, state) &&
++ !be_preemption_needed(&be, state)
++ ) {
++ /* Our current task's next job has already been
++ * released and has higher priority than the highest
++ * prioriy waiting task; in other words: it is tardy.
++ * We just keep it.
++ */
++ TRACE("prev will be next, already released\n");
++ *next = prev;
++ class = prev->rt_param.basic_params.class;
++ deadline = get_deadline(*next);
++ deactivate = 0;
++ } else {
++ /* either not yet released, preempted, or non-rt */
++ type = what_first(&be, &srt, &cap_queue);
++ switch (type) {
++ case CAP:
++ /* capacity */
++ deactivate = schedule_capacity(prev, next, rq);
++ deadline = state->cap->deadline;
++ if (*next)
++ class = RT_CLASS_SOFT;
++ else
++ class = RT_CLASS_BEST_EFFORT;
++ break;
++ case BE:
++ /* be */
++ *next = NULL;
++ bes = be_take_ready(&be);
++ if (bes) {
++ class = RT_CLASS_SOFT;
++ deadline = bes->deadline;
++ *next = take_ready(&hsb_fifo);
++ if (!*next) {
++ /* deactivate */
++ __release_capacity(&cap_queue,
++ bes->budget,
++ bes->deadline, NULL);
++ bes->budget = 0;
++ barrier();
++ spin_unlock(&cap_queue.lock);
++ write_unlock(&be.ready_lock);
++ write_unlock_irqrestore(&srt.ready_lock,
++ flags);
++ be_enqueue(&be, bes);
++ goto reschedule;
++ }
++ }
++ break;
++ case SRT:
++ /* srt */
++ *next = __take_ready(&srt);
++ if (*next) {
++ class = RT_CLASS_SOFT;
++ deadline = get_deadline(*next);
++ }
++ break;
++ case BG:
++ /* background server mode */
++ class = RT_CLASS_BEST_EFFORT;
++ deadline = 0;
++ *next = take_ready(&hsb_fifo);
++ break;
++ }
++
++
++ /* give back capacities */
++ if (type != CAP && state->cap) {
++ __return_capacity(&cap_queue, state->cap);
++ state->cap = NULL;
++ }
++ if (*next && deactivate) {
++ /* mark the task as executing on this cpu */
++ set_task_cpu(*next, smp_processor_id());
++ /* stick the task into the runqueue */
++ __activate_task(*next, rq);
++ }
++ }
++
++ adjust_cpu_queue(class, deadline, bes);
++
++ switch (type) {
++ case BG:
++ break;
++ case BE:
++ be.check_resched(&be);
++ break;
++ case SRT:
++ srt.check_resched(&srt);
++ break;
++ case CAP:
++ if (!list_empty(&cap_queue.queue))
++ cap_check_resched(list_entry(cap_queue.queue.next,
++ capacity_t, list)->deadline);
++ break;
++ }
++
++
++ if(*next)
++ set_rt_flags(*next, RT_F_RUNNING);
++
++ spin_unlock(&cap_queue.lock);
++ write_unlock(&be.ready_lock);
++ write_unlock_irqrestore(&srt.ready_lock, flags);
++ return deactivate;
++}
++
++
++static int hsb_schedule(struct task_struct * prev, struct task_struct ** next,
++ runqueue_t * rq)
++{
++ int need_deactivate = 1;
++ cpu_state_t *state = NULL;
++
++ preempt_disable();
++
++ state = &__get_cpu_var(hsb_cpu_state);
++
++ be_preempt(&be, state);
++
++
++ if (is_realtime(prev) && !is_be(prev) &&
++ get_rt_flags(prev) == RT_F_SLEEP)
++ {
++ TRACE("preparing %d for next period\n", prev->pid);
++ release_capacity(&cap_queue, prev->time_slice,
++ prev->rt_param.times.deadline, prev);
++ edf_prepare_for_next_period(prev);
++ }
++
++ if (get_rt_mode() == MODE_RT_RUN) {
++ /* we need to schedule hrt if a hrt job is pending or when
++ * we have a non expired hrt job on the cpu
++ */
++
++ if (hrt_client_eligible(&state->hrt) ||
++ unlikely((is_hrt(prev) && is_running(prev) &&
++ get_rt_flags(prev) != RT_F_SLEEP))) {
++ if (state->cap) {
++ return_capacity(&cap_queue, state->cap);
++ state->cap = NULL;
++ }
++ need_deactivate = schedule_hrt(prev, next, rq);
++ } else
++ need_deactivate = schedule_srt_be_cap(prev, next, rq);
++
++ }
++
++ if (is_realtime(prev) && need_deactivate && prev->array) {
++ /* take it out of the run queue */
++ deactivate_task(prev, rq);
++ }
++
++ preempt_enable();
++
++ return 0;
++}
++
++/* put task into correct queue */
++static inline void hsb_add_release(struct task_struct *t)
++{
++ if (is_hrt(t))
++ add_release(hrt_dom(get_partition(t)), t);
++ else if (is_srt(t))
++ add_release(&srt, t);
++ else if (is_be(t)) {
++ t->time_slice = 0;
++ add_ready(&hsb_fifo, t);
++ fifo_check_resched();
++ } else
++ BUG();
++
++}
++
++/* put task into correct queue */
++static inline void hsb_add_ready(struct task_struct *t)
++{
++ if (is_hrt(t))
++ add_ready(hrt_dom(get_partition(t)), t);
++ else if (is_srt(t))
++ add_ready(&srt, t);
++ else if (is_be(t)) {
++ add_ready(&hsb_fifo, t);
++ fifo_check_resched();
++ }
++ else
++ BUG();
++}
++
++
++/* _finish_switch - we just finished the switch away from prev
++ * it is now safe to requeue the task
++ */
++static void hsb_finish_switch(struct task_struct *prev)
++{
++ if (!is_realtime(prev) || !is_running(prev))
++ return;
++
++ TRACE("finish switch for %d\n", prev->pid);
++
++ if (is_be(prev)) {
++ add_ready(&hsb_fifo, prev);
++ return;
++ }
++
++ if (get_rt_flags(prev) == RT_F_SLEEP ||
++ get_rt_mode() != MODE_RT_RUN) {
++ /* this task has expired
++ * _schedule has already taken care of updating
++ * the release and
++ * deadline. We just must check if has been released.
++ */
++ if (is_released(prev) && get_rt_mode() == MODE_RT_RUN) {
++ sched_trace_job_release(prev);
++ hsb_add_ready(prev);
++ TRACE("%d goes straight to ready queue\n", prev->pid);
++ }
++ else
++ /* it has got to wait */
++ hsb_add_release(prev);
++ }
++ else {
++ /* this is a forced preemption
++ * thus the task stays in the ready_queue
++ * we only must make it available to other cpus
++ */
++ hsb_add_ready(prev);
++ }
++}
++
++
++/* Prepare a task for running in RT mode
++ * Enqueues the task into master queue data structure
++ * returns
++ * -EPERM if task is not TASK_STOPPED
++ */
++static long hsb_prepare_task(struct task_struct * t)
++{
++ TRACE("edf-hsb: prepare task %d\n", t->pid);
++
++ if (t->state == TASK_STOPPED) {
++ __setscheduler(t, SCHED_FIFO, MAX_RT_PRIO - 1);
++
++ if (get_rt_mode() == MODE_RT_RUN && !is_be(t))
++ /* The action is already on.
++ * Prepare immediate release
++ */
++ edf_release_now(t);
++ /* The task should be running in the queue, otherwise signal
++ * code will try to wake it up with fatal consequences.
++ */
++ t->state = TASK_RUNNING;
++ if (is_be(t))
++ t->rt_param.times.deadline = 0;
++ hsb_add_release(t);
++ return 0;
++ }
++ else
++ return -EPERM;
++}
++
++static void hsb_wake_up_task(struct task_struct *task)
++{
++ /* We must determine whether task should go into the release
++ * queue or into the ready queue. It may enter the ready queue
++ * if it has credit left in its time slice and has not yet reached
++ * its deadline. If it is now passed its deadline we assume this the
++ * arrival of a new sporadic job and thus put it in the ready queue
++ * anyway.If it has zero budget and the next release is in the future
++ * it has to go to the release queue.
++ */
++ TRACE("edf-hsb: wake up %d with budget=%d\n",
++ task->pid, task->time_slice);
++ task->state = TASK_RUNNING;
++
++ if (is_be(task)) {
++ task->rt_param.times.last_release = jiffies;
++ hsb_add_release(task);
++ }
++ else if (is_tardy(task)) {
++ /* new sporadic release */
++ edf_release_now(task);
++ sched_trace_job_release(task);
++ hsb_add_ready(task);
++ }
++ else if (task->time_slice) {
++ /* came back in time before deadline
++ */
++ set_rt_flags(task, RT_F_RUNNING);
++ hsb_add_ready(task);
++ }
++ else {
++ hsb_add_release(task);
++ }
++
++}
++
++static void hsb_task_blocks(struct task_struct *t)
++{
++ /* not really anything to do since it can only block if
++ * it is running, and when it is not running it is not in any
++ * queue anyway.
++ */
++ TRACE("task %d blocks with budget=%d\n", t->pid, t->time_slice);
++ if (is_be(t))
++ sched_trace_job_completion(t);
++}
++
++
++static int hsb_mode_change(int new_mode)
++{
++ int cpu;
++ cpu_state_t *entry;
++ jiffie_t start;
++
++ TRACE("[%d] edf-hsb: mode changed to %d\n", smp_processor_id(),
++ new_mode);
++ if (new_mode == MODE_RT_RUN) {
++ start = jiffies + 20;
++ rerelease_all(&srt, edf_release_at);
++ be_prepare_new_releases(&be, start);
++
++ /* initialize per CPU state
++ * we can't do this at boot time because we don't know
++ * which CPUs will be online and we can't put non-existing
++ * cpus into the queue
++ */
++ spin_lock(&hsb_cpu_lock);
++ /* get old cruft out of the way in case we reenter real-time
++ * mode for a second time
++ */
++ while (!list_empty(&hsb_cpu_queue))
++ list_del(hsb_cpu_queue.next);
++ /* reinitialize */
++ for_each_online_cpu(cpu) {
++ entry = &per_cpu(hsb_cpu_state, cpu);
++ atomic_set(&entry->will_schedule, 0);
++ entry->exec_class = RT_CLASS_BEST_EFFORT;
++ entry->cur_deadline = 0;
++ list_add(&entry->list, &hsb_cpu_queue);
++
++ rerelease_all(&entry->hrt.domain, edf_release_at);
++ prepare_hrt_release(&entry->hrt, start);
++ }
++ spin_unlock(&hsb_cpu_lock);
++
++ }
++ TRACE("[%d] edf-hsb: mode change done\n", smp_processor_id());
++ return 0;
++}
++
++
++typedef enum {
++ EDF_HSB_SET_HRT,
++ EDF_HSB_GET_HRT,
++ EDF_HSB_CREATE_BE
++} edf_hsb_setup_cmds_t;
++
++typedef struct {
++ int cpu;
++ unsigned int wcet;
++ unsigned int period;
++} setup_hrt_param_t;
++
++typedef struct {
++ unsigned int wcet;
++ unsigned int period;
++} create_be_param_t;
++
++typedef struct {
++ union {
++ setup_hrt_param_t setup_hrt;
++ create_be_param_t create_be;
++ };
++} param_t;
++
++static pid_t next_be_server_pid = SRT_BASE_PID;
++
++static int hsb_scheduler_setup(int cmd, void __user* up)
++{
++ unsigned long flags;
++ int error = -EINVAL;
++ cpu_state_t* state;
++ be_server_t* srv;
++ param_t param;
++
++ switch (cmd) {
++ case EDF_HSB_SET_HRT:
++ if (copy_from_user(¶m, up, sizeof(setup_hrt_param_t))) {
++ error = -EFAULT;
++ goto out;
++ }
++ if (!cpu_online(param.setup_hrt.cpu)) {
++ printk(KERN_WARNING "scheduler setup: "
++ "CPU %d is not online!\n", param.setup_hrt.cpu);
++ error = -EINVAL;
++ goto out;
++ }
++ if (param.setup_hrt.period < param.setup_hrt.wcet) {
++ printk(KERN_WARNING "period < wcet!\n");
++ error = -EINVAL;
++ goto out;
++ }
++
++ state = &per_cpu(hsb_cpu_state, param.setup_hrt.cpu);
++ spin_lock_irqsave(&state->lock, flags);
++
++ state->hrt.wcet = param.setup_hrt.wcet;
++ state->hrt.period = param.setup_hrt.period;
++
++ spin_unlock_irqrestore(&state->lock, flags);
++
++ printk(KERN_WARNING "edf-hsb: set HRT #%d to (%d, %d)\n",
++ param.setup_hrt.cpu, param.setup_hrt.wcet,
++ param.setup_hrt.period);
++
++ error = 0;
++
++ break;
++
++ case EDF_HSB_GET_HRT:
++ if (copy_from_user(¶m, up, sizeof(setup_hrt_param_t))) {
++ error = -EFAULT;
++ goto out;
++ }
++ if (!cpu_online(param.setup_hrt.cpu)) {
++ error = -EINVAL;
++ goto out;
++ }
++ state = &per_cpu(hsb_cpu_state, param.setup_hrt.cpu);
++ spin_lock_irqsave(&state->lock, flags);
++
++ param.setup_hrt.wcet = state->hrt.wcet;
++ param.setup_hrt.period = state->hrt.period;
++
++ spin_unlock_irqrestore(&state->lock, flags);
++
++ if (copy_to_user(up, ¶m, sizeof(setup_hrt_param_t))) {
++ error = -EFAULT;
++ goto out;
++ }
++ error = 0;
++ break;
++
++ case EDF_HSB_CREATE_BE:
++ if (copy_from_user(¶m, up, sizeof(create_be_param_t))) {
++ error = -EFAULT;
++ goto out;
++ }
++ if (param.create_be.period < param.create_be.wcet ||
++ !param.create_be.period || !param.create_be.wcet) {
++ error = -EINVAL;
++ goto out;
++ }
++ srv = (be_server_t*) kmalloc(sizeof(be_server_t), GFP_KERNEL);
++ if (!srv) {
++ error = -ENOMEM;
++ goto out;
++ }
++ srv->wcet = param.create_be.wcet;
++ srv->period = param.create_be.period;
++ srv->pid = next_be_server_pid++;
++ INIT_LIST_HEAD(&srv->list);
++ be_prepare_new_release(srv, jiffies);
++ be_enqueue(&be, srv);
++
++ printk(KERN_WARNING "edf-hsb: created a BE with (%d, %d)\n",
++ param.create_be.wcet, param.create_be.period);
++
++ error = 0;
++ break;
++
++ default:
++ printk(KERN_WARNING "edf-hsb: unknown command %d\n", cmd);
++ }
++
++out:
++ return error;
++}
++
++/* Plugin object */
++static sched_plugin_t s_plugin __cacheline_aligned_in_smp = {
++ .ready_to_use = 0
++};
++
++
++/*
++ * Plugin initialization code.
++ */
++#define INIT_SCHED_PLUGIN (struct sched_plugin){\
++ .plugin_name = "EDF-HSB",\
++ .ready_to_use = 1,\
++ .scheduler_tick = hsb_scheduler_tick,\
++ .prepare_task = hsb_prepare_task,\
++ .sleep_next_period = edf_sleep_next_period,\
++ .schedule = hsb_schedule,\
++ .finish_switch = hsb_finish_switch,\
++ .mode_change = hsb_mode_change,\
++ .wake_up_task = hsb_wake_up_task,\
++ .task_blocks = hsb_task_blocks, \
++ .scheduler_setup = hsb_scheduler_setup \
++}
++
++
++sched_plugin_t *__init init_edf_hsb_plugin(void)
++{
++ int i;
++
++ if (!s_plugin.ready_to_use)
++ {
++ capacity_queue_init(&cap_queue);
++ edf_domain_init(&srt, srt_check_resched);
++ edf_domain_init(&be, be_check_resched);
++ fifo_domain_init(&hsb_fifo, NULL);
++ for (i = 0; i < NR_CPUS; i++)
++ {
++ hsb_cpu_state_init(&per_cpu(hsb_cpu_state, i),
++ hrt_check_resched, i);
++ printk("HRT server %d initialized.\n", i);
++ }
++ s_plugin = INIT_SCHED_PLUGIN;
++ }
++ return &s_plugin;
++}
+diff --git a/kernel/sched_global_edf.c b/kernel/sched_global_edf.c
+new file mode 100644
+index 0000000..4b36bc5
+--- /dev/null
++++ b/kernel/sched_global_edf.c
+@@ -0,0 +1,550 @@
++/*
++ * kernel/sched-global-edf.c
++ *
++ * Re-Implementation of the Global EDF scheduler.
++ *
++ * This version works without using the struct queue. It uses the
++ * builtin kernel lists.
++ */
++
++#include
++#include
++#include
++
++#include
++#include
++
++#include
++#include
++
++
++/* cpu_entry_t - maintain state of the priority of cpu's current task
++ * this is needed to check for priority inversions.
++ */
++typedef struct {
++ int cpu;
++ int executes_realtime;
++ jiffie_t cur_deadline;
++ struct list_head list;
++ atomic_t will_schedule;
++} cpu_entry_t;
++DEFINE_PER_CPU(cpu_entry_t, gedf_cpu_entries);
++
++#define set_will_schedule() \
++ (atomic_set(&__get_cpu_var(gedf_cpu_entries).will_schedule, 1))
++#define clear_will_schedule() \
++ (atomic_set(&__get_cpu_var(gedf_cpu_entries).will_schedule, 0))
++#define test_will_schedule(cpu) \
++ (atomic_read(&per_cpu(gedf_cpu_entries, cpu).will_schedule))
++
++
++/* always acquire the cpu lock as the last lock to avoid deadlocks */
++static spinlock_t gedf_cpu_lock = SPIN_LOCK_UNLOCKED;
++/* the cpus queue themselves according to priority in here */
++static LIST_HEAD(gedf_cpu_queue);
++
++
++static rt_domain_t gedf;
++
++#define DUMP(args...) TRACE(args)
++
++/* adjust_cpu_queue - Move the cpu entry to the correct place to maintain
++ * order in the cpu queue. Caller must hold ready write lock.
++ *
++ */
++static void adjust_cpu_queue(int exec_rt, jiffie_t deadline)
++{
++ struct list_head *pos;
++ cpu_entry_t *other;
++ cpu_entry_t *entry;
++
++ spin_lock(&gedf_cpu_lock);
++
++ entry = &__get_cpu_var(gedf_cpu_entries);
++ entry->executes_realtime = exec_rt;
++ entry->cur_deadline = deadline;
++
++ list_del(&entry->list);
++ /* if we do not execute real-time jobs we just move
++ * to the end of the queue
++ */
++ if (entry->executes_realtime)
++ list_for_each(pos, &gedf_cpu_queue) {
++ other = list_entry(pos, cpu_entry_t, list);
++ if (!other->executes_realtime ||
++ time_before_eq(entry->cur_deadline,
++ other->cur_deadline))
++ {
++ __list_add(&entry->list, pos->prev, pos);
++ goto out;
++ }
++ }
++ /* if we get this far we have the lowest priority task */
++ list_add_tail(&entry->list, &gedf_cpu_queue);
++
++ out:
++ spin_unlock(&gedf_cpu_lock);
++}
++
++
++/* check_reschedule_needed - Check whether another CPU needs to reschedule.
++ *
++ * The function only checks and kicks the last CPU. It will reschedule and
++ * kick the next if necessary, and so on. The caller is responsible for making
++ * sure that it is not the last entry or that a reschedule is not necessary.
++ *
++ */
++static int gedf_check_resched(rt_domain_t *edf)
++{
++ cpu_entry_t *last;
++ int ret = 0;
++
++ spin_lock(&gedf_cpu_lock);
++
++ if (!list_empty(&edf->ready_queue)) {
++ last = list_entry(gedf_cpu_queue.prev, cpu_entry_t, list);
++ if (!last->executes_realtime ||
++ time_before(next_ready(edf)->rt_param.times.deadline,
++ last->cur_deadline))
++ {
++ if (smp_processor_id() == last->cpu)
++ set_tsk_need_resched(current);
++ else
++ if (!test_will_schedule(last->cpu))
++ smp_send_reschedule(last->cpu);
++ ret = 1;
++ }
++ }
++
++ spin_unlock(&gedf_cpu_lock);
++ return ret;
++}
++
++
++
++/* gedf_scheduler_tick - this function is called for every local timer
++ * interrupt.
++ *
++ * checks whether the current task has expired and checks
++ * whether we need to preempt it if it has not expired
++ */
++static reschedule_check_t gedf_scheduler_tick(void)
++{
++ unsigned long flags;
++ struct task_struct *t = current;
++ reschedule_check_t want_resched = NO_RESCHED;
++
++ /* expire tasks even if not in real-time mode
++ * this makes sure that at the end of real-time mode
++ * no tasks "run away forever".
++ */
++ BUG_ON(is_realtime(t) && t->time_slice > 100000);
++ if (is_realtime(t) && (!--t->time_slice)) {
++ /* this task has exhausted its budget in this period */
++ set_rt_flags(t, RT_F_SLEEP);
++ want_resched = FORCE_RESCHED;
++ set_will_schedule();
++ sched_trace_job_completion(t);
++ }
++ if (get_rt_mode() == MODE_RT_RUN)
++ {
++ /* check whether anything is waiting to be released
++ * this could probably be moved to the global timer
++ * interrupt handler since the state will only change
++ * once per jiffie
++ */
++ try_release_pending(&gedf);
++ if (want_resched != FORCE_RESCHED)
++ {
++ read_lock_irqsave(&gedf.ready_lock, flags);
++ if (edf_preemption_needed(&gedf, t))
++ {
++ want_resched = FORCE_RESCHED;
++ set_will_schedule();
++ }
++ read_unlock_irqrestore(&gedf.ready_lock, flags);
++ }
++ }
++ return want_resched;
++}
++
++/* This is main Global EDF schedule function
++ *
++ * Assumes the caller holds the lock for rq and that irqs are disabled
++ * This is function only works for indirect switching
++ */
++static int gedf_schedule(struct task_struct * prev,
++ struct task_struct ** next,
++ runqueue_t * rq)
++{
++ int need_deactivate = 1;
++ int rt;
++ jiffie_t deadline;
++ unsigned long flags;
++
++
++ if (is_realtime(prev) && get_rt_flags(prev) == RT_F_SLEEP)
++ {
++ DUMP("preparing %d for next period\n", prev->pid);
++ edf_prepare_for_next_period(prev);
++ }
++
++ if (get_rt_mode() == MODE_RT_RUN) {
++ write_lock_irqsave(&gedf.ready_lock, flags);
++
++ clear_will_schedule();
++
++ if (is_realtime(prev) && is_released(prev) && is_running(prev)
++ && !edf_preemption_needed(&gedf, prev)) {
++ /* Our current task's next job has already been
++ * released and has higher priority than the highest
++ * prioriy waiting task; in other words: it is tardy.
++ * We just keep it.
++ */
++ DUMP("prev will be next, already released\n");
++ *next = prev;
++ rt = 1;
++ deadline = prev->rt_param.times.deadline;
++ need_deactivate = 0;
++ } else {
++ /* either not yet released, preempted, or non-rt */
++ *next = __take_ready(&gedf);
++ if (*next) {
++ /* mark the task as executing on this cpu */
++ set_task_cpu(*next, smp_processor_id());
++
++ /* stick the task into the runqueue */
++ __activate_task(*next, rq);
++ rt = 1;
++ deadline = (*next)->rt_param.times.deadline;
++ }
++ else
++ rt = deadline = 0;
++ }
++
++ adjust_cpu_queue(rt, deadline);
++
++ if (rt) {
++ set_rt_flags(*next, RT_F_RUNNING);
++ gedf.check_resched(&gedf);
++ }
++ write_unlock_irqrestore(&gedf.ready_lock, flags);
++ }
++
++ if (is_realtime(prev) && need_deactivate && prev->array) {
++ /* take it out of the run queue */
++ deactivate_task(prev, rq);
++ }
++
++ /* don't put back into release yet.
++ * We first need to actually switch
++ * stacks before we can execute it
++ * on a different CPU */
++
++ /* in the current implementation nobody cares about the return value */
++ return 0;
++}
++
++
++/* _finish_switch - we just finished the switch away from prev
++ * it is now safe to requeue the task
++ */
++static void gedf_finish_switch(struct task_struct *prev)
++{
++ if (!is_realtime(prev) || !is_running(prev))
++ return;
++
++ /*printk(KERN_INFO "gedf finish switch for %d\n", prev->pid);*/
++ if (get_rt_flags(prev) == RT_F_SLEEP ||
++ get_rt_mode() != MODE_RT_RUN) {
++ /* this task has expired
++ * _schedule has already taken care of updating
++ * the release and
++ * deadline. We just must check if has been released.
++ */
++ if (time_before_eq(prev->rt_param.times.release, jiffies)
++ && get_rt_mode() == MODE_RT_RUN) {
++ /* already released */
++ add_ready(&gedf, prev);
++ DUMP("%d goes straight to ready queue\n", prev->pid);
++ }
++ else
++ /* it has got to wait */
++ add_release(&gedf, prev);
++ }
++ else {
++ /* this is a forced preemption
++ * thus the task stays in the ready_queue
++ * we only must make it available to others
++ */
++ add_ready(&gedf, prev);
++ }
++}
++
++
++/* Prepare a task for running in RT mode
++ * Enqueues the task into master queue data structure
++ * returns
++ * -EPERM if task is not TASK_STOPPED
++ */
++static long gedf_prepare_task(struct task_struct * t)
++{
++ TRACE("global edf: prepare task %d\n", t->pid);
++
++ if (t->state == TASK_STOPPED) {
++ __setscheduler(t, SCHED_FIFO, MAX_RT_PRIO - 1);
++
++ if (get_rt_mode() == MODE_RT_RUN)
++ /* The action is already on.
++ * Prepare immediate release
++ */
++ edf_release_now(t);
++ /* The task should be running in the queue, otherwise signal
++ * code will try to wake it up with fatal consequences.
++ */
++ t->state = TASK_RUNNING;
++ add_release(&gedf, t);
++ return 0;
++ }
++ else
++ return -EPERM;
++}
++
++static void gedf_wake_up_task(struct task_struct *task)
++{
++ /* We must determine whether task should go into the release
++ * queue or into the ready queue. It may enter the ready queue
++ * if it has credit left in its time slice and has not yet reached
++ * its deadline. If it is now passed its deadline we assume this the
++ * arrival of a new sporadic job and thus put it in the ready queue
++ * anyway.If it has zero budget and the next release is in the future
++ * it has to go to the release queue.
++ */
++ TRACE("global edf: wake up %d with budget=%d\n",
++ task->pid, task->time_slice);
++ task->state = TASK_RUNNING;
++ if (is_tardy(task)) {
++ /* new sporadic release */
++ edf_release_now(task);
++ sched_trace_job_release(task);
++ add_ready(&gedf, task);
++ }
++ else if (task->time_slice) {
++ /* came back in time before deadline
++ */
++ set_rt_flags(task, RT_F_RUNNING);
++ add_ready(&gedf, task);
++ }
++ else {
++ add_release(&gedf, task);
++ }
++
++}
++
++static void gedf_task_blocks(struct task_struct *t)
++{
++ BUG_ON(!is_realtime(t));
++ /* not really anything to do since it can only block if
++ * it is running, and when it is not running it is not in any
++ * queue anyway.
++ *
++ */
++ TRACE("task %d blocks with budget=%d\n", t->pid, t->time_slice);
++ BUG_ON(t->rt_list.next != LIST_POISON1);
++ BUG_ON(t->rt_list.prev != LIST_POISON2);
++}
++
++
++/* When _tear_down is called, the task should not be in any queue any more
++ * as it must have blocked first. We don't have any internal state for the task,
++ * it is all in the task_struct.
++ */
++static long gedf_tear_down(struct task_struct * t)
++{
++ BUG_ON(!is_realtime(t));
++ TRACE("global edf: tear down called for %d \n", t->pid);
++ BUG_ON(t->array);
++ BUG_ON(t->rt_list.next != LIST_POISON1);
++ BUG_ON(t->rt_list.prev != LIST_POISON2);
++ return 0;
++}
++
++
++static int gedf_mode_change(int new_mode)
++{
++ int cpu;
++ cpu_entry_t *entry;
++
++/* printk(KERN_INFO "[%d] global edf: mode changed to %d\n", smp_processor_id(),
++ new_mode);*/
++ if (new_mode == MODE_RT_RUN) {
++ rerelease_all(&gedf, edf_release_at);
++
++ /* initialize per CPU state
++ * we can't do this at boot time because we don't know
++ * which CPUs will be online and we can't put non-existing
++ * cpus into the queue
++ */
++ spin_lock(&gedf_cpu_lock);
++ /* get old cruft out of the way in case we reenter real-time
++ * mode for a second time
++ */
++ while (!list_empty(&gedf_cpu_queue))
++ list_del(gedf_cpu_queue.next);
++ /* reinitialize */
++ for_each_online_cpu(cpu) {
++ entry = &per_cpu(gedf_cpu_entries, cpu);
++ atomic_set(&entry->will_schedule, 0);
++ entry->executes_realtime = 0;
++ entry->cur_deadline = 0;
++ entry->cpu = cpu;
++ list_add(&entry->list, &gedf_cpu_queue);
++ }
++ spin_unlock(&gedf_cpu_lock);
++ }
++ /*printk(KERN_INFO "[%d] global edf: mode change done\n", smp_processor_id()); */
++ return 0;
++}
++
++
++/* Plugin object */
++static sched_plugin_t s_plugin __cacheline_aligned_in_smp = {
++ .ready_to_use = 0
++};
++
++
++/*
++ * Plugin initialization code.
++ */
++#define INIT_SCHED_PLUGIN (struct sched_plugin){\
++ .plugin_name = "Global EDF",\
++ .ready_to_use = 1,\
++ .scheduler_tick = gedf_scheduler_tick,\
++ .prepare_task = gedf_prepare_task,\
++ .sleep_next_period = edf_sleep_next_period,\
++ .tear_down = gedf_tear_down,\
++ .schedule = gedf_schedule,\
++ .finish_switch = gedf_finish_switch,\
++ .mode_change = gedf_mode_change,\
++ .wake_up_task = gedf_wake_up_task,\
++ .task_blocks = gedf_task_blocks \
++ }
++
++
++sched_plugin_t *__init init_global_edf_plugin(void)
++{
++ if (!s_plugin.ready_to_use)
++ {
++ edf_domain_init(&gedf, gedf_check_resched);
++ s_plugin = INIT_SCHED_PLUGIN;
++ }
++ return &s_plugin;
++}
++
++
++
++/*****************************************************************************/
++/*****************************************************************************/
++/*****************************************************************************/
++/* NON-PREEMPTIVE GLOBAL EDF */
++
++
++/* gedf_np_scheduler_tick - this function is called for every local timer
++ * interrupt.
++ *
++ * checks whether the current task has expired and checks
++ * whether we need to preempt it if it has not expired
++ */
++static reschedule_check_t gedf_np_scheduler_tick(void)
++{
++ if (get_rt_mode() == MODE_RT_RUN)
++ {
++ /* check whether anything is waiting to be released
++ * this could probably be moved to the global timer
++ * interrupt handler since the state will only change
++ * once per jiffie
++ */
++ try_release_pending(&gedf);
++ }
++
++ /* expire tasks even if not in real-time mode
++ * this makes sure that at the end of real-time mode
++ * no tasks "run away forever".
++ */
++ BUG_ON(current->time_slice > 1000);
++ if (is_realtime(current) && (!--current->time_slice)) {
++ /* this task has exhausted its budget in this period */
++ set_rt_flags(current, RT_F_SLEEP);
++ return FORCE_RESCHED;
++ }
++ else
++ return NO_RESCHED;
++}
++
++/* gedf_np_check_resched - Check whether another CPU needs to reschedule.
++ *
++ * The function only checks and kicks the last CPU. It will reschedule and
++ * kick the next if necessary, and so on. The caller is responsible for making
++ * sure that it is not the last entry or that a reschedule is not necessary.
++ *
++ */
++static int gedf_np_check_resched(rt_domain_t *edf)
++{
++ cpu_entry_t *last;
++ int ret = 0;
++
++ spin_lock(&gedf_cpu_lock);
++
++ if (!list_empty(&edf->ready_queue)) {
++ last = list_entry(gedf_cpu_queue.prev, cpu_entry_t, list);
++ /* preemption happens only for non-realtime tasks */
++ if (!last->executes_realtime)
++ {
++ if (smp_processor_id() == last->cpu)
++ set_tsk_need_resched(current);
++ else
++ smp_send_reschedule(last->cpu);
++ ret = 1;
++ goto out;
++ }
++ }
++
++ out:
++ spin_unlock(&gedf_cpu_lock);
++ return ret;
++}
++
++
++/* non-preemptive global EDF
++ *
++ * Non-preemptive EDF is almost the same as normal EDF. We only have to
++ * adjust the scheduler tick and the resched function.
++ */
++#define INIT_SCHED_PLUGIN_NP (struct sched_plugin){\
++ .plugin_name = "Non-Preemptive Global EDF",\
++ .ready_to_use = 1,\
++ .scheduler_tick = gedf_np_scheduler_tick,\
++ .prepare_task = gedf_prepare_task,\
++ .sleep_next_period = edf_sleep_next_period,\
++ .tear_down = gedf_tear_down,\
++ .schedule = gedf_schedule,\
++ .finish_switch = gedf_finish_switch,\
++ .mode_change = gedf_mode_change,\
++ .wake_up_task = gedf_wake_up_task,\
++ .task_blocks = gedf_task_blocks \
++ }
++
++
++/* as we only set the plugin at boot time,
++ * we use the same structure as preemptive EDF. This simplifies a lot
++ * of the funtions.
++ */
++sched_plugin_t* __init init_global_edf_np_plugin(void)
++{
++ if (!s_plugin.ready_to_use)
++ {
++ edf_domain_init(&gedf, gedf_np_check_resched);
++ s_plugin = INIT_SCHED_PLUGIN_NP;
++ }
++ return &s_plugin;
++}
+diff --git a/kernel/sched_gsn_edf.c b/kernel/sched_gsn_edf.c
+new file mode 100644
+index 0000000..27d1b37
+--- /dev/null
++++ b/kernel/sched_gsn_edf.c
+@@ -0,0 +1,814 @@
++/*
++ * kernel/sched_gsn_edf.c
++ *
++ * Implementation of the GSN-EDF scheduling algorithm.
++ *
++ * This version uses the simple approach and serializes all scheduling
++ * decisions by the use of a queue lock. This is probably not the
++ * best way to do it, but it should suffice for now. It should not
++ * affect the benchmarks since all synchronization primitives will
++ * take the same performance hit, if any.
++ */
++
++#include
++#include
++#include
++
++#include
++#include
++#include
++#include
++#include
++
++/* Overview of GSN-EDF operations.
++ *
++ * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This
++ * description only covers how the individual operations are implemented in
++ * LITMUS.
++ *
++ * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage
++ * structure (NOT the actually scheduled
++ * task). If there is another linked task To
++ * already it will set To->linked_on = NO_CPU
++ * (thereby removing its association with this
++ * CPU). However, it will not requeue the
++ * previously linked task (if any). It will set
++ * T's state to RT_F_RUNNING and check whether
++ * it is already running somewhere else. If T
++ * is scheduled somewhere else it will link
++ * it to that CPU instead (and pull the linked
++ * task to cpu). T may be NULL.
++ *
++ * unlink(T) - Unlink removes T from all scheduler data
++ * structures. If it is linked to some CPU it
++ * will link NULL to that CPU. If it is
++ * currently queued in the gsnedf queue it will
++ * be removed from the T->rt_list. It is safe to
++ * call unlink(T) if T is not linked. T may not
++ * be NULL.
++ *
++ * requeue(T) - Requeue will insert T into the appropriate
++ * queue. If the system is in real-time mode and
++ * the T is released already, it will go into the
++ * ready queue. If the system is not in
++ * real-time mode is T, then T will go into the
++ * release queue. If T's release time is in the
++ * future, it will go into the release
++ * queue. That means that T's release time/job
++ * no/etc. has to be updated before requeu(T) is
++ * called. It is not safe to call requeue(T)
++ * when T is already queued. T may not be NULL.
++ *
++ * gsnedf_job_arrival(T) - This is the catch all function when T enters
++ * the system after either a suspension or at a
++ * job release. It will queue T (which means it
++ * is not safe to call gsnedf_job_arrival(T) if
++ * T is already queued) and then check whether a
++ * preemption is necessary. If a preemption is
++ * necessary it will update the linkage
++ * accordingly and cause scheduled to be called
++ * (either with an IPI or need_resched). It is
++ * safe to call gsnedf_job_arrival(T) if T's
++ * next job has not been actually released yet
++ * (releast time in the future). T will be put
++ * on the release queue in that case.
++ *
++ * job_completion(T) - Take care of everything that needs to be done
++ * to prepare T for its next release and place
++ * it in the right queue with
++ * gsnedf_job_arrival().
++ *
++ *
++ * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is
++ * equivalent to unlink(T). Note that if you unlink a task from a CPU none of
++ * the functions will automatically propagate pending task from the ready queue
++ * to a linked task. This is the job of the calling function ( by means of
++ * __take_ready).
++ */
++
++
++/* cpu_entry_t - maintain the linked and scheduled state
++ */
++typedef struct {
++ int cpu;
++ struct task_struct* linked; /* only RT tasks */
++ struct task_struct* scheduled; /* only RT tasks */
++ struct list_head list;
++ atomic_t will_schedule; /* prevent unneeded IPIs */
++} cpu_entry_t;
++DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries);
++
++#define set_will_schedule() \
++ (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 1))
++#define clear_will_schedule() \
++ (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 0))
++#define test_will_schedule(cpu) \
++ (atomic_read(&per_cpu(gsnedf_cpu_entries, cpu).will_schedule))
++
++
++#define NO_CPU 0xffffffff
++
++/* The gsnedf_lock is used to serialize all scheduling events.
++ * It protects
++ */
++static queuelock_t gsnedf_lock;
++/* the cpus queue themselves according to priority in here */
++static LIST_HEAD(gsnedf_cpu_queue);
++
++static rt_domain_t gsnedf;
++
++
++/* update_cpu_position - Move the cpu entry to the correct place to maintain
++ * order in the cpu queue. Caller must hold gsnedf lock.
++ */
++static void update_cpu_position(cpu_entry_t *entry)
++{
++ cpu_entry_t *other;
++ struct list_head *pos;
++ list_del(&entry->list);
++ /* if we do not execute real-time jobs we just move
++ * to the end of the queue
++ */
++ if (entry->linked) {
++ list_for_each(pos, &gsnedf_cpu_queue) {
++ other = list_entry(pos, cpu_entry_t, list);
++ if (edf_higher_prio(entry->linked, other->linked)) {
++ __list_add(&entry->list, pos->prev, pos);
++ return;
++ }
++ }
++ }
++ /* if we get this far we have the lowest priority job */
++ list_add_tail(&entry->list, &gsnedf_cpu_queue);
++}
++
++/* link_task_to_cpu - Update the link of a CPU.
++ * Handles the case where the to-be-linked task is already
++ * scheduled on a different CPU.
++ */
++static noinline void link_task_to_cpu(struct task_struct* linked,
++ cpu_entry_t *entry)
++
++{
++ cpu_entry_t *sched;
++ struct task_struct* tmp;
++ int on_cpu;
++
++ BUG_ON(linked && !is_realtime(linked));
++
++ /* Currently linked task is set to be unlinked. */
++ if (entry->linked) {
++ entry->linked->rt_param.linked_on = NO_CPU;
++ }
++
++ /* Link new task to CPU. */
++ if (linked) {
++ set_rt_flags(linked, RT_F_RUNNING);
++ /* handle task is already scheduled somewhere! */
++ on_cpu = linked->rt_param.scheduled_on;
++ if (on_cpu != NO_CPU) {
++ sched = &per_cpu(gsnedf_cpu_entries, on_cpu);
++ /* this should only happen if not linked already */
++ BUG_ON(sched->linked == linked);
++
++ /* If we are already scheduled on the CPU to which we
++ * wanted to link, we don't need to do the swap --
++ * we just link ourselves to the CPU and depend on
++ * the caller to get things right.
++ */
++ if (entry != sched) {
++ tmp = sched->linked;
++ linked->rt_param.linked_on = sched->cpu;
++ sched->linked = linked;
++ update_cpu_position(sched);
++ linked = tmp;
++ }
++ }
++ if (linked) /* might be NULL due to swap */
++ linked->rt_param.linked_on = entry->cpu;
++ }
++ entry->linked = linked;
++ update_cpu_position(entry);
++}
++
++/* unlink - Make sure a task is not linked any longer to an entry
++ * where it was linked before. Must hold gsnedf_lock.
++ */
++static noinline void unlink(struct task_struct* t)
++{
++ cpu_entry_t *entry;
++
++ if (unlikely(!t)) {
++ TRACE_BUG_ON(!t);
++ return;
++ }
++
++ if (t->rt_param.linked_on != NO_CPU) {
++ /* unlink */
++ entry = &per_cpu(gsnedf_cpu_entries, t->rt_param.linked_on);
++ t->rt_param.linked_on = NO_CPU;
++ link_task_to_cpu(NULL, entry);
++ } else if (in_list(&t->rt_list)) {
++ /* This is an interesting situation: t is scheduled,
++ * but was just recently unlinked. It cannot be
++ * linked anywhere else (because then it would have
++ * been relinked to this CPU), thus it must be in some
++ * queue. We must remove it from the list in this
++ * case.
++ */
++ list_del(&t->rt_list);
++ }
++}
++
++
++/* preempt - force a CPU to reschedule
++ */
++static noinline void preempt(cpu_entry_t *entry)
++{
++ /* We cannot make the is_np() decision here if it is a remote CPU
++ * because requesting exit_np() requires that we currently use the
++ * address space of the task. Thus, in the remote case we just send
++ * the IPI and let schedule() handle the problem.
++ */
++
++ if (smp_processor_id() == entry->cpu) {
++ if (entry->scheduled && is_np(entry->scheduled))
++ request_exit_np(entry->scheduled);
++ else
++ set_tsk_need_resched(current);
++ } else
++ /* in case that it is a remote CPU we have to defer the
++ * the decision to the remote CPU
++ * FIXME: We could save a few IPI's here if we leave the flag
++ * set when we are waiting for a np_exit().
++ */
++ if (!test_will_schedule(entry->cpu))
++ smp_send_reschedule(entry->cpu);
++}
++
++/* requeue - Put an unlinked task into gsn-edf domain.
++ * Caller must hold gsnedf_lock.
++ */
++static noinline void requeue(struct task_struct* task)
++{
++ BUG_ON(!task);
++ /* sanity check rt_list before insertion */
++ BUG_ON(in_list(&task->rt_list));
++
++ if (get_rt_flags(task) == RT_F_SLEEP ||
++ get_rt_mode() != MODE_RT_RUN) {
++ /* this task has expired
++ * _schedule has already taken care of updating
++ * the release and
++ * deadline. We just must check if it has been released.
++ */
++ if (is_released(task) && get_rt_mode() == MODE_RT_RUN)
++ __add_ready(&gsnedf, task);
++ else {
++ /* it has got to wait */
++ __add_release(&gsnedf, task);
++ }
++
++ } else
++ /* this is a forced preemption
++ * thus the task stays in the ready_queue
++ * we only must make it available to others
++ */
++ __add_ready(&gsnedf, task);
++}
++
++/* gsnedf_job_arrival: task is either resumed or released */
++static noinline void gsnedf_job_arrival(struct task_struct* task)
++{
++ cpu_entry_t* last;
++
++ BUG_ON(list_empty(&gsnedf_cpu_queue));
++ BUG_ON(!task);
++
++ /* first queue arriving job */
++ requeue(task);
++
++ /* then check for any necessary preemptions */
++ last = list_entry(gsnedf_cpu_queue.prev, cpu_entry_t, list);
++ if (edf_preemption_needed(&gsnedf, last->linked)) {
++ /* preemption necessary */
++ task = __take_ready(&gsnedf);
++ TRACE("job_arrival: task %d linked to %d\n",
++ task->pid, last->cpu);
++ if (last->linked)
++ requeue(last->linked);
++
++ link_task_to_cpu(task, last);
++ preempt(last);
++ }
++}
++
++/* check for current job releases */
++static noinline void gsnedf_release_jobs(void)
++{
++ struct list_head *pos, *save;
++ struct task_struct *queued;
++
++ list_for_each_safe(pos, save, &gsnedf.release_queue) {
++ queued = list_entry(pos, struct task_struct, rt_list);
++ if (likely(is_released(queued))) {
++ /* this one is ready to go*/
++ list_del(pos);
++ set_rt_flags(queued, RT_F_RUNNING);
++
++ sched_trace_job_release(queued);
++ gsnedf_job_arrival(queued);
++ }
++ else
++ /* the release queue is ordered */
++ break;
++ }
++}
++
++/* gsnedf_scheduler_tick - this function is called for every local timer
++ * interrupt.
++ *
++ * checks whether the current task has expired and checks
++ * whether we need to preempt it if it has not expired
++ */
++static reschedule_check_t gsnedf_scheduler_tick(void)
++{
++ unsigned long flags;
++ struct task_struct* t = current;
++ reschedule_check_t want_resched = NO_RESCHED;
++
++ /* expire tasks even if not in real-time mode
++ * this makes sure that at the end of real-time mode
++ * no task "runs away forever".
++ */
++ if (is_realtime(t))
++ TRACE_CUR("before dec: time_slice == %u\n", t->time_slice);
++
++ if (is_realtime(t) && t->time_slice && !--t->time_slice) {
++ if (!is_np(t)) { /* np tasks will be preempted when they become
++ preemptable again */
++ want_resched = FORCE_RESCHED;
++ set_will_schedule();
++ TRACE("gsnedf_scheduler_tick: "
++ "%d is preemptable "
++ " => FORCE_RESCHED\n", t->pid);
++ } else {
++ TRACE("gsnedf_scheduler_tick: "
++ "%d is non-preemptable, "
++ "preemption delayed.\n", t->pid);
++ request_exit_np(t);
++ }
++ }
++
++ /* only the first CPU needs to release jobs */
++ if (get_rt_mode() == MODE_RT_RUN && smp_processor_id() == 0) {
++ queue_lock_irqsave(&gsnedf_lock, flags);
++
++ /* (1) try to release pending jobs */
++ gsnedf_release_jobs();
++
++ /* we don't need to check linked != scheduled since
++ * set_tsk_need_resched has been set by preempt() if necessary
++ */
++
++ queue_unlock_irqrestore(&gsnedf_lock, flags);
++ }
++
++ return want_resched;
++}
++
++/* caller holds gsnedf_lock */
++static noinline void job_completion(struct task_struct *t)
++{
++ BUG_ON(!t);
++
++ sched_trace_job_completion(t);
++
++ TRACE_TASK(t, "job_completion().\n");
++
++ /* set flags */
++ set_rt_flags(t, RT_F_SLEEP);
++ /* prepare for next period */
++ edf_prepare_for_next_period(t);
++ /* unlink */
++ unlink(t);
++ /* requeue
++ * But don't requeue a blocking task. */
++ if (is_running(t))
++ gsnedf_job_arrival(t);
++}
++
++
++/* Getting schedule() right is a bit tricky. schedule() may not make any
++ * assumptions on the state of the current task since it may be called for a
++ * number of reasons. The reasons include a scheduler_tick() determined that it
++ * was necessary, because sys_exit_np() was called, because some Linux
++ * subsystem determined so, or even (in the worst case) because there is a bug
++ * hidden somewhere. Thus, we must take extreme care to determine what the
++ * current state is.
++ *
++ * The CPU could currently be scheduling a task (or not), be linked (or not).
++ *
++ * The following assertions for the scheduled task could hold:
++ *
++ * - !is_running(scheduled) // the job blocks
++ * - scheduled->timeslice == 0 // the job completed (forcefully)
++ * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
++ * - linked != scheduled // we need to reschedule (for any reason)
++ * - is_np(scheduled) // rescheduling must be delayed,
++ * sys_exit_np must be requested
++ *
++ * Any of these can occur together.
++ */
++static int gsnedf_schedule(struct task_struct * prev,
++ struct task_struct ** next,
++ runqueue_t * rq)
++{
++ cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
++ int out_of_time, sleep, preempt, np, exists,
++ rt, blocks;
++ struct task_struct* linked;
++
++ /* Will be released in finish_switch. */
++ queue_lock(&gsnedf_lock);
++ clear_will_schedule();
++
++ /* sanity checking */
++ BUG_ON(entry->scheduled && entry->scheduled != prev);
++ BUG_ON(entry->scheduled && !is_realtime(prev));
++
++ /* (0) Determine state */
++ exists = entry->scheduled != NULL;
++ blocks = exists && !is_running(entry->scheduled);
++ out_of_time = exists && !entry->scheduled->time_slice;
++ np = exists && is_np(entry->scheduled);
++ sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
++ preempt = entry->scheduled != entry->linked;
++ rt = get_rt_mode() == MODE_RT_RUN;
++
++ /* If a task blocks we have no choice but to reschedule.
++ */
++ if (blocks)
++ unlink(entry->scheduled);
++
++ /* Request a sys_exit_np() call if we would like to preempt but cannot.
++ * We need to make sure to update the link structure anyway in case
++ * that we are still linked. Multiple calls to request_exit_np() don't
++ * hurt.
++ */
++ if (np && (out_of_time || preempt || sleep)) {
++ unlink(entry->scheduled);
++ request_exit_np(entry->scheduled);
++ }
++
++ /* Any task that is preemptable and either exhausts its execution
++ * budget or wants to sleep completes. We may have to reschedule after
++ * this.
++ */
++ if (!np && (out_of_time || sleep))
++ job_completion(entry->scheduled);
++
++ /* Stop real-time tasks when we leave real-time mode
++ */
++ if (!rt && entry->linked) {
++ /* task will be preempted once it is preemptable
++ * (which it may be already)
++ */
++ linked = entry->linked;
++ unlink(linked);
++ requeue(linked);
++ }
++
++ /* Link pending task if we became unlinked.
++ */
++ if (rt && !entry->linked)
++ link_task_to_cpu(__take_ready(&gsnedf), entry);
++
++ /* The final scheduling decision. Do we need to switch for some reason?
++ * If linked different from scheduled select linked as next.
++ */
++ if ((!np || blocks) &&
++ entry->linked != entry->scheduled) {
++ /* Take care of a previously scheduled
++ * job by taking it out of the Linux runqueue.
++ */
++ if (entry->scheduled) {
++ if (prev->array)
++ /* take it out of the run queue */
++ deactivate_task(prev, rq);
++ }
++
++ /* Schedule a linked job? */
++ if (entry->linked) {
++ *next = entry->linked;
++ /* mark the task as executing on this cpu */
++ set_task_cpu(*next, smp_processor_id());
++ /* stick the task into the runqueue */
++ __activate_task(*next, rq);
++ }
++ } else
++ /* Only override Linux scheduler if we have real-time task
++ * scheduled that needs to continue.
++ */
++ if (exists)
++ *next = prev;
++
++ /* Unlock in case that we don't affect real-time tasks or
++ * if nothing changed and finish_switch won't be called.
++ */
++ if (prev == *next || (!is_realtime(prev) && !*next))
++ queue_unlock(&gsnedf_lock);
++
++ return 0;
++}
++
++
++/* _finish_switch - we just finished the switch away from prev
++ */
++static void gsnedf_finish_switch(struct task_struct *prev)
++{
++ cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
++
++ if (is_realtime(current))
++ entry->scheduled = current;
++ else
++ entry->scheduled = NULL;
++
++ prev->rt_param.scheduled_on = NO_CPU;
++ current->rt_param.scheduled_on = smp_processor_id();
++
++ /* unlock in case schedule() left it locked */
++ if (is_realtime(current) || is_realtime(prev))
++ queue_unlock(&gsnedf_lock);
++}
++
++
++/* Prepare a task for running in RT mode
++ * Enqueues the task into master queue data structure
++ * returns
++ * -EPERM if task is not TASK_STOPPED
++ */
++static long gsnedf_prepare_task(struct task_struct * t)
++{
++ unsigned long flags;
++ TRACE("gsn edf: prepare task %d\n", t->pid);
++
++ if (t->state == TASK_STOPPED) {
++ __setscheduler(t, SCHED_FIFO, MAX_RT_PRIO - 1);
++
++ t->rt_param.scheduled_on = NO_CPU;
++ t->rt_param.linked_on = NO_CPU;
++ if (get_rt_mode() == MODE_RT_RUN)
++ /* The action is already on.
++ * Prepare immediate release
++ */
++ edf_release_now(t);
++ /* The task should be running in the queue, otherwise signal
++ * code will try to wake it up with fatal consequences.
++ */
++ t->state = TASK_RUNNING;
++
++ queue_lock_irqsave(&gsnedf_lock, flags);
++ requeue(t);
++ queue_unlock_irqrestore(&gsnedf_lock, flags);
++ return 0;
++ }
++ else
++ return -EPERM;
++}
++
++static void gsnedf_wake_up_task(struct task_struct *task)
++{
++ unsigned long flags;
++ /* We must determine whether task should go into the release
++ * queue or into the ready queue. It may enter the ready queue
++ * if it has credit left in its time slice and has not yet reached
++ * its deadline. If it is now passed its deadline we assume this the
++ * arrival of a new sporadic job and thus put it in the ready queue
++ * anyway.If it has zero budget and the next release is in the future
++ * it has to go to the release queue.
++ */
++ TRACE("gsnedf: %d unsuspends with budget=%d\n",
++ task->pid, task->time_slice);
++ task->state = TASK_RUNNING;
++
++ /* We need to take suspensions because of semaphores into
++ * account! If a job resumes after being suspended due to acquiring
++ * a semaphore, it should never be treated as a new job release.
++ */
++ if (get_rt_flags(task) == RT_F_EXIT_SEM) {
++ set_rt_flags(task, RT_F_RUNNING);
++ } else {
++ if (is_tardy(task)) {
++ /* new sporadic release */
++ edf_release_now(task);
++ sched_trace_job_release(task);
++ }
++ else if (task->time_slice)
++ /* came back in time before deadline
++ */
++ set_rt_flags(task, RT_F_RUNNING);
++ }
++
++ queue_lock_irqsave(&gsnedf_lock, flags);
++ gsnedf_job_arrival(task);
++ queue_unlock_irqrestore(&gsnedf_lock, flags);
++}
++
++static void gsnedf_task_blocks(struct task_struct *t)
++{
++ unsigned long flags;
++
++ /* unlink if necessary */
++ queue_lock_irqsave(&gsnedf_lock, flags);
++ unlink(t);
++ queue_unlock_irqrestore(&gsnedf_lock, flags);
++
++ BUG_ON(!is_realtime(t));
++ TRACE("task %d suspends with budget=%d\n", t->pid, t->time_slice);
++ BUG_ON(t->rt_list.next != LIST_POISON1);
++ BUG_ON(t->rt_list.prev != LIST_POISON2);
++}
++
++
++/* When _tear_down is called, the task should not be in any queue any more
++ * as it must have blocked first. We don't have any internal state for the task,
++ * it is all in the task_struct.
++ */
++static long gsnedf_tear_down(struct task_struct * t)
++{
++ BUG_ON(!is_realtime(t));
++ TRACE_TASK(t, "RIP\n");
++ BUG_ON(t->array);
++ BUG_ON(t->rt_list.next != LIST_POISON1);
++ BUG_ON(t->rt_list.prev != LIST_POISON2);
++ return 0;
++}
++
++static long gsnedf_pi_block(struct pi_semaphore *sem,
++ struct task_struct *new_waiter)
++{
++ /* This callback has to handle the situation where a new waiter is
++ * added to the wait queue of the semaphore.
++ *
++ * We must check if has a higher priority than the currently
++ * highest-priority task, and then potentially reschedule.
++ */
++
++ BUG_ON(!new_waiter);
++
++ if (edf_higher_prio(new_waiter, sem->hp.task)) {
++ TRACE_TASK(new_waiter, " boosts priority\n");
++ /* called with IRQs disabled */
++ queue_lock(&gsnedf_lock);
++ /* store new highest-priority task */
++ sem->hp.task = new_waiter;
++ if (sem->holder) {
++ /* let holder inherit */
++ sem->holder->rt_param.inh_task = new_waiter;
++ unlink(sem->holder);
++ gsnedf_job_arrival(sem->holder);
++ }
++ queue_unlock(&gsnedf_lock);
++ }
++
++ return 0;
++}
++
++static long gsnedf_inherit_priority(struct pi_semaphore *sem,
++ struct task_struct *new_owner)
++{
++ /* We don't need to acquire the gsnedf_lock since at the time of this
++ * call new_owner isn't actually scheduled yet (it's still sleeping)
++ * and since the calling function already holds sem->wait.lock, which
++ * prevents concurrent sem->hp.task changes.
++ */
++
++ if (sem->hp.task && sem->hp.task != new_owner) {
++ new_owner->rt_param.inh_task = sem->hp.task;
++ TRACE_TASK(new_owner, "inherited priority from %s/%d\n",
++ sem->hp.task->comm, sem->hp.task->pid);
++ } else
++ TRACE_TASK(new_owner,
++ "cannot inherit priority, "
++ "no higher priority job waits.\n");
++ return 0;
++}
++
++/* This function is called on a semaphore release, and assumes that
++ * the current task is also the semaphore holder.
++ */
++static long gsnedf_return_priority(struct pi_semaphore *sem)
++{
++ struct task_struct* t = current;
++ int ret = 0;
++
++ /* Find new highest-priority semaphore task
++ * if holder task is the current hp.task.
++ *
++ * Calling function holds sem->wait.lock.
++ */
++ if (t == sem->hp.task)
++ edf_set_hp_task(sem);
++
++ TRACE_CUR("gsnedf_return_priority for lock %p\n", sem);
++
++ if (t->rt_param.inh_task) {
++ /* interrupts already disabled by PI code */
++ queue_lock(&gsnedf_lock);
++
++ /* Reset inh_task to NULL. */
++ t->rt_param.inh_task = NULL;
++
++ /* Check if rescheduling is necessary */
++ unlink(t);
++ gsnedf_job_arrival(t);
++ queue_unlock(&gsnedf_lock);
++ }
++
++ return ret;
++}
++
++static int gsnedf_mode_change(int new_mode)
++{
++ unsigned long flags;
++ int cpu;
++ cpu_entry_t *entry;
++
++ if (new_mode == MODE_RT_RUN) {
++ queue_lock_irqsave(&gsnedf_lock, flags);
++
++ __rerelease_all(&gsnedf, edf_release_at);
++
++ /* get old cruft out of the way in case we reenter real-time
++ * mode for a second time
++ */
++ while (!list_empty(&gsnedf_cpu_queue))
++ list_del(gsnedf_cpu_queue.next);
++ /* reinitialize */
++ for_each_online_cpu(cpu) {
++ entry = &per_cpu(gsnedf_cpu_entries, cpu);
++ atomic_set(&entry->will_schedule, 0);
++ entry->linked = NULL;
++ entry->scheduled = NULL;
++ list_add(&entry->list, &gsnedf_cpu_queue);
++ }
++
++ queue_unlock_irqrestore(&gsnedf_lock, flags);
++
++ }
++ return 0;
++}
++
++
++/* Plugin object */
++static sched_plugin_t s_plugin __cacheline_aligned_in_smp = {
++ .ready_to_use = 0
++};
++
++
++/*
++ * Plugin initialization code.
++ */
++#define INIT_SCHED_PLUGIN (struct sched_plugin){ \
++ .plugin_name = "GSN-EDF", \
++ .ready_to_use = 1, \
++ .scheduler_tick = gsnedf_scheduler_tick, \
++ .prepare_task = gsnedf_prepare_task, \
++ .sleep_next_period = edf_sleep_next_period, \
++ .tear_down = gsnedf_tear_down, \
++ .schedule = gsnedf_schedule, \
++ .finish_switch = gsnedf_finish_switch, \
++ .mode_change = gsnedf_mode_change, \
++ .wake_up_task = gsnedf_wake_up_task, \
++ .task_blocks = gsnedf_task_blocks, \
++ .inherit_priority = gsnedf_inherit_priority, \
++ .return_priority = gsnedf_return_priority, \
++ .pi_block = gsnedf_pi_block \
++}
++
++
++sched_plugin_t *__init init_gsn_edf_plugin(void)
++{
++ int cpu;
++ cpu_entry_t *entry;
++
++ if (!s_plugin.ready_to_use)
++ {
++ /* initialize CPU state */
++ for (cpu = 0; cpu < NR_CPUS; cpu++) {
++ entry = &per_cpu(gsnedf_cpu_entries, cpu);
++ atomic_set(&entry->will_schedule, 0);
++ entry->linked = NULL;
++ entry->scheduled = NULL;
++ entry->cpu = cpu;
++ }
++
++ queue_lock_init(&gsnedf_lock);
++ edf_domain_init(&gsnedf, NULL);
++ s_plugin = INIT_SCHED_PLUGIN;
++ }
++ return &s_plugin;
++}
++
++
+diff --git a/kernel/sched_part_edf.c b/kernel/sched_part_edf.c
+new file mode 100644
+index 0000000..a792ac5
+--- /dev/null
++++ b/kernel/sched_part_edf.c
+@@ -0,0 +1,340 @@
++/*
++ * kernel/sched_part_edf.c
++ *
++ * Implementation of the partitioned EDF scheduler plugin.
++ */
++
++#include
++#include
++#include
++#include
++
++#include
++#include
++#include
++
++
++typedef struct {
++ rt_domain_t domain;
++ int cpu;
++ struct task_struct* scheduled; /* only RT tasks */
++ spinlock_t lock;
++} part_edf_domain_t;
++
++
++#define local_edf (&__get_cpu_var(part_edf_domains).domain)
++#define local_pedf (&__get_cpu_var(part_edf_domains))
++#define remote_edf(cpu) (&per_cpu(part_edf_domains, cpu).domain)
++#define remote_pedf(cpu) (&per_cpu(part_edf_domains, cpu))
++#define task_edf(task) remote_edf(get_partition(task))
++
++static void part_edf_domain_init(part_edf_domain_t* pedf,
++ check_resched_needed_t check,
++ int cpu)
++{
++ edf_domain_init(&pedf->domain, check);
++ pedf->cpu = cpu;
++ pedf->lock = SPIN_LOCK_UNLOCKED;
++ pedf->scheduled = NULL;
++}
++
++DEFINE_PER_CPU(part_edf_domain_t, part_edf_domains);
++
++/* This check is trivial in partioned systems as we only have to consider
++ * the CPU of the partition.
++ *
++ */
++static int part_edf_check_resched(rt_domain_t *edf)
++{
++ part_edf_domain_t *pedf = container_of(edf, part_edf_domain_t, domain);
++ int ret = 0;
++
++ spin_lock(&pedf->lock);
++
++ /* because this is a callback from rt_domain_t we already hold
++ * the necessary lock for the ready queue
++ */
++ if (edf_preemption_needed(edf, pedf->scheduled)) {
++ if (pedf->cpu == smp_processor_id())
++ set_tsk_need_resched(current);
++ else
++ smp_send_reschedule(pedf->cpu);
++ ret = 1;
++ }
++ spin_unlock(&pedf->lock);
++ return ret;
++}
++
++
++static reschedule_check_t part_edf_scheduler_tick(void)
++{
++ unsigned long flags;
++ struct task_struct *t = current;
++ reschedule_check_t want_resched = NO_RESCHED;
++ rt_domain_t *edf = local_edf;
++ part_edf_domain_t *pedf = local_pedf;
++
++ /* Check for inconsistency. We don't need the lock for this since
++ * ->scheduled is only changed in schedule, which obviously is not
++ * executing in parallel on this CPU
++ */
++ BUG_ON(is_realtime(t) && t != pedf->scheduled);
++
++ /* expire tasks even if not in real-time mode
++ * this makes sure that at the end of real-time mode
++ * no tasks "run away forever".
++ */
++ if (is_realtime(t) && (!--t->time_slice)) {
++ /* this task has exhausted its budget in this period */
++ set_rt_flags(t, RT_F_SLEEP);
++ want_resched = FORCE_RESCHED;
++ }
++ if (get_rt_mode() == MODE_RT_RUN)
++ {
++ /* check whether anything is waiting to be released
++ * this could probably be moved to the global timer
++ * interrupt handler since the state will only change
++ * once per jiffie
++ */
++ try_release_pending(edf);
++ if (want_resched != FORCE_RESCHED)
++ {
++ read_lock_irqsave(&edf->ready_lock, flags);
++ if (edf_preemption_needed(edf, t))
++ want_resched = FORCE_RESCHED;
++ read_unlock_irqrestore(&edf->ready_lock, flags);
++ }
++ }
++ return want_resched;
++}
++
++static int part_edf_schedule(struct task_struct * prev,
++ struct task_struct ** next,
++ runqueue_t * rq)
++{
++ int need_deactivate = 1;
++ part_edf_domain_t* pedf = local_pedf;
++ rt_domain_t* edf = &pedf->domain;
++
++
++ if (is_realtime(prev) && get_rt_flags(prev) == RT_F_SLEEP)
++ edf_prepare_for_next_period(prev);
++
++ if (get_rt_mode() == MODE_RT_RUN) {
++ write_lock(&edf->ready_lock);
++ if (is_realtime(prev) && is_released(prev) && is_running(prev)
++ && !edf_preemption_needed(edf, prev)) {
++ /* this really should only happen if the task has
++ * 100% utilization...
++ */
++ TRACE("prev will be next, already released\n");
++ *next = prev;
++ need_deactivate = 0;
++ } else {
++ /* either not yet released, preempted, or non-rt */
++ *next = __take_ready(edf);
++ if (*next) {
++ /* stick the task into the runqueue */
++ __activate_task(*next, rq);
++ set_task_cpu(*next, smp_processor_id());
++ }
++ }
++ spin_lock(&pedf->lock);
++ pedf->scheduled = *next;
++ spin_unlock(&pedf->lock);
++ if (*next)
++ set_rt_flags(*next, RT_F_RUNNING);
++
++ write_unlock(&edf->ready_lock);
++ }
++
++ if (is_realtime(prev) && need_deactivate && prev->array) {
++ /* take it out of the run queue */
++ deactivate_task(prev, rq);
++ }
++
++ return 0;
++}
++
++
++static void part_edf_finish_switch(struct task_struct *prev)
++{
++ rt_domain_t* edf = local_edf;
++
++ if (!is_realtime(prev) || !is_running(prev))
++ return;
++
++ if (get_rt_flags(prev) == RT_F_SLEEP ||
++ get_rt_mode() != MODE_RT_RUN) {
++ /* this task has expired
++ * _schedule has already taken care of updating
++ * the release and
++ * deadline. We just must check if has been released.
++ */
++ if (is_released(prev) && get_rt_mode() == MODE_RT_RUN) {
++ /* already released */
++ add_ready(edf, prev);
++ TRACE("%d goes straight to ready queue\n", prev->pid);
++ } else
++ /* it has got to wait */
++ add_release(edf, prev);
++ } else {
++ /* this is a forced preemption
++ * thus the task stays in the ready_queue
++ * we only must make it available to others
++ */
++ add_ready(edf, prev);
++ }
++}
++
++
++/* Prepare a task for running in RT mode
++ * Enqueues the task into master queue data structure
++ * returns
++ * -EPERM if task is not TASK_STOPPED
++ */
++static long part_edf_prepare_task(struct task_struct * t)
++{
++ rt_domain_t* edf = task_edf(t);
++
++
++ TRACE("[%d] part edf: prepare task %d on CPU %d\n",
++ smp_processor_id(), t->pid, get_partition(t));
++ if (t->state == TASK_STOPPED) {
++ __setscheduler(t, SCHED_FIFO, MAX_RT_PRIO - 1);
++
++ if (get_rt_mode() == MODE_RT_RUN)
++ /* The action is already on.
++ * Prepare immediate release.
++ */
++ edf_release_now(t);
++ /* The task should be running in the queue, otherwise signal
++ * code will try to wake it up with fatal consequences.
++ */
++ t->state = TASK_RUNNING;
++ add_release(edf, t);
++ return 0;
++ } else
++ return -EPERM;
++}
++
++static void part_edf_wake_up_task(struct task_struct *task)
++{
++ rt_domain_t* edf;
++
++ edf = task_edf(task);
++
++ /* We must determine whether task should go into the release
++ * queue or into the ready queue. It may enter the ready queue
++ * if it has credit left in its time slice and has not yet reached
++ * its deadline. If it is now passed its deadline we assume this the
++ * arrival of a new sporadic job and thus put it in the ready queue
++ * anyway.If it has zero budget and the next release is in the future
++ * it has to go to the release queue.
++ */
++ TRACE("part edf: wake up %d with budget=%d for cpu %d\n",
++ task->pid, task->time_slice, get_partition(task));
++ task->state = TASK_RUNNING;
++ if (is_tardy(task)) {
++ /* new sporadic release */
++ edf_release_now(task);
++ add_ready(edf, task);
++
++ } else if (task->time_slice) {
++ /* Came back in time before deadline. This may cause
++ * deadline overruns, but since we don't handle suspensions
++ * in the analytical model, we don't care since we can't
++ * guarantee anything at all if tasks block.
++ */
++ set_rt_flags(task, RT_F_RUNNING);
++ add_ready(edf, task);
++
++ } else {
++ add_release(edf, task);
++ }
++
++}
++
++static void part_edf_task_blocks(struct task_struct *t)
++{
++ BUG_ON(!is_realtime(t));
++ /* not really anything to do since it can only block if
++ * it is running, and when it is not running it is not in any
++ * queue anyway.
++ *
++ */
++ TRACE("task %d blocks with budget=%d\n", t->pid, t->time_slice);
++ BUG_ON(in_list(&t->rt_list));
++}
++
++
++/* When _tear_down is called, the task should not be in any queue any more
++ * as it must have blocked first. We don't have any internal state for the task,
++ * it is all in the task_struct.
++ */
++static long part_edf_tear_down(struct task_struct * t)
++{
++ BUG_ON(!is_realtime(t));
++ TRACE("part edf: tear down called for %d \n", t->pid);
++ BUG_ON(t->array);
++ BUG_ON(in_list(&t->rt_list));
++ return 0;
++}
++
++
++static int part_edf_mode_change(int new_mode)
++{
++ int cpu;
++
++ if (new_mode == MODE_RT_RUN)
++ for_each_online_cpu(cpu)
++ rerelease_all(remote_edf(cpu), edf_release_at);
++ TRACE("[%d] part edf: mode changed to %d\n",
++ smp_processor_id(), new_mode);
++ return 0;
++}
++
++
++/* Plugin object */
++static sched_plugin_t s_plugin __cacheline_aligned_in_smp = {
++ .ready_to_use = 0
++};
++
++
++/*
++ * Plugin initialization code.
++ */
++#define INIT_SCHED_PLUGIN (struct sched_plugin) {\
++ .plugin_name = "Partitioned EDF",\
++ .ready_to_use = 1,\
++ .scheduler_tick = part_edf_scheduler_tick,\
++ .prepare_task = part_edf_prepare_task,\
++ .sleep_next_period = edf_sleep_next_period,\
++ .tear_down = part_edf_tear_down,\
++ .schedule = part_edf_schedule,\
++ .finish_switch = part_edf_finish_switch,\
++ .mode_change = part_edf_mode_change,\
++ .wake_up_task = part_edf_wake_up_task,\
++ .task_blocks = part_edf_task_blocks \
++}
++
++
++sched_plugin_t *__init init_part_edf_plugin(void)
++{
++ int i;
++
++ if (!s_plugin.ready_to_use)
++ {
++ for (i = 0; i < NR_CPUS; i++)
++ {
++ part_edf_domain_init(remote_pedf(i),
++ part_edf_check_resched, i);
++ printk("CPU partition %d initialized.", i);
++ }
++ s_plugin = INIT_SCHED_PLUGIN;
++ }
++ return &s_plugin;
++}
++
++
++
+diff --git a/kernel/sched_pfair.c b/kernel/sched_pfair.c
+new file mode 100644
+index 0000000..1a6a790
+--- /dev/null
++++ b/kernel/sched_pfair.c
+@@ -0,0 +1,503 @@
++/*
++ *
++ * Implementation of synchronized PFAIR PD2 scheduler
++ *
++ */
++
++#include
++#include
++#include
++
++#include
++#include
++#include
++#include
++#include
++
++struct cpu_state {
++ struct task_struct * t;
++ volatile jiffie_t jiffie_marker;
++};
++/* PFAIR scheduling domain, release and ready queues */
++static pfair_domain_t pfair __cacheline_aligned_in_smp;
++
++/* An indicator that quantum boundary was crossed
++ * and a decision has to be made
++ */
++static int sync_go[NR_CPUS];
++
++
++/* A collection of CPU states protected by pfair lock */
++DEFINE_PER_CPU(struct cpu_state, states);
++
++/*
++ * This function gets called by the timer code, with HZ frequency
++ * with interrupts disabled.
++ *
++ * The function merges the release queue with the ready queue
++ * and indicates that quantum boundary was crossed.
++ *
++ * It also suggests to schedule off currently running
++ * real-time task if the mode is non-real-time.
++ */
++static reschedule_check_t pfair_scheduler_tick(void)
++{
++ int want_resched = NO_RESCHED;
++ sync_go[smp_processor_id()] = 0;
++ if (!cpu_isset(smp_processor_id(), pfair.domain_cpus))
++ goto out;
++ /* Now determine if we want current task to be preempted */
++ if (get_rt_mode() == MODE_RT_RUN) {
++ pfair_try_release_pending(&pfair);
++ want_resched = FORCE_RESCHED;
++ /* indicate that the interrupt fired */
++ sync_go[smp_processor_id()] = 1;
++ barrier();
++ } else if (is_realtime(current) && is_running(current)) {
++ /* In non real-time mode we want to
++ * schedule off real-time tasks */
++ want_resched = FORCE_RESCHED;
++ } else if (is_realtime(current) && !is_running(current)) {
++ TRACE("[%d] %d Timer interrupt on not runninng %d\n",
++ smp_processor_id(),
++ jiffies-rt_start_time, current->pid);
++ }
++out:
++ return want_resched;
++}
++
++/**
++ * This function is called by the processor
++ * that performs rescheduling. It saves the timing
++ * parameters of currently running jobs that were not rescheduled yet
++ * and releases next subtask for these jobs placing them into
++ * release and ready queues.
++ */
++static void pretend_release(cpumask_t p)
++{
++ int i = 0;
++ struct task_struct * t = NULL;
++ /* for all the tasks increment the number of used quanta
++ * and release next subtask or job depending on the number
++ * of used quanta
++ */
++ for_each_cpu_mask(i, p) {
++ t = per_cpu(states, i).t;
++ if (t != NULL) {
++ backup_times(t);
++ inc_passed_quanta(t);
++ if ( get_passed_quanta(t) == get_exec_cost(t)) {
++ pfair_prepare_next_job(t);
++ } else {
++ pfair_prepare_next_subtask(t);
++ }
++ /*
++ TRACE("[%d] %d pretending release %d with (%d, %d)\n",
++ smp_processor_id(),
++ jiffies-rt_start_time,t->pid,
++ get_release(t)-rt_start_time,
++ get_deadline(t)-rt_start_time);*/
++ /* detect if the job or subtask has to be released now*/
++ if (time_before_eq(get_release(t), jiffies))
++ pfair_add_ready(&pfair, t);
++ else
++ pfair_add_release(&pfair, t);
++ }
++ }
++}
++/*
++ * Rollback the the pretended release of tasks.
++ * Timing parameters are restored and tasks are removed
++ * from the queues as it was before calling the schedule() function.
++ *
++ */
++static void rollback_release(cpumask_t p)
++{
++ int i = -1;
++ struct task_struct * t = NULL;
++ /*
++ * Rollback the pretended changes
++ */
++ for_each_cpu_mask(i, p) {
++ t = per_cpu(states, i).t;
++ if (t != NULL) {
++ restore_times(t);
++ if(t->rt_list.prev != LIST_POISON1 ||
++ t->rt_list.next != LIST_POISON2) {
++ /* Delete the task from a queue */
++ list_del(&t->rt_list);
++ }
++ }
++ }
++}
++
++/*
++ * The procedure creates a list of cpu's whose tasks have not been
++ * rescheduled yet. These are CPU's with jiffie marker different from
++ * the value of jiffies.
++ */
++static void find_participants(cpumask_t * target)
++{
++ cpumask_t res;int i;
++ cpus_clear(res);
++ for_each_online_cpu(i) {
++ if(per_cpu(states, i).jiffie_marker != jiffies)
++ cpu_set(i, res);
++ }
++ /* Examine only cpus in the domain */
++ cpus_and(res, pfair.domain_cpus, res);
++ (*target) = res;
++}
++
++/*
++ * This is main PFAIR schedule function,
++ * each processor pretends that some currently running tasks are
++ * released in the next quantum and determines whether it should
++ * keep the task that is currently running (this is usually the case
++ * for heavy tasks).
++*/
++static int pfair_schedule(struct task_struct *prev,
++ struct task_struct **next,
++ runqueue_t * rq)
++{
++ int cpu =-1;
++ int k =-1;
++ int need_deactivate = 1;
++ int keep =0;
++ unsigned long flags;
++ cpumask_t participants;
++ /* A temporary array */
++ struct task_struct * rs_old_ptr[NR_CPUS];
++
++ *next = NULL;
++ cpu = smp_processor_id();
++ /* CPU's not in the domain just bypass */
++ if (!cpu_isset(cpu, pfair.domain_cpus)) {
++ goto out;
++ }
++ queue_lock_irqsave(&pfair.pfair_lock, flags);
++
++ /* If we happen to run in non-realtime mode
++ * then we have to schedule off currently running tasks
++ * */
++ if (get_rt_mode() != MODE_RT_RUN) {
++ if (is_realtime(prev)) {
++ per_cpu(states, cpu).t = NULL;
++ TRACE("[%d] %d Suspending %d\n",
++ cpu, jiffies - rt_start_time,
++ prev->pid);
++ /* Move the task to the
++ * release queue for future runs
++ * FIXME: Do something smarter.
++ * For example create a set where
++ * prepared or inactive tasks are placed
++ * and then released.
++ * */
++ set_release(prev, get_release(prev) + 1000);
++ pfair_add_release(&pfair, prev);
++ }
++ goto out_deactivate;
++ }
++ /* If the current task stops or dies */
++ if (is_realtime(prev) && !is_running(prev)) {
++ /* remove it from the running set */
++ per_cpu(states, cpu).t = NULL;
++ }
++ /* Make pfair decisions at quantum boundaries only,
++ * but schedule off stopped or dead tasks */
++
++ if ((sync_go[cpu]--) != 1)
++ goto out_deactivate;
++
++ /*TRACE("[%d] %d Scheduler activation", cpu, jiffies-rt_start_time);
++ cpus_and(res, pfair.domain_cpus, cpu_online_map);
++ for_each_cpu_mask(k, res) {
++ TRACE("%d" ,(per_cpu(states, k).jiffie_marker!=jiffies));
++ }
++ TRACE("\n");*/
++
++ /* Find processors that have not rescheduled yet */
++ find_participants(&participants);
++ /* For each task on remote cpu's pretend release */
++ pretend_release(participants);
++ /* Clear temporary array */
++ for_each_possible_cpu(k) { rs_old_ptr[k] = NULL; }
++ /* Select a new subset of eligible tasks */
++ for_each_cpu_mask(k, participants) {
++ rs_old_ptr[k] = __pfair_take_ready (&pfair);
++ /* Check if our current task must be scheduled in the next quantum */
++ if (rs_old_ptr[k] == per_cpu(states, cpu).t) {
++ /* this is our current task, keep it */
++ *next = per_cpu(states, cpu).t;
++ need_deactivate = 0;
++ keep = 1;
++ break;
++ }
++ }
++ /* Put all the extracted tasks back into the ready queue */
++ for_each_cpu_mask(k, participants) {
++ if (rs_old_ptr[k] != NULL){
++ pfair_add_ready(&pfair, rs_old_ptr[k]);
++ rs_old_ptr[k] = NULL;
++ }
++ }
++ /* Rollback the pretended release,
++ * task parameters are restored and running tasks are removed
++ * from queues */
++ rollback_release(participants);
++ /*
++ * If the current task is not scheduled in the next quantum
++ * then select a new pfair task
++ */
++ if(!keep) {
++ *next = per_cpu(states, cpu).t = __pfair_take_ready(&pfair);
++ if (*next != NULL) {
++ /*TRACE("[%d] %d Scheduling %d with (%d, %d)\n",
++ cpu, jiffies-rt_start_time,
++ get_release(*next),
++ get_deadline(*next));
++ */
++ set_task_cpu(*next, cpu);
++ __activate_task(*next, rq);
++ }
++ } else {
++ if (is_realtime(prev)) {
++ /*TRACE("[%d] %d prev==next %d\n",
++ cpu,jiffies-rt_start_time,
++ (prev)->pid);*/
++
++ /* The task will not be switched off but we
++ * need to track the execution time
++ */
++ inc_passed_quanta(prev);
++ }
++ }
++
++ /*Show that our task does not participate in subsequent selections*/
++ __get_cpu_var(states).jiffie_marker = jiffies;
++
++out_deactivate:
++ if ( is_realtime(prev) && need_deactivate && prev->array) {
++ /* take prev out of the linux run queue */
++ deactivate_task(prev, rq);
++ }
++ queue_unlock_irqrestore(&pfair.pfair_lock, flags);
++out:
++ return 0;
++}
++
++static void pfair_finish_task_switch(struct task_struct *t)
++{
++ if (!is_realtime(t) || !is_running(t))
++ return;
++
++ queue_lock(&pfair.pfair_lock);
++ /* Release in real-time mode only,
++ * if the mode is non real-time, then
++ * the task is already in the release queue
++ * with the time far in the future
++ */
++ if (get_rt_mode() == MODE_RT_RUN) {
++ inc_passed_quanta(t);
++ if ( get_passed_quanta(t) == get_exec_cost(t)) {
++ sched_trace_job_completion(t);
++ pfair_prepare_next_job(t);
++ } else {
++ pfair_prepare_next_subtask(t);
++ }
++ /*TRACE("[%d] %d releasing %d with (%d, %d)\n",
++ smp_processor_id(),
++ jiffies-rt_start_time,
++ t->pid,
++ get_release(t)-rt_start_time,
++ get_deadline(t)-rt_start_time);*/
++ if (time_before_eq(get_release(t), jiffies))
++ pfair_add_ready(&pfair, t);
++ else
++ pfair_add_release(&pfair, t);
++ }
++ queue_unlock(&pfair.pfair_lock);
++}
++
++/* Prepare a task for running in RT mode
++ * Enqueues the task into master queue data structure
++ * returns
++ * -EPERM if task is not TASK_STOPPED
++ */
++static long pfair_prepare_task(struct task_struct * t)
++{
++ unsigned long flags;
++ TRACE("pfair: prepare task %d\n", t->pid);
++ if (t->state == TASK_STOPPED) {
++ __setscheduler(t, SCHED_FIFO, MAX_RT_PRIO - 1);
++
++ if (get_rt_mode() == MODE_RT_RUN)
++ /* The action is already on.
++ * Prepare immediate release
++ */
++ __pfair_prepare_new_release(t, jiffies);
++ /* The task should be running in the queue, otherwise signal
++ * code will try to wake it up with fatal consequences.
++ */
++ t->state = TASK_RUNNING;
++ queue_lock_irqsave(&pfair.pfair_lock, flags);
++ pfair_add_release(&pfair, t);
++ queue_unlock_irqrestore(&pfair.pfair_lock, flags);
++ return 0;
++ } else
++ return -EPERM;
++}
++
++
++
++static void pfair_wake_up_task(struct task_struct *task)
++{
++
++ unsigned long flags;
++
++ /* We must determine whether task should go into the release
++ * queue or into the ready queue.
++ * The task enters the ready queue if the previous deadline was missed,
++ * so we treat the invoked job as a new sporadic release.
++ *
++ * The job can also enter the ready queue if it was invoked before its
++ * global deadline, but its budjet must be clipped down to one quantum
++ */
++ task->state = TASK_RUNNING;
++ if (time_after_eq(jiffies, task->rt_param.times.last_release
++ + get_rt_period(task))) {
++ /* new sporadic release */
++ TRACE("[%d] Sporadic release of %d at %d\n",
++ smp_processor_id(),
++ jiffies-rt_start_time,
++ task->pid);
++ __pfair_prepare_new_release(task, jiffies);
++ queue_lock_irqsave(&pfair.pfair_lock, flags);
++ sched_trace_job_release(task);
++ pfair_add_ready(&pfair, task);
++ queue_unlock_irqrestore(&pfair.pfair_lock, flags);
++ } else if (task->time_slice) {
++ /* came back in time before deadline
++ * clip the budget to be the last subtask of a job or
++ * the new job.
++ */
++ task->rt_param.times.exec_time = get_exec_cost(task) - 1;
++ if (task->rt_param.times.exec_time == 0) {
++ pfair_prepare_next_job(task);
++ } else {
++ pfair_prepare_next_subtask(task);
++ }
++ TRACE("[%d] %d Resume of %d with %d, %d, %d\n",
++ smp_processor_id(), jiffies-rt_start_time,
++ task->pid, get_release(task)-rt_start_time,
++ get_deadline(task)-rt_start_time,
++ get_passed_quanta(task));
++
++ set_rt_flags(task, RT_F_RUNNING);
++ queue_lock_irqsave(&pfair.pfair_lock, flags);
++ sched_trace_job_release(task);
++ if (time_after_eq(jiffies, get_release(task))) {
++ pfair_add_ready(&pfair, task);
++ } else {
++ pfair_add_release(&pfair, task);
++ }
++ queue_unlock_irqrestore(&pfair.pfair_lock, flags);
++
++ } else {
++ TRACE("[%d] %d Strange release of %d with %d, %d, %d\n",
++ smp_processor_id(), jiffies-rt_start_time,
++ task->pid,
++ get_release(task), get_deadline(task),
++ get_passed_quanta(task));
++
++ queue_lock_irqsave(&pfair.pfair_lock, flags);
++ pfair_add_release(&pfair, task);
++ queue_unlock_irqrestore(&pfair.pfair_lock, flags);
++ }
++}
++
++
++static void pfair_task_blocks(struct task_struct *t)
++{
++ unsigned long flags;
++ int i;
++ cpumask_t res;
++ BUG_ON(!is_realtime(t));
++ /* If the task blocks, then it must be removed from the running set */
++ queue_lock_irqsave(&pfair.pfair_lock, flags);
++ cpus_and(res,pfair.domain_cpus, cpu_online_map);
++ for_each_cpu_mask(i, res) {
++ if (per_cpu(states, i).t == t)
++ per_cpu(states, i).t = NULL;
++ }
++ /* If the task is running and in some
++ * list it might have been released by another
++ * processor
++ */
++ if((t->rt_list.next != LIST_POISON1 ||
++ t->rt_list.prev != LIST_POISON2)) {
++ TRACE("[%d] %d task %d is deleted from the list\n",
++ smp_processor_id(),
++ jiffies-rt_start_time, t->pid);
++ list_del(&t->rt_list);
++ }
++ queue_unlock_irqrestore(&pfair.pfair_lock, flags);
++ TRACE("[%d] %d task %d blocks with budget=%d state=%d\n",
++ smp_processor_id(), jiffies-rt_start_time,
++ t->pid, t->time_slice, t->state);
++}
++
++static long pfair_tear_down(struct task_struct * t)
++{
++ BUG_ON(!is_realtime(t));
++ TRACE("pfair: tear down called for %d \n", t->pid);
++ BUG_ON(t->array);
++ BUG_ON(t->rt_list.next != LIST_POISON1);
++ BUG_ON(t->rt_list.prev != LIST_POISON2);
++ return 0;
++}
++
++static int pfair_mode_change(int new_mode)
++{
++ printk(KERN_INFO "[%d] pfair mode change %d\n",
++ smp_processor_id(), new_mode);
++ if (new_mode == MODE_RT_RUN) {
++ pfair_prepare_new_releases(&pfair, jiffies + 10);
++ }
++ printk(KERN_INFO "[%d] pfair: mode change done\n", smp_processor_id());
++ return 0;
++}
++
++/* Plugin object */
++static sched_plugin_t s_plugin __cacheline_aligned_in_smp = {
++ .ready_to_use = 0
++};
++/*
++* PFAIR plugin initialization macro.
++*/
++#define INIT_PFAIR_PLUGIN (struct sched_plugin){\
++ .plugin_name = "PFAIR",\
++ .ready_to_use = 1,\
++ .scheduler_tick = pfair_scheduler_tick,\
++ .prepare_task = pfair_prepare_task,\
++ .tear_down = pfair_tear_down,\
++ .schedule = pfair_schedule,\
++ .finish_switch = pfair_finish_task_switch,\
++ .mode_change = pfair_mode_change,\
++ .wake_up_task = pfair_wake_up_task,\
++ .task_blocks = pfair_task_blocks \
++ }
++
++sched_plugin_t* __init init_pfair_plugin(void)
++{
++ int i=0;
++ if (!s_plugin.ready_to_use) {
++ pfair_domain_init(&pfair);
++ for (i=0; i
++#include
++
++
++/*************************************************************
++ * Dummy plugin functions *
++ *************************************************************/
++
++void litmus_dummy_finish_switch(struct task_struct * prev)
++{
++}
++
++int litmus_dummy_schedule(struct task_struct * prev,
++ struct task_struct** next,
++ runqueue_t* q)
++{
++ return 0;
++}
++
++reschedule_check_t litmus_dummy_scheduler_tick(void)
++{
++ return NO_RESCHED;
++}
++
++
++long litmus_dummy_prepare_task(struct task_struct *t)
++{
++ return 0;
++}
++
++void litmus_dummy_wake_up_task(struct task_struct *task)
++{
++ printk(KERN_WARNING "task %d: unhandled real-time wake up!\n",
++ task->pid);
++}
++
++void litmus_dummy_task_blocks(struct task_struct *task)
++{
++}
++
++long litmus_dummy_tear_down(struct task_struct *task)
++{
++ return 0;
++}
++
++int litmus_dummy_scheduler_setup(int cmd, void __user *parameter)
++{
++ return -EPERM;
++}
++
++long litmus_dummy_sleep_next_period(void)
++{
++ return -EPERM;
++}
++
++long litmus_dummy_inherit_priority(struct pi_semaphore *sem,
++ struct task_struct *new_owner)
++{
++ return -EPERM;
++}
++
++long litmus_dummy_return_priority(struct pi_semaphore *sem)
++{
++ return -EPERM;
++}
++
++long litmus_dummy_pi_block(struct pi_semaphore *sem,
++ struct task_struct *new_waiter)
++{
++ return -EPERM;
++}
++
++
++/* The default scheduler plugin. It doesn't do anything and lets Linux do its
++ * job.
++ */
++
++sched_plugin_t linux_sched_plugin = {
++ .plugin_name = "Linux",
++ .ready_to_use = 1,
++ .scheduler_tick = litmus_dummy_scheduler_tick,
++ .prepare_task = litmus_dummy_prepare_task,
++ .tear_down = litmus_dummy_tear_down,
++ .wake_up_task = litmus_dummy_wake_up_task,
++ .task_blocks = litmus_dummy_task_blocks,
++ .sleep_next_period = litmus_dummy_sleep_next_period,
++ .schedule = litmus_dummy_schedule,
++ .finish_switch = litmus_dummy_finish_switch,
++ .scheduler_setup = litmus_dummy_scheduler_setup,
++ .inherit_priority = litmus_dummy_inherit_priority,
++ .return_priority = litmus_dummy_return_priority,
++ .pi_block = litmus_dummy_pi_block
++};
++
++/*
++ * The reference to current plugin that is used to schedule tasks within
++ * the system. It stores references to actual function implementations
++ * Should be initialized by calling "init_***_plugin()"
++ */
++sched_plugin_t *curr_sched_plugin = &linux_sched_plugin;
++
+diff --git a/kernel/sched_psn_edf.c b/kernel/sched_psn_edf.c
+new file mode 100644
+index 0000000..9e4f4ab
+--- /dev/null
++++ b/kernel/sched_psn_edf.c
+@@ -0,0 +1,523 @@
++
++/*
++ * kernel/sched_psn_edf.c
++ *
++ * Implementation of the PSN-EDF scheduler plugin.
++ * Based on kern/sched_part_edf.c and kern/sched_gsn_edf.c.
++ *
++ * Suspensions and non-preemptable sections are supported.
++ * Priority inheritance is not supported.
++ */
++
++#include
++#include
++#include
++#include
++
++#include
++#include
++#include
++
++
++typedef struct {
++ rt_domain_t domain;
++ int cpu;
++ struct task_struct* scheduled; /* only RT tasks */
++ spinlock_t lock; /* protects the domain and
++ * serializes scheduling decisions
++ */
++} psnedf_domain_t;
++
++DEFINE_PER_CPU(psnedf_domain_t, psnedf_domains);
++
++#define local_edf (&__get_cpu_var(psnedf_domains).domain)
++#define local_pedf (&__get_cpu_var(psnedf_domains))
++#define remote_edf(cpu) (&per_cpu(psnedf_domains, cpu).domain)
++#define remote_pedf(cpu) (&per_cpu(psnedf_domains, cpu))
++#define task_edf(task) remote_edf(get_partition(task))
++#define task_pedf(task) remote_pedf(get_partition(task))
++
++
++static void psnedf_domain_init(psnedf_domain_t* pedf,
++ check_resched_needed_t check,
++ int cpu)
++{
++ edf_domain_init(&pedf->domain, check);
++ pedf->cpu = cpu;
++ pedf->lock = SPIN_LOCK_UNLOCKED;
++ pedf->scheduled = NULL;
++}
++
++static void requeue(struct task_struct* t, rt_domain_t *edf)
++{
++ /* only requeue if t is actually running */
++ BUG_ON(!is_running(t));
++
++ if (t->state != TASK_RUNNING)
++ TRACE_TASK(t, "requeue: !TASK_RUNNING");
++
++ set_rt_flags(t, RT_F_RUNNING);
++ if (!is_released(t) ||
++ get_rt_mode() != MODE_RT_RUN)
++ __add_release(edf, t); /* it has got to wait */
++ else
++ __add_ready(edf, t);
++}
++
++/* we assume the lock is being held */
++static void preempt(psnedf_domain_t *pedf)
++{
++ if (smp_processor_id() == pedf->cpu) {
++ if (pedf->scheduled && is_np(pedf->scheduled))
++ request_exit_np(pedf->scheduled);
++ else
++ set_tsk_need_resched(current);
++ } else
++ /* in case that it is a remote CPU we have to defer the
++ * the decision to the remote CPU
++ */
++ smp_send_reschedule(pedf->cpu);
++}
++
++/* This check is trivial in partioned systems as we only have to consider
++ * the CPU of the partition.
++ */
++static int psnedf_check_resched(rt_domain_t *edf)
++{
++ psnedf_domain_t *pedf = container_of(edf, psnedf_domain_t, domain);
++ int ret = 0;
++
++ /* because this is a callback from rt_domain_t we already hold
++ * the necessary lock for the ready queue
++ */
++ if (edf_preemption_needed(edf, pedf->scheduled)) {
++ preempt(pedf);
++ ret = 1;
++ }
++ return ret;
++}
++
++
++static reschedule_check_t psnedf_scheduler_tick(void)
++{
++ unsigned long flags;
++ struct task_struct *t = current;
++ reschedule_check_t want_resched = NO_RESCHED;
++ rt_domain_t *edf = local_edf;
++ psnedf_domain_t *pedf = local_pedf;
++
++ /* Check for inconsistency. We don't need the lock for this since
++ * ->scheduled is only changed in schedule, which obviously is not
++ * executing in parallel on this CPU
++ */
++ BUG_ON(is_realtime(t) && t != pedf->scheduled);
++
++ if (is_realtime(t))
++ TRACE("%s/%d was hit by scheduler tick\n", t->comm, t->pid);
++
++ /* expire tasks even if not in real-time mode
++ * this makes sure that at the end of real-time mode
++ * no tasks "run away forever".
++ */
++ if (is_realtime(t) && t->time_slice && !--t->time_slice) {
++ if (!is_np(t)) {
++ want_resched = FORCE_RESCHED;
++ } else {
++ TRACE("psnedf_scheduler_tick: "
++ "%d is non-preemptable, "
++ "preemption delayed.\n", t->pid);
++ request_exit_np(t);
++ }
++ }
++
++ if (get_rt_mode() == MODE_RT_RUN)
++ {
++ /* check whether anything is waiting to be released
++ * this could probably be moved to the global timer
++ * interrupt handler since the state will only change
++ * once per jiffie
++ */
++ spin_lock_irqsave(&pedf->lock, flags);
++ __release_pending(edf);
++ if (want_resched != FORCE_RESCHED &&
++ edf_preemption_needed(edf, t))
++ want_resched = FORCE_RESCHED;
++
++ spin_unlock_irqrestore(&pedf->lock, flags);
++
++ }
++ return want_resched;
++}
++
++static void job_completion(struct task_struct* t)
++{
++ TRACE_TASK(t, "job_completion().\n");
++ set_rt_flags(t, RT_F_SLEEP);
++ edf_prepare_for_next_period(t);
++}
++
++static int psnedf_schedule(struct task_struct * prev,
++ struct task_struct ** next,
++ runqueue_t * rq)
++{
++ psnedf_domain_t* pedf = local_pedf;
++ rt_domain_t* edf = &pedf->domain;
++
++ int out_of_time, sleep, preempt,
++ np, exists, rt, blocks, resched;
++
++ spin_lock(&pedf->lock);
++
++ /* sanity checking */
++ BUG_ON(pedf->scheduled && pedf->scheduled != prev);
++ BUG_ON(pedf->scheduled && !is_realtime(prev));
++
++ /* (0) Determine state */
++ exists = pedf->scheduled != NULL;
++ blocks = exists && !is_running(pedf->scheduled);
++ out_of_time = exists && !pedf->scheduled->time_slice;
++ np = exists && is_np(pedf->scheduled);
++ sleep = exists && get_rt_flags(pedf->scheduled) == RT_F_SLEEP;
++ preempt = edf_preemption_needed(edf, prev);
++ rt = get_rt_mode() == MODE_RT_RUN;
++
++
++ /* If we need to preempt do so.
++ * The following checks set resched to 1 in case of special
++ * circumstances.
++ */
++ resched = preempt;
++
++ /* If a task blocks we have no choice but to reschedule.
++ */
++ if (blocks)
++ resched = 1;
++
++ /* Request a sys_exit_np() call if we would like to preempt but cannot.
++ * Multiple calls to request_exit_np() don't hurt.
++ */
++ if (np && (out_of_time || preempt || sleep))
++ request_exit_np(pedf->scheduled);
++
++ /* Any task that is preemptable and either exhausts its execution
++ * budget or wants to sleep completes. We may have to reschedule after
++ * this.
++ */
++ if (!np && (out_of_time || sleep)) {
++ job_completion(pedf->scheduled);
++ resched = 1;
++ }
++
++ /* Stop real-time tasks when we leave real-time mode
++ */
++ if (!rt && exists)
++ resched = 1;
++
++ /* The final scheduling decision. Do we need to switch for some reason?
++ * Switch if we are in RT mode and have no task or if we need to
++ * resched.
++ */
++ *next = NULL;
++ if ((!np || blocks) && (resched || (!exists && rt))) {
++ /* Take care of a previously scheduled
++ * job by taking it out of the Linux runqueue.
++ */
++ if (pedf->scheduled) {
++ /* as opposed to global schedulers that switch without
++ * a lock being held we can requeue already here since
++ * no other CPU will schedule from this domain.
++ */
++ if (!blocks)
++ requeue(pedf->scheduled, edf);
++ if (prev->array)
++ /* take it out of the run queue */
++ deactivate_task(prev, rq);
++ }
++
++ /* only pick tasks if we are actually in RT mode */
++ if (rt)
++ *next = __take_ready(edf);
++ if (*next) {
++ /* stick the task into the runqueue */
++ __activate_task(*next, rq);
++ set_task_cpu(*next, smp_processor_id());
++ }
++
++ } else
++ /* Only override Linux scheduler if we have a real-time task
++ * scheduled that needs to continue.
++ */
++ if (exists)
++ *next = prev;
++
++ if (*next)
++ set_rt_flags(*next, RT_F_RUNNING);
++
++ pedf->scheduled = *next;
++ spin_unlock(&pedf->lock);
++ return 0;
++}
++
++
++/* Prepare a task for running in RT mode
++ * Enqueues the task into master queue data structure
++ * returns
++ * -EPERM if task is not TASK_STOPPED
++ */
++static long psnedf_prepare_task(struct task_struct * t)
++{
++ rt_domain_t* edf = task_edf(t);
++ psnedf_domain_t* pedf = task_pedf(t);
++ unsigned long flags;
++
++ TRACE("[%d] psn edf: prepare task %d on CPU %d\n",
++ smp_processor_id(), t->pid, get_partition(t));
++ if (t->state == TASK_STOPPED) {
++ __setscheduler(t, SCHED_FIFO, MAX_RT_PRIO - 1);
++
++ if (get_rt_mode() == MODE_RT_RUN)
++ /* The action is already on.
++ * Prepare immediate release.
++ */
++ edf_release_now(t);
++ /* The task should be running in the queue, otherwise signal
++ * code will try to wake it up with fatal consequences.
++ */
++ t->state = TASK_RUNNING;
++ spin_lock_irqsave(&pedf->lock, flags);
++ __add_release(edf, t);
++ spin_unlock_irqrestore(&pedf->lock, flags);
++ return 0;
++ } else
++ return -EPERM;
++}
++
++static void psnedf_wake_up_task(struct task_struct *task)
++{
++ unsigned long flags;
++ psnedf_domain_t* pedf = task_pedf(task);
++ rt_domain_t* edf = task_edf(task);
++
++ TRACE("psnedf: %d unsuspends with budget=%d\n",
++ task->pid, task->time_slice);
++
++ /* After fixing the litmus_controlled bug,
++ * this should hold again.
++ */
++ BUG_ON(in_list(&task->rt_list));
++
++ task->state = TASK_RUNNING;
++
++ /* We need to take suspensions because of semaphores into
++ * account! If a job resumes after being suspended due to acquiring
++ * a semaphore, it should never be treated as a new job release.
++ */
++ if (is_tardy(task) && get_rt_flags(task) != RT_F_EXIT_SEM) {
++ /* new sporadic release */
++ edf_release_now(task);
++ sched_trace_job_release(task);
++ }
++
++ spin_lock_irqsave(&pedf->lock, flags);
++ requeue(task, edf);
++ spin_unlock_irqrestore(&pedf->lock, flags);
++}
++
++static void psnedf_task_blocks(struct task_struct *t)
++{
++ BUG_ON(!is_realtime(t));
++ /* not really anything to do since it can only block if
++ * it is running, and when it is not running it is not in any
++ * queue anyway.
++ */
++ TRACE("task %d blocks with budget=%d\n", t->pid, t->time_slice);
++ BUG_ON(in_list(&t->rt_list));
++}
++
++
++/* When _tear_down is called, the task should not be in any queue any more
++ * as it must have blocked first. We don't have any internal state for the task,
++ * it is all in the task_struct.
++ */
++static long psnedf_tear_down(struct task_struct * t)
++{
++ BUG_ON(!is_realtime(t));
++ TRACE_TASK(t, "tear down called");
++ BUG_ON(t->array);
++ BUG_ON(in_list(&t->rt_list));
++ return 0;
++}
++
++static long psnedf_pi_block(struct pi_semaphore *sem,
++ struct task_struct *new_waiter)
++{
++ psnedf_domain_t* pedf;
++ rt_domain_t* edf;
++ struct task_struct* t;
++ int cpu = get_partition(new_waiter);
++
++ BUG_ON(!new_waiter);
++
++ if (edf_higher_prio(new_waiter, sem->hp.cpu_task[cpu])) {
++ TRACE_TASK(new_waiter, " boosts priority\n");
++ pedf = task_pedf(new_waiter);
++ edf = task_edf(new_waiter);
++
++ /* interrupts already disabled */
++ spin_lock(&pedf->lock);
++
++ /* store new highest-priority task */
++ sem->hp.cpu_task[cpu] = new_waiter;
++ if (sem->holder &&
++ get_partition(sem->holder) == get_partition(new_waiter)) {
++ /* let holder inherit */
++ sem->holder->rt_param.inh_task = new_waiter;
++ t = sem->holder;
++ if (in_list(&t->rt_list)) {
++ /* queued in domain*/
++ list_del(&t->rt_list);
++ /* readd to make priority change take place */
++ if (is_released(t))
++ __add_ready(edf, t);
++ else
++ __add_release(edf, t);
++ }
++ }
++
++ /* check if we need to reschedule */
++ if (edf_preemption_needed(edf, current))
++ preempt(pedf);
++
++ spin_unlock(&pedf->lock);
++ }
++
++ return 0;
++}
++
++static long psnedf_inherit_priority(struct pi_semaphore *sem,
++ struct task_struct *new_owner)
++{
++ int cpu = get_partition(new_owner);
++
++ new_owner->rt_param.inh_task = sem->hp.cpu_task[cpu];
++ if (sem->hp.cpu_task[cpu] && new_owner != sem->hp.cpu_task[cpu]) {
++ TRACE_TASK(new_owner,
++ "inherited priority from %s/%d\n",
++ sem->hp.cpu_task[cpu]->comm,
++ sem->hp.cpu_task[cpu]->pid);
++ } else
++ TRACE_TASK(new_owner,
++ "cannot inherit priority: "
++ "no higher priority job waits on this CPU!\n");
++ /* make new owner non-preemptable as required by FMLP under
++ * PSN-EDF.
++ */
++ make_np(new_owner);
++ return 0;
++}
++
++
++/* This function is called on a semaphore release, and assumes that
++ * the current task is also the semaphore holder.
++ */
++static long psnedf_return_priority(struct pi_semaphore *sem)
++{
++ struct task_struct* t = current;
++ psnedf_domain_t* pedf = task_pedf(t);
++ rt_domain_t* edf = task_edf(t);
++ int ret = 0;
++ int cpu = get_partition(current);
++
++
++ /* Find new highest-priority semaphore task
++ * if holder task is the current hp.cpu_task[cpu].
++ *
++ * Calling function holds sem->wait.lock.
++ */
++ if (t == sem->hp.cpu_task[cpu])
++ edf_set_hp_cpu_task(sem, cpu);
++
++ take_np(t);
++ if (current->rt_param.inh_task) {
++ TRACE_CUR("return priority of %s/%d\n",
++ current->rt_param.inh_task->comm,
++ current->rt_param.inh_task->pid);
++ spin_lock(&pedf->lock);
++
++ /* Reset inh_task to NULL. */
++ current->rt_param.inh_task = NULL;
++
++ /* check if we need to reschedule */
++ if (edf_preemption_needed(edf, current))
++ preempt(pedf);
++
++ spin_unlock(&pedf->lock);
++ } else
++ TRACE_CUR(" no priority to return %p\n", sem);
++
++ return ret;
++}
++
++
++static int psnedf_mode_change(int new_mode)
++{
++ int cpu;
++
++ if (new_mode == MODE_RT_RUN)
++ for_each_online_cpu(cpu) {
++ spin_lock(&remote_pedf(cpu)->lock);
++ __rerelease_all(remote_edf(cpu), edf_release_at);
++ spin_unlock(&remote_pedf(cpu)->lock);
++ }
++
++ TRACE("[%d] psn edf: mode changed to %d\n",
++ smp_processor_id(), new_mode);
++ return 0;
++}
++
++
++/* Plugin object */
++static sched_plugin_t s_plugin __cacheline_aligned_in_smp = {
++ .ready_to_use = 0
++};
++
++
++/*
++ * Plugin initialization code.
++ */
++#define INIT_SCHED_PLUGIN (struct sched_plugin) {\
++ .plugin_name = "PSN-EDF",\
++ .ready_to_use = 1,\
++ .scheduler_tick = psnedf_scheduler_tick,\
++ .prepare_task = psnedf_prepare_task,\
++ .sleep_next_period = edf_sleep_next_period,\
++ .tear_down = psnedf_tear_down,\
++ .schedule = psnedf_schedule,\
++ .mode_change = psnedf_mode_change,\
++ .wake_up_task = psnedf_wake_up_task,\
++ .task_blocks = psnedf_task_blocks, \
++ .pi_block = psnedf_pi_block, \
++ .inherit_priority = psnedf_inherit_priority, \
++ .return_priority = psnedf_return_priority \
++}
++
++
++sched_plugin_t *__init init_psn_edf_plugin(void)
++{
++ int i;
++
++ if (!s_plugin.ready_to_use)
++ {
++ for (i = 0; i < NR_CPUS; i++)
++ {
++ psnedf_domain_init(remote_pedf(i),
++ psnedf_check_resched, i);
++ printk("PSN-EDF: CPU partition %d initialized.\n", i);
++ }
++ s_plugin = INIT_SCHED_PLUGIN;
++ }
++ return &s_plugin;
++}
++
++
++
+diff --git a/kernel/sched_trace.c b/kernel/sched_trace.c
+new file mode 100644
+index 0000000..4cfe0c4
+--- /dev/null
++++ b/kernel/sched_trace.c
+@@ -0,0 +1,755 @@
++/* sched_trace.c -- record scheduling events to a byte stream.
++ *
++ * TODO: Move ring buffer to a lockfree implementation.
++ */
++
++#include
++#include
++#include
++#include
++#include
++#include
++
++#include
++#include
++#include
++
++
++typedef struct {
++ /* guard read and write pointers */
++ spinlock_t lock;
++ /* guard against concurrent freeing of buffer */
++ rwlock_t del_lock;
++
++ /* memory allocated for ring buffer */
++ unsigned long order;
++ char* buf;
++ char* end;
++
++ /* Read/write pointer. May not cross.
++ * They point to the position of next write and
++ * last read.
++ */
++ char* writep;
++ char* readp;
++
++} ring_buffer_t;
++
++#define EMPTY_RING_BUFFER { \
++ .lock = SPIN_LOCK_UNLOCKED, \
++ .del_lock = RW_LOCK_UNLOCKED, \
++ .buf = NULL, \
++ .end = NULL, \
++ .writep = NULL, \
++ .readp = NULL \
++}
++
++void rb_init(ring_buffer_t* buf)
++{
++ *buf = (ring_buffer_t) EMPTY_RING_BUFFER;
++}
++
++int rb_alloc_buf(ring_buffer_t* buf, unsigned long order)
++{
++ unsigned long flags;
++ int error = 0;
++ char *mem;
++
++ /* do memory allocation while not atomic */
++ mem = (char *) __get_free_pages(GFP_KERNEL, order);
++ if (!mem)
++ return -ENOMEM;
++ write_lock_irqsave(&buf->del_lock, flags);
++ BUG_ON(buf->buf);
++ buf->buf = mem;
++ buf->end = buf->buf + PAGE_SIZE * (1 << order) - 1;
++ memset(buf->buf, 0xff, buf->end - buf->buf);
++ buf->order = order;
++ buf->writep = buf->buf + 1;
++ buf->readp = buf->buf;
++ write_unlock_irqrestore(&buf->del_lock, flags);
++ return error;
++}
++
++int rb_free_buf(ring_buffer_t* buf)
++{
++ unsigned long flags;
++ int error = 0;
++ write_lock_irqsave(&buf->del_lock, flags);
++ BUG_ON(!buf->buf);
++ free_pages((unsigned long) buf->buf, buf->order);
++ buf->buf = NULL;
++ buf->end = NULL;
++ buf->writep = NULL;
++ buf->readp = NULL;
++ write_unlock_irqrestore(&buf->del_lock, flags);
++ return error;
++}
++
++/* Assumption: concurrent writes are serialized externally
++ *
++ * Will only succeed if there is enough space for all len bytes.
++ */
++int rb_put(ring_buffer_t* buf, char* mem, size_t len)
++{
++ unsigned long flags;
++ char* r , *w;
++ int error = 0;
++ read_lock_irqsave(&buf->del_lock, flags);
++ if (!buf->buf) {
++ error = -ENODEV;
++ goto out;
++ }
++ spin_lock(&buf->lock);
++ r = buf->readp;
++ w = buf->writep;
++ spin_unlock(&buf->lock);
++ if (r < w && buf->end - w >= len - 1) {
++ /* easy case: there is enough space in the buffer
++ * to write it in one continous chunk*/
++ memcpy(w, mem, len);
++ w += len;
++ if (w > buf->end)
++ /* special case: fit exactly into buffer
++ * w is now buf->end + 1
++ */
++ w = buf->buf;
++ } else if (w < r && r - w >= len) { /* >= len because may not cross */
++ /* we are constrained by the read pointer but we there
++ * is enough space
++ */
++ memcpy(w, mem, len);
++ w += len;
++ } else if (r <= w && buf->end - w < len - 1) {
++ /* the wrap around case: there may or may not be space */
++ if ((buf->end - w) + (r - buf->buf) >= len - 1) {
++ /* copy chunk that fits at the end */
++ memcpy(w, mem, buf->end - w + 1);
++ mem += buf->end - w + 1;
++ len -= (buf->end - w + 1);
++ w = buf->buf;
++ /* copy the rest */
++ memcpy(w, mem, len);
++ w += len;
++ }
++ else
++ error = -ENOMEM;
++ } else {
++ error = -ENOMEM;
++ }
++ if (!error) {
++ spin_lock(&buf->lock);
++ buf->writep = w;
++ spin_unlock(&buf->lock);
++ }
++ out:
++ read_unlock_irqrestore(&buf->del_lock, flags);
++ return error;
++}
++
++/* Assumption: concurrent reads are serialized externally */
++int rb_get(ring_buffer_t* buf, char* mem, size_t len)
++{
++ unsigned long flags;
++ char* r , *w;
++ int error = 0;
++ read_lock_irqsave(&buf->del_lock, flags);
++ if (!buf->buf) {
++ error = -ENODEV;
++ goto out;
++ }
++ spin_lock(&buf->lock);
++ r = buf->readp;
++ w = buf->writep;
++ spin_unlock(&buf->lock);
++
++ if (w <= r && buf->end - r >= len) {
++ /* easy case: there is enough data in the buffer
++ * to get it in one chunk*/
++ memcpy(mem, r + 1, len);
++ r += len;
++ error = len;
++
++ } else if (r + 1 < w && w - r - 1 >= len) {
++ /* we are constrained by the write pointer but
++ * there is enough data
++ */
++ memcpy(mem, r + 1, len);
++ r += len;
++ error = len;
++
++ } else if (r + 1 < w && w - r - 1 < len) {
++ /* we are constrained by the write pointer and there
++ * there is not enough data
++ */
++ memcpy(mem, r + 1, w - r - 1);
++ error = w - r - 1;
++ r += w - r - 1;
++
++ } else if (w <= r && buf->end - r < len) {
++ /* the wrap around case: there may or may not be enough data
++ * first let's get what is available
++ */
++ memcpy(mem, r + 1, buf->end - r);
++ error += (buf->end - r);
++ mem += (buf->end - r);
++ len -= (buf->end - r);
++ r += (buf->end - r);
++
++ if (w > buf->buf) {
++ /* there is more to get */
++ r = buf->buf - 1;
++ if (w - r >= len) {
++ /* plenty */
++ memcpy(mem, r + 1, len);
++ error += len;
++ r += len;
++ } else {
++ memcpy(mem, r + 1, w - r - 1);
++ error += w - r - 1;
++ r += w - r - 1;
++ }
++ }
++ } /* nothing available */
++
++ if (error > 0) {
++ spin_lock(&buf->lock);
++ buf->readp = r;
++ spin_unlock(&buf->lock);
++ }
++ out:
++ read_unlock_irqrestore(&buf->del_lock, flags);
++ return error;
++}
++
++
++
++/******************************************************************************/
++/* DEVICE FILE DRIVER */
++/******************************************************************************/
++
++
++
++/* Allocate a buffer of about 1 MB per CPU.
++ *
++ */
++#define BUFFER_ORDER 8
++
++typedef struct {
++ ring_buffer_t buf;
++ atomic_t reader_cnt;
++ struct semaphore reader_mutex;
++} trace_buffer_t;
++
++
++/* This does not initialize the semaphore!! */
++
++#define EMPTY_TRACE_BUFFER \
++ { .buf = EMPTY_RING_BUFFER, .reader_cnt = ATOMIC_INIT(0)}
++
++static DEFINE_PER_CPU(trace_buffer_t, trace_buffer);
++
++#ifdef CONFIG_SCHED_DEBUG_TRACE
++static spinlock_t log_buffer_lock = SPIN_LOCK_UNLOCKED;
++#endif
++static trace_buffer_t log_buffer = EMPTY_TRACE_BUFFER;
++
++static void init_buffers(void)
++{
++ int i;
++
++ for (i = 0; i < NR_CPUS; i++) {
++ rb_init(&per_cpu(trace_buffer, i).buf);
++ init_MUTEX(&per_cpu(trace_buffer, i).reader_mutex);
++ atomic_set(&per_cpu(trace_buffer, i).reader_cnt, 0);
++ }
++ /* only initialize the mutex, the rest was initialized as part
++ * of the static initialization macro
++ */
++ init_MUTEX(&log_buffer.reader_mutex);
++}
++
++static int trace_release(struct inode *in, struct file *filp)
++{
++ int error = -EINVAL;
++ trace_buffer_t* buf = filp->private_data;
++
++ BUG_ON(!filp->private_data);
++
++ if (down_interruptible(&buf->reader_mutex)) {
++ error = -ERESTARTSYS;
++ goto out;
++ }
++
++ /* last release must deallocate buffers */
++ if (atomic_dec_return(&buf->reader_cnt) == 0) {
++ error = rb_free_buf(&buf->buf);
++ }
++
++ up(&buf->reader_mutex);
++ out:
++ return error;
++}
++
++static ssize_t trace_read(struct file *filp, char __user *to, size_t len,
++ loff_t *f_pos)
++{
++ /* we ignore f_pos, this is strictly sequential */
++
++ ssize_t error = -EINVAL;
++ char* mem;
++ trace_buffer_t *buf = filp->private_data;
++
++ if (down_interruptible(&buf->reader_mutex)) {
++ error = -ERESTARTSYS;
++ goto out;
++ }
++
++ if (len > 64 * 1024)
++ len = 64 * 1024;
++ mem = kmalloc(len, GFP_KERNEL);
++ if (!mem) {
++ error = -ENOMEM;
++ goto out_unlock;
++ }
++
++ error = rb_get(&buf->buf, mem, len);
++ while (!error) {
++ set_current_state(TASK_INTERRUPTIBLE);
++ schedule_timeout(110);
++ if (signal_pending(current))
++ error = -ERESTARTSYS;
++ else
++ error = rb_get(&buf->buf, mem, len);
++ }
++
++ if (error > 0 && copy_to_user(to, mem, error))
++ error = -EFAULT;
++
++ kfree(mem);
++ out_unlock:
++ up(&buf->reader_mutex);
++ out:
++ return error;
++}
++
++
++/* trace_open - Open one of the per-CPU sched_trace buffers.
++ */
++static int trace_open(struct inode *in, struct file *filp)
++{
++ int error = -EINVAL;
++ int cpu = MINOR(in->i_rdev);
++ trace_buffer_t* buf;
++
++ if (!cpu_online(cpu)) {
++ printk(KERN_WARNING "sched trace: "
++ "CPU #%d is not online. (open failed)\n", cpu);
++ error = -ENODEV;
++ goto out;
++ }
++
++ buf = &per_cpu(trace_buffer, cpu);
++
++ if (down_interruptible(&buf->reader_mutex)) {
++ error = -ERESTARTSYS;
++ goto out;
++ }
++
++ /* first open must allocate buffers */
++ if (atomic_inc_return(&buf->reader_cnt) == 1) {
++ if ((error = rb_alloc_buf(&buf->buf, BUFFER_ORDER)))
++ {
++ atomic_dec(&buf->reader_cnt);
++ goto out_unlock;
++ }
++ }
++
++ error = 0;
++ filp->private_data = buf;
++
++ out_unlock:
++ up(&buf->reader_mutex);
++ out:
++ return error;
++}
++
++/* log_open - open the global log message ring buffer.
++ */
++static int log_open(struct inode *in, struct file *filp)
++{
++ int error = -EINVAL;
++ trace_buffer_t* buf;
++
++ buf = &log_buffer;
++
++ if (down_interruptible(&buf->reader_mutex)) {
++ error = -ERESTARTSYS;
++ goto out;
++ }
++
++ /* first open must allocate buffers */
++ if (atomic_inc_return(&buf->reader_cnt) == 1) {
++ if ((error = rb_alloc_buf(&buf->buf, BUFFER_ORDER)))
++ {
++ atomic_dec(&buf->reader_cnt);
++ goto out_unlock;
++ }
++ }
++
++ error = 0;
++ filp->private_data = buf;
++
++ out_unlock:
++ up(&buf->reader_mutex);
++ out:
++ return error;
++}
++
++/******************************************************************************/
++/* Device Registration */
++/******************************************************************************/
++
++/* the major numbes are from the unassigned/local use block
++ *
++ * This should be converted to dynamic allocation at some point...
++ */
++#define TRACE_MAJOR 250
++#define LOG_MAJOR 251
++
++/* trace_fops - The file operations for accessing the per-CPU scheduling event
++ * trace buffers.
++ */
++struct file_operations trace_fops = {
++ .owner = THIS_MODULE,
++ .open = trace_open,
++ .release = trace_release,
++ .read = trace_read,
++};
++
++/* log_fops - The file operations for accessing the global LITMUS log message
++ * buffer.
++ *
++ * Except for opening the device file it uses the same operations as trace_fops.
++ */
++struct file_operations log_fops = {
++ .owner = THIS_MODULE,
++ .open = log_open,
++ .release = trace_release,
++ .read = trace_read,
++};
++
++static int __init register_buffer_dev(const char* name,
++ struct file_operations* fops,
++ int major, int count)
++{
++ dev_t trace_dev;
++ struct cdev *cdev;
++ int error = 0;
++
++ trace_dev = MKDEV(major, 0);
++ error = register_chrdev_region(trace_dev, count, name);
++ if (error)
++ {
++ printk(KERN_WARNING "sched trace: "
++ "Could not register major/minor number %d\n", major);
++ return error;
++ }
++ cdev = cdev_alloc();
++ if (!cdev) {
++ printk(KERN_WARNING "sched trace: "
++ "Could not get a cdev for %s.\n", name);
++ return -ENOMEM;
++ }
++ cdev->owner = THIS_MODULE;
++ cdev->ops = fops;
++ error = cdev_add(cdev, trace_dev, count);
++ if (error) {
++ printk(KERN_WARNING "sched trace: "
++ "add_cdev failed for %s.\n", name);
++ return -ENOMEM;
++ }
++ return error;
++
++}
++
++static int __init init_sched_trace(void)
++{
++ int error1 = 0, error2 = 0;
++
++ printk("Initializing scheduler trace device\n");
++ init_buffers();
++
++ error1 = register_buffer_dev("schedtrace", &trace_fops,
++ TRACE_MAJOR, NR_CPUS);
++
++ error2 = register_buffer_dev("litmus_log", &log_fops,
++ LOG_MAJOR, 1);
++ if (error1 || error2)
++ return min(error1, error2);
++ else
++ return 0;
++}
++
++module_init(init_sched_trace);
++
++/******************************************************************************/
++/* KERNEL API */
++/******************************************************************************/
++
++/* The per-CPU LITMUS log buffer. Don't put it on the stack, it is too big for
++ * that and the kernel gets very picky with nested interrupts and small stacks.
++ */
++
++#ifdef CONFIG_SCHED_DEBUG_TRACE
++
++#define MSG_SIZE 255
++static DEFINE_PER_CPU(char[MSG_SIZE], fmt_buffer);
++
++/* sched_trace_log_message - This is the only function that accesses the the
++ * log buffer inside the kernel for writing.
++ * Concurrent access to it is serialized via the
++ * log_buffer_lock.
++ *
++ * The maximum length of a formatted message is 255.
++ */
++void sched_trace_log_message(const char* fmt, ...)
++{
++ unsigned long flags;
++ va_list args;
++ size_t len;
++ char* buf;
++
++ va_start(args, fmt);
++ local_irq_save(flags);
++
++ /* format message */
++ buf = __get_cpu_var(fmt_buffer);
++ len = vscnprintf(buf, MSG_SIZE, fmt, args);
++
++ spin_lock(&log_buffer_lock);
++ /* Don't copy the trailing null byte, we don't want null bytes
++ * in a text file.
++ */
++ rb_put(&log_buffer.buf, buf, len);
++ spin_unlock(&log_buffer_lock);
++
++ local_irq_restore(flags);
++ va_end(args);
++}
++
++#endif
++
++#ifdef CONFIG_SCHED_TASK_TRACE
++
++static inline void __put_trace(char* mem, size_t size)
++{
++ trace_buffer_t* buf = &__get_cpu_var(trace_buffer);
++ rb_put(&buf->buf, mem, size);
++}
++
++#define put_trace(obj) \
++ if (get_rt_mode() == MODE_RT_RUN) \
++ __put_trace((char *) &obj, sizeof(obj))
++
++#define header(rec, type) \
++{ \
++ rec.header.trace = type; \
++ rec.header.timestamp = sched_clock(); \
++ rec.header.size = sizeof(rec); \
++}
++
++#define tinfo(info, t) \
++{ \
++ info.is_rt = is_realtime(t); \
++ info.is_server = 0; \
++ info.class = get_class(t); \
++ info.budget = (t)->time_slice; \
++ info.pid = (t)->pid; \
++ info.deadline = (t)->rt_param.times.deadline; \
++}
++
++#define rtinfo(info, t) \
++{ \
++ info.wcet = get_exec_cost(t); \
++ info.period = get_rt_period(t); \
++}
++
++void sched_trace_scheduler_invocation(void)
++{
++ invocation_record_t rec;
++ header(rec, ST_INVOCATION);
++ rec.flags = current->flags;
++ put_trace(rec);
++}
++
++void sched_trace_task_arrival(struct task_struct *t)
++{
++ arrival_record_t rec;
++ header(rec, ST_ARRIVAL);
++ tinfo(rec.task, t);
++ put_trace(rec);
++}
++
++
++void sched_trace_task_departure(struct task_struct *t)
++{
++ departure_record_t rec;
++ header(rec, ST_DEPARTURE);
++ tinfo(rec.task, t);
++ put_trace(rec);
++}
++
++void sched_trace_task_preemption(struct task_struct *t, struct task_struct* by)
++{
++ preemption_record_t rec;
++ header(rec, ST_PREEMPTION);
++ tinfo(rec.task, t);
++ tinfo(rec.by, by);
++ put_trace(rec);
++}
++
++
++void sched_trace_task_scheduled(struct task_struct *t)
++{
++ scheduled_record_t rec;
++ header(rec, ST_SCHEDULED);
++ tinfo(rec.task, t);
++ put_trace(rec);
++}
++
++
++void sched_trace_job_release(struct task_struct *t)
++{
++ release_record_t rec;
++ header(rec, ST_JOB_RELEASE);
++ tinfo(rec.task, t);
++ rtinfo(rec, t);
++ put_trace(rec);
++}
++
++void sched_trace_job_completion(struct task_struct *t)
++{
++ completion_record_t rec;
++ header(rec, ST_JOB_COMPLETION);
++ tinfo(rec.task, t);
++ rtinfo(rec, t);
++ rec.tardiness = jiffies - t->rt_param.times.deadline;
++ rec.job_no = t->rt_param.times.job_no;
++ TRACE_TASK(t, "AAATardiness : %d\n", rec.tardiness);
++ put_trace(rec);
++}
++
++
++void sched_trace_server_scheduled(int id, task_class_t class,
++ unsigned int budget, jiffie_t deadline)
++{
++ scheduled_record_t rec;
++ header(rec, ST_SCHEDULED);
++ rec.task.pid = id;
++ rec.task.is_rt = 1;
++ rec.task.is_server = 1;
++ rec.task.class = class;
++ rec.task.budget = budget;
++ rec.task.deadline = deadline;
++ put_trace(rec);
++}
++
++void sched_trace_server_release(int id, unsigned int wcet,
++ unsigned int period, task_class_t class)
++{
++ release_record_t rec;
++ header(rec, ST_JOB_RELEASE);
++ rec.task.pid = id;
++ rec.task.is_rt = 1;
++ rec.task.is_server = 1;
++ rec.task.class = class;
++ rec.task.budget = wcet;
++ rec.period = period;
++ rec.wcet = wcet;
++ put_trace(rec);
++}
++
++void sched_trace_server_completion(int id, unsigned int budget,
++ jiffie_t deadline, task_class_t class)
++{
++ completion_record_t rec;
++ header(rec, ST_JOB_COMPLETION);
++ rec.task.pid = id;
++ rec.task.is_rt = 1;
++ rec.task.is_server = 1;
++ rec.task.class = class;
++ rec.task.budget = budget;
++ rec.task.deadline = deadline;
++ rec.period = 0;
++ rec.tardiness = jiffies - deadline;
++ put_trace(rec);
++
++}
++
++void sched_trace_capacity_release(struct task_struct *t)
++{
++ cap_release_record_t rec;
++ header(rec, ST_CAPACITY_RELEASE);
++ tinfo(rec.task, t);
++ put_trace(rec);
++}
++
++void sched_trace_capacity_allocation(struct task_struct *t, u16 budget, u32 deadline,
++ pid_t donor)
++{
++ cap_allocation_record_t rec;
++ header(rec, ST_CAPACITY_ALLOCATION);
++ tinfo(rec.task, t);
++ rec.donor = donor;
++ rec.budget = budget;
++ rec.deadline = deadline;
++ put_trace(rec);
++}
++
++void sched_trace_capacity_alloc_srv(pid_t srv, u32 srv_dl, task_class_t cls,
++ u16 srv_budget,
++ u16 budget, u32 deadline, pid_t donor)
++{
++ cap_allocation_record_t rec;
++ header(rec, ST_CAPACITY_ALLOCATION);
++ rec.task.pid = srv;
++ rec.task.is_rt = 1;
++ rec.task.is_server = 1;
++ rec.task.class = cls;
++ rec.task.budget = srv_budget;
++ rec.task.deadline = srv_dl;
++ rec.donor = donor;
++ rec.budget = budget;
++ rec.deadline = deadline;
++ put_trace(rec);
++}
++
++void sched_trace_service_level_change(struct task_struct *t,
++ unsigned int from,
++ unsigned int to)
++{
++ service_level_change_record_t rec;
++ header(rec, ST_SERVICE_LEVEL_CHANGE);
++ tinfo(rec.task, t);
++ rec.to = to;
++ rec.from = from;
++ rec.new_level =
++ t->rt_param.service_level[to];
++ rec.old_level =
++ t->rt_param.service_level[from];
++ put_trace(rec);
++}
++
++void sched_trace_weight_error(struct task_struct* t, fp_t actual)
++{
++ weight_error_record_t rec;
++ header(rec, ST_WEIGHT_ERROR);
++ rec.task = t->pid;
++ rec.actual = actual;
++ rec.estimate = get_est_weight(t);
++ put_trace(rec);
++}
++
++
++#endif
+diff --git a/kernel/timer.c b/kernel/timer.c
+index c2a8ccf..77a1b6b 100644
+--- a/kernel/timer.c
++++ b/kernel/timer.c
+@@ -737,6 +737,27 @@ static inline s64 __get_nsec_offset(void)
+ return ns_offset;
+ }
+
++/* Non-static, non-inline, public version of function above.
++ * It's up to the programmer to decide how to use it, no guarantees
++ * about anything are made here.
++ */
++s64 get_nsec_offset(void)
++{
++ cycle_t cycle_now, cycle_delta;
++ s64 ns_offset;
++
++ /* read clocksource: */
++ cycle_now = clocksource_read(clock);
++
++ /* calculate the delta since the last update_wall_time: */
++ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
++
++ /* convert to nanoseconds: */
++ ns_offset = cyc2ns(clock, cycle_delta);
++
++ return ns_offset;
++}
++
+ /**
+ * __get_realtime_clock_ts - Returns the time of day in a timespec
+ * @ts: pointer to the timespec to be set
+@@ -789,6 +810,7 @@ void do_gettimeofday(struct timeval *tv)
+ }
+
+ EXPORT_SYMBOL(do_gettimeofday);
++
+ /**
+ * do_settimeofday - Sets the time of day
+ * @tv: pointer to the timespec variable containing the new time
+diff --git a/kernel/trace.c b/kernel/trace.c
+new file mode 100644
+index 0000000..6119574
+--- /dev/null
++++ b/kernel/trace.c
+@@ -0,0 +1,302 @@
++#include
++#include
++#include
++#include
++#include
++
++#include
++
++/******************************************************************************/
++/* Allocation */
++/******************************************************************************/
++
++struct ft_buffer* trace_ts_buf = NULL;
++
++static unsigned int ts_seq_no = 0;
++
++feather_callback void save_timestamp(unsigned long event)
++{
++ unsigned int seq_no = fetch_and_inc((int *) &ts_seq_no);
++ struct timestamp *ts;
++ if (ft_buffer_start_write(trace_ts_buf, (void**) &ts)) {
++ ts->event = event;
++ ts->timestamp = ft_read_tsc();
++ ts->seq_no = seq_no;
++ ts->cpu = raw_smp_processor_id();
++ ft_buffer_finish_write(trace_ts_buf, ts);
++ }
++}
++
++static struct ft_buffer* alloc_ft_buffer(unsigned int count, size_t size)
++{
++ struct ft_buffer* buf;
++ size_t total = (size + 1) * count;
++ char* mem;
++ int order = 0, pages = 1;
++
++ buf = kmalloc(sizeof(struct ft_buffer), GFP_KERNEL);
++ if (!buf)
++ return NULL;
++
++ total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0);
++ while (pages < total) {
++ order++;
++ pages *= 2;
++ }
++
++ mem = (char*) __get_free_pages(GFP_KERNEL, order);
++ if (!mem) {
++ kfree(buf);
++ return NULL;
++ }
++
++ if (!init_ft_buffer(buf, count, size,
++ mem + (count * size), /* markers at the end */
++ mem)) { /* buffer objects */
++ free_pages((unsigned long) mem, order);
++ kfree(buf);
++ return NULL;
++ }
++ return buf;
++}
++
++static void free_ft_buffer(struct ft_buffer* buf)
++{
++ int order = 0, pages = 1;
++ size_t total;
++
++ if (buf) {
++ total = (buf->slot_size + 1) * buf->slot_count;
++ total = (total / PAGE_SIZE) + (total % PAGE_SIZE != 0);
++ while (pages < total) {
++ order++;
++ pages *= 2;
++ }
++ free_pages((unsigned long) buf->buffer_mem, order);
++ kfree(buf);
++ }
++}
++
++
++/******************************************************************************/
++/* DEVICE FILE DRIVER */
++/******************************************************************************/
++
++#define NO_TIMESTAMPS 262144
++
++static DECLARE_MUTEX(feather_lock);
++static int use_count = 0;
++
++static int trace_release(struct inode *in, struct file *filp)
++{
++ int err = -EINVAL;
++
++ if (down_interruptible(&feather_lock)) {
++ err = -ERESTARTSYS;
++ goto out;
++ }
++
++ printk(KERN_ALERT "%s/%d disconnects from feather trace device. "
++ "use_count=%d\n",
++ current->comm, current->pid, use_count);
++
++ if (use_count == 1) {
++ /* disable events */
++ ft_disable_all_events();
++
++ /* wait for any pending events to complete */
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ schedule_timeout(HZ);
++
++ printk(KERN_ALERT "Failed trace writes: %u\n",
++ trace_ts_buf->failed_writes);
++
++ free_ft_buffer(trace_ts_buf);
++ trace_ts_buf = NULL;
++ }
++
++ use_count--;
++ up(&feather_lock);
++out:
++ return err;
++}
++
++
++static ssize_t trace_read(struct file *filp, char __user *to, size_t len,
++ loff_t *f_pos)
++{
++ /* we ignore f_pos, this is strictly sequential */
++ ssize_t error = 0;
++ struct timestamp ts;
++
++ if (down_interruptible(&feather_lock)) {
++ error = -ERESTARTSYS;
++ goto out;
++ }
++
++
++ while (len >= sizeof(struct timestamp)) {
++ if (ft_buffer_read(trace_ts_buf, &ts)) {
++ if (copy_to_user(to, &ts, sizeof(struct timestamp))) {
++ error = -EFAULT;
++ break;
++ } else {
++ len -= sizeof(struct timestamp);
++ to += sizeof(struct timestamp);
++ error += sizeof(struct timestamp);
++ }
++ } else {
++ set_current_state(TASK_INTERRUPTIBLE);
++ schedule_timeout(50);
++ if (signal_pending(current)) {
++ error = -ERESTARTSYS;
++ break;
++ }
++ }
++ }
++ up(&feather_lock);
++out:
++ return error;
++}
++
++#define ENABLE_CMD 0
++#define DISABLE_CMD 1
++
++static ssize_t trace_write(struct file *filp, const char __user *from,
++ size_t len, loff_t *f_pos)
++{
++ ssize_t error = -EINVAL;
++ unsigned long cmd;
++ unsigned long id;
++
++ if (len % sizeof(long) || len < 2 * sizeof(long))
++ goto out;
++
++ if (copy_from_user(&cmd, from, sizeof(long))) {
++ error = -EFAULT;
++ goto out;
++ }
++ len -= sizeof(long);
++ from += sizeof(long);
++
++ if (cmd != ENABLE_CMD && cmd != DISABLE_CMD)
++ goto out;
++
++ if (down_interruptible(&feather_lock)) {
++ error = -ERESTARTSYS;
++ goto out;
++ }
++
++ error = sizeof(long);
++ while (len) {
++ if (copy_from_user(&id, from, sizeof(long))) {
++ error = -EFAULT;
++ goto out;
++ }
++ len -= sizeof(long);
++ from += sizeof(long);
++ if (cmd) {
++ printk(KERN_INFO
++ "Disabling feather-trace event %lu.\n", id);
++ ft_disable_event(id);
++ } else {
++ printk(KERN_INFO
++ "Enabling feather-trace event %lu.\n", id);
++ ft_enable_event(id);
++ }
++ error += sizeof(long);
++ }
++
++ up(&feather_lock);
++ out:
++ return error;
++}
++
++static int trace_open(struct inode *in, struct file *filp)
++{
++ int err = 0;
++ unsigned int count = NO_TIMESTAMPS;
++
++ if (down_interruptible(&feather_lock)) {
++ err = -ERESTARTSYS;
++ goto out;
++ }
++
++ while (count && !trace_ts_buf) {
++ printk("trace: trying to allocate %u time stamps.\n", count);
++ trace_ts_buf = alloc_ft_buffer(count, sizeof(struct timestamp));
++ count /= 2;
++ }
++ if (!trace_ts_buf)
++ err = -ENOMEM;
++ else
++ use_count++;
++
++ up(&feather_lock);
++out:
++ return err;
++}
++
++/******************************************************************************/
++/* Device Registration */
++/******************************************************************************/
++
++#define FT_TRACE_MAJOR 252
++
++struct file_operations ft_trace_fops = {
++ .owner = THIS_MODULE,
++ .open = trace_open,
++ .release = trace_release,
++ .write = trace_write,
++ .read = trace_read,
++};
++
++
++static int __init register_buffer_dev(const char* name,
++ struct file_operations* fops,
++ int major, int count)
++{
++ dev_t trace_dev;
++ struct cdev *cdev;
++ int error = 0;
++
++ trace_dev = MKDEV(major, 0);
++ error = register_chrdev_region(trace_dev, count, name);
++ if (error)
++ {
++ printk(KERN_WARNING "trace: "
++ "Could not register major/minor number %d\n", major);
++ return error;
++ }
++ cdev = cdev_alloc();
++ if (!cdev) {
++ printk(KERN_WARNING "trace: "
++ "Could not get a cdev for %s.\n", name);
++ return -ENOMEM;
++ }
++ cdev->owner = THIS_MODULE;
++ cdev->ops = fops;
++ error = cdev_add(cdev, trace_dev, count);
++ if (error) {
++ printk(KERN_WARNING "trace: "
++ "add_cdev failed for %s.\n", name);
++ return -ENOMEM;
++ }
++ return error;
++
++}
++
++static int __init init_sched_trace(void)
++{
++ int error = 0;
++
++ printk("Initializing Feather-Trace device\n");
++ /* dummy entry to make linker happy */
++ ft_event0(666, save_timestamp);
++
++ error = register_buffer_dev("ft_trace", &ft_trace_fops,
++ FT_TRACE_MAJOR, 1);
++ return error;
++}
++
++module_init(init_sched_trace);
+diff --git a/lib/semaphore-sleepers.c b/lib/semaphore-sleepers.c
+index 1281805..3f4d543 100644
+--- a/lib/semaphore-sleepers.c
++++ b/lib/semaphore-sleepers.c
+@@ -108,7 +108,7 @@ fastcall int __sched __down_interruptible(struct semaphore * sem)
+ /*
+ * With signals pending, this turns into
+ * the trylock failure case - we won't be
+- * sleeping, and we* can't get the lock as
++ * sleeping, and we can't get the lock as
+ * it has contention. Just correct the count
+ * and exit.
+ */
diff --git a/index.html b/index.html
index bbdcf1a..623a80f 100644
--- a/index.html
+++ b/index.html
@@ -30,13 +30,26 @@
kernel with focus on multiprocessor real-time scheduling and
synchronization. The Linux kernel is modified to support the sporadic task
model and modular scheduler plugins. Both partitioned and global scheduling
- is supported. In the current version (2007.1), scheduler plugins that
- implement various EDF variants and PFAIR scheduling are included.
+ is supported. In the current version (2007.2), plugins for the following
+ scheduling policies are included:
+
+ - Partitioned EDF (P-EDF)
+ - Partitioned EDF with synchronization support (PSN-EDF)
+ - Global EDF (G-EDF)
+ - Global EDF with synchronization support (GSN-EDF)
+ - Global non-preemptive EDF (G-NP-EDF)
+ - Global Feedback-Controlled EDF (FC-EDF)
+ - EDF for heterogeneous task systems (EDF-HSB)
+ - PFAIR (both staggered and aligned quanta are supported)
+
+
+ The latest public release of LITMUSRT occurred on 10/29/2007.
-
+
Support
@@ -142,7 +155,8 @@
General Public License (GPL).
- The current release (2007.1) consists of
+ The latest version of LITMUSRT is 2007.2 and was released on 10/29/2007.
+ It consists of
our Linux kernel modifications in the form of
a patch against Linux 2.6.20,
liblitmus, the user-space API for real-time tasks,
@@ -152,32 +166,47 @@
- Please note that the current implementation is a prototype with
- certain limitations. Most notably, it is not safe in a multiuser context,
- i.e., real-time system calls do not check for superuser
+ Please note that the current implementation is a prototype with
+ certain limitations. Most notably, it is not secure in a multiuser context,
+ i.e., real-time system calls do not require superuser
privileges. Further, some resources (e.g. semaphores) that
should be dynamically allocated are allocated statically in the current version.
+
+
+ Old releases:
+
+
Installation
- The current release of LITMUSRT, version 2007.1, consists of an
+ The current release of LITMUSRT, version 2007.2, consists of an
extension of the Linux kernel that adds support for the sporadic task
model, a scheduler plugin infrastructure, and some scheduler plugins, as
well as two user-space libraries that provide the LITMUSRT
@@ -202,11 +231,11 @@ cd $DIR
# get Linux 2.6.20
wget http://www.kernel.org/pub/linux/kernel/v2.6/linux-2.6.20.tar.bz2
tar xjf linux-2.6.20.tar.bz2
-wget http://www.cs.unc.edu/~anderson/litmus-rt/download/litmus-rt-2007.1.patch
+wget http://www.cs.unc.edu/~anderson/litmus-rt/download/litmus-rt-2007.2.patch
mv linux-2.6.20 litmus-rt
# apply the LITMUS RT patch
cd litmus-rt
-patch -p1 < ../litmus-rt-2007.1.patch
+patch -p1 < ../litmus-rt-2007.2.patch
# create a working kernel configuration with HZ=1000
make gconfig
# compile the kernel
@@ -223,7 +252,7 @@ make modules
class="src">rtsched kernel parameter.
-rtsched={linux, pfair, part_edf, global_edf, global_edf_np, edf_hsb, gsn_edf, psn_edf}
+rtsched={linux, pfair, part_edf, global_edf, global_edf_np, edf_hsb, gsn_edf, psn_edf, adaptive}
For example, on our test machine, we use the
@@ -246,8 +275,8 @@ initrd /boot/kernel-2.6.20-LITMUSRT.img
cd $DIR
-wget http://www.cs.unc.edu/~anderson/litmus-rt/download/liblitmus-2007.1.tgz
-tar xzf liblitmus-2007.1.tgz
+wget http://www.cs.unc.edu/~anderson/litmus-rt/download/liblitmus-2007.2.tgz
+tar xzf liblitmus-2007.2.tgz
cd liblitmus
make
@@ -260,8 +289,8 @@ make
cd $DIR
-wget http://www.cs.unc.edu/~anderson/litmus-rt/download/libso-2007.1.tgz
-tar xzf libso-2007.1.tgz
+wget http://www.cs.unc.edu/~anderson/litmus-rt/download/libso-2007.2.tgz
+tar xzf libso-2007.2.tgz
cd libso
make
make tests
@@ -277,13 +306,27 @@ make tests
Documentation
+
+ Most of the documentation has yet to be written. To get an overview of
+ the architecture of the kernel extension, we recommend to read the paper
+ “LITMUSRT:
+ A Status Report”.
+
+
+ Please contact bbb[AT]cs.unc.edu if you have any
+ questions.
+
+
+
--
cgit v1.2.2